PyPI - alita-sdk - Versions diffs - 0.3.365__py3-none-any.whl → 0.3.462__py3-none-any.whl - Mend

alita-sdk 0.3.365py3-none-any.whl → 0.3.462py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of alita-sdk might be problematic. Click here for more details.

Files changed (118) hide show

alita_sdk/cli/__init__.py +10 -0
alita_sdk/cli/__main__.py +17 -0
alita_sdk/cli/agent_executor.py +144 -0
alita_sdk/cli/agent_loader.py +197 -0
alita_sdk/cli/agent_ui.py +166 -0
alita_sdk/cli/agents.py +1069 -0
alita_sdk/cli/callbacks.py +576 -0
alita_sdk/cli/cli.py +159 -0
alita_sdk/cli/config.py +153 -0
alita_sdk/cli/formatting.py +182 -0
alita_sdk/cli/mcp_loader.py +315 -0
alita_sdk/cli/toolkit.py +330 -0
alita_sdk/cli/toolkit_loader.py +55 -0
alita_sdk/cli/tools/__init__.py +9 -0
alita_sdk/cli/tools/filesystem.py +905 -0
alita_sdk/configurations/bitbucket.py +95 -0
alita_sdk/configurations/confluence.py +96 -1
alita_sdk/configurations/gitlab.py +79 -0
alita_sdk/configurations/jira.py +103 -0
alita_sdk/configurations/testrail.py +88 -0
alita_sdk/configurations/xray.py +93 -0
alita_sdk/configurations/zephyr_enterprise.py +93 -0
alita_sdk/configurations/zephyr_essential.py +75 -0
alita_sdk/runtime/clients/artifact.py +1 -1
alita_sdk/runtime/clients/client.py +47 -10
alita_sdk/runtime/clients/mcp_discovery.py +342 -0
alita_sdk/runtime/clients/mcp_manager.py +262 -0
alita_sdk/runtime/clients/sandbox_client.py +373 -0
alita_sdk/runtime/langchain/assistant.py +70 -41
alita_sdk/runtime/langchain/constants.py +6 -1
alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +4 -1
alita_sdk/runtime/langchain/document_loaders/constants.py +73 -100
alita_sdk/runtime/langchain/langraph_agent.py +164 -38
alita_sdk/runtime/langchain/utils.py +43 -7
alita_sdk/runtime/models/mcp_models.py +61 -0
alita_sdk/runtime/toolkits/__init__.py +24 -0
alita_sdk/runtime/toolkits/application.py +8 -1
alita_sdk/runtime/toolkits/artifact.py +5 -6
alita_sdk/runtime/toolkits/mcp.py +895 -0
alita_sdk/runtime/toolkits/tools.py +140 -50
alita_sdk/runtime/tools/__init__.py +7 -2
alita_sdk/runtime/tools/application.py +7 -0
alita_sdk/runtime/tools/function.py +94 -5
alita_sdk/runtime/tools/graph.py +10 -4
alita_sdk/runtime/tools/image_generation.py +104 -8
alita_sdk/runtime/tools/llm.py +204 -114
alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
alita_sdk/runtime/tools/mcp_remote_tool.py +166 -0
alita_sdk/runtime/tools/mcp_server_tool.py +3 -1
alita_sdk/runtime/tools/sandbox.py +180 -79
alita_sdk/runtime/tools/vectorstore.py +22 -21
alita_sdk/runtime/tools/vectorstore_base.py +79 -26
alita_sdk/runtime/utils/mcp_oauth.py +164 -0
alita_sdk/runtime/utils/mcp_sse_client.py +405 -0
alita_sdk/runtime/utils/streamlit.py +34 -3
alita_sdk/runtime/utils/toolkit_utils.py +14 -4
alita_sdk/runtime/utils/utils.py +1 -0
alita_sdk/tools/__init__.py +48 -31
alita_sdk/tools/ado/repos/__init__.py +1 -0
alita_sdk/tools/ado/test_plan/__init__.py +1 -1
alita_sdk/tools/ado/wiki/__init__.py +1 -5
alita_sdk/tools/ado/work_item/__init__.py +1 -5
alita_sdk/tools/ado/work_item/ado_wrapper.py +17 -8
alita_sdk/tools/base_indexer_toolkit.py +194 -112
alita_sdk/tools/bitbucket/__init__.py +1 -0
alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
alita_sdk/tools/code/sonar/__init__.py +1 -1
alita_sdk/tools/code_indexer_toolkit.py +15 -5
alita_sdk/tools/confluence/__init__.py +2 -2
alita_sdk/tools/confluence/api_wrapper.py +110 -63
alita_sdk/tools/confluence/loader.py +10 -0
alita_sdk/tools/elitea_base.py +22 -22
alita_sdk/tools/github/__init__.py +2 -2
alita_sdk/tools/gitlab/__init__.py +2 -1
alita_sdk/tools/gitlab/api_wrapper.py +11 -7
alita_sdk/tools/gitlab_org/__init__.py +1 -2
alita_sdk/tools/google_places/__init__.py +2 -1
alita_sdk/tools/jira/__init__.py +1 -0
alita_sdk/tools/jira/api_wrapper.py +1 -1
alita_sdk/tools/memory/__init__.py +1 -1
alita_sdk/tools/non_code_indexer_toolkit.py +2 -2
alita_sdk/tools/openapi/__init__.py +10 -1
alita_sdk/tools/pandas/__init__.py +1 -1
alita_sdk/tools/postman/__init__.py +2 -1
alita_sdk/tools/postman/api_wrapper.py +18 -8
alita_sdk/tools/postman/postman_analysis.py +8 -1
alita_sdk/tools/pptx/__init__.py +2 -2
alita_sdk/tools/qtest/__init__.py +3 -3
alita_sdk/tools/qtest/api_wrapper.py +1708 -76
alita_sdk/tools/rally/__init__.py +1 -2
alita_sdk/tools/report_portal/__init__.py +1 -0
alita_sdk/tools/salesforce/__init__.py +1 -0
alita_sdk/tools/servicenow/__init__.py +2 -3
alita_sdk/tools/sharepoint/__init__.py +1 -0
alita_sdk/tools/sharepoint/api_wrapper.py +125 -34
alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
alita_sdk/tools/sharepoint/utils.py +8 -2
alita_sdk/tools/slack/__init__.py +1 -0
alita_sdk/tools/sql/__init__.py +2 -1
alita_sdk/tools/sql/api_wrapper.py +71 -23
alita_sdk/tools/testio/__init__.py +1 -0
alita_sdk/tools/testrail/__init__.py +1 -3
alita_sdk/tools/utils/__init__.py +17 -0
alita_sdk/tools/utils/content_parser.py +35 -24
alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +67 -21
alita_sdk/tools/xray/__init__.py +2 -1
alita_sdk/tools/zephyr/__init__.py +2 -1
alita_sdk/tools/zephyr_enterprise/__init__.py +1 -0
alita_sdk/tools/zephyr_essential/__init__.py +1 -0
alita_sdk/tools/zephyr_scale/__init__.py +1 -0
alita_sdk/tools/zephyr_squad/__init__.py +1 -0
{alita_sdk-0.3.365.dist-info → alita_sdk-0.3.462.dist-info}/METADATA +8 -2
{alita_sdk-0.3.365.dist-info → alita_sdk-0.3.462.dist-info}/RECORD +118 -93
alita_sdk-0.3.462.dist-info/entry_points.txt +2 -0
{alita_sdk-0.3.365.dist-info → alita_sdk-0.3.462.dist-info}/WHEEL +0 -0
{alita_sdk-0.3.365.dist-info → alita_sdk-0.3.462.dist-info}/licenses/LICENSE +0 -0
{alita_sdk-0.3.365.dist-info → alita_sdk-0.3.462.dist-info}/top_level.txt +0 -0

alita_sdk/runtime/tools/vectorstore.py CHANGED Viewed

@@ -207,9 +207,9 @@ class VectorStoreWrapper(BaseToolApiWrapper):
             tool_name="_remove_collection"
         )
-    def _get_indexed_ids(self, collection_suffix: Optional[str] = '') -> List[str]:
+    def _get_indexed_ids(self, index_name: Optional[str] = '') -> List[str]:
         """Get all indexed document IDs from vectorstore"""
-        return self.vector_adapter.get_indexed_ids(self, collection_suffix)
+        return self.vector_adapter.get_indexed_ids(self, index_name)
     def list_collections(self) -> Any:
         """List all collections in the vectorstore.
@@ -233,7 +233,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
             return {"collections": [], "message": "No indexed collections"}
         return cols
-    def _clean_collection(self, collection_suffix: str = ''):
+    def _clean_collection(self, index_name: str = ''):
         """
         Clean the vectorstore collection by deleting all indexed data.
         """
@@ -241,15 +241,15 @@ class VectorStoreWrapper(BaseToolApiWrapper):
             f"Cleaning collection '{self.dataset}'",
             tool_name="_clean_collection"
         )
-        self.vector_adapter.clean_collection(self, collection_suffix)
+        self.vector_adapter.clean_collection(self, index_name)
         self._log_data(
             f"Collection '{self.dataset}' has been cleaned. ",
             tool_name="_clean_collection"
         )
-    def _get_code_indexed_data(self, collection_suffix: str) -> Dict[str, Dict[str, Any]]:
+    def _get_code_indexed_data(self, index_name: str) -> Dict[str, Dict[str, Any]]:
         """ Get all indexed data from vectorstore for code content """
-        return self.vector_adapter.get_code_indexed_data(self, collection_suffix)
+        return self.vector_adapter.get_code_indexed_data(self, index_name)
     def _add_to_collection(self, entry_id, new_collection_value):
         """Add a new collection name to the `collection` key in the `metadata` column."""
@@ -258,7 +258,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
     def _reduce_duplicates(
             self,
             documents: Generator[Any, None, None],
-            collection_suffix: str,
+            index_name: str,
             get_indexed_data: Callable,
             key_fn: Callable,
             compare_fn: Callable,
@@ -267,7 +267,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
     ) -> List[Any]:
         """Generic duplicate reduction logic for documents."""
         self._log_data(log_msg, tool_name="index_documents")
-        indexed_data = get_indexed_data(collection_suffix)
+        indexed_data = get_indexed_data(index_name)
         indexed_keys = set(indexed_data.keys())
         if not indexed_keys:
             self._log_data("Vectorstore is empty, indexing all incoming documents", tool_name="index_documents")
@@ -279,14 +279,14 @@ class VectorStoreWrapper(BaseToolApiWrapper):
         for document in documents:
             key = key_fn(document)
             key = key if isinstance(key, str) else str(key)
-            if key in indexed_keys and collection_suffix == indexed_data[key]['metadata'].get('collection'):
+            if key in indexed_keys and index_name == indexed_data[key]['metadata'].get('collection'):
                 if compare_fn(document, indexed_data[key]):
                     # Disabled addition of new collection to already indexed documents
                     # # check metadata.collection and update if needed
                     # for update_collection_id in remove_ids_fn(indexed_data, key):
                     #     self._add_to_collection(
                     #         update_collection_id,
-                    #         collection_suffix
+                    #         index_name
                     #     )
                     continue
                 final_docs.append(document)
@@ -303,10 +303,10 @@ class VectorStoreWrapper(BaseToolApiWrapper):
         return final_docs
-    def _reduce_code_duplicates(self, documents: Generator[Any, None, None], collection_suffix: str) -> List[Any]:
+    def _reduce_code_duplicates(self, documents: Generator[Any, None, None], index_name: str) -> List[Any]:
         return self._reduce_duplicates(
             documents,
-            collection_suffix,
+            index_name,
             self._get_code_indexed_data,
             lambda doc: doc.metadata.get('filename'),
             lambda doc, idx: (
@@ -318,7 +318,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
             log_msg="Verification of code documents to index started"
         )
-    def index_documents(self, documents: Generator[Document, None, None], collection_suffix: str, progress_step: int = 20, clean_index: bool = True, is_code: bool = True):
+    def index_documents(self, documents: Generator[Document, None, None], index_name: str, progress_step: int = 20, clean_index: bool = True, is_code: bool = True):
         """ Index documents in the vectorstore.
         Args:
@@ -329,13 +329,13 @@ class VectorStoreWrapper(BaseToolApiWrapper):
         from ..langchain.interfaces.llm_processor import add_documents
-        self._log_tool_event(message=f"Starting the indexing... Parameters: {collection_suffix=}, {clean_index=}, {is_code}", tool_name="index_documents")
+        self._log_tool_event(message=f"Starting the indexing... Parameters: {index_name=}, {clean_index=}, {is_code}", tool_name="index_documents")
         # pre-process documents if needed (find duplicates, etc.)
         if clean_index:
             logger.info("Cleaning index before re-indexing all documents.")
             self._log_data("Cleaning index before re-indexing all documents. Previous index will be removed", tool_name="index_documents")
             try:
-                self._clean_collection(collection_suffix)
+                self._clean_collection(index_name)
                 self.vectoradapter.persist()
                 self.vectoradapter.vacuum()
                 self._log_data("Previous index has been removed",
@@ -349,7 +349,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
                 message="Filter for duplicates",
                 tool_name="index_documents")
             # remove duplicates based on metadata 'id' and 'updated_on' or 'commit_hash' fields
-            documents = self._reduce_code_duplicates(documents, collection_suffix)
+            documents = self._reduce_code_duplicates(documents, index_name)
             self._log_tool_event(
                 message="All the duplicates were filtered out. Proceeding with indexing.",
                 tool_name="index_documents")
@@ -377,13 +377,13 @@ class VectorStoreWrapper(BaseToolApiWrapper):
         self._log_tool_event(message=f"Documents for indexing were processed. Total documents: {len(documents)}",
                              tool_name="index_documents")
-        # if collection_suffix is provided, add it to metadata of each document
-        if collection_suffix:
+        # if index_name is provided, add it to metadata of each document
+        if index_name:
             for doc in documents:
                 if not doc.metadata.get('collection'):
-                    doc.metadata['collection'] = collection_suffix
+                    doc.metadata['collection'] = index_name
                 else:
-                    doc.metadata['collection'] += f";{collection_suffix}"
+                    doc.metadata['collection'] += f";{index_name}"
         total_docs = len(documents)
         documents_count = 0
@@ -414,7 +414,8 @@ class VectorStoreWrapper(BaseToolApiWrapper):
                 return {"status": "error", "message": f"Error: {format_exc()}"}
         if _documents:
             add_documents(vectorstore=self.vectorstore, documents=_documents)
-        return {"status": "ok", "message": f"successfully indexed {documents_count} documents"}
+        return {"status": "ok", "message": f"successfully indexed {documents_count} documents" if documents_count > 0
+        else "No new documents to index."}
     def search_documents(self, query:str, doctype: str = 'code',
                          filter:dict|str={}, cut_off: float=0.5,

alita_sdk/runtime/tools/vectorstore_base.py CHANGED Viewed

@@ -1,16 +1,18 @@
 import json
-import math
 from collections import OrderedDict
 from logging import getLogger
 from typing import Any, Optional, List, Dict, Generator
+import math
 from langchain_core.documents import Document
 from langchain_core.messages import HumanMessage
+from langchain_core.tools import ToolException
+from psycopg.errors import DataException
 from pydantic import BaseModel, model_validator, Field
 from alita_sdk.tools.elitea_base import BaseToolApiWrapper
 from alita_sdk.tools.vector_adapters.VectorStoreAdapter import VectorStoreAdapterFactory
-from ..utils.logging import dispatch_custom_event
+from ...runtime.utils.utils import IndexerKeywords
 logger = getLogger(__name__)
@@ -175,6 +177,37 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
             except Exception as e:
                 logger.error(f"Failed to initialize PGVectorSearch: {str(e)}")
+    def _similarity_search_with_score(self, query: str, filter: dict = None, k: int = 10):
+        """
+        Perform similarity search with proper exception handling for DataException.
+        Args:
+            query: Search query string
+            filter: Optional filter dictionary
+            k: Number of results to return
+        Returns:
+            List of (Document, score) tuples
+        Raises:
+            ToolException: When DataException occurs or other search errors
+        """
+        try:
+            return self.vectorstore.similarity_search_with_score(
+                query, filter=filter, k=k
+            )
+        except DataException as dimException:
+            exception_str = str(dimException)
+            if 'different vector dimensions' in exception_str:
+                logger.error(f"Data exception: {exception_str}")
+                raise ToolException(f"Global search cannot be completed since collections were indexed using "
+                                    f"different embedding models. Use search within a single collection."
+                                    f"\nDetails: {exception_str}")
+            raise ToolException(f"Data exception during search. Possibly invalid filter: {exception_str}")
+        except Exception as e:
+            logger.error(f"Error during similarity search: {str(e)}")
+            raise ToolException(f"Search failed: {str(e)}")
     def list_collections(self) -> List[str]:
         """List all collections in the vectorstore."""
@@ -183,7 +216,28 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
             return "No indexed collections"
         return collections
-    def _clean_collection(self, collection_suffix: str = ''):
+    def get_index_meta(self, index_name: str):
+        index_metas = self.vector_adapter.get_index_meta(self, index_name)
+        if len(index_metas) > 1:
+            raise RuntimeError(f"Multiple index_meta documents found: {index_metas}")
+        return index_metas[0] if index_metas else None
+    def get_indexed_count(self, index_name: str) -> int:
+        from sqlalchemy.orm import Session
+        from sqlalchemy import func, or_
+        with Session(self.vectorstore.session_maker.bind) as session:
+            return session.query(
+                self.vectorstore.EmbeddingStore.id,
+            ).filter(
+                func.jsonb_extract_path_text(self.vectorstore.EmbeddingStore.cmetadata, 'collection') == index_name,
+                or_(
+                    func.jsonb_extract_path_text(self.vectorstore.EmbeddingStore.cmetadata, 'type').is_(None),
+                    func.jsonb_extract_path_text(self.vectorstore.EmbeddingStore.cmetadata, 'type') != IndexerKeywords.INDEX_META_TYPE.value
+                )
+            ).count()
+    def _clean_collection(self, index_name: str = ''):
         """
         Clean the vectorstore collection by deleting all indexed data.
         """
@@ -191,13 +245,13 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
             f"Cleaning collection '{self.dataset}'",
             tool_name="_clean_collection"
         )
-        self.vector_adapter.clean_collection(self, collection_suffix)
+        self.vector_adapter.clean_collection(self, index_name)
         self._log_tool_event(
             f"Collection '{self.dataset}' has been cleaned. ",
             tool_name="_clean_collection"
         )
-    def index_documents(self, documents: Generator[Document, None, None], collection_suffix: str, progress_step: int = 20, clean_index: bool = True):
+    def index_documents(self, documents: Generator[Document, None, None], index_name: str, progress_step: int = 20, clean_index: bool = True):
         """ Index documents in the vectorstore.
         Args:
@@ -206,21 +260,21 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
             clean_index (bool): If True, clean the index before re-indexing all documents.
         """
         if clean_index:
-            self._clean_index(collection_suffix)
+            self._clean_index(index_name)
-        return self._save_index(list(documents), collection_suffix, progress_step)
+        return self._save_index(list(documents), index_name, progress_step)
-    def _clean_index(self, collection_suffix: str):
+    def _clean_index(self, index_name: str):
         logger.info("Cleaning index before re-indexing all documents.")
         self._log_tool_event("Cleaning index before re-indexing all documents. Previous index will be removed", tool_name="index_documents")
         try:
-            self._clean_collection(collection_suffix)
+            self._clean_collection(index_name)
             self._log_tool_event("Previous index has been removed",
                            tool_name="index_documents")
         except Exception as e:
             logger.warning(f"Failed to clean index: {str(e)}. Continuing with re-indexing.")
-    def _save_index(self, documents: list[Document], collection_suffix: Optional[str] = None, progress_step: int = 20):
+    def _save_index(self, documents: list[Document], index_name: Optional[str] = None, progress_step: int = 20):
         from ..langchain.interfaces.llm_processor import add_documents
         #
         for doc in documents:
@@ -229,13 +283,13 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
         logger.debug(f"Indexing documents: {documents}")
-        # if collection_suffix is provided, add it to metadata of each document
-        if collection_suffix:
+        # if index_name is provided, add it to metadata of each document
+        if index_name:
             for doc in documents:
                 if not doc.metadata.get('collection'):
-                    doc.metadata['collection'] = collection_suffix
+                    doc.metadata['collection'] = index_name
                 else:
-                    doc.metadata['collection'] += f";{collection_suffix}"
+                    doc.metadata['collection'] += f";{index_name}"
         total_docs = len(documents)
         documents_count = 0
@@ -269,7 +323,8 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
                 return {"status": "error", "message": f"Error: {format_exc()}"}
         if _documents:
             add_documents(vectorstore=self.vectorstore, documents=_documents)
-        return {"status": "ok", "message": f"successfully indexed {documents_count} documents"}
+        return {"status": "ok", "message": f"successfully indexed {documents_count} documents" if documents_count > 0
+        else "no documents to index"}
     def search_documents(self, query:str, doctype: str = 'code',
                          filter:dict|str={}, cut_off: float=0.5,
@@ -303,7 +358,7 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
                 }
             try:
-                document_items = self.vectorstore.similarity_search_with_score(
+                document_items = self._similarity_search_with_score(
                     query, filter=document_filter, k=search_top
                 )
                 # Add document results to unique docs
@@ -336,18 +391,16 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
                     }
                 try:
-                    chunk_items = self.vectorstore.similarity_search_with_score(
+                    chunk_items = self._similarity_search_with_score(
                         query, filter=chunk_filter, k=search_top
                     )
-                    logger.debug(f"Chunk items for {chunk_type}: {chunk_items[0]}")
                     for doc, score in chunk_items:
                         # Create unique identifier for document
                         source = doc.metadata.get('source')
                         chunk_id = doc.metadata.get('chunk_id')
                         doc_id = f"{source}_{chunk_id}" if source and chunk_id else str(doc.metadata.get('id', id(doc)))
                         # Store document and its score
                         if doc_id not in unique_docs:
                             unique_docs[doc_id] = doc
@@ -367,9 +420,9 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
                                 doc_filter = {
                                     "$and": doc_filter_parts
                                 }
                             try:
-                                fetch_items = self.vectorstore.similarity_search_with_score(
+                                fetch_items = self._similarity_search_with_score(
                                     query, filter=doc_filter, k=1
                                 )
                                 if fetch_items:
@@ -383,7 +436,7 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
         else:
             # Default search behavior (unchanged)
             max_search_results = 30 if search_top * 3 > 30 else search_top * 3
-            vector_items = self.vectorstore.similarity_search_with_score(
+            vector_items = self._similarity_search_with_score(
                 query, filter=filter, k=max_search_results
             )
@@ -401,7 +454,7 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
         doc_map = OrderedDict(
             sorted(doc_map.items(), key=lambda x: x[1][1], reverse=True)
         )
         # Process full-text search if configured
         if full_text_search and full_text_search.get('enabled') and full_text_search.get('fields'):
             language = full_text_search.get('language', 'english')
@@ -414,7 +467,7 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
                 for field_name in full_text_search.get('fields', []):
                     try:
                         text_results = self.pg_helper.full_text_search(field_name, query)
                         # Combine text search results with vector results
                         for result in text_results:
                             doc_id = result['id']

alita_sdk/runtime/utils/mcp_oauth.py ADDED Viewed

@@ -0,0 +1,164 @@
+import json
+import logging
+import re
+from typing import Any, Dict, Optional
+from urllib.parse import urlparse
+import requests
+from langchain_core.tools import ToolException
+logger = logging.getLogger(__name__)
+class McpAuthorizationRequired(ToolException):
+    """Raised when an MCP server requires OAuth authorization before use."""
+    def __init__(
+        self,
+        message: str,
+        server_url: str,
+        resource_metadata_url: Optional[str] = None,
+        www_authenticate: Optional[str] = None,
+        resource_metadata: Optional[Dict[str, Any]] = None,
+        status: Optional[int] = None,
+        tool_name: Optional[str] = None,
+    ):
+        super().__init__(message)
+        self.server_url = server_url
+        self.resource_metadata_url = resource_metadata_url
+        self.www_authenticate = www_authenticate
+        self.resource_metadata = resource_metadata
+        self.status = status
+        self.tool_name = tool_name
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "message": str(self),
+            "server_url": self.server_url,
+            "resource_metadata_url": self.resource_metadata_url,
+            "www_authenticate": self.www_authenticate,
+            "resource_metadata": self.resource_metadata,
+            "status": self.status,
+            "tool_name": self.tool_name,
+        }
+def extract_resource_metadata_url(www_authenticate: Optional[str], server_url: Optional[str] = None) -> Optional[str]:
+    """
+    Pull the resource_metadata URL from a WWW-Authenticate header if present.
+    If not found and server_url is provided, try to construct resource metadata URLs.
+    """
+    if not www_authenticate and not server_url:
+        return None
+    # RFC9728 returns `resource_metadata="<url>"` inside the header value
+    if www_authenticate:
+        match = re.search(r'resource_metadata\s*=\s*\"?([^\", ]+)\"?', www_authenticate)
+        if match:
+            return match.group(1)
+    # For servers that don't provide resource_metadata in WWW-Authenticate,
+    # we'll return None and rely on inferring authorization servers from the realm
+    # or using well-known OAuth discovery endpoints directly
+    return None
+def fetch_oauth_authorization_server_metadata(base_url: str, timeout: int = 10) -> Optional[Dict[str, Any]]:
+    """
+    Fetch OAuth authorization server metadata from well-known endpoints.
+    Tries both oauth-authorization-server and openid-configuration discovery endpoints.
+    """
+    discovery_endpoints = [
+        f"{base_url}/.well-known/oauth-authorization-server",
+        f"{base_url}/.well-known/openid-configuration",
+    ]
+    for endpoint in discovery_endpoints:
+        try:
+            resp = requests.get(endpoint, timeout=timeout)
+            if resp.status_code == 200:
+                return resp.json()
+        except Exception as exc:
+            logger.debug(f"Failed to fetch OAuth metadata from {endpoint}: {exc}")
+            continue
+    return None
+def infer_authorization_servers_from_realm(www_authenticate: Optional[str], server_url: str) -> Optional[list]:
+    """
+    Infer authorization server URLs from WWW-Authenticate realm or server URL.
+    This is used when the server doesn't provide resource_metadata endpoint.
+    """
+    if not www_authenticate and not server_url:
+        return None
+    authorization_servers = []
+    # Try to extract realm from WWW-Authenticate header
+    realm = None
+    if www_authenticate:
+        realm_match = re.search(r'realm\s*=\s*\"([^\"]+)\"', www_authenticate)
+        if realm_match:
+            realm = realm_match.group(1)
+    # Parse the server URL to get base domain
+    parsed = urlparse(server_url)
+    base_url = f"{parsed.scheme}://{parsed.netloc}"
+    # Return the base authorization server URL (not the discovery endpoint)
+    # The client will append .well-known paths when fetching metadata
+    authorization_servers.append(base_url)
+    return authorization_servers if authorization_servers else None
+def fetch_resource_metadata(resource_metadata_url: str, timeout: int = 10) -> Optional[Dict[str, Any]]:
+    """Fetch and parse the protected resource metadata document."""
+    try:
+        resp = requests.get(resource_metadata_url, timeout=timeout)
+        resp.raise_for_status()
+        return resp.json()
+    except Exception as exc:  # broad catch – we want to surface auth requirement even if this fails
+        logger.warning("Failed to fetch resource metadata from %s: %s", resource_metadata_url, exc)
+        return None
+async def fetch_resource_metadata_async(resource_metadata_url: str, session=None, timeout: int = 10) -> Optional[Dict[str, Any]]:
+    """Async variant for fetching protected resource metadata."""
+    try:
+        import aiohttp
+        client_timeout = aiohttp.ClientTimeout(total=timeout)
+        if session:
+            async with session.get(resource_metadata_url, timeout=client_timeout) as resp:
+                text = await resp.text()
+        else:
+            async with aiohttp.ClientSession(timeout=client_timeout) as local_session:
+                async with local_session.get(resource_metadata_url) as resp:
+                    text = await resp.text()
+        try:
+            return json.loads(text)
+        except json.JSONDecodeError:
+            logger.warning("Resource metadata at %s is not valid JSON: %s", resource_metadata_url, text[:200])
+            return None
+    except Exception as exc:
+        logger.warning("Failed to fetch resource metadata from %s: %s", resource_metadata_url, exc)
+        return None
+def canonical_resource(server_url: str) -> str:
+    """Produce a canonical resource identifier for the MCP server."""
+    parsed = urlparse(server_url)
+    # Normalize scheme/host casing per RFC guidance
+    normalized = parsed._replace(
+        scheme=parsed.scheme.lower(),
+        netloc=parsed.netloc.lower(),
+    )
+    resource = normalized.geturl()
+    # Prefer form without trailing slash unless path is meaningful
+    if resource.endswith("/") and parsed.path in ("", "/"):
+        resource = resource[:-1]
+    return resource

alita-sdk 0.3.365__py3-none-any.whl → 0.3.462__py3-none-any.whl

Potentially problematic release.

alita-sdk 0.3.365py3-none-any.whl → 0.3.462py3-none-any.whl