PyPI - alita-sdk - Versions diffs - 0.3.376__py3-none-any.whl → 0.3.435__py3-none-any.whl - Mend

alita-sdk 0.3.376py3-none-any.whl → 0.3.435py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of alita-sdk might be problematic. Click here for more details.

Files changed (60) hide show

alita_sdk/configurations/bitbucket.py +95 -0
alita_sdk/configurations/confluence.py +96 -1
alita_sdk/configurations/gitlab.py +79 -0
alita_sdk/configurations/jira.py +103 -0
alita_sdk/configurations/testrail.py +88 -0
alita_sdk/configurations/xray.py +93 -0
alita_sdk/configurations/zephyr_enterprise.py +93 -0
alita_sdk/configurations/zephyr_essential.py +75 -0
alita_sdk/runtime/clients/client.py +9 -4
alita_sdk/runtime/clients/mcp_discovery.py +342 -0
alita_sdk/runtime/clients/mcp_manager.py +262 -0
alita_sdk/runtime/clients/sandbox_client.py +8 -0
alita_sdk/runtime/langchain/assistant.py +41 -38
alita_sdk/runtime/langchain/constants.py +5 -1
alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +4 -1
alita_sdk/runtime/langchain/document_loaders/constants.py +28 -12
alita_sdk/runtime/langchain/langraph_agent.py +91 -27
alita_sdk/runtime/langchain/utils.py +24 -4
alita_sdk/runtime/models/mcp_models.py +57 -0
alita_sdk/runtime/toolkits/__init__.py +24 -0
alita_sdk/runtime/toolkits/application.py +8 -1
alita_sdk/runtime/toolkits/mcp.py +787 -0
alita_sdk/runtime/toolkits/tools.py +98 -50
alita_sdk/runtime/tools/__init__.py +7 -2
alita_sdk/runtime/tools/application.py +7 -0
alita_sdk/runtime/tools/function.py +20 -28
alita_sdk/runtime/tools/graph.py +10 -4
alita_sdk/runtime/tools/image_generation.py +104 -8
alita_sdk/runtime/tools/llm.py +146 -114
alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
alita_sdk/runtime/tools/mcp_server_tool.py +79 -10
alita_sdk/runtime/tools/sandbox.py +166 -63
alita_sdk/runtime/tools/vectorstore.py +3 -2
alita_sdk/runtime/tools/vectorstore_base.py +4 -3
alita_sdk/runtime/utils/streamlit.py +34 -3
alita_sdk/runtime/utils/toolkit_utils.py +5 -2
alita_sdk/runtime/utils/utils.py +1 -0
alita_sdk/tools/__init__.py +48 -31
alita_sdk/tools/ado/work_item/ado_wrapper.py +17 -8
alita_sdk/tools/base_indexer_toolkit.py +75 -66
alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
alita_sdk/tools/code_indexer_toolkit.py +13 -3
alita_sdk/tools/confluence/api_wrapper.py +29 -7
alita_sdk/tools/confluence/loader.py +10 -0
alita_sdk/tools/elitea_base.py +7 -7
alita_sdk/tools/gitlab/api_wrapper.py +11 -7
alita_sdk/tools/jira/api_wrapper.py +1 -1
alita_sdk/tools/openapi/__init__.py +10 -1
alita_sdk/tools/qtest/api_wrapper.py +522 -74
alita_sdk/tools/sharepoint/api_wrapper.py +104 -33
alita_sdk/tools/sharepoint/authorization_helper.py +175 -1
alita_sdk/tools/sharepoint/utils.py +8 -2
alita_sdk/tools/utils/content_parser.py +27 -16
alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +19 -6
{alita_sdk-0.3.376.dist-info → alita_sdk-0.3.435.dist-info}/METADATA +1 -1
{alita_sdk-0.3.376.dist-info → alita_sdk-0.3.435.dist-info}/RECORD +60 -55
{alita_sdk-0.3.376.dist-info → alita_sdk-0.3.435.dist-info}/WHEEL +0 -0
{alita_sdk-0.3.376.dist-info → alita_sdk-0.3.435.dist-info}/licenses/LICENSE +0 -0
{alita_sdk-0.3.376.dist-info → alita_sdk-0.3.435.dist-info}/top_level.txt +0 -0

alita_sdk/tools/__init__.py CHANGED Viewed

@@ -90,62 +90,79 @@ available_count = len(AVAILABLE_TOOLS)
 total_attempted = len(AVAILABLE_TOOLS) + len(FAILED_IMPORTS)
 logger.info(f"Tool imports completed: {available_count}/{total_attempted} successful")
 def get_tools(tools_list, alita, llm, store: Optional[BaseStore] = None, *args, **kwargs):
     tools = []
     for tool in tools_list:
-        # validate tool name syntax - it cannot be started with _
-        for tool_name in tool.get('settings', {}).get('selected_tools', []):
-            if isinstance(tool_name, str) and tool_name.startswith('_'):
-                raise ValueError(f"Tool name '{tool_name}' from toolkit '{tool.get('type', '')}' cannot start with '_'")
-        tool['settings']['alita'] = alita
-        tool['settings']['llm'] = llm
-        tool['settings']['store'] = store
+        settings = tool.get('settings')
+        # Skip tools without settings early
+        if not settings:
+            logger.warning(f"Tool '{tool.get('type', '')}' has no settings, skipping...")
+            continue
+        # Validate tool names once
+        selected_tools = settings.get('selected_tools', [])
+        invalid_tools = [name for name in selected_tools if isinstance(name, str) and name.startswith('_')]
+        if invalid_tools:
+            raise ValueError(f"Tool names {invalid_tools} from toolkit '{tool.get('type', '')}' cannot start with '_'")
+        # Cache tool type and add common settings
         tool_type = tool['type']
+        settings['alita'] = alita
+        settings['llm'] = llm
+        settings['store'] = store
+        # Set pgvector collection schema if present
+        if settings.get('pgvector_configuration'):
+            settings['pgvector_configuration']['collection_schema'] = str(tool['id'])
-        # Handle special cases for ADO tools
+        # Handle ADO special cases
         if tool_type in ['ado_boards', 'ado_wiki', 'ado_plans']:
             tools.extend(AVAILABLE_TOOLS['ado']['get_tools'](tool_type, tool))
+            continue
-        # Check if tool is available and has get_tools function
-        elif tool_type in AVAILABLE_TOOLS and 'get_tools' in AVAILABLE_TOOLS[tool_type]:
+        # Handle ADO repos aliases
+        if tool_type in ['ado_repos', 'azure_devops_repos'] and 'ado_repos' in AVAILABLE_TOOLS:
             try:
-                get_tools_func = AVAILABLE_TOOLS[tool_type]['get_tools']
-                tools.extend(get_tools_func(tool))
+                tools.extend(AVAILABLE_TOOLS['ado_repos']['get_tools'](tool))
             except Exception as e:
-                logger.error(f"Error getting tools for {tool_type}: {e}")
-                raise ToolException(f"Error getting tools for {tool_type}: {e}")
+                logger.error(f"Error getting ADO repos tools: {e}")
+            continue
-        # Handle ADO repos special case (it might be requested as azure_devops_repos)
-        elif tool_type in ['ado_repos', 'azure_devops_repos'] and 'ado_repos' in AVAILABLE_TOOLS:
+        # Skip MCP toolkit - it's handled by runtime/toolkits/tools.py to avoid duplicate loading
+        if tool_type == 'mcp':
+            logger.debug(f"Skipping MCP toolkit '{tool.get('toolkit_name')}' - handled by runtime toolkit system")
+            continue
+        # Handle standard tools
+        if tool_type in AVAILABLE_TOOLS and 'get_tools' in AVAILABLE_TOOLS[tool_type]:
             try:
-                get_tools_func = AVAILABLE_TOOLS['ado_repos']['get_tools']
-                tools.extend(get_tools_func(tool))
+                tools.extend(AVAILABLE_TOOLS[tool_type]['get_tools'](tool))
             except Exception as e:
-                logger.error(f"Error getting ADO repos tools: {e}")
+                logger.error(f"Error getting tools for {tool_type}: {e}")
+                raise ToolException(f"Error getting tools for {tool_type}: {e}")
+            continue
         # Handle custom modules
-        elif tool.get("settings", {}).get("module"):
+        if settings.get("module"):
             try:
-                settings = tool.get("settings", {})
                 mod = import_module(settings.pop("module"))
                 tkitclass = getattr(mod, settings.pop("class"))
-                #
-                get_toolkit_params = tool["settings"].copy()
+                get_toolkit_params = settings.copy()
                 get_toolkit_params["name"] = tool.get("name")
-                #
                 toolkit = tkitclass.get_toolkit(**get_toolkit_params)
                 tools.extend(toolkit.get_tools())
             except Exception as e:
                 logger.error(f"Error in getting custom toolkit: {e}")
+            continue
+        # Tool not available
+        if tool_type in FAILED_IMPORTS:
+            logger.warning(f"Tool '{tool_type}' is not available: {FAILED_IMPORTS[tool_type]}")
         else:
-            # Tool not available or not found
-            if tool_type in FAILED_IMPORTS:
-                logger.warning(f"Tool '{tool_type}' is not available: {FAILED_IMPORTS[tool_type]}")
-            else:
-                logger.warning(f"Unknown tool type: {tool_type}")
+            logger.warning(f"Unknown tool type: {tool_type}")
     return tools

alita_sdk/tools/ado/work_item/ado_wrapper.py CHANGED Viewed

@@ -329,11 +329,14 @@ class AzureDevOpsApiWrapper(NonCodeIndexerToolkit):
                 parsed_item.update(fields_data)
             # extract relations if any
-            relations_data = work_item.relations
+            relations_data = None
+            if expand and str(expand).lower() in ("relations", "all"):
+                try:
+                    relations_data = getattr(work_item, 'relations', None)
+                except KeyError:
+                    relations_data = None
             if relations_data:
-                parsed_item['relations'] = []
-                for relation in relations_data:
-                    parsed_item['relations'].append(relation.as_dict())
+                parsed_item['relations'] = [relation.as_dict() for relation in relations_data]
             if parse_attachments:
                 # describe images in work item fields if present
@@ -344,13 +347,19 @@ class AzureDevOpsApiWrapper(NonCodeIndexerToolkit):
                         for img in images:
                             src = img.get('src')
                             if src:
-                                description = self.parse_attachment_by_url(src, image_description_prompt)
+                                description = self.parse_attachment_by_url(src, image_description_prompt=image_description_prompt)
                                 img['image-description'] = description
                         parsed_item[field_name] = str(soup)
                 # parse attached documents if present
-                if parsed_item['relations']:
-                    for attachment in parsed_item['relations']:
-                        attachment['content'] = self.parse_attachment_by_url(attachment['url'], attachment['attributes']['name'], image_description_prompt)
+                for relation in parsed_item.get('relations', []):
+                    # Only process actual file attachments
+                    if relation.get('rel') == 'AttachedFile':
+                        file_name = relation.get('attributes', {}).get('name')
+                        if file_name:
+                            try:
+                                relation['content'] = self.parse_attachment_by_url(relation['url'], file_name, image_description_prompt=image_description_prompt)
+                            except Exception as att_e:
+                                logger.warning(f"Failed to parse attachment {file_name}: {att_e}")
             return parsed_item

alita_sdk/tools/base_indexer_toolkit.py CHANGED Viewed

@@ -7,7 +7,6 @@ from typing import Any, Optional, List, Dict, Generator
 from langchain_core.documents import Document
 from pydantic import create_model, Field, SecretStr
-from .utils import make_json_serializable
 from .utils.content_parser import file_extension_by_chunker, process_document_by_type
 from .vector_adapters.VectorStoreAdapter import VectorStoreAdapterFactory
 from ..runtime.langchain.document_loaders.constants import loaders_allowed_to_override
@@ -111,7 +110,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
     def __init__(self, **kwargs):
         conn = kwargs.get('connection_string', None)
         connection_string = conn.get_secret_value() if isinstance(conn, SecretStr) else conn
-        collection_name = kwargs.get('collection_name')
+        collection_name = kwargs.get('collection_schema')
         if 'vectorstore_type' not in kwargs:
             kwargs['vectorstore_type'] = 'PGVector'
@@ -152,39 +151,45 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
     def index_data(self, **kwargs):
         index_name = kwargs.get("index_name")
-        progress_step = kwargs.get("progress_step")
         clean_index = kwargs.get("clean_index")
         chunking_tool = kwargs.get("chunking_tool")
         chunking_config = kwargs.get("chunking_config")
+        result = {"count": 0}
         #
-        if clean_index:
-            self._clean_index(index_name)
-        #
-        self.index_meta_init(index_name, kwargs)
-        #
-        self._log_tool_event(f"Indexing data into collection with suffix '{index_name}'. It can take some time...")
-        self._log_tool_event(f"Loading the documents to index...{kwargs}")
-        documents = self._base_loader(**kwargs)
-        documents = list(documents) # consume/exhaust generator to count items
-        documents_count = len(documents)
-        documents = (doc for doc in documents)
-        self._log_tool_event(f"Base documents were pre-loaded. "
-                             f"Search for possible document duplicates and remove them from the indexing list...")
-        documents = self._reduce_duplicates(documents, index_name)
-        self._log_tool_event(f"Duplicates were removed. "
-                             f"Processing documents to collect dependencies and prepare them for indexing...")
-        result = self._save_index_generator(documents, documents_count, chunking_tool, chunking_config, index_name=index_name, progress_step=progress_step)
-        #
-        self.index_meta_update(index_name, IndexerKeywords.INDEX_META_COMPLETED.value, result)
-        #
-        return {"status": "ok", "message": f"successfully indexed {result} documents"}
-    def _save_index_generator(self, base_documents: Generator[Document, None, None], base_total: int, chunking_tool, chunking_config, index_name: Optional[str] = None, progress_step: int = 20):
+        try:
+            if clean_index:
+                self._clean_index(index_name)
+            #
+            self.index_meta_init(index_name, kwargs)
+            #
+            self._log_tool_event(f"Indexing data into collection with suffix '{index_name}'. It can take some time...")
+            self._log_tool_event(f"Loading the documents to index...{kwargs}")
+            documents = self._base_loader(**kwargs)
+            documents = list(documents) # consume/exhaust generator to count items
+            documents_count = len(documents)
+            documents = (doc for doc in documents)
+            self._log_tool_event(f"Base documents were pre-loaded. "
+                                 f"Search for possible document duplicates and remove them from the indexing list...")
+            documents = self._reduce_duplicates(documents, index_name)
+            self._log_tool_event(f"Duplicates were removed. "
+                                 f"Processing documents to collect dependencies and prepare them for indexing...")
+            self._save_index_generator(documents, documents_count, chunking_tool, chunking_config, index_name=index_name, result=result)
+            #
+            results_count = result["count"]
+            self.index_meta_update(index_name, IndexerKeywords.INDEX_META_COMPLETED.value, results_count)
+            #
+            return {"status": "ok", "message": f"successfully indexed {results_count} documents" if results_count > 0
+            else "no new documents to index"}
+        except Exception as e:
+            self.index_meta_update(index_name, IndexerKeywords.INDEX_META_FAILED.value, result["count"])
+            raise e
+    def _save_index_generator(self, base_documents: Generator[Document, None, None], base_total: int, chunking_tool, chunking_config, result, index_name: Optional[str] = None):
         self._log_tool_event(f"Base documents are ready for indexing. {base_total} base documents in total to index.")
         from ..runtime.langchain.interfaces.llm_processor import add_documents
         #
         base_doc_counter = 0
-        total_counter = 0
         pg_vector_add_docs_chunk = []
         for base_doc in base_documents:
             base_doc_counter += 1
@@ -232,10 +237,9 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
             msg = f"Indexed base document #{base_doc_counter} out of {base_total} (with {dependent_docs_counter} dependencies)."
             logger.debug(msg)
             self._log_tool_event(msg)
-            total_counter += dependent_docs_counter
+            result["count"] += dependent_docs_counter
         if pg_vector_add_docs_chunk:
             add_documents(vectorstore=self.vectorstore, documents=pg_vector_add_docs_chunk)
-        return total_counter
     def _apply_loaders_chunkers(self, documents: Generator[Document, None, None], chunking_tool: str=None, chunking_config=None) -> Generator[Document, None, None]:
         from ..tools.chunkers import __all__ as chunkers
@@ -343,7 +347,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
         """Cleans the indexed data in the collection."""
         super()._clean_collection(index_name=index_name)
         return (f"Collection '{index_name}' has been removed from the vector store.\n"
-                f"Available collections: {self.list_indexes()}") if index_name \
+                f"Available collections: {self.list_collections()}") if index_name \
             else "All collections have been removed from the vector store."
     def _build_collection_filter(self, filter: dict | str, index_name: str = "") -> dict:
@@ -385,7 +389,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
         """ Searches indexed documents in the vector store."""
         # build filter on top of index_name
-        available_collections = super().list_indexes()
+        available_collections = super().list_collections()
         if index_name and index_name not in available_collections:
             return f"Collection '{index_name}' not found. Available collections: {available_collections}"
@@ -454,37 +458,28 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
         )
     def index_meta_init(self, index_name: str, index_configuration: dict[str, Any]):
-        index_meta_raw = super().get_index_meta(index_name)
-        from ..runtime.langchain.interfaces.llm_processor import add_documents
-        created_on = time.time()
-        metadata = {
-            "collection": index_name,
-            "type": IndexerKeywords.INDEX_META_TYPE.value,
-            "indexed": 0,
-            "state": IndexerKeywords.INDEX_META_IN_PROGRESS.value,
-            "index_configuration": index_configuration,
-            "created_on": created_on,
-            "updated_on": created_on,
-            "history": "[]",
-        }
-        index_meta_ids = None
-        #
-        if index_meta_raw:
-            history_raw = index_meta_raw.get("metadata", {}).get("history", "[]")
-            if isinstance(history_raw, str) and history_raw.strip():
-                try:
-                    history = json.loads(history_raw)
-                except (json.JSONDecodeError, TypeError):
-                    history = []
-            else:
-                history = []
-            new_history_item = {k: v for k, v in index_meta_raw.get("metadata", {}).items() if k != "history"}
-            history.append(new_history_item)
-            metadata["history"] = json.dumps(history)
-            index_meta_ids = [index_meta_raw.get("id")]
-        #
-        index_meta_doc = Document(page_content=f"{IndexerKeywords.INDEX_META_TYPE.value}_{index_name}", metadata=metadata)
-        add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc], ids=index_meta_ids)
+        index_meta = super().get_index_meta(index_name)
+        if not index_meta:
+            self._log_tool_event(
+                f"There is no existing index_meta for collection '{index_name}'. Initializing it.",
+                tool_name="index_data"
+            )
+            from ..runtime.langchain.interfaces.llm_processor import add_documents
+            created_on = time.time()
+            metadata = {
+                "collection": index_name,
+                "type": IndexerKeywords.INDEX_META_TYPE.value,
+                "indexed": 0,
+                "state": IndexerKeywords.INDEX_META_IN_PROGRESS.value,
+                "index_configuration": index_configuration,
+                "created_on": created_on,
+                "updated_on": created_on,
+                "task_id": None,
+                "conversation_id": None,
+            }
+            metadata["history"] = json.dumps([metadata])
+            index_meta_doc = Document(page_content=f"{IndexerKeywords.INDEX_META_TYPE.value}_{index_name}", metadata=metadata)
+            add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc])
     def index_meta_update(self, index_name: str, state: str, result: int):
         index_meta_raw = super().get_index_meta(index_name)
@@ -495,6 +490,20 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
             metadata["indexed"] = result
             metadata["state"] = state
             metadata["updated_on"] = time.time()
+            #
+            history_raw = metadata.pop("history", "[]")
+            try:
+                history = json.loads(history_raw) if history_raw.strip() else []
+                # replace the last history item with updated metadata
+                if history and isinstance(history, list):
+                    history[-1] = metadata
+                else:
+                    history = [metadata]
+            except (json.JSONDecodeError, TypeError):
+                logger.warning(f"Failed to load index history: {history_raw}. Create new with only current item.")
+                history = [metadata]
+            #
+            metadata["history"] = json.dumps(history)
             index_meta_doc = Document(page_content=index_meta_raw.get("content", ""), metadata=metadata)
             add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc], ids=[index_meta_raw.get("id")])
@@ -547,10 +556,10 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
                 "args_schema": RemoveIndexParams
             },
             {
-                "name": "list_indexes",
-                "mode": "list_indexes",
-                "ref": self.list_indexes,
-                "description": self.list_indexes.__doc__,
+                "name": "list_collections",
+                "mode": "list_collections",
+                "ref": self.list_collections,
+                "description": self.list_collections.__doc__,
                 "args_schema": create_model("ListCollectionsParams")  # No parameters
             },
         ]

alita_sdk/tools/chunkers/sematic/proposal_chunker.py CHANGED Viewed

@@ -6,7 +6,7 @@ from langchain_core.prompts import ChatPromptTemplate
 from langchain.text_splitter import TokenTextSplitter
 from typing import Optional, List
-from langchain_core.pydantic_v1 import BaseModel
+from pydantic import BaseModel
 from ..utils import tiktoken_length
 logger = getLogger(__name__)

alita_sdk/tools/code_indexer_toolkit.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import ast
 import fnmatch
+import json
 import logging
 from typing import Optional, List, Generator
@@ -21,7 +22,7 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
         return self.vector_adapter.get_code_indexed_data(self, index_name)
     def key_fn(self, document: Document):
-        return document.metadata.get('id')
+        return document.metadata.get("filename")
     def compare_fn(self, document: Document, idx_data):
         return (document.metadata.get('commit_hash') and
@@ -46,7 +47,7 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
         )
     def _extend_data(self, documents: Generator[Document, None, None]):
-        yield from parse_code_files_for_db(documents)
+        yield from documents
     def _index_tool_params(self):
         """Return the parameters for indexing data."""
@@ -117,6 +118,15 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
                     if not file_content:
                         # empty file, skip
                         continue
+                    #
+                    # ensure file content is a string
+                    if isinstance(file_content, bytes):
+                        file_content = file_content.decode("utf-8", errors="ignore")
+                    elif isinstance(file_content, dict) and file.endswith('.json'):
+                        file_content = json.dumps(file_content)
+                    elif not isinstance(file_content, str):
+                        file_content = str(file_content)
+                    #
                     # hash the file content to ensure uniqueness
                     import hashlib
                     file_hash = hashlib.sha256(file_content.encode("utf-8")).hexdigest()
@@ -127,7 +137,7 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
                     self._log_tool_event(message=f"{idx} out of {total_files} files have been read", tool_name="loader")
             self._log_tool_event(message=f"{len(_files)} have been read", tool_name="loader")
-        return file_content_generator()
+        return parse_code_files_for_db(file_content_generator())
     def __handle_get_files(self, path: str, branch: str):
         """

alita_sdk/tools/confluence/api_wrapper.py CHANGED Viewed

@@ -7,12 +7,14 @@ from json import JSONDecodeError
 from typing import Optional, List, Any, Dict, Callable, Generator, Literal
 import requests
+from atlassian.errors import ApiError
 from langchain_community.document_loaders.confluence import ContentFormat
 from langchain_core.documents import Document
 from langchain_core.messages import HumanMessage
 from langchain_core.tools import ToolException
 from markdownify import markdownify
 from pydantic import Field, PrivateAttr, model_validator, create_model, SecretStr
+from requests import HTTPError
 from tenacity import retry, stop_after_attempt, wait_exponential, before_sleep_log
 from alita_sdk.tools.non_code_indexer_toolkit import NonCodeIndexerToolkit
@@ -194,6 +196,7 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
     keep_markdown_format: Optional[bool] = True
     ocr_languages: Optional[str] = None
     keep_newlines: Optional[bool] = True
+    _errors: Optional[list[str]] = None
     _image_cache: ImageDescriptionCache = PrivateAttr(default_factory=ImageDescriptionCache)
     @model_validator(mode='before')
@@ -498,7 +501,9 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
         restrictions = self.client.get_all_restrictions_for_content(page["id"])
         return (
-                page["status"] == "current"
+                (page["status"] == "current"
+                # allow user to see archived content if needed
+                 or page["status"] == "archived")
                 and not restrictions["read"]["restrictions"]["user"]["results"]
                 and not restrictions["read"]["restrictions"]["group"]["results"]
         )
@@ -518,18 +523,35 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
                 ),
                 before_sleep=before_sleep_log(logger, logging.WARNING),
             )(self.client.get_page_by_id)
-            page = get_page(
-                page_id=page_id, expand=f"{self.content_format.value},version"
-            )
-            if not self.include_restricted_content and not self.is_public_page(page):
-                continue
+            try:
+                page = get_page(
+                    page_id=page_id, expand=f"{self.content_format.value},version"
+                )
+            except (ApiError, HTTPError) as e:
+                logger.error(f"Error fetching page with ID {page_id}: {e}")
+                page_content_temp = f"Confluence API Error: cannot fetch the page with ID {page_id}: {e}"
+                # store errors
+                if self._errors is None:
+                    self._errors = []
+                self._errors.append(page_content_temp)
+                return Document(page_content=page_content_temp,
+                                metadata={})
+            # TODO: update on toolkit advanced settings level as a separate feature
+            # if not self.include_restricted_content and not self.is_public_page(page):
+            #     continue
             yield self.process_page(page, skip_images)
+    def _log_errors(self):
+        """ Log errors encountered during toolkit execution. """
+        if self._errors:
+            logger.info(f"Errors encountered during toolkit execution: {self._errors}")
     def read_page_by_id(self, page_id: str, skip_images: bool = False):
         """Reads a page by its id in the Confluence space. If id is not available, but there is a title - use get_page_id first."""
         result = list(self.get_pages_by_id([page_id], skip_images))
         if not result:
-            "Page not found"
+            return f"Pages not found. Errors: {self._errors}" if self._errors \
+                else "Pages not found or you do not have access to them."
         return result[0].page_content
         # return self._strip_base64_images(result[0].page_content) if skip_images else result[0].page_content

alita_sdk/tools/confluence/loader.py CHANGED Viewed

@@ -3,6 +3,7 @@ from typing import Optional, List
 from logging import getLogger
 import requests
+from langchain_core.documents import Document
 logger = getLogger(__name__)
 from PIL import Image
@@ -193,6 +194,15 @@ class AlitaConfluenceLoader(ConfluenceLoader):
         else:
             return super().process_image(link, ocr_languages)
+    def process_page(self, page: dict, include_attachments: bool, include_comments: bool, include_labels: bool,
+                     content_format: ContentFormat, ocr_languages: Optional[str] = None,
+                     keep_markdown_format: Optional[bool] = False, keep_newlines: bool = False) -> Document:
+        if not page.get("title"):
+            # if 'include_restricted_content' set to True, draft pages are loaded and can have no title
+            page["title"] = "Untitled"
+        return super().process_page(page, include_attachments, include_comments, include_labels, content_format,
+                                    ocr_languages, keep_markdown_format, keep_newlines)
     # TODO review usage
     # def process_svg(
     #         self,

alita_sdk/tools/elitea_base.py CHANGED Viewed

@@ -17,7 +17,7 @@ from ..runtime.utils.utils import IndexerKeywords
 logger = logging.getLogger(__name__)
-INDEX_TOOL_NAMES = ['index_data', 'remove_index', 'list_indexes', 'search_index', 'stepback_search_index',
+INDEX_TOOL_NAMES = ['index_data', 'remove_index', 'list_collections', 'search_index', 'stepback_search_index',
                             'stepback_summary_index']
 LoaderSchema = create_model(
@@ -403,9 +403,9 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
         """Cleans the indexed data in the collection."""
         self._init_vector_store()._clean_collection(index_name=index_name)
         return (f"Collection '{index_name}' has been removed from the vector store.\n"
-                f"Available collections: {self.list_indexes()}")
+                f"Available collections: {self.list_collections()}")
-    def list_indexes(self):
+    def list_collections(self):
         """Lists all collections in the vector store."""
         vectorstore_wrapper = self._init_vector_store()
         return vectorstore_wrapper.list_collections()
@@ -537,10 +537,10 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
                 "args_schema": RemoveIndexParams
             },
             {
-                "name": "list_indexes",
-                "mode": "list_indexes",
-                "ref": self.list_indexes,
-                "description": self.list_indexes.__doc__,
+                "name": "list_collections",
+                "mode": "list_collections",
+                "ref": self.list_collections,
+                "description": self.list_collections.__doc__,
                 "args_schema": create_model("ListCollectionsParams")  # No parameters
             },

alita_sdk/tools/gitlab/api_wrapper.py CHANGED Viewed

@@ -117,7 +117,11 @@ class GitLabAPIWrapper(CodeIndexerToolkit):
     @model_validator(mode='before')
     @classmethod
-    def validate_toolkit(cls, values: Dict) -> Dict:
+    def validate_toolkit_before(cls, values: Dict) -> Dict:
+        return super().validate_toolkit(values)
+    @model_validator(mode='after')
+    def validate_toolkit(self):
         try:
            import gitlab
         except ImportError:
@@ -125,17 +129,17 @@ class GitLabAPIWrapper(CodeIndexerToolkit):
                 "python-gitlab is not installed. "
                 "Please install it with `pip install python-gitlab`"
             )
-        values['repository'] = cls._sanitize_url(values['repository'])
+        self.repository = self._sanitize_url(self.repository)
         g = gitlab.Gitlab(
-            url=cls._sanitize_url(values['url']),
-            private_token=values['private_token'],
+            url=self._sanitize_url(self.url),
+            private_token=self.private_token.get_secret_value(),
             keep_base_url=True,
         )
         g.auth()
-        cls._git = g
-        cls._active_branch = values.get('branch')
-        return super().validate_toolkit(values)
+        self._git = g
+        self._active_branch = self.branch
+        return self
     @property
     def repo_instance(self):

alita_sdk/tools/jira/api_wrapper.py CHANGED Viewed

@@ -563,7 +563,7 @@ class JiraApiWrapper(NonCodeIndexerToolkit):
         Use the appropriate issue link type (e.g., "Test", "Relates", "Blocks").
         If we use "Test" linktype, the test is inward issue, the story/other issue is outward issue.."""
-        comment = "This test is linked to the story."
+        comment = f"Issue {inward_issue_key} was linked to {outward_issue_key}."
         comment_body = {"content": [{"content": [{"text": comment,"type": "text"}],"type": "paragraph"}],"type": "doc","version": 1} if self.api_version == "3" else comment
         link_data = {
             "type": {"name": f"{linktype}"},

alita_sdk/tools/openapi/__init__.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import json
 import re
 import logging
+import yaml
 from typing import List, Any, Optional, Dict
 from langchain_core.tools import BaseTool, BaseToolkit, ToolException
 from requests_openapi import Operation, Client, Server
@@ -101,7 +102,15 @@ class AlitaOpenAPIToolkit(BaseToolkit):
         else:
             tools_set = {}
         if isinstance(openapi_spec, str):
-            openapi_spec = json.loads(openapi_spec)
+            # Try to detect if it's YAML or JSON by attempting to parse as JSON first
+            try:
+                openapi_spec = json.loads(openapi_spec)
+            except json.JSONDecodeError:
+                # If JSON parsing fails, try YAML
+                try:
+                    openapi_spec = yaml.safe_load(openapi_spec)
+                except yaml.YAMLError as e:
+                    raise ToolException(f"Failed to parse OpenAPI spec as JSON or YAML: {e}")
         c = Client()
         c.load_spec(openapi_spec)
         if headers:

alita-sdk 0.3.376__py3-none-any.whl → 0.3.435__py3-none-any.whl

Potentially problematic release.

alita-sdk 0.3.376py3-none-any.whl → 0.3.435py3-none-any.whl