PyPI - alita-sdk - Versions diffs - 0.3.379__py3-none-any.whl → 0.3.462__py3-none-any.whl - Mend

alita-sdk 0.3.379py3-none-any.whl → 0.3.462py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of alita-sdk might be problematic. Click here for more details.

Files changed (110) hide show

alita_sdk/cli/__init__.py +10 -0
alita_sdk/cli/__main__.py +17 -0
alita_sdk/cli/agent_executor.py +144 -0
alita_sdk/cli/agent_loader.py +197 -0
alita_sdk/cli/agent_ui.py +166 -0
alita_sdk/cli/agents.py +1069 -0
alita_sdk/cli/callbacks.py +576 -0
alita_sdk/cli/cli.py +159 -0
alita_sdk/cli/config.py +153 -0
alita_sdk/cli/formatting.py +182 -0
alita_sdk/cli/mcp_loader.py +315 -0
alita_sdk/cli/toolkit.py +330 -0
alita_sdk/cli/toolkit_loader.py +55 -0
alita_sdk/cli/tools/__init__.py +9 -0
alita_sdk/cli/tools/filesystem.py +905 -0
alita_sdk/configurations/bitbucket.py +95 -0
alita_sdk/configurations/confluence.py +96 -1
alita_sdk/configurations/gitlab.py +79 -0
alita_sdk/configurations/jira.py +103 -0
alita_sdk/configurations/testrail.py +88 -0
alita_sdk/configurations/xray.py +93 -0
alita_sdk/configurations/zephyr_enterprise.py +93 -0
alita_sdk/configurations/zephyr_essential.py +75 -0
alita_sdk/runtime/clients/client.py +47 -10
alita_sdk/runtime/clients/mcp_discovery.py +342 -0
alita_sdk/runtime/clients/mcp_manager.py +262 -0
alita_sdk/runtime/clients/sandbox_client.py +8 -0
alita_sdk/runtime/langchain/assistant.py +37 -16
alita_sdk/runtime/langchain/constants.py +6 -1
alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +4 -1
alita_sdk/runtime/langchain/document_loaders/constants.py +28 -12
alita_sdk/runtime/langchain/langraph_agent.py +146 -31
alita_sdk/runtime/langchain/utils.py +39 -7
alita_sdk/runtime/models/mcp_models.py +61 -0
alita_sdk/runtime/toolkits/__init__.py +24 -0
alita_sdk/runtime/toolkits/application.py +8 -1
alita_sdk/runtime/toolkits/artifact.py +5 -6
alita_sdk/runtime/toolkits/mcp.py +895 -0
alita_sdk/runtime/toolkits/tools.py +137 -56
alita_sdk/runtime/tools/__init__.py +7 -2
alita_sdk/runtime/tools/application.py +7 -0
alita_sdk/runtime/tools/function.py +29 -25
alita_sdk/runtime/tools/graph.py +10 -4
alita_sdk/runtime/tools/image_generation.py +104 -8
alita_sdk/runtime/tools/llm.py +204 -114
alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
alita_sdk/runtime/tools/mcp_remote_tool.py +166 -0
alita_sdk/runtime/tools/mcp_server_tool.py +3 -1
alita_sdk/runtime/tools/sandbox.py +57 -43
alita_sdk/runtime/tools/vectorstore.py +2 -1
alita_sdk/runtime/tools/vectorstore_base.py +19 -3
alita_sdk/runtime/utils/mcp_oauth.py +164 -0
alita_sdk/runtime/utils/mcp_sse_client.py +405 -0
alita_sdk/runtime/utils/streamlit.py +34 -3
alita_sdk/runtime/utils/toolkit_utils.py +14 -4
alita_sdk/tools/__init__.py +46 -31
alita_sdk/tools/ado/repos/__init__.py +1 -0
alita_sdk/tools/ado/test_plan/__init__.py +1 -1
alita_sdk/tools/ado/wiki/__init__.py +1 -5
alita_sdk/tools/ado/work_item/__init__.py +1 -5
alita_sdk/tools/ado/work_item/ado_wrapper.py +17 -8
alita_sdk/tools/base_indexer_toolkit.py +105 -43
alita_sdk/tools/bitbucket/__init__.py +1 -0
alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
alita_sdk/tools/code/sonar/__init__.py +1 -1
alita_sdk/tools/code_indexer_toolkit.py +13 -3
alita_sdk/tools/confluence/__init__.py +2 -2
alita_sdk/tools/confluence/api_wrapper.py +29 -7
alita_sdk/tools/confluence/loader.py +10 -0
alita_sdk/tools/github/__init__.py +2 -2
alita_sdk/tools/gitlab/__init__.py +2 -1
alita_sdk/tools/gitlab/api_wrapper.py +11 -7
alita_sdk/tools/gitlab_org/__init__.py +1 -2
alita_sdk/tools/google_places/__init__.py +2 -1
alita_sdk/tools/jira/__init__.py +1 -0
alita_sdk/tools/jira/api_wrapper.py +1 -1
alita_sdk/tools/memory/__init__.py +1 -1
alita_sdk/tools/openapi/__init__.py +10 -1
alita_sdk/tools/pandas/__init__.py +1 -1
alita_sdk/tools/postman/__init__.py +2 -1
alita_sdk/tools/pptx/__init__.py +2 -2
alita_sdk/tools/qtest/__init__.py +3 -3
alita_sdk/tools/qtest/api_wrapper.py +1708 -76
alita_sdk/tools/rally/__init__.py +1 -2
alita_sdk/tools/report_portal/__init__.py +1 -0
alita_sdk/tools/salesforce/__init__.py +1 -0
alita_sdk/tools/servicenow/__init__.py +2 -3
alita_sdk/tools/sharepoint/__init__.py +1 -0
alita_sdk/tools/sharepoint/api_wrapper.py +125 -34
alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
alita_sdk/tools/sharepoint/utils.py +8 -2
alita_sdk/tools/slack/__init__.py +1 -0
alita_sdk/tools/sql/__init__.py +2 -1
alita_sdk/tools/testio/__init__.py +1 -0
alita_sdk/tools/testrail/__init__.py +1 -3
alita_sdk/tools/utils/content_parser.py +27 -16
alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +18 -5
alita_sdk/tools/xray/__init__.py +2 -1
alita_sdk/tools/zephyr/__init__.py +2 -1
alita_sdk/tools/zephyr_enterprise/__init__.py +1 -0
alita_sdk/tools/zephyr_essential/__init__.py +1 -0
alita_sdk/tools/zephyr_scale/__init__.py +1 -0
alita_sdk/tools/zephyr_squad/__init__.py +1 -0
{alita_sdk-0.3.379.dist-info → alita_sdk-0.3.462.dist-info}/METADATA +8 -2
{alita_sdk-0.3.379.dist-info → alita_sdk-0.3.462.dist-info}/RECORD +110 -86
alita_sdk-0.3.462.dist-info/entry_points.txt +2 -0
{alita_sdk-0.3.379.dist-info → alita_sdk-0.3.462.dist-info}/WHEEL +0 -0
{alita_sdk-0.3.379.dist-info → alita_sdk-0.3.462.dist-info}/licenses/LICENSE +0 -0
{alita_sdk-0.3.379.dist-info → alita_sdk-0.3.462.dist-info}/top_level.txt +0 -0

alita_sdk/tools/ado/work_item/__init__.py CHANGED Viewed

@@ -23,11 +23,6 @@ class AzureDevOpsWorkItemsToolkit(BaseToolkit):
         AzureDevOpsWorkItemsToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
         m = create_model(
             name,
-            name=(str, Field(description="Toolkit name",
-                             json_schema_extra={
-                                 'toolkit_name': True,
-                                 'max_toolkit_length': AzureDevOpsWorkItemsToolkit.toolkit_max_length})
-                  ),
             ado_configuration=(AdoConfiguration, Field(description="Ado Work Item configuration", json_schema_extra={'configuration_types': ['ado']})),
             limit=(Optional[int], Field(description="ADO plans limit used for limitation of the list with results", default=5)),
             selected_tools=(List[Literal[tuple(selected_tools)]], Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
@@ -42,6 +37,7 @@ class AzureDevOpsWorkItemsToolkit(BaseToolkit):
                     'metadata': {
                         "label": "ADO boards",
                         "icon_url": "ado-boards-icon.svg",
+                        "max_length": AzureDevOpsWorkItemsToolkit.toolkit_max_length,
                         "categories": ["project management"],
                         "extra_categories": ["work item management", "issue tracking", "agile boards"],
                         "sections": {

alita_sdk/tools/ado/work_item/ado_wrapper.py CHANGED Viewed

@@ -329,11 +329,14 @@ class AzureDevOpsApiWrapper(NonCodeIndexerToolkit):
                 parsed_item.update(fields_data)
             # extract relations if any
-            relations_data = work_item.relations
+            relations_data = None
+            if expand and str(expand).lower() in ("relations", "all"):
+                try:
+                    relations_data = getattr(work_item, 'relations', None)
+                except KeyError:
+                    relations_data = None
             if relations_data:
-                parsed_item['relations'] = []
-                for relation in relations_data:
-                    parsed_item['relations'].append(relation.as_dict())
+                parsed_item['relations'] = [relation.as_dict() for relation in relations_data]
             if parse_attachments:
                 # describe images in work item fields if present
@@ -344,13 +347,19 @@ class AzureDevOpsApiWrapper(NonCodeIndexerToolkit):
                         for img in images:
                             src = img.get('src')
                             if src:
-                                description = self.parse_attachment_by_url(src, image_description_prompt)
+                                description = self.parse_attachment_by_url(src, image_description_prompt=image_description_prompt)
                                 img['image-description'] = description
                         parsed_item[field_name] = str(soup)
                 # parse attached documents if present
-                if parsed_item['relations']:
-                    for attachment in parsed_item['relations']:
-                        attachment['content'] = self.parse_attachment_by_url(attachment['url'], attachment['attributes']['name'], image_description_prompt)
+                for relation in parsed_item.get('relations', []):
+                    # Only process actual file attachments
+                    if relation.get('rel') == 'AttachedFile':
+                        file_name = relation.get('attributes', {}).get('name')
+                        if file_name:
+                            try:
+                                relation['content'] = self.parse_attachment_by_url(relation['url'], file_name, image_description_prompt=image_description_prompt)
+                            except Exception as att_e:
+                                logger.warning(f"Failed to parse attachment {file_name}: {att_e}")
             return parsed_item

alita_sdk/tools/base_indexer_toolkit.py CHANGED Viewed

@@ -4,10 +4,10 @@ import logging
 import time
 from typing import Any, Optional, List, Dict, Generator
+from langchain_core.callbacks import dispatch_custom_event
 from langchain_core.documents import Document
 from pydantic import create_model, Field, SecretStr
-from .utils import make_json_serializable
 from .utils.content_parser import file_extension_by_chunker, process_document_by_type
 from .vector_adapters.VectorStoreAdapter import VectorStoreAdapterFactory
 from ..runtime.langchain.document_loaders.constants import loaders_allowed_to_override
@@ -16,6 +16,8 @@ from ..runtime.utils.utils import IndexerKeywords
 logger = logging.getLogger(__name__)
+DEFAULT_CUT_OFF = 0.2
 # Base Vector Store Schema Models
 BaseIndexParams = create_model(
     "BaseIndexParams",
@@ -38,7 +40,7 @@ BaseSearchParams = create_model(
         default={},
         examples=["{\"key\": \"value\"}", "{\"status\": \"active\"}"]
     )),
-    cut_off=(Optional[float], Field(description="Cut-off score for search results", default=0.5, ge=0, le=1)),
+    cut_off=(Optional[float], Field(description="Cut-off score for search results", default=DEFAULT_CUT_OFF, ge=0, le=1)),
     search_top=(Optional[int], Field(description="Number of top results to return", default=10)),
     full_text_search=(Optional[Dict[str, Any]], Field(
         description="Full text search parameters. Can be a dictionary with search options.",
@@ -68,7 +70,7 @@ BaseStepbackSearchParams = create_model(
         default={},
         examples=["{\"key\": \"value\"}", "{\"status\": \"active\"}"]
     )),
-    cut_off=(Optional[float], Field(description="Cut-off score for search results", default=0.5, ge=0, le=1)),
+    cut_off=(Optional[float], Field(description="Cut-off score for search results", default=DEFAULT_CUT_OFF, ge=0, le=1)),
     search_top=(Optional[int], Field(description="Number of top results to return", default=10)),
     full_text_search=(Optional[Dict[str, Any]], Field(
         description="Full text search parameters. Can be a dictionary with search options.",
@@ -111,7 +113,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
     def __init__(self, **kwargs):
         conn = kwargs.get('connection_string', None)
         connection_string = conn.get_secret_value() if isinstance(conn, SecretStr) else conn
-        collection_name = kwargs.get('collection_name')
+        collection_name = kwargs.get('collection_schema')
         if 'vectorstore_type' not in kwargs:
             kwargs['vectorstore_type'] = 'PGVector'
@@ -176,11 +178,15 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
                                  f"Processing documents to collect dependencies and prepare them for indexing...")
             self._save_index_generator(documents, documents_count, chunking_tool, chunking_config, index_name=index_name, result=result)
             #
-            self.index_meta_update(index_name, IndexerKeywords.INDEX_META_COMPLETED.value, result["count"])
+            results_count = result["count"]
+            self.index_meta_update(index_name, IndexerKeywords.INDEX_META_COMPLETED.value, results_count)
+            self._emit_index_event(index_name)
             #
-            return {"status": "ok", "message": f"successfully indexed {result["count"]} documents"}
+            return {"status": "ok", "message": f"successfully indexed {results_count} documents" if results_count > 0
+            else "no new documents to index"}
         except Exception as e:
             self.index_meta_update(index_name, IndexerKeywords.INDEX_META_FAILED.value, result["count"])
+            self._emit_index_event(index_name, error=str(e))
             raise e
@@ -379,7 +385,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
     def search_index(self,
                      query: str,
                      index_name: str = "",
-                     filter: dict | str = {}, cut_off: float = 0.5,
+                     filter: dict | str = {}, cut_off: float = DEFAULT_CUT_OFF,
                      search_top: int = 10, reranker: dict = {},
                      full_text_search: Optional[Dict[str, Any]] = None,
                      reranking_config: Optional[Dict[str, Dict[str, Any]]] = None,
@@ -410,7 +416,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
                      query: str,
                      messages: List[Dict[str, Any]] = [],
                      index_name: str = "",
-                     filter: dict | str = {}, cut_off: float = 0.5,
+                     filter: dict | str = {}, cut_off: float = DEFAULT_CUT_OFF,
                      search_top: int = 10, reranker: dict = {},
                      full_text_search: Optional[Dict[str, Any]] = None,
                      reranking_config: Optional[Dict[str, Dict[str, Any]]] = None,
@@ -435,7 +441,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
                      query: str,
                      messages: List[Dict[str, Any]] = [],
                      index_name: str = "",
-                     filter: dict | str = {}, cut_off: float = 0.5,
+                     filter: dict | str = {}, cut_off: float = DEFAULT_CUT_OFF,
                      search_top: int = 10, reranker: dict = {},
                      full_text_search: Optional[Dict[str, Any]] = None,
                      reranking_config: Optional[Dict[str, Dict[str, Any]]] = None,
@@ -457,37 +463,29 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
         )
     def index_meta_init(self, index_name: str, index_configuration: dict[str, Any]):
-        index_meta_raw = super().get_index_meta(index_name)
-        from ..runtime.langchain.interfaces.llm_processor import add_documents
-        created_on = time.time()
-        metadata = {
-            "collection": index_name,
-            "type": IndexerKeywords.INDEX_META_TYPE.value,
-            "indexed": 0,
-            "state": IndexerKeywords.INDEX_META_IN_PROGRESS.value,
-            "index_configuration": index_configuration,
-            "created_on": created_on,
-            "updated_on": created_on,
-            "history": "[]",
-        }
-        index_meta_ids = None
-        #
-        if index_meta_raw:
-            history_raw = index_meta_raw.get("metadata", {}).get("history", "[]")
-            if isinstance(history_raw, str) and history_raw.strip():
-                try:
-                    history = json.loads(history_raw)
-                except (json.JSONDecodeError, TypeError):
-                    history = []
-            else:
-                history = []
-            new_history_item = {k: v for k, v in index_meta_raw.get("metadata", {}).items() if k != "history"}
-            history.append(new_history_item)
-            metadata["history"] = json.dumps(history)
-            index_meta_ids = [index_meta_raw.get("id")]
-        #
-        index_meta_doc = Document(page_content=f"{IndexerKeywords.INDEX_META_TYPE.value}_{index_name}", metadata=metadata)
-        add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc], ids=index_meta_ids)
+        index_meta = super().get_index_meta(index_name)
+        if not index_meta:
+            self._log_tool_event(
+                f"There is no existing index_meta for collection '{index_name}'. Initializing it.",
+                tool_name="index_data"
+            )
+            from ..runtime.langchain.interfaces.llm_processor import add_documents
+            created_on = time.time()
+            metadata = {
+                "collection": index_name,
+                "type": IndexerKeywords.INDEX_META_TYPE.value,
+                "indexed": 0,
+                "updated": 0,
+                "state": IndexerKeywords.INDEX_META_IN_PROGRESS.value,
+                "index_configuration": index_configuration,
+                "created_on": created_on,
+                "updated_on": created_on,
+                "task_id": None,
+                "conversation_id": None,
+            }
+            metadata["history"] = json.dumps([metadata])
+            index_meta_doc = Document(page_content=f"{IndexerKeywords.INDEX_META_TYPE.value}_{index_name}", metadata=metadata)
+            add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc])
     def index_meta_update(self, index_name: str, state: str, result: int):
         index_meta_raw = super().get_index_meta(index_name)
@@ -495,12 +493,75 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
         #
         if index_meta_raw:
             metadata = copy.deepcopy(index_meta_raw.get("metadata", {}))
-            metadata["indexed"] = result
+            metadata["indexed"] = self.get_indexed_count(index_name)
+            metadata["updated"] = result
             metadata["state"] = state
             metadata["updated_on"] = time.time()
+            #
+            history_raw = metadata.pop("history", "[]")
+            try:
+                history = json.loads(history_raw) if history_raw.strip() else []
+                # replace the last history item with updated metadata
+                if history and isinstance(history, list):
+                    history[-1] = metadata
+                else:
+                    history = [metadata]
+            except (json.JSONDecodeError, TypeError):
+                logger.warning(f"Failed to load index history: {history_raw}. Create new with only current item.")
+                history = [metadata]
+            #
+            metadata["history"] = json.dumps(history)
             index_meta_doc = Document(page_content=index_meta_raw.get("content", ""), metadata=metadata)
             add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc], ids=[index_meta_raw.get("id")])
+    def _emit_index_event(self, index_name: str, error: Optional[str] = None):
+        """
+        Emit custom event for index data operation.
+        Args:
+            index_name: The name of the index
+            error: Error message if the operation failed, None otherwise
+        """
+        index_meta = super().get_index_meta(index_name)
+        if not index_meta:
+            logger.warning(
+                f"No index_meta found for index '{index_name}'. "
+                "Cannot emit index event."
+            )
+            return
+        metadata = index_meta.get("metadata", {})
+        # Determine if this is a reindex operation
+        history_raw = metadata.get("history", "[]")
+        try:
+            history = json.loads(history_raw) if history_raw.strip() else []
+            is_reindex = len(history) > 1
+        except (json.JSONDecodeError, TypeError):
+            is_reindex = False
+        # Build event message
+        event_data = {
+            "id": index_meta.get("id"),
+            "index_name": index_name,
+            "state": metadata.get("state"),
+            "error": error,
+            "reindex": is_reindex,
+            "indexed": metadata.get("indexed", 0),
+            "updated": metadata.get("updated", 0),
+        }
+        # Emit the event
+        try:
+            dispatch_custom_event("index_data_status", event_data)
+            logger.debug(
+                f"Emitted index_data_status event for index "
+                f"'{index_name}': {event_data}"
+            )
+        except Exception as e:
+            logger.warning(f"Failed to emit index_data_status event: {e}")
     def get_available_tools(self):
         """
         Returns the standardized vector search tools (search operations only).
@@ -554,6 +615,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
                 "mode": "list_collections",
                 "ref": self.list_collections,
                 "description": self.list_collections.__doc__,
-                "args_schema": create_model("ListCollectionsParams")  # No parameters
+                # No parameters
+                "args_schema": create_model("ListCollectionsParams")
             },
-        ]
+        ]

alita_sdk/tools/bitbucket/__init__.py CHANGED Viewed

@@ -61,6 +61,7 @@ class AlitaBitbucketToolkit(BaseToolkit):
                 'metadata':
                     {
                         "label": "Bitbucket", "icon_url": "bitbucket-icon.svg",
+                        "max_length": AlitaBitbucketToolkit.toolkit_max_length,
                         "categories": ["code repositories"],
                         "extra_categories": ["bitbucket", "git", "repository", "code", "version control"],
                     }

alita_sdk/tools/chunkers/sematic/proposal_chunker.py CHANGED Viewed

@@ -6,7 +6,7 @@ from langchain_core.prompts import ChatPromptTemplate
 from langchain.text_splitter import TokenTextSplitter
 from typing import Optional, List
-from langchain_core.pydantic_v1 import BaseModel
+from pydantic import BaseModel
 from ..utils import tiktoken_length
 logger = getLogger(__name__)

alita_sdk/tools/code/sonar/__init__.py CHANGED Viewed

@@ -29,7 +29,7 @@ class SonarToolkit(BaseToolkit):
         SonarToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
         return create_model(
             name,
-            sonar_project_name=(str, Field(description="Project name of the desired repository", json_schema_extra={'toolkit_name': True, 'max_toolkit_length': SonarToolkit.toolkit_max_length})),
+            sonar_project_name=(str, Field(description="Project name of the desired repository")),
             sonar_configuration=(SonarConfiguration, Field(description="Sonar Configuration", json_schema_extra={'configuration_types': ['sonar']})),
             selected_tools=(List[Literal[tuple(selected_tools)]], Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
             __config__=ConfigDict(json_schema_extra=

alita_sdk/tools/code_indexer_toolkit.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import ast
 import fnmatch
+import json
 import logging
 from typing import Optional, List, Generator
@@ -21,7 +22,7 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
         return self.vector_adapter.get_code_indexed_data(self, index_name)
     def key_fn(self, document: Document):
-        return document.metadata.get('id')
+        return document.metadata.get("filename")
     def compare_fn(self, document: Document, idx_data):
         return (document.metadata.get('commit_hash') and
@@ -46,7 +47,7 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
         )
     def _extend_data(self, documents: Generator[Document, None, None]):
-        yield from parse_code_files_for_db(documents)
+        yield from documents
     def _index_tool_params(self):
         """Return the parameters for indexing data."""
@@ -117,6 +118,15 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
                     if not file_content:
                         # empty file, skip
                         continue
+                    #
+                    # ensure file content is a string
+                    if isinstance(file_content, bytes):
+                        file_content = file_content.decode("utf-8", errors="ignore")
+                    elif isinstance(file_content, dict) and file.endswith('.json'):
+                        file_content = json.dumps(file_content)
+                    elif not isinstance(file_content, str):
+                        file_content = str(file_content)
+                    #
                     # hash the file content to ensure uniqueness
                     import hashlib
                     file_hash = hashlib.sha256(file_content.encode("utf-8")).hexdigest()
@@ -127,7 +137,7 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
                     self._log_tool_event(message=f"{idx} out of {total_files} files have been read", tool_name="loader")
             self._log_tool_event(message=f"{len(_files)} have been read", tool_name="loader")
-        return file_content_generator()
+        return parse_code_files_for_db(file_content_generator())
     def __handle_get_files(self, path: str, branch: str):
         """

alita_sdk/tools/confluence/__init__.py CHANGED Viewed

@@ -67,8 +67,7 @@ class ConfluenceToolkit(BaseToolkit):
         model = create_model(
             name,
-            space=(str, Field(description="Space", json_schema_extra={'toolkit_name': True,
-                                                                                    'max_toolkit_length': ConfluenceToolkit.toolkit_max_length})),
+            space=(str, Field(description="Space")),
             cloud=(bool, Field(description="Hosting Option", json_schema_extra={'configuration': True})),
             limit=(int, Field(description="Pages limit per request", default=5)),
             labels=(Optional[str], Field(
@@ -95,6 +94,7 @@ class ConfluenceToolkit(BaseToolkit):
                 'metadata': {
                     "label": "Confluence",
                     "icon_url": None,
+                    "max_length": ConfluenceToolkit.toolkit_max_length,
                     "categories": ["documentation"],
                     "extra_categories": ["confluence", "wiki", "knowledge base", "documentation", "atlassian"]
                 }

alita_sdk/tools/confluence/api_wrapper.py CHANGED Viewed

@@ -7,12 +7,14 @@ from json import JSONDecodeError
 from typing import Optional, List, Any, Dict, Callable, Generator, Literal
 import requests
+from atlassian.errors import ApiError
 from langchain_community.document_loaders.confluence import ContentFormat
 from langchain_core.documents import Document
 from langchain_core.messages import HumanMessage
 from langchain_core.tools import ToolException
 from markdownify import markdownify
 from pydantic import Field, PrivateAttr, model_validator, create_model, SecretStr
+from requests import HTTPError
 from tenacity import retry, stop_after_attempt, wait_exponential, before_sleep_log
 from alita_sdk.tools.non_code_indexer_toolkit import NonCodeIndexerToolkit
@@ -194,6 +196,7 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
     keep_markdown_format: Optional[bool] = True
     ocr_languages: Optional[str] = None
     keep_newlines: Optional[bool] = True
+    _errors: Optional[list[str]] = None
     _image_cache: ImageDescriptionCache = PrivateAttr(default_factory=ImageDescriptionCache)
     @model_validator(mode='before')
@@ -498,7 +501,9 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
         restrictions = self.client.get_all_restrictions_for_content(page["id"])
         return (
-                page["status"] == "current"
+                (page["status"] == "current"
+                # allow user to see archived content if needed
+                 or page["status"] == "archived")
                 and not restrictions["read"]["restrictions"]["user"]["results"]
                 and not restrictions["read"]["restrictions"]["group"]["results"]
         )
@@ -518,18 +523,35 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
                 ),
                 before_sleep=before_sleep_log(logger, logging.WARNING),
             )(self.client.get_page_by_id)
-            page = get_page(
-                page_id=page_id, expand=f"{self.content_format.value},version"
-            )
-            if not self.include_restricted_content and not self.is_public_page(page):
-                continue
+            try:
+                page = get_page(
+                    page_id=page_id, expand=f"{self.content_format.value},version"
+                )
+            except (ApiError, HTTPError) as e:
+                logger.error(f"Error fetching page with ID {page_id}: {e}")
+                page_content_temp = f"Confluence API Error: cannot fetch the page with ID {page_id}: {e}"
+                # store errors
+                if self._errors is None:
+                    self._errors = []
+                self._errors.append(page_content_temp)
+                return Document(page_content=page_content_temp,
+                                metadata={})
+            # TODO: update on toolkit advanced settings level as a separate feature
+            # if not self.include_restricted_content and not self.is_public_page(page):
+            #     continue
             yield self.process_page(page, skip_images)
+    def _log_errors(self):
+        """ Log errors encountered during toolkit execution. """
+        if self._errors:
+            logger.info(f"Errors encountered during toolkit execution: {self._errors}")
     def read_page_by_id(self, page_id: str, skip_images: bool = False):
         """Reads a page by its id in the Confluence space. If id is not available, but there is a title - use get_page_id first."""
         result = list(self.get_pages_by_id([page_id], skip_images))
         if not result:
-            "Page not found"
+            return f"Pages not found. Errors: {self._errors}" if self._errors \
+                else "Pages not found or you do not have access to them."
         return result[0].page_content
         # return self._strip_base64_images(result[0].page_content) if skip_images else result[0].page_content

alita_sdk/tools/confluence/loader.py CHANGED Viewed

@@ -3,6 +3,7 @@ from typing import Optional, List
 from logging import getLogger
 import requests
+from langchain_core.documents import Document
 logger = getLogger(__name__)
 from PIL import Image
@@ -193,6 +194,15 @@ class AlitaConfluenceLoader(ConfluenceLoader):
         else:
             return super().process_image(link, ocr_languages)
+    def process_page(self, page: dict, include_attachments: bool, include_comments: bool, include_labels: bool,
+                     content_format: ContentFormat, ocr_languages: Optional[str] = None,
+                     keep_markdown_format: Optional[bool] = False, keep_newlines: bool = False) -> Document:
+        if not page.get("title"):
+            # if 'include_restricted_content' set to True, draft pages are loaded and can have no title
+            page["title"] = "Untitled"
+        return super().process_page(page, include_attachments, include_comments, include_labels, content_format,
+                                    ocr_languages, keep_markdown_format, keep_newlines)
     # TODO review usage
     # def process_svg(
     #         self,

alita_sdk/tools/github/__init__.py CHANGED Viewed

@@ -53,6 +53,7 @@ class AlitaGitHubToolkit(BaseToolkit):
                     'metadata': {
                         "label": "GitHub",
                         "icon_url": None,
+                        "max_length": AlitaGitHubToolkit.toolkit_max_length,
                         "categories": ["code repositories"],
                         "extra_categories": ["github", "git", "repository", "code", "version control"],
                     },
@@ -62,8 +63,7 @@ class AlitaGitHubToolkit(BaseToolkit):
                                                              json_schema_extra={'configuration_types': ['github']})),
             pgvector_configuration=(Optional[PgVectorConfiguration], Field(description="PgVector configuration", default=None,
                                                                      json_schema_extra={'configuration_types': ['pgvector']})),
-            repository=(str, Field(description="Github repository", json_schema_extra={'toolkit_name': True,
-                                                                                       'max_toolkit_length': AlitaGitHubToolkit.toolkit_max_length})),
+            repository=(str, Field(description="Github repository")),
             active_branch=(Optional[str], Field(description="Active branch", default="main")),
             base_branch=(Optional[str], Field(description="Github Base branch", default="main")),
             # embedder settings

alita_sdk/tools/gitlab/__init__.py CHANGED Viewed

@@ -43,7 +43,7 @@ class AlitaGitlabToolkit(BaseToolkit):
         AlitaGitlabToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
         return create_model(
             name,
-            repository=(str, Field(description="GitLab repository", json_schema_extra={'toolkit_name': True, 'max_toolkit_length': AlitaGitlabToolkit.toolkit_max_length})),
+            repository=(str, Field(description="GitLab repository")),
             gitlab_configuration=(GitlabConfiguration, Field(description="GitLab configuration", json_schema_extra={'configuration_types': ['gitlab']})),
             branch=(str, Field(description="Main branch", default="main")),
             # indexer settings
@@ -57,6 +57,7 @@ class AlitaGitlabToolkit(BaseToolkit):
                 'metadata': {
                     "label": "GitLab",
                     "icon_url": None,
+                    "max_length": AlitaGitlabToolkit.toolkit_max_length,
                     "categories": ["code repositories"],
                     "extra_categories": ["gitlab", "git", "repository", "code", "version control"],
                 }

alita_sdk/tools/gitlab/api_wrapper.py CHANGED Viewed

@@ -117,7 +117,11 @@ class GitLabAPIWrapper(CodeIndexerToolkit):
     @model_validator(mode='before')
     @classmethod
-    def validate_toolkit(cls, values: Dict) -> Dict:
+    def validate_toolkit_before(cls, values: Dict) -> Dict:
+        return super().validate_toolkit(values)
+    @model_validator(mode='after')
+    def validate_toolkit(self):
         try:
            import gitlab
         except ImportError:
@@ -125,17 +129,17 @@ class GitLabAPIWrapper(CodeIndexerToolkit):
                 "python-gitlab is not installed. "
                 "Please install it with `pip install python-gitlab`"
             )
-        values['repository'] = cls._sanitize_url(values['repository'])
+        self.repository = self._sanitize_url(self.repository)
         g = gitlab.Gitlab(
-            url=cls._sanitize_url(values['url']),
-            private_token=values['private_token'],
+            url=self._sanitize_url(self.url),
+            private_token=self.private_token.get_secret_value(),
             keep_base_url=True,
         )
         g.auth()
-        cls._git = g
-        cls._active_branch = values.get('branch')
-        return super().validate_toolkit(values)
+        self._git = g
+        self._active_branch = self.branch
+        return self
     @property
     def repo_instance(self):

alita_sdk/tools/gitlab_org/__init__.py CHANGED Viewed

@@ -30,8 +30,6 @@ class AlitaGitlabSpaceToolkit(BaseToolkit):
         AlitaGitlabSpaceToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
         return create_model(
             name,
-            name=(str, Field(description="Toolkit name", json_schema_extra={'toolkit_name': True,
-                                                                            'max_toolkit_length': AlitaGitlabSpaceToolkit.toolkit_max_length})),
             gitlab_configuration=(GitlabConfiguration, Field(description="GitLab configuration",
                                                                        json_schema_extra={
                                                                            'configuration_types': ['gitlab']})),
@@ -46,6 +44,7 @@ class AlitaGitlabSpaceToolkit(BaseToolkit):
                 'metadata': {
                     "label": "GitLab Org",
                     "icon_url": None,
+                    "max_length": AlitaGitlabSpaceToolkit.toolkit_max_length,
                     "categories": ["code repositories"],
                     "extra_categories": ["gitlab", "git", "repository", "code", "version control"],
                 }

alita_sdk/tools/google_places/__init__.py CHANGED Viewed

@@ -30,7 +30,7 @@ class GooglePlacesToolkit(BaseToolkit):
         GooglePlacesToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
         return create_model(
             name,
-            results_count=(Optional[int], Field(description="Results number to show", default=None, json_schema_extra={'toolkit_name': True, 'max_toolkit_length': GooglePlacesToolkit.toolkit_max_length})),
+            results_count=(Optional[int], Field(description="Results number to show", default=None)),
             google_places_configuration=(GooglePlacesConfiguration, Field(description="Google Places Configuration", json_schema_extra={'configuration_types': ['google_places']})),
             selected_tools=(List[Literal[tuple(selected_tools)]], Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
             __config__=ConfigDict(json_schema_extra=
@@ -38,6 +38,7 @@ class GooglePlacesToolkit(BaseToolkit):
                                       'metadata':
                                           {
                                               "label": "Google Places", "icon_url": "gplaces-icon.svg",
+                                              "max_length": GooglePlacesToolkit.toolkit_max_length,
                                               "categories": ["other"],
                                               "extra_categories": ["google", "places", "maps", "location",
                                                                    "geolocation"],

alita_sdk/tools/jira/__init__.py CHANGED Viewed

@@ -89,6 +89,7 @@ class JiraToolkit(BaseToolkit):
                 'metadata': {
                     "label": "Jira",
                     "icon_url": "jira-icon.svg",
+                    "max_length": JiraToolkit.toolkit_max_length,
                     "categories": ["project management"],
                     "extra_categories": ["jira", "atlassian", "issue tracking", "project management", "task management"],
                 }

alita_sdk/tools/jira/api_wrapper.py CHANGED Viewed

@@ -563,7 +563,7 @@ class JiraApiWrapper(NonCodeIndexerToolkit):
         Use the appropriate issue link type (e.g., "Test", "Relates", "Blocks").
         If we use "Test" linktype, the test is inward issue, the story/other issue is outward issue.."""
-        comment = "This test is linked to the story."
+        comment = f"Issue {inward_issue_key} was linked to {outward_issue_key}."
         comment_body = {"content": [{"content": [{"text": comment,"type": "text"}],"type": "paragraph"}],"type": "doc","version": 1} if self.api_version == "3" else comment
         link_data = {
             "type": {"name": f"{linktype}"},

alita_sdk/tools/memory/__init__.py CHANGED Viewed

@@ -61,7 +61,7 @@ class MemoryToolkit(BaseToolkit):
         return create_model(
             'memory',
-            namespace=(str, Field(description="Memory namespace", json_schema_extra={'toolkit_name': True})),
+            namespace=(str, Field(description="Memory namespace")),
             pgvector_configuration=(PgVectorConfiguration, Field(description="PgVector Configuration",
                                                                            json_schema_extra={
                                                                                'configuration_types': ['pgvector']})),

alita-sdk 0.3.379__py3-none-any.whl → 0.3.462__py3-none-any.whl

Potentially problematic release.

alita-sdk 0.3.379py3-none-any.whl → 0.3.462py3-none-any.whl