PyPI - alita-sdk - Versions diffs - 0.3.365__py3-none-any.whl → 0.3.462__py3-none-any.whl - Mend

alita-sdk 0.3.365py3-none-any.whl → 0.3.462py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of alita-sdk might be problematic. Click here for more details.

Files changed (118) hide show

alita_sdk/cli/__init__.py +10 -0
alita_sdk/cli/__main__.py +17 -0
alita_sdk/cli/agent_executor.py +144 -0
alita_sdk/cli/agent_loader.py +197 -0
alita_sdk/cli/agent_ui.py +166 -0
alita_sdk/cli/agents.py +1069 -0
alita_sdk/cli/callbacks.py +576 -0
alita_sdk/cli/cli.py +159 -0
alita_sdk/cli/config.py +153 -0
alita_sdk/cli/formatting.py +182 -0
alita_sdk/cli/mcp_loader.py +315 -0
alita_sdk/cli/toolkit.py +330 -0
alita_sdk/cli/toolkit_loader.py +55 -0
alita_sdk/cli/tools/__init__.py +9 -0
alita_sdk/cli/tools/filesystem.py +905 -0
alita_sdk/configurations/bitbucket.py +95 -0
alita_sdk/configurations/confluence.py +96 -1
alita_sdk/configurations/gitlab.py +79 -0
alita_sdk/configurations/jira.py +103 -0
alita_sdk/configurations/testrail.py +88 -0
alita_sdk/configurations/xray.py +93 -0
alita_sdk/configurations/zephyr_enterprise.py +93 -0
alita_sdk/configurations/zephyr_essential.py +75 -0
alita_sdk/runtime/clients/artifact.py +1 -1
alita_sdk/runtime/clients/client.py +47 -10
alita_sdk/runtime/clients/mcp_discovery.py +342 -0
alita_sdk/runtime/clients/mcp_manager.py +262 -0
alita_sdk/runtime/clients/sandbox_client.py +373 -0
alita_sdk/runtime/langchain/assistant.py +70 -41
alita_sdk/runtime/langchain/constants.py +6 -1
alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +4 -1
alita_sdk/runtime/langchain/document_loaders/constants.py +73 -100
alita_sdk/runtime/langchain/langraph_agent.py +164 -38
alita_sdk/runtime/langchain/utils.py +43 -7
alita_sdk/runtime/models/mcp_models.py +61 -0
alita_sdk/runtime/toolkits/__init__.py +24 -0
alita_sdk/runtime/toolkits/application.py +8 -1
alita_sdk/runtime/toolkits/artifact.py +5 -6
alita_sdk/runtime/toolkits/mcp.py +895 -0
alita_sdk/runtime/toolkits/tools.py +140 -50
alita_sdk/runtime/tools/__init__.py +7 -2
alita_sdk/runtime/tools/application.py +7 -0
alita_sdk/runtime/tools/function.py +94 -5
alita_sdk/runtime/tools/graph.py +10 -4
alita_sdk/runtime/tools/image_generation.py +104 -8
alita_sdk/runtime/tools/llm.py +204 -114
alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
alita_sdk/runtime/tools/mcp_remote_tool.py +166 -0
alita_sdk/runtime/tools/mcp_server_tool.py +3 -1
alita_sdk/runtime/tools/sandbox.py +180 -79
alita_sdk/runtime/tools/vectorstore.py +22 -21
alita_sdk/runtime/tools/vectorstore_base.py +79 -26
alita_sdk/runtime/utils/mcp_oauth.py +164 -0
alita_sdk/runtime/utils/mcp_sse_client.py +405 -0
alita_sdk/runtime/utils/streamlit.py +34 -3
alita_sdk/runtime/utils/toolkit_utils.py +14 -4
alita_sdk/runtime/utils/utils.py +1 -0
alita_sdk/tools/__init__.py +48 -31
alita_sdk/tools/ado/repos/__init__.py +1 -0
alita_sdk/tools/ado/test_plan/__init__.py +1 -1
alita_sdk/tools/ado/wiki/__init__.py +1 -5
alita_sdk/tools/ado/work_item/__init__.py +1 -5
alita_sdk/tools/ado/work_item/ado_wrapper.py +17 -8
alita_sdk/tools/base_indexer_toolkit.py +194 -112
alita_sdk/tools/bitbucket/__init__.py +1 -0
alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
alita_sdk/tools/code/sonar/__init__.py +1 -1
alita_sdk/tools/code_indexer_toolkit.py +15 -5
alita_sdk/tools/confluence/__init__.py +2 -2
alita_sdk/tools/confluence/api_wrapper.py +110 -63
alita_sdk/tools/confluence/loader.py +10 -0
alita_sdk/tools/elitea_base.py +22 -22
alita_sdk/tools/github/__init__.py +2 -2
alita_sdk/tools/gitlab/__init__.py +2 -1
alita_sdk/tools/gitlab/api_wrapper.py +11 -7
alita_sdk/tools/gitlab_org/__init__.py +1 -2
alita_sdk/tools/google_places/__init__.py +2 -1
alita_sdk/tools/jira/__init__.py +1 -0
alita_sdk/tools/jira/api_wrapper.py +1 -1
alita_sdk/tools/memory/__init__.py +1 -1
alita_sdk/tools/non_code_indexer_toolkit.py +2 -2
alita_sdk/tools/openapi/__init__.py +10 -1
alita_sdk/tools/pandas/__init__.py +1 -1
alita_sdk/tools/postman/__init__.py +2 -1
alita_sdk/tools/postman/api_wrapper.py +18 -8
alita_sdk/tools/postman/postman_analysis.py +8 -1
alita_sdk/tools/pptx/__init__.py +2 -2
alita_sdk/tools/qtest/__init__.py +3 -3
alita_sdk/tools/qtest/api_wrapper.py +1708 -76
alita_sdk/tools/rally/__init__.py +1 -2
alita_sdk/tools/report_portal/__init__.py +1 -0
alita_sdk/tools/salesforce/__init__.py +1 -0
alita_sdk/tools/servicenow/__init__.py +2 -3
alita_sdk/tools/sharepoint/__init__.py +1 -0
alita_sdk/tools/sharepoint/api_wrapper.py +125 -34
alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
alita_sdk/tools/sharepoint/utils.py +8 -2
alita_sdk/tools/slack/__init__.py +1 -0
alita_sdk/tools/sql/__init__.py +2 -1
alita_sdk/tools/sql/api_wrapper.py +71 -23
alita_sdk/tools/testio/__init__.py +1 -0
alita_sdk/tools/testrail/__init__.py +1 -3
alita_sdk/tools/utils/__init__.py +17 -0
alita_sdk/tools/utils/content_parser.py +35 -24
alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +67 -21
alita_sdk/tools/xray/__init__.py +2 -1
alita_sdk/tools/zephyr/__init__.py +2 -1
alita_sdk/tools/zephyr_enterprise/__init__.py +1 -0
alita_sdk/tools/zephyr_essential/__init__.py +1 -0
alita_sdk/tools/zephyr_scale/__init__.py +1 -0
alita_sdk/tools/zephyr_squad/__init__.py +1 -0
{alita_sdk-0.3.365.dist-info → alita_sdk-0.3.462.dist-info}/METADATA +8 -2
{alita_sdk-0.3.365.dist-info → alita_sdk-0.3.462.dist-info}/RECORD +118 -93
alita_sdk-0.3.462.dist-info/entry_points.txt +2 -0
{alita_sdk-0.3.365.dist-info → alita_sdk-0.3.462.dist-info}/WHEEL +0 -0
{alita_sdk-0.3.365.dist-info → alita_sdk-0.3.462.dist-info}/licenses/LICENSE +0 -0
{alita_sdk-0.3.365.dist-info → alita_sdk-0.3.462.dist-info}/top_level.txt +0 -0

alita_sdk/tools/ado/wiki/__init__.py CHANGED Viewed

@@ -24,11 +24,6 @@ class AzureDevOpsWikiToolkit(BaseToolkit):
         AzureDevOpsWikiToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
         m = create_model(
             name_alias,
-            name=(str, Field(description="Toolkit name",
-                             json_schema_extra={
-                                 'toolkit_name': True,
-                                 'max_toolkit_length': AzureDevOpsWikiToolkit.toolkit_max_length})
-                  ),
             ado_configuration=(AdoConfiguration, Field(description="Ado configuration", json_schema_extra={'configuration_types': ['ado']})),
             # indexer settings
             pgvector_configuration=(Optional[PgVectorConfiguration], Field(default=None,
@@ -42,6 +37,7 @@ class AzureDevOpsWikiToolkit(BaseToolkit):
                     'metadata': {
                         "label": "ADO wiki",
                         "icon_url": "ado-wiki-icon.svg",
+                        "max_length": AzureDevOpsWikiToolkit.toolkit_max_length,
                         "categories": ["documentation"],
                         "extra_categories": ["knowledge base", "documentation management", "wiki"],
                         "sections": {

alita_sdk/tools/ado/work_item/__init__.py CHANGED Viewed

@@ -23,11 +23,6 @@ class AzureDevOpsWorkItemsToolkit(BaseToolkit):
         AzureDevOpsWorkItemsToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
         m = create_model(
             name,
-            name=(str, Field(description="Toolkit name",
-                             json_schema_extra={
-                                 'toolkit_name': True,
-                                 'max_toolkit_length': AzureDevOpsWorkItemsToolkit.toolkit_max_length})
-                  ),
             ado_configuration=(AdoConfiguration, Field(description="Ado Work Item configuration", json_schema_extra={'configuration_types': ['ado']})),
             limit=(Optional[int], Field(description="ADO plans limit used for limitation of the list with results", default=5)),
             selected_tools=(List[Literal[tuple(selected_tools)]], Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
@@ -42,6 +37,7 @@ class AzureDevOpsWorkItemsToolkit(BaseToolkit):
                     'metadata': {
                         "label": "ADO boards",
                         "icon_url": "ado-boards-icon.svg",
+                        "max_length": AzureDevOpsWorkItemsToolkit.toolkit_max_length,
                         "categories": ["project management"],
                         "extra_categories": ["work item management", "issue tracking", "agile boards"],
                         "sections": {

alita_sdk/tools/ado/work_item/ado_wrapper.py CHANGED Viewed

@@ -329,11 +329,14 @@ class AzureDevOpsApiWrapper(NonCodeIndexerToolkit):
                 parsed_item.update(fields_data)
             # extract relations if any
-            relations_data = work_item.relations
+            relations_data = None
+            if expand and str(expand).lower() in ("relations", "all"):
+                try:
+                    relations_data = getattr(work_item, 'relations', None)
+                except KeyError:
+                    relations_data = None
             if relations_data:
-                parsed_item['relations'] = []
-                for relation in relations_data:
-                    parsed_item['relations'].append(relation.as_dict())
+                parsed_item['relations'] = [relation.as_dict() for relation in relations_data]
             if parse_attachments:
                 # describe images in work item fields if present
@@ -344,13 +347,19 @@ class AzureDevOpsApiWrapper(NonCodeIndexerToolkit):
                         for img in images:
                             src = img.get('src')
                             if src:
-                                description = self.parse_attachment_by_url(src, image_description_prompt)
+                                description = self.parse_attachment_by_url(src, image_description_prompt=image_description_prompt)
                                 img['image-description'] = description
                         parsed_item[field_name] = str(soup)
                 # parse attached documents if present
-                if parsed_item['relations']:
-                    for attachment in parsed_item['relations']:
-                        attachment['content'] = self.parse_attachment_by_url(attachment['url'], attachment['attributes']['name'], image_description_prompt)
+                for relation in parsed_item.get('relations', []):
+                    # Only process actual file attachments
+                    if relation.get('rel') == 'AttachedFile':
+                        file_name = relation.get('attributes', {}).get('name')
+                        if file_name:
+                            try:
+                                relation['content'] = self.parse_attachment_by_url(relation['url'], file_name, image_description_prompt=image_description_prompt)
+                            except Exception as att_e:
+                                logger.warning(f"Failed to parse attachment {file_name}: {att_e}")
             return parsed_item

alita_sdk/tools/base_indexer_toolkit.py CHANGED Viewed

@@ -1,41 +1,46 @@
+import copy
 import json
 import logging
 import time
 from typing import Any, Optional, List, Dict, Generator
+from langchain_core.callbacks import dispatch_custom_event
 from langchain_core.documents import Document
 from pydantic import create_model, Field, SecretStr
 from .utils.content_parser import file_extension_by_chunker, process_document_by_type
 from .vector_adapters.VectorStoreAdapter import VectorStoreAdapterFactory
+from ..runtime.langchain.document_loaders.constants import loaders_allowed_to_override
 from ..runtime.tools.vectorstore_base import VectorStoreWrapperBase
 from ..runtime.utils.utils import IndexerKeywords
 logger = logging.getLogger(__name__)
+DEFAULT_CUT_OFF = 0.2
 # Base Vector Store Schema Models
 BaseIndexParams = create_model(
     "BaseIndexParams",
-    collection_suffix=(str, Field(description="Suffix for collection name (max 7 characters) used to separate datasets", min_length=1, max_length=7)),
+    index_name=(str, Field(description="Index name (max 7 characters)", min_length=1, max_length=7)),
 )
 RemoveIndexParams = create_model(
     "RemoveIndexParams",
-    collection_suffix=(Optional[str], Field(description="Optional suffix for collection name (max 7 characters)", default="", max_length=7)),
+    index_name=(Optional[str], Field(description="Optional index name (max 7 characters)", default="", max_length=7)),
 )
 BaseSearchParams = create_model(
     "BaseSearchParams",
     query=(str, Field(description="Query text to search in the index")),
-    collection_suffix=(Optional[str], Field(
-        description="Optional suffix for collection name (max 7 characters). Leave empty to search across all datasets",
+    index_name=(Optional[str], Field(
+        description="Optional index name (max 7 characters). Leave empty to search across all datasets",
         default="", max_length=7)),
     filter=(Optional[dict | str], Field(
         description="Filter to apply to the search results. Can be a dictionary or a JSON string.",
         default={},
         examples=["{\"key\": \"value\"}", "{\"status\": \"active\"}"]
     )),
-    cut_off=(Optional[float], Field(description="Cut-off score for search results", default=0.5, ge=0, le=1)),
+    cut_off=(Optional[float], Field(description="Cut-off score for search results", default=DEFAULT_CUT_OFF, ge=0, le=1)),
     search_top=(Optional[int], Field(description="Number of top results to return", default=10)),
     full_text_search=(Optional[Dict[str, Any]], Field(
         description="Full text search parameters. Can be a dictionary with search options.",
@@ -58,14 +63,14 @@ BaseSearchParams = create_model(
 BaseStepbackSearchParams = create_model(
     "BaseStepbackSearchParams",
     query=(str, Field(description="Query text to search in the index")),
-    collection_suffix=(Optional[str], Field(description="Optional suffix for collection name (max 7 characters)", default="", max_length=7)),
+    index_name=(Optional[str], Field(description="Optional index name (max 7 characters)", default="", max_length=7)),
     messages=(Optional[List], Field(description="Chat messages for stepback search context", default=[])),
     filter=(Optional[dict | str], Field(
         description="Filter to apply to the search results. Can be a dictionary or a JSON string.",
         default={},
         examples=["{\"key\": \"value\"}", "{\"status\": \"active\"}"]
     )),
-    cut_off=(Optional[float], Field(description="Cut-off score for search results", default=0.5, ge=0, le=1)),
+    cut_off=(Optional[float], Field(description="Cut-off score for search results", default=DEFAULT_CUT_OFF, ge=0, le=1)),
     search_top=(Optional[int], Field(description="Number of top results to return", default=10)),
     full_text_search=(Optional[Dict[str, Any]], Field(
         description="Full text search parameters. Can be a dictionary with search options.",
@@ -92,7 +97,7 @@ BaseIndexDataParams = create_model(
                        description="Optional flag to enforce clean existing index before indexing new data")),
     progress_step=(Optional[int], Field(default=10, ge=0, le=100,
                          description="Optional step size for progress reporting during indexing")),
-    chunking_config=(Optional[dict], Field(description="Chunking tool configuration", default_factory=dict)),
+    chunking_config=(Optional[dict], Field(description="Chunking tool configuration", default=loaders_allowed_to_override)),
 )
@@ -108,7 +113,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
     def __init__(self, **kwargs):
         conn = kwargs.get('connection_string', None)
         connection_string = conn.get_secret_value() if isinstance(conn, SecretStr) else conn
-        collection_name = kwargs.get('collection_name')
+        collection_name = kwargs.get('collection_schema')
         if 'vectorstore_type' not in kwargs:
             kwargs['vectorstore_type'] = 'PGVector'
@@ -148,55 +153,48 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
         yield from ()
     def index_data(self, **kwargs):
-        from ..runtime.langchain.interfaces.llm_processor import add_documents
-        collection_suffix = kwargs.get("collection_suffix")
-        progress_step = kwargs.get("progress_step")
+        index_name = kwargs.get("index_name")
         clean_index = kwargs.get("clean_index")
         chunking_tool = kwargs.get("chunking_tool")
         chunking_config = kwargs.get("chunking_config")
+        result = {"count": 0}
         #
-        if clean_index:
-            self._clean_index(collection_suffix)
-        #
-        # create and add initial index meta document
-        index_meta_doc = Document(page_content=f"{IndexerKeywords.INDEX_META_TYPE.value}_{collection_suffix}", metadata={
-            "collection": collection_suffix,
-            "type": IndexerKeywords.INDEX_META_TYPE.value,
-            "indexed": 0,
-            "state": IndexerKeywords.INDEX_META_IN_PROGRESS.value,
-            "index_configuration": kwargs,
-            "created_on": time.time(),
-            "updated_on": time.time(),
-        })
-        index_meta_ids = add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc])
-        #
-        self._log_tool_event(f"Indexing data into collection with suffix '{collection_suffix}'. It can take some time...")
-        self._log_tool_event(f"Loading the documents to index...{kwargs}")
-        documents = self._base_loader(**kwargs)
-        documents = list(documents) # consume/exhaust generator to count items
-        documents_count = len(documents)
-        documents = (doc for doc in documents)
-        self._log_tool_event(f"Base documents were pre-loaded. "
-                             f"Search for possible document duplicates and remove them from the indexing list...")
-        documents = self._reduce_duplicates(documents, collection_suffix)
-        self._log_tool_event(f"Duplicates were removed. "
-                             f"Processing documents to collect dependencies and prepare them for indexing...")
-        result = self._save_index_generator(documents, documents_count, chunking_tool, chunking_config, collection_suffix=collection_suffix, progress_step=progress_step)
-        #
-        # update index meta document
-        index_meta_doc.metadata["indexed"] = result
-        index_meta_doc.metadata["state"] = IndexerKeywords.INDEX_META_COMPLETED.value
-        index_meta_doc.metadata["updated_on"] = time.time()
-        add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc], ids=index_meta_ids)
-        #
-        return {"status": "ok", "message": f"successfully indexed {result} documents"}
+        try:
+            if clean_index:
+                self._clean_index(index_name)
+            #
+            self.index_meta_init(index_name, kwargs)
+            #
+            self._log_tool_event(f"Indexing data into collection with suffix '{index_name}'. It can take some time...")
+            self._log_tool_event(f"Loading the documents to index...{kwargs}")
+            documents = self._base_loader(**kwargs)
+            documents = list(documents) # consume/exhaust generator to count items
+            documents_count = len(documents)
+            documents = (doc for doc in documents)
+            self._log_tool_event(f"Base documents were pre-loaded. "
+                                 f"Search for possible document duplicates and remove them from the indexing list...")
+            documents = self._reduce_duplicates(documents, index_name)
+            self._log_tool_event(f"Duplicates were removed. "
+                                 f"Processing documents to collect dependencies and prepare them for indexing...")
+            self._save_index_generator(documents, documents_count, chunking_tool, chunking_config, index_name=index_name, result=result)
+            #
+            results_count = result["count"]
+            self.index_meta_update(index_name, IndexerKeywords.INDEX_META_COMPLETED.value, results_count)
+            self._emit_index_event(index_name)
+            #
+            return {"status": "ok", "message": f"successfully indexed {results_count} documents" if results_count > 0
+            else "no new documents to index"}
+        except Exception as e:
+            self.index_meta_update(index_name, IndexerKeywords.INDEX_META_FAILED.value, result["count"])
+            self._emit_index_event(index_name, error=str(e))
+            raise e
-    def _save_index_generator(self, base_documents: Generator[Document, None, None], base_total: int, chunking_tool, chunking_config, collection_suffix: Optional[str] = None, progress_step: int = 20):
+    def _save_index_generator(self, base_documents: Generator[Document, None, None], base_total: int, chunking_tool, chunking_config, result, index_name: Optional[str] = None):
         self._log_tool_event(f"Base documents are ready for indexing. {base_total} base documents in total to index.")
         from ..runtime.langchain.interfaces.llm_processor import add_documents
         #
         base_doc_counter = 0
-        total_counter = 0
         pg_vector_add_docs_chunk = []
         for base_doc in base_documents:
             base_doc_counter += 1
@@ -223,12 +221,12 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
                 if 'id' not in doc.metadata or 'updated_on' not in doc.metadata:
                     logger.warning(f"Document is missing required metadata field 'id' or 'updated_on': {doc.metadata}")
                 #
-                # if collection_suffix is provided, add it to metadata of each document
-                if collection_suffix:
+                # if index_name is provided, add it to metadata of each document
+                if index_name:
                     if not doc.metadata.get('collection'):
-                        doc.metadata['collection'] = collection_suffix
+                        doc.metadata['collection'] = index_name
                     else:
-                        doc.metadata['collection'] += f";{collection_suffix}"
+                        doc.metadata['collection'] += f";{index_name}"
                 #
                 try:
                     pg_vector_add_docs_chunk.append(doc)
@@ -244,10 +242,9 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
             msg = f"Indexed base document #{base_doc_counter} out of {base_total} (with {dependent_docs_counter} dependencies)."
             logger.debug(msg)
             self._log_tool_event(msg)
-            total_counter += dependent_docs_counter
+            result["count"] += dependent_docs_counter
         if pg_vector_add_docs_chunk:
             add_documents(vectorstore=self.vectorstore, documents=pg_vector_add_docs_chunk)
-        return total_counter
     def _apply_loaders_chunkers(self, documents: Generator[Document, None, None], chunking_tool: str=None, chunking_config=None) -> Generator[Document, None, None]:
         from ..tools.chunkers import __all__ as chunkers
@@ -307,12 +304,12 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
     def _reduce_duplicates(
             self,
             documents: Generator[Any, None, None],
-            collection_suffix: str,
+            index_name: str,
             log_msg: str = "Verification of documents to index started"
     ) -> Generator[Document, None, None]:
         """Generic duplicate reduction logic for documents."""
         self._log_tool_event(log_msg, tool_name="index_documents")
-        indexed_data = self._get_indexed_data(collection_suffix)
+        indexed_data = self._get_indexed_data(index_name)
         indexed_keys = set(indexed_data.keys())
         if not indexed_keys:
             self._log_tool_event("Vectorstore is empty, indexing all incoming documents", tool_name="index_documents")
@@ -324,7 +321,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
         for document in documents:
             key = self.key_fn(document)
             key = key if isinstance(key, str) else str(key)
-            if key in indexed_keys and collection_suffix == indexed_data[key]['metadata'].get('collection'):
+            if key in indexed_keys and index_name == indexed_data[key]['metadata'].get('collection'):
                 if self.compare_fn(document, indexed_data[key]):
                     continue
                 yield document
@@ -339,7 +336,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
             )
             self.vectorstore.delete(ids=list(docs_to_remove))
-    def _get_indexed_data(self, collection_suffix: str):
+    def _get_indexed_data(self, index_name: str):
         raise NotImplementedError("Subclasses must implement this method")
     def key_fn(self, document: Document):
@@ -351,73 +348,57 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
     def remove_ids_fn(self, idx_data, key: str):
         raise NotImplementedError("Subclasses must implement this method")
-    def remove_index(self, collection_suffix: str = ""):
+    def remove_index(self, index_name: str = ""):
         """Cleans the indexed data in the collection."""
-        super()._clean_collection(collection_suffix=collection_suffix)
-        return (f"Collection '{collection_suffix}' has been removed from the vector store.\n"
-                f"Available collections: {self.list_collections()}") if collection_suffix \
+        super()._clean_collection(index_name=index_name)
+        return (f"Collection '{index_name}' has been removed from the vector store.\n"
+                f"Available collections: {self.list_collections()}") if index_name \
             else "All collections have been removed from the vector store."
-    def _build_collection_filter(self, filter: dict | str, collection_suffix: str = "") -> dict:
+    def _build_collection_filter(self, filter: dict | str, index_name: str = "") -> dict:
         """Builds a filter for the collection based on the provided suffix."""
         filter = filter if isinstance(filter, dict) else json.loads(filter)
-        if collection_suffix:
+        if index_name:
             filter.update({"collection": {
-                "$eq": collection_suffix.strip()
+                "$eq": index_name.strip()
             }})
-        filter = {
-            "$and": [
-                filter,
-                {"$or": [
-                    {"type": {"$exists": False}},
-                    {"type": {"$ne": IndexerKeywords.INDEX_META_TYPE.value}}
-                ]},
-            ]
-        }
-        return filter
-    def index_meta_read(self):
-        from sqlalchemy import func
-        from sqlalchemy.orm import Session
-        store = self.vectorstore
-        try:
-            with Session(store.session_maker.bind) as session:
-                meta = session.query(
-                        store.EmbeddingStore.id,
-                        store.EmbeddingStore.cmetadata
-                    ).filter(
-                        func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'type') == IndexerKeywords.INDEX_META_TYPE.value
-                    ).all()
-                return [
-                    {"id": id_, "metadata": cmetadata}
-                    for id_, cmetadata in meta
+        if filter:
+            # Exclude index meta documents from search results
+            filter = {
+                "$and": [
+                    filter,
+                    {"$or": [
+                        {"type": {"$exists": False}},
+                        {"type": {"$ne": IndexerKeywords.INDEX_META_TYPE.value}}
+                    ]},
                 ]
-        except Exception as e:
-            logger.error(f"Failed to get index_meta from PGVector: {str(e)}")
-            return []
-    def index_meta_delete(self, index_meta_ids: list[str]):
-        self.vectorstore.delete(ids=index_meta_ids)
+            }
+        else:
+            filter = {"$or": [
+                {"type": {"$exists": False}},
+                {"type": {"$ne": IndexerKeywords.INDEX_META_TYPE.value}}
+            ]}
+        return filter
     def search_index(self,
                      query: str,
-                     collection_suffix: str = "",
-                     filter: dict | str = {}, cut_off: float = 0.5,
+                     index_name: str = "",
+                     filter: dict | str = {}, cut_off: float = DEFAULT_CUT_OFF,
                      search_top: int = 10, reranker: dict = {},
                      full_text_search: Optional[Dict[str, Any]] = None,
                      reranking_config: Optional[Dict[str, Dict[str, Any]]] = None,
                      extended_search: Optional[List[str]] = None,
                      **kwargs):
         """ Searches indexed documents in the vector store."""
-        # build filter on top of collection_suffix
+        # build filter on top of index_name
         available_collections = super().list_collections()
-        if collection_suffix and collection_suffix not in available_collections:
-            return f"Collection '{collection_suffix}' not found. Available collections: {available_collections}"
+        if index_name and index_name not in available_collections:
+            return f"Collection '{index_name}' not found. Available collections: {available_collections}"
-        filter = self._build_collection_filter(filter, collection_suffix)
+        filter = self._build_collection_filter(filter, index_name)
         found_docs = super().search_documents(
             query,
             doctype=self.doctype,
@@ -434,15 +415,15 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
     def stepback_search_index(self,
                      query: str,
                      messages: List[Dict[str, Any]] = [],
-                     collection_suffix: str = "",
-                     filter: dict | str = {}, cut_off: float = 0.5,
+                     index_name: str = "",
+                     filter: dict | str = {}, cut_off: float = DEFAULT_CUT_OFF,
                      search_top: int = 10, reranker: dict = {},
                      full_text_search: Optional[Dict[str, Any]] = None,
                      reranking_config: Optional[Dict[str, Dict[str, Any]]] = None,
                      extended_search: Optional[List[str]] = None,
                      **kwargs):
         """ Searches indexed documents in the vector store."""
-        filter = self._build_collection_filter(filter, collection_suffix)
+        filter = self._build_collection_filter(filter, index_name)
         found_docs = super().stepback_search(
             query,
             messages,
@@ -459,8 +440,8 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
     def stepback_summary_index(self,
                      query: str,
                      messages: List[Dict[str, Any]] = [],
-                     collection_suffix: str = "",
-                     filter: dict | str = {}, cut_off: float = 0.5,
+                     index_name: str = "",
+                     filter: dict | str = {}, cut_off: float = DEFAULT_CUT_OFF,
                      search_top: int = 10, reranker: dict = {},
                      full_text_search: Optional[Dict[str, Any]] = None,
                      reranking_config: Optional[Dict[str, Dict[str, Any]]] = None,
@@ -468,7 +449,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
                      **kwargs):
         """ Generates a summary of indexed documents using stepback technique."""
-        filter = self._build_collection_filter(filter, collection_suffix)
+        filter = self._build_collection_filter(filter, index_name)
         return super().stepback_summary(
             query,
             messages,
@@ -480,6 +461,106 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
             reranking_config=reranking_config,
             extended_search=extended_search
         )
+    def index_meta_init(self, index_name: str, index_configuration: dict[str, Any]):
+        index_meta = super().get_index_meta(index_name)
+        if not index_meta:
+            self._log_tool_event(
+                f"There is no existing index_meta for collection '{index_name}'. Initializing it.",
+                tool_name="index_data"
+            )
+            from ..runtime.langchain.interfaces.llm_processor import add_documents
+            created_on = time.time()
+            metadata = {
+                "collection": index_name,
+                "type": IndexerKeywords.INDEX_META_TYPE.value,
+                "indexed": 0,
+                "updated": 0,
+                "state": IndexerKeywords.INDEX_META_IN_PROGRESS.value,
+                "index_configuration": index_configuration,
+                "created_on": created_on,
+                "updated_on": created_on,
+                "task_id": None,
+                "conversation_id": None,
+            }
+            metadata["history"] = json.dumps([metadata])
+            index_meta_doc = Document(page_content=f"{IndexerKeywords.INDEX_META_TYPE.value}_{index_name}", metadata=metadata)
+            add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc])
+    def index_meta_update(self, index_name: str, state: str, result: int):
+        index_meta_raw = super().get_index_meta(index_name)
+        from ..runtime.langchain.interfaces.llm_processor import add_documents
+        #
+        if index_meta_raw:
+            metadata = copy.deepcopy(index_meta_raw.get("metadata", {}))
+            metadata["indexed"] = self.get_indexed_count(index_name)
+            metadata["updated"] = result
+            metadata["state"] = state
+            metadata["updated_on"] = time.time()
+            #
+            history_raw = metadata.pop("history", "[]")
+            try:
+                history = json.loads(history_raw) if history_raw.strip() else []
+                # replace the last history item with updated metadata
+                if history and isinstance(history, list):
+                    history[-1] = metadata
+                else:
+                    history = [metadata]
+            except (json.JSONDecodeError, TypeError):
+                logger.warning(f"Failed to load index history: {history_raw}. Create new with only current item.")
+                history = [metadata]
+            #
+            metadata["history"] = json.dumps(history)
+            index_meta_doc = Document(page_content=index_meta_raw.get("content", ""), metadata=metadata)
+            add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc], ids=[index_meta_raw.get("id")])
+    def _emit_index_event(self, index_name: str, error: Optional[str] = None):
+        """
+        Emit custom event for index data operation.
+        Args:
+            index_name: The name of the index
+            error: Error message if the operation failed, None otherwise
+        """
+        index_meta = super().get_index_meta(index_name)
+        if not index_meta:
+            logger.warning(
+                f"No index_meta found for index '{index_name}'. "
+                "Cannot emit index event."
+            )
+            return
+        metadata = index_meta.get("metadata", {})
+        # Determine if this is a reindex operation
+        history_raw = metadata.get("history", "[]")
+        try:
+            history = json.loads(history_raw) if history_raw.strip() else []
+            is_reindex = len(history) > 1
+        except (json.JSONDecodeError, TypeError):
+            is_reindex = False
+        # Build event message
+        event_data = {
+            "id": index_meta.get("id"),
+            "index_name": index_name,
+            "state": metadata.get("state"),
+            "error": error,
+            "reindex": is_reindex,
+            "indexed": metadata.get("indexed", 0),
+            "updated": metadata.get("updated", 0),
+        }
+        # Emit the event
+        try:
+            dispatch_custom_event("index_data_status", event_data)
+            logger.debug(
+                f"Emitted index_data_status event for index "
+                f"'{index_name}': {event_data}"
+            )
+        except Exception as e:
+            logger.warning(f"Failed to emit index_data_status event: {e}")
     def get_available_tools(self):
         """
@@ -534,6 +615,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
                 "mode": "list_collections",
                 "ref": self.list_collections,
                 "description": self.list_collections.__doc__,
-                "args_schema": create_model("ListCollectionsParams")  # No parameters
+                # No parameters
+                "args_schema": create_model("ListCollectionsParams")
             },
-        ]
+        ]

alita_sdk/tools/bitbucket/__init__.py CHANGED Viewed

@@ -61,6 +61,7 @@ class AlitaBitbucketToolkit(BaseToolkit):
                 'metadata':
                     {
                         "label": "Bitbucket", "icon_url": "bitbucket-icon.svg",
+                        "max_length": AlitaBitbucketToolkit.toolkit_max_length,
                         "categories": ["code repositories"],
                         "extra_categories": ["bitbucket", "git", "repository", "code", "version control"],
                     }

alita_sdk/tools/chunkers/sematic/proposal_chunker.py CHANGED Viewed

@@ -6,7 +6,7 @@ from langchain_core.prompts import ChatPromptTemplate
 from langchain.text_splitter import TokenTextSplitter
 from typing import Optional, List
-from langchain_core.pydantic_v1 import BaseModel
+from pydantic import BaseModel
 from ..utils import tiktoken_length
 logger = getLogger(__name__)

alita_sdk/tools/code/sonar/__init__.py CHANGED Viewed

@@ -29,7 +29,7 @@ class SonarToolkit(BaseToolkit):
         SonarToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
         return create_model(
             name,
-            sonar_project_name=(str, Field(description="Project name of the desired repository", json_schema_extra={'toolkit_name': True, 'max_toolkit_length': SonarToolkit.toolkit_max_length})),
+            sonar_project_name=(str, Field(description="Project name of the desired repository")),
             sonar_configuration=(SonarConfiguration, Field(description="Sonar Configuration", json_schema_extra={'configuration_types': ['sonar']})),
             selected_tools=(List[Literal[tuple(selected_tools)]], Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
             __config__=ConfigDict(json_schema_extra=

alita-sdk 0.3.365__py3-none-any.whl → 0.3.462__py3-none-any.whl

Potentially problematic release.

alita-sdk 0.3.365py3-none-any.whl → 0.3.462py3-none-any.whl