PyPI - alita-sdk - Versions diffs - 0.3.370__py3-none-any.whl → 0.3.372__py3-none-any.whl - Mend

alita-sdk 0.3.370py3-none-any.whl → 0.3.372py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of alita-sdk might be problematic. Click here for more details.

Files changed (8) hide show

alita_sdk/runtime/tools/vectorstore_base.py CHANGED Viewed

@@ -185,6 +185,12 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
             return "No indexed collections"
         return collections
+    def get_index_meta(self, collection_suffix: str):
+        index_metas = self.vector_adapter.get_index_meta(self, collection_suffix)
+        if len(index_metas) > 1:
+            raise RuntimeError(f"Multiple index_meta documents found: {index_metas}")
+        return index_metas[0] if index_metas else None
     def _clean_collection(self, collection_suffix: str = ''):
         """
         Clean the vectorstore collection by deleting all indexed data.

alita_sdk/tools/base_indexer_toolkit.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import copy
 import json
 import logging
 import time
@@ -148,7 +149,6 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
         yield from ()
     def index_data(self, **kwargs):
-        from ..runtime.langchain.interfaces.llm_processor import add_documents
         collection_suffix = kwargs.get("collection_suffix")
         progress_step = kwargs.get("progress_step")
         clean_index = kwargs.get("clean_index")
@@ -158,17 +158,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
         if clean_index:
             self._clean_index(collection_suffix)
         #
-        # create and add initial index meta document
-        index_meta_doc = Document(page_content=f"{IndexerKeywords.INDEX_META_TYPE.value}_{collection_suffix}", metadata={
-            "collection": collection_suffix,
-            "type": IndexerKeywords.INDEX_META_TYPE.value,
-            "indexed": 0,
-            "state": IndexerKeywords.INDEX_META_IN_PROGRESS.value,
-            "index_configuration": kwargs,
-            "created_on": time.time(),
-            "updated_on": time.time(),
-        })
-        index_meta_ids = add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc])
+        self.index_meta_init(collection_suffix, kwargs)
         #
         self._log_tool_event(f"Indexing data into collection with suffix '{collection_suffix}'. It can take some time...")
         self._log_tool_event(f"Loading the documents to index...{kwargs}")
@@ -183,11 +173,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
                              f"Processing documents to collect dependencies and prepare them for indexing...")
         result = self._save_index_generator(documents, documents_count, chunking_tool, chunking_config, collection_suffix=collection_suffix, progress_step=progress_step)
         #
-        # update index meta document
-        index_meta_doc.metadata["indexed"] = result
-        index_meta_doc.metadata["state"] = IndexerKeywords.INDEX_META_COMPLETED.value
-        index_meta_doc.metadata["updated_on"] = time.time()
-        add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc], ids=index_meta_ids)
+        self.index_meta_update(collection_suffix, IndexerKeywords.INDEX_META_COMPLETED.value, result)
         #
         return {"status": "ok", "message": f"successfully indexed {result} documents"}
@@ -366,6 +352,9 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
             filter.update({"collection": {
                 "$eq": collection_suffix.strip()
             }})
+        if filter:
+            # Exclude index meta documents from search results
             filter = {
                 "$and": [
                     filter,
@@ -375,32 +364,13 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
                     ]},
                 ]
             }
+        else:
+            filter = {"$or": [
+                {"type": {"$exists": False}},
+                {"type": {"$ne": IndexerKeywords.INDEX_META_TYPE.value}}
+            ]}
         return filter
-    def index_meta_read(self):
-        from sqlalchemy import func
-        from sqlalchemy.orm import Session
-        store = self.vectorstore
-        try:
-            with Session(store.session_maker.bind) as session:
-                meta = session.query(
-                        store.EmbeddingStore.id,
-                        store.EmbeddingStore.cmetadata
-                    ).filter(
-                        func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'type') == IndexerKeywords.INDEX_META_TYPE.value
-                    ).all()
-                return [
-                    {"id": id_, "metadata": cmetadata}
-                    for id_, cmetadata in meta
-                ]
-        except Exception as e:
-            logger.error(f"Failed to get index_meta from PGVector: {str(e)}")
-            return []
-    def index_meta_delete(self, index_meta_ids: list[str]):
-        self.vectorstore.delete(ids=index_meta_ids)
     def search_index(self,
                      query: str,
                      collection_suffix: str = "",
@@ -480,6 +450,51 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
             reranking_config=reranking_config,
             extended_search=extended_search
         )
+    def index_meta_init(self, collection_suffix: str, index_configuration: dict[str, Any]):
+        index_meta_raw = super().get_index_meta(collection_suffix)
+        from ..runtime.langchain.interfaces.llm_processor import add_documents
+        created_on = time.time()
+        metadata = {
+            "collection": collection_suffix,
+            "type": IndexerKeywords.INDEX_META_TYPE.value,
+            "indexed": 0,
+            "state": IndexerKeywords.INDEX_META_IN_PROGRESS.value,
+            "index_configuration": index_configuration,
+            "created_on": created_on,
+            "updated_on": created_on,
+            "history": "[]",
+        }
+        index_meta_ids = None
+        #
+        if index_meta_raw:
+            history_raw = index_meta_raw.get("metadata", {}).get("history", "[]")
+            if isinstance(history_raw, str) and history_raw.strip():
+                try:
+                    history = json.loads(history_raw)
+                except (json.JSONDecodeError, TypeError):
+                    history = []
+            else:
+                history = []
+            new_history_item = {k: v for k, v in index_meta_raw.get("metadata", {}).items() if k != "history"}
+            history.append(new_history_item)
+            metadata["history"] = json.dumps(history)
+            index_meta_ids = [index_meta_raw.get("id")]
+        #
+        index_meta_doc = Document(page_content=f"{IndexerKeywords.INDEX_META_TYPE.value}_{collection_suffix}", metadata=metadata)
+        add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc], ids=index_meta_ids)
+    def index_meta_update(self, collection_suffix: str, state: str, result: int):
+        index_meta_raw = super().get_index_meta(collection_suffix)
+        from ..runtime.langchain.interfaces.llm_processor import add_documents
+        #
+        if index_meta_raw:
+            metadata = copy.deepcopy(index_meta_raw.get("metadata", {}))
+            metadata["indexed"] = result
+            metadata["state"] = state
+            metadata["updated_on"] = time.time()
+            index_meta_doc = Document(page_content=index_meta_raw.get("content", ""), metadata=metadata)
+            add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc], ids=[index_meta_raw.get("id")])
     def get_available_tools(self):
         """

alita_sdk/tools/vector_adapters/VectorStoreAdapter.py CHANGED Viewed

@@ -2,6 +2,8 @@ from abc import ABC, abstractmethod
 from typing import Any, Dict, Optional, List
 from logging import getLogger
+from ...runtime.utils.utils import IndexerKeywords
 logger = getLogger(__name__)
@@ -48,6 +50,11 @@ class VectorStoreAdapter(ABC):
         """Add a new collection name to the metadata"""
         pass
+    @abstractmethod
+    def get_index_meta(self, vectorstore_wrapper, collection_suffix: str) -> List[Dict[str, Any]]:
+        """Get all index_meta entries from the vector store."""
+        pass
 class PGVectorAdapter(VectorStoreAdapter):
     """Adapter for PGVector database operations."""
@@ -265,6 +272,29 @@ class PGVectorAdapter(VectorStoreAdapter):
         except Exception as e:
             logger.error(f"Failed to update collection for entry ID {entry_id}: {str(e)}")
+    def get_index_meta(self, vectorstore_wrapper, collection_suffix: str) -> List[Dict[str, Any]]:
+        from sqlalchemy.orm import Session
+        from sqlalchemy import func
+        store = vectorstore_wrapper.vectorstore
+        try:
+            with Session(store.session_maker.bind) as session:
+                meta = session.query(
+                    store.EmbeddingStore.id,
+                    store.EmbeddingStore.document,
+                    store.EmbeddingStore.cmetadata
+                ).filter(
+                    store.EmbeddingStore.cmetadata['type'].astext == IndexerKeywords.INDEX_META_TYPE.value,
+                    func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'collection') == collection_suffix
+                ).all()
+                result = []
+                for id, document, cmetadata in meta:
+                    result.append({"id": id, "content": document, "metadata": cmetadata})
+                return result
+        except Exception as e:
+            logger.error(f"Failed to get index_meta from PGVector: {str(e)}")
+            raise e
 class ChromaAdapter(VectorStoreAdapter):
     """Adapter for Chroma database operations."""
@@ -361,6 +391,9 @@ class ChromaAdapter(VectorStoreAdapter):
         # This is a simplified implementation - in practice, you might need more complex logic
         logger.warning("add_to_collection for Chroma is not fully implemented yet")
+    def get_index_meta(self, vectorstore_wrapper, collection_suffix: str) -> List[Dict[str, Any]]:
+        logger.warning("get_index_meta for Chroma is not implemented yet")
 class VectorStoreAdapterFactory:
     """Factory for creating vector store adapters."""

{alita_sdk-0.3.370.dist-info → alita_sdk-0.3.372.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: alita_sdk
-Version: 0.3.370
+Version: 0.3.372
 Summary: SDK for building langchain agents using resources from Alita
 Author-email: Artem Rozumenko <artyom.rozumenko@gmail.com>, Mikalai Biazruchka <mikalai_biazruchka@epam.com>, Roman Mitusov <roman_mitusov@epam.com>, Ivan Krakhmaliuk <lifedj27@gmail.com>, Artem Dubrovskiy <ad13box@gmail.com>
 License-Expression: Apache-2.0

{alita_sdk-0.3.370.dist-info → alita_sdk-0.3.372.dist-info}/RECORD RENAMED Viewed

@@ -123,7 +123,7 @@ alita_sdk/runtime/tools/router.py,sha256=p7e0tX6YAWw2M2Nq0A_xqw1E2P-Xz1DaJvhUstf
 alita_sdk/runtime/tools/sandbox.py,sha256=WNz-aUMtkGCPg84dDy_0BPkyp-6YjoYB-xjIEFFrtKw,11601
 alita_sdk/runtime/tools/tool.py,sha256=lE1hGi6qOAXG7qxtqxarD_XMQqTghdywf261DZawwno,5631
 alita_sdk/runtime/tools/vectorstore.py,sha256=8vRhi1lGFEs3unvnflEi2p59U2MfV32lStpEizpDms0,34467
-alita_sdk/runtime/tools/vectorstore_base.py,sha256=1DYmMQEBMLetxQgi6D9Wd_vM_xVCa9qGTAfLOo2kNC0,27533
+alita_sdk/runtime/tools/vectorstore_base.py,sha256=WF-v3sGQKo9q8D8ULyuBo5dPdFcx79X0DCRjyoOd7DI,27844
 alita_sdk/runtime/utils/AlitaCallback.py,sha256=E4LlSBuCHWiUq6W7IZExERHZY0qcmdjzc_rJlF2iQIw,7356
 alita_sdk/runtime/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 alita_sdk/runtime/utils/constants.py,sha256=Xntx1b_uxUzT4clwqHA_U6K8y5bBqf_4lSQwXdcWrp4,13586
@@ -135,7 +135,7 @@ alita_sdk/runtime/utils/toolkit_runtime.py,sha256=MU63Fpxj0b5_r1IUUc0Q3-PN9VwL7r
 alita_sdk/runtime/utils/toolkit_utils.py,sha256=I9QFqnaqfVgN26LUr6s3XlBlG6y0CoHURnCzG7XcwVs,5311
 alita_sdk/runtime/utils/utils.py,sha256=BVEVLkYiiotcUD0XsHyx-wACpHfALsQg7PLZpObqvK8,1008
 alita_sdk/tools/__init__.py,sha256=jUj1ztC2FbkIUB-YYmiqaz_rqW7Il5kWzDPn1mJmj5w,10545
-alita_sdk/tools/base_indexer_toolkit.py,sha256=jaUzLqzGwY0YJ4ZGeRHfyrWOiuTpOawUqGrLVqXHtFo,26137
+alita_sdk/tools/base_indexer_toolkit.py,sha256=GpeIYY7kJZCjv0-gGcHNStY0uCtEBRl-I5XiISM0Tdo,26900
 alita_sdk/tools/code_indexer_toolkit.py,sha256=6QvI1by0OFdnKTx5TfNoDJjnMrvnTi9T56xaDxzeleU,7306
 alita_sdk/tools/elitea_base.py,sha256=up3HshASSDfjlHV_HPrs1aD4JIwwX0Ug26WGTzgIYvY,34724
 alita_sdk/tools/non_code_indexer_toolkit.py,sha256=B3QvhpT1F9QidkCcsOi3J_QrTOaNlTxqWFwe90VivQQ,1329
@@ -331,7 +331,7 @@ alita_sdk/tools/testrail/api_wrapper.py,sha256=tQcGlFJmftvs5ZiO4tsP19fCo4CrJeq_U
 alita_sdk/tools/utils/__init__.py,sha256=W9rCCUPtHCP5nGAbWp0n5jaNA84572aiRoqKneBnaS4,3330
 alita_sdk/tools/utils/available_tools_decorator.py,sha256=IbrdfeQkswxUFgvvN7-dyLMZMyXLiwvX7kgi3phciCk,273
 alita_sdk/tools/utils/content_parser.py,sha256=TuKAPUzIZx9F-pzHiVyrCFpI5emrGaOF8DgWHJP2cM4,15235
-alita_sdk/tools/vector_adapters/VectorStoreAdapter.py,sha256=ypBEAkFRGHv5edW0N9rdo1yKurNGQ4pRVEWtrN_7SeA,17656
+alita_sdk/tools/vector_adapters/VectorStoreAdapter.py,sha256=p_9Cu5eausnfiKNsitbVxwu5eimZHRv3R-OMw7lBrts,19173
 alita_sdk/tools/vector_adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 alita_sdk/tools/xray/__init__.py,sha256=eOMWP8VamFbbJgt1xrGpGPqB9ByOTA0Cd3LCaETzGk4,4376
 alita_sdk/tools/xray/api_wrapper.py,sha256=uj5kzUgPdo_Oct9WCNMOpkb6o_3L7J4LZrEGtrwYMmc,30157
@@ -352,8 +352,8 @@ alita_sdk/tools/zephyr_scale/api_wrapper.py,sha256=kT0TbmMvuKhDUZc0i7KO18O38JM9S
 alita_sdk/tools/zephyr_squad/__init__.py,sha256=0ne8XLJEQSLOWfzd2HdnqOYmQlUliKHbBED5kW_Vias,2895
 alita_sdk/tools/zephyr_squad/api_wrapper.py,sha256=kmw_xol8YIYFplBLWTqP_VKPRhL_1ItDD0_vXTe_UuI,14906
 alita_sdk/tools/zephyr_squad/zephyr_squad_cloud_client.py,sha256=R371waHsms4sllHCbijKYs90C-9Yu0sSR3N4SUfQOgU,5066
-alita_sdk-0.3.370.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-alita_sdk-0.3.370.dist-info/METADATA,sha256=7o5P_ba4fUU5FVQU9htx-olWpTUnrpVOcfl2o3DwSEs,19071
-alita_sdk-0.3.370.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-alita_sdk-0.3.370.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
-alita_sdk-0.3.370.dist-info/RECORD,,
+alita_sdk-0.3.372.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+alita_sdk-0.3.372.dist-info/METADATA,sha256=A3hNTePpqTE8uzQhDG7RbgX5Iv7MoyOYgkucgkqKEpI,19071
+alita_sdk-0.3.372.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+alita_sdk-0.3.372.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
+alita_sdk-0.3.372.dist-info/RECORD,,

{alita_sdk-0.3.370.dist-info → alita_sdk-0.3.372.dist-info}/WHEEL RENAMED Viewed

File without changes

{alita_sdk-0.3.370.dist-info → alita_sdk-0.3.372.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{alita_sdk-0.3.370.dist-info → alita_sdk-0.3.372.dist-info}/top_level.txt RENAMED Viewed

File without changes

alita-sdk 0.3.370__py3-none-any.whl → 0.3.372__py3-none-any.whl

Potentially problematic release.

alita-sdk 0.3.370py3-none-any.whl → 0.3.372py3-none-any.whl