PyPI - alita-sdk - Versions diffs - 0.3.217__py3-none-any.whl → 0.3.218__py3-none-any.whl - Mend

alita-sdk 0.3.217py3-none-any.whl → 0.3.218py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

alita_sdk/runtime/tools/vectorstore.py CHANGED Viewed

@@ -196,35 +196,95 @@ class VectorStoreWrapper(BaseToolApiWrapper):
             f"Cleaning collection '{self.dataset}'",
             tool_name="_clean_collection"
         )
-        data = self.vectoradapter.vectorstore.get(include=['metadatas'])
-        if data['ids']:
-            self.vectoradapter.vectorstore.delete(ids=data['ids'])
+        # This logic deletes the entire collection
+        # Works for PGVector and Chroma
+        self.vectoradapter.vectorstore.delete_collection()
+        # This logic deletes all data from the vectorstore collection without removal of collection.
+        # data = self.vectoradapter.vectorstore.get(include=['metadatas'])
+        # if data['ids']:
+        #     self.vectoradapter.vectorstore.delete(ids=data['ids'])
         self._log_data(
             f"Collection '{self.dataset}' has been cleaned. ",
             tool_name="_clean_collection"
         )
+    # TODO: refactor to use common method for different vectorstores in a separate vectorstore wrappers
     def _get_indexed_data(self, store):
         """ Get all indexed data from vectorstore for non-code content """
-        # get already indexed data
+        # Check if this is a PGVector store
+        if hasattr(store, 'session_maker') and hasattr(store, 'EmbeddingStore'):
+            return self._get_pgvector_indexed_data(store)
+        else:
+            # Fall back to original Chroma implementation
+            return self._get_chroma_indexed_data(store)
+    def _get_pgvector_indexed_data(self, store):
+        """ Get all indexed data from PGVector for non-code content """
+        from sqlalchemy.orm import Session
         result = {}
         try:
-            self._log_data("Retrieving already indexed data from vectorstore",
+            self._log_data("Retrieving already indexed data from PGVector vectorstore",
+                           tool_name="index_documents")
+            with Session(store.session_maker.bind) as session:
+                docs = session.query(
+                    store.EmbeddingStore.id,
+                    store.EmbeddingStore.document,
+                    store.EmbeddingStore.cmetadata
+                ).all()
+            # Process the retrieved data
+            for doc in docs:
+                db_id = doc.id
+                meta = doc.cmetadata or {}
+                # Get document id from metadata
+                doc_id = str(meta.get('id', db_id))
+                dependent_docs = meta.get(IndexerKeywords.DEPENDENT_DOCS.value, [])
+                if dependent_docs:
+                    dependent_docs = [d.strip() for d in dependent_docs.split(';') if d.strip()]
+                parent_id = meta.get(IndexerKeywords.PARENT.value, -1)
+                chunk_id = meta.get('chunk_id')
+                if doc_id in result and chunk_id:
+                    # If document with the same id already saved, add db_id for current one as chunk
+                    result[doc_id]['all_chunks'].append(db_id)
+                else:
+                    result[doc_id] = {
+                        'metadata': meta,
+                        'id': db_id,
+                        'all_chunks': [db_id],
+                        IndexerKeywords.DEPENDENT_DOCS.value: dependent_docs,
+                        IndexerKeywords.PARENT.value: parent_id
+                    }
+        except Exception as e:
+            logger.error(f"Failed to get indexed data from PGVector: {str(e)}. Continuing with empty index.")
+        return result
+    def _get_chroma_indexed_data(self, store):
+        """ Get all indexed data from Chroma for non-code content """
+        result = {}
+        try:
+            self._log_data("Retrieving already indexed data from Chroma vectorstore",
                            tool_name="index_documents")
             data = store.get(include=['metadatas'])
-            # re-structure data to be more usable
+            # Re-structure data to be more usable
             for meta, db_id in zip(data['metadatas'], data['ids']):
-                # get document id from metadata
+                # Get document id from metadata
                 doc_id = str(meta['id'])
                 dependent_docs = meta.get(IndexerKeywords.DEPENDENT_DOCS.value, [])
                 if dependent_docs:
                     dependent_docs = [d.strip() for d in dependent_docs.split(';') if d.strip()]
                 parent_id = meta.get(IndexerKeywords.PARENT.value, -1)
-                #
                 chunk_id = meta.get('chunk_id')
                 if doc_id in result and chunk_id:
-                    # if document with the same id already saved, add db_id fof current one as chunk
+                    # If document with the same id already saved, add db_id for current one as chunk
                     result[doc_id]['all_chunks'].append(db_id)
                 else:
                     result[doc_id] = {
@@ -235,7 +295,8 @@ class VectorStoreWrapper(BaseToolApiWrapper):
                         IndexerKeywords.PARENT.value: parent_id
                     }
         except Exception as e:
-            logger.error(f"Failed to get indexed data from vectorstore: {str(e)}. Continuing with empty index.")
+            logger.error(f"Failed to get indexed data from Chroma: {str(e)}. Continuing with empty index.")
         return result
     def _get_code_indexed_data(self, store) -> Dict[str, Dict[str, Any]]:

{alita_sdk-0.3.217.dist-info → alita_sdk-0.3.218.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: alita_sdk
-Version: 0.3.217
+Version: 0.3.218
 Summary: SDK for building langchain agents using resources from Alita
 Author-email: Artem Rozumenko <artyom.rozumenko@gmail.com>, Mikalai Biazruchka <mikalai_biazruchka@epam.com>, Roman Mitusov <roman_mitusov@epam.com>, Ivan Krakhmaliuk <lifedjik@gmail.com>, Artem Dubrovskiy <ad13box@gmail.com>
 License-Expression: Apache-2.0

{alita_sdk-0.3.217.dist-info → alita_sdk-0.3.218.dist-info}/RECORD RENAMED Viewed

@@ -82,7 +82,7 @@ alita_sdk/runtime/tools/pgvector_search.py,sha256=NN2BGAnq4SsDHIhUcFZ8d_dbEOM8Qw
 alita_sdk/runtime/tools/prompt.py,sha256=nJafb_e5aOM1Rr3qGFCR-SKziU9uCsiP2okIMs9PppM,741
 alita_sdk/runtime/tools/router.py,sha256=wCvZjVkdXK9dMMeEerrgKf5M790RudH68pDortnHSz0,1517
 alita_sdk/runtime/tools/tool.py,sha256=lE1hGi6qOAXG7qxtqxarD_XMQqTghdywf261DZawwno,5631
-alita_sdk/runtime/tools/vectorstore.py,sha256=R7Xy2HMIcXSoJ3exvPH_BYbzZfTSnRY23Tn46tqKIiU,33961
+alita_sdk/runtime/tools/vectorstore.py,sha256=w9NbsBFnO-3H9i0U8p5lzJkU-1K30jAlbKDfgFbiIAE,36631
 alita_sdk/runtime/utils/AlitaCallback.py,sha256=E4LlSBuCHWiUq6W7IZExERHZY0qcmdjzc_rJlF2iQIw,7356
 alita_sdk/runtime/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 alita_sdk/runtime/utils/constants.py,sha256=Xntx1b_uxUzT4clwqHA_U6K8y5bBqf_4lSQwXdcWrp4,13586
@@ -305,8 +305,8 @@ alita_sdk/tools/zephyr_scale/api_wrapper.py,sha256=9CzQqQKv45LqZCmwSe4zzEXvBtStI
 alita_sdk/tools/zephyr_squad/__init__.py,sha256=0AI_j27xVO5Gk5HQMFrqPTd4uvuVTpiZUicBrdfEpKg,2796
 alita_sdk/tools/zephyr_squad/api_wrapper.py,sha256=kmw_xol8YIYFplBLWTqP_VKPRhL_1ItDD0_vXTe_UuI,14906
 alita_sdk/tools/zephyr_squad/zephyr_squad_cloud_client.py,sha256=R371waHsms4sllHCbijKYs90C-9Yu0sSR3N4SUfQOgU,5066
-alita_sdk-0.3.217.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-alita_sdk-0.3.217.dist-info/METADATA,sha256=Bdw9AsoShDiMxiNJlCg-PegsjzUFDeryEMKuwkH66lQ,18917
-alita_sdk-0.3.217.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-alita_sdk-0.3.217.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
-alita_sdk-0.3.217.dist-info/RECORD,,
+alita_sdk-0.3.218.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+alita_sdk-0.3.218.dist-info/METADATA,sha256=9JpdmaYNFhDeebVRZjzWjDTB0PU1eo-JkZn4i-XqhDw,18917
+alita_sdk-0.3.218.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+alita_sdk-0.3.218.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
+alita_sdk-0.3.218.dist-info/RECORD,,

{alita_sdk-0.3.217.dist-info → alita_sdk-0.3.218.dist-info}/WHEEL RENAMED Viewed

File without changes

{alita_sdk-0.3.217.dist-info → alita_sdk-0.3.218.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{alita_sdk-0.3.217.dist-info → alita_sdk-0.3.218.dist-info}/top_level.txt RENAMED Viewed

File without changes

alita-sdk 0.3.217__py3-none-any.whl → 0.3.218__py3-none-any.whl

alita-sdk 0.3.217py3-none-any.whl → 0.3.218py3-none-any.whl