PyPI - alita-sdk - Versions diffs - 0.3.217__py3-none-any.whl → 0.3.219__py3-none-any.whl - Mend

alita-sdk 0.3.217py3-none-any.whl → 0.3.219py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

alita_sdk/runtime/tools/vectorstore.py CHANGED Viewed

@@ -188,6 +188,59 @@ class VectorStoreWrapper(BaseToolApiWrapper):
             except Exception as e:
                 logger.error(f"Failed to initialize PGVectorSearch: {str(e)}")
+    def _remove_collection(self):
+        """
+        Remove the vectorstore collection entirely.
+        """
+        self._log_data(
+            f"Remove collection '{self.dataset}'",
+            tool_name="_remove_collection"
+        )
+        from sqlalchemy import text
+        from sqlalchemy.orm import Session
+        schema_name = self.vectorstore.collection_name
+        with Session(self.vectorstore.session_maker.bind) as session:
+            drop_schema_query = text(f"DROP SCHEMA IF EXISTS {schema_name} CASCADE;")
+            session.execute(drop_schema_query)
+            session.commit()
+            logger.info(f"Schema '{schema_name}' has been dropped.")
+        self._log_data(
+            f"Collection '{self.dataset}' has been removed. ",
+            tool_name="_remove_collection"
+        )
+    def _get_indexed_ids(self, store):
+        """Get all indexed document IDs from vectorstore"""
+        # Check if this is a PGVector store
+        if hasattr(store, 'session_maker') and hasattr(store, 'EmbeddingStore'):
+            return self._get_pgvector_indexed_ids(store)
+        else:
+            # Fall back to Chroma implementation
+            return self._get_chroma_indexed_ids(store)
+    def _get_pgvector_indexed_ids(self, store):
+        """Get all indexed document IDs from PGVector"""
+        from sqlalchemy.orm import Session
+        try:
+            with Session(store.session_maker.bind) as session:
+                ids = session.query(store.EmbeddingStore.id).all()
+            return [str(id_tuple[0]) for id_tuple in ids]
+        except Exception as e:
+            logger.error(f"Failed to get indexed IDs from PGVector: {str(e)}")
+            return []
+    def _get_chroma_indexed_ids(self, store):
+        """Get all indexed document IDs from Chroma"""
+        try:
+            data = store.get(include=[])  # Only get IDs, no metadata
+            return data.get('ids', [])
+        except Exception as e:
+            logger.error(f"Failed to get indexed IDs from Chroma: {str(e)}")
+            return []
     def _clean_collection(self):
         """
         Clean the vectorstore collection by deleting all indexed data.
@@ -196,35 +249,92 @@ class VectorStoreWrapper(BaseToolApiWrapper):
             f"Cleaning collection '{self.dataset}'",
             tool_name="_clean_collection"
         )
-        data = self.vectoradapter.vectorstore.get(include=['metadatas'])
-        if data['ids']:
-            self.vectoradapter.vectorstore.delete(ids=data['ids'])
+        # This logic deletes all data from the vectorstore collection without removal of collection.
+        # Collection itself remains available for future indexing.
+        self.vectoradapter.vectorstore.delete(ids=self._get_indexed_ids(self.vectoradapter.vectorstore))
         self._log_data(
             f"Collection '{self.dataset}' has been cleaned. ",
             tool_name="_clean_collection"
         )
+    # TODO: refactor to use common method for different vectorstores in a separate vectorstore wrappers
     def _get_indexed_data(self, store):
         """ Get all indexed data from vectorstore for non-code content """
-        # get already indexed data
+        # Check if this is a PGVector store
+        if hasattr(store, 'session_maker') and hasattr(store, 'EmbeddingStore'):
+            return self._get_pgvector_indexed_data(store)
+        else:
+            # Fall back to original Chroma implementation
+            return self._get_chroma_indexed_data(store)
+    def _get_pgvector_indexed_data(self, store):
+        """ Get all indexed data from PGVector for non-code content """
+        from sqlalchemy.orm import Session
         result = {}
         try:
-            self._log_data("Retrieving already indexed data from vectorstore",
+            self._log_data("Retrieving already indexed data from PGVector vectorstore",
+                           tool_name="index_documents")
+            with Session(store.session_maker.bind) as session:
+                docs = session.query(
+                    store.EmbeddingStore.id,
+                    store.EmbeddingStore.document,
+                    store.EmbeddingStore.cmetadata
+                ).all()
+            # Process the retrieved data
+            for doc in docs:
+                db_id = doc.id
+                meta = doc.cmetadata or {}
+                # Get document id from metadata
+                doc_id = str(meta.get('id', db_id))
+                dependent_docs = meta.get(IndexerKeywords.DEPENDENT_DOCS.value, [])
+                if dependent_docs:
+                    dependent_docs = [d.strip() for d in dependent_docs.split(';') if d.strip()]
+                parent_id = meta.get(IndexerKeywords.PARENT.value, -1)
+                chunk_id = meta.get('chunk_id')
+                if doc_id in result and chunk_id:
+                    # If document with the same id already saved, add db_id for current one as chunk
+                    result[doc_id]['all_chunks'].append(db_id)
+                else:
+                    result[doc_id] = {
+                        'metadata': meta,
+                        'id': db_id,
+                        'all_chunks': [db_id],
+                        IndexerKeywords.DEPENDENT_DOCS.value: dependent_docs,
+                        IndexerKeywords.PARENT.value: parent_id
+                    }
+        except Exception as e:
+            logger.error(f"Failed to get indexed data from PGVector: {str(e)}. Continuing with empty index.")
+        return result
+    def _get_chroma_indexed_data(self, store):
+        """ Get all indexed data from Chroma for non-code content """
+        result = {}
+        try:
+            self._log_data("Retrieving already indexed data from Chroma vectorstore",
                            tool_name="index_documents")
             data = store.get(include=['metadatas'])
-            # re-structure data to be more usable
+            # Re-structure data to be more usable
             for meta, db_id in zip(data['metadatas'], data['ids']):
-                # get document id from metadata
+                # Get document id from metadata
                 doc_id = str(meta['id'])
                 dependent_docs = meta.get(IndexerKeywords.DEPENDENT_DOCS.value, [])
                 if dependent_docs:
                     dependent_docs = [d.strip() for d in dependent_docs.split(';') if d.strip()]
                 parent_id = meta.get(IndexerKeywords.PARENT.value, -1)
-                #
                 chunk_id = meta.get('chunk_id')
                 if doc_id in result and chunk_id:
-                    # if document with the same id already saved, add db_id fof current one as chunk
+                    # If document with the same id already saved, add db_id for current one as chunk
                     result[doc_id]['all_chunks'].append(db_id)
                 else:
                     result[doc_id] = {
@@ -235,7 +345,8 @@ class VectorStoreWrapper(BaseToolApiWrapper):
                         IndexerKeywords.PARENT.value: parent_id
                     }
         except Exception as e:
-            logger.error(f"Failed to get indexed data from vectorstore: {str(e)}. Continuing with empty index.")
+            logger.error(f"Failed to get indexed data from Chroma: {str(e)}. Continuing with empty index.")
         return result
     def _get_code_indexed_data(self, store) -> Dict[str, Dict[str, Any]]:

alita_sdk/tools/elitea_base.py CHANGED Viewed

@@ -375,12 +375,30 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
             Cleans the indexed data in the collection
         """
-        self._init_vector_store(collection_suffix)._clean_collection()
+        self._init_vector_store(collection_suffix)._remove_collection()
     def list_collections(self):
         """
             Lists all collections in the vector store
         """
+        if self.vectorstore_type == 'PGVector'.lower():
+            from sqlalchemy import text
+            from sqlalchemy.orm import Session
+            # schema_name = self.vectorstore.collection_name
+            with Session(self._init_vector_store().vectorstore.session_maker.bind) as session:
+                get_collections = text("""
+                    SELECT table_schema
+                    FROM information_schema.columns
+                    WHERE udt_name = 'vector';
+                """)
+                # Execute the raw SQL query
+                result = session.execute(get_collections)
+                # Fetch all rows from the result
+                docs = result.fetchall()
+            return str(docs)
         vector_client = self._init_vector_store().vectoradapter.vectorstore._client
         return ','.join([collection.name for collection in vector_client.list_collections()])

{alita_sdk-0.3.217.dist-info → alita_sdk-0.3.219.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: alita_sdk
-Version: 0.3.217
+Version: 0.3.219
 Summary: SDK for building langchain agents using resources from Alita
 Author-email: Artem Rozumenko <artyom.rozumenko@gmail.com>, Mikalai Biazruchka <mikalai_biazruchka@epam.com>, Roman Mitusov <roman_mitusov@epam.com>, Ivan Krakhmaliuk <lifedjik@gmail.com>, Artem Dubrovskiy <ad13box@gmail.com>
 License-Expression: Apache-2.0

{alita_sdk-0.3.217.dist-info → alita_sdk-0.3.219.dist-info}/RECORD RENAMED Viewed

@@ -82,7 +82,7 @@ alita_sdk/runtime/tools/pgvector_search.py,sha256=NN2BGAnq4SsDHIhUcFZ8d_dbEOM8Qw
 alita_sdk/runtime/tools/prompt.py,sha256=nJafb_e5aOM1Rr3qGFCR-SKziU9uCsiP2okIMs9PppM,741
 alita_sdk/runtime/tools/router.py,sha256=wCvZjVkdXK9dMMeEerrgKf5M790RudH68pDortnHSz0,1517
 alita_sdk/runtime/tools/tool.py,sha256=lE1hGi6qOAXG7qxtqxarD_XMQqTghdywf261DZawwno,5631
-alita_sdk/runtime/tools/vectorstore.py,sha256=R7Xy2HMIcXSoJ3exvPH_BYbzZfTSnRY23Tn46tqKIiU,33961
+alita_sdk/runtime/tools/vectorstore.py,sha256=lpbpS2yukyT9xRkT2tNQl9YqnlO5F0rNCyj0nU7OJDE,38537
 alita_sdk/runtime/utils/AlitaCallback.py,sha256=E4LlSBuCHWiUq6W7IZExERHZY0qcmdjzc_rJlF2iQIw,7356
 alita_sdk/runtime/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 alita_sdk/runtime/utils/constants.py,sha256=Xntx1b_uxUzT4clwqHA_U6K8y5bBqf_4lSQwXdcWrp4,13586
@@ -94,7 +94,7 @@ alita_sdk/runtime/utils/toolkit_runtime.py,sha256=MU63Fpxj0b5_r1IUUc0Q3-PN9VwL7r
 alita_sdk/runtime/utils/toolkit_utils.py,sha256=I9QFqnaqfVgN26LUr6s3XlBlG6y0CoHURnCzG7XcwVs,5311
 alita_sdk/runtime/utils/utils.py,sha256=CpEl3LCeLbhzQySz08lkKPm7Auac6IiLF7WB8wmArMI,589
 alita_sdk/tools/__init__.py,sha256=1AHqP2xyLjn92xVm70l9XIke6FkfHkLo5OoQVe4BuP8,10421
-alita_sdk/tools/elitea_base.py,sha256=iGWoskj7mUCMKz7yubcyrLYEHr1YJQMGwsuTGxJyrv8,30356
+alita_sdk/tools/elitea_base.py,sha256=7mi-Bg3DHkaCCfldovroy8zqp5IIROVfWGHG5k-Zl7o,31083
 alita_sdk/tools/ado/__init__.py,sha256=2NMQwt2pjIukSC9nSZ7CLocdGpK7002x7ixKr_wunxk,1313
 alita_sdk/tools/ado/utils.py,sha256=PTCludvaQmPLakF2EbCGy66Mro4-rjDtavVP-xcB2Wc,1252
 alita_sdk/tools/ado/repos/__init__.py,sha256=guYY95Gtyb0S4Jj0V1qO0x2jlRoH0H1cKjHXNwmShow,6388
@@ -305,8 +305,8 @@ alita_sdk/tools/zephyr_scale/api_wrapper.py,sha256=9CzQqQKv45LqZCmwSe4zzEXvBtStI
 alita_sdk/tools/zephyr_squad/__init__.py,sha256=0AI_j27xVO5Gk5HQMFrqPTd4uvuVTpiZUicBrdfEpKg,2796
 alita_sdk/tools/zephyr_squad/api_wrapper.py,sha256=kmw_xol8YIYFplBLWTqP_VKPRhL_1ItDD0_vXTe_UuI,14906
 alita_sdk/tools/zephyr_squad/zephyr_squad_cloud_client.py,sha256=R371waHsms4sllHCbijKYs90C-9Yu0sSR3N4SUfQOgU,5066
-alita_sdk-0.3.217.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-alita_sdk-0.3.217.dist-info/METADATA,sha256=Bdw9AsoShDiMxiNJlCg-PegsjzUFDeryEMKuwkH66lQ,18917
-alita_sdk-0.3.217.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-alita_sdk-0.3.217.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
-alita_sdk-0.3.217.dist-info/RECORD,,
+alita_sdk-0.3.219.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+alita_sdk-0.3.219.dist-info/METADATA,sha256=8OiIgT_-Gr1auGCJvupch3DVJFcQ3bGTgU59Wj43Idk,18917
+alita_sdk-0.3.219.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+alita_sdk-0.3.219.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
+alita_sdk-0.3.219.dist-info/RECORD,,

{alita_sdk-0.3.217.dist-info → alita_sdk-0.3.219.dist-info}/WHEEL RENAMED Viewed

File without changes

{alita_sdk-0.3.217.dist-info → alita_sdk-0.3.219.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{alita_sdk-0.3.217.dist-info → alita_sdk-0.3.219.dist-info}/top_level.txt RENAMED Viewed

File without changes

alita-sdk 0.3.217__py3-none-any.whl → 0.3.219__py3-none-any.whl

alita-sdk 0.3.217py3-none-any.whl → 0.3.219py3-none-any.whl