PyPI - alita-sdk - Versions diffs - 0.3.230__py3-none-any.whl → 0.3.231__py3-none-any.whl - Mend

alita-sdk 0.3.230py3-none-any.whl → 0.3.231py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

alita_sdk/runtime/tools/vectorstore.py CHANGED Viewed

@@ -210,38 +210,48 @@ class VectorStoreWrapper(BaseToolApiWrapper):
             tool_name="_remove_collection"
         )
-    def _get_indexed_ids(self, store):
+    def _get_indexed_ids(self, collection_suffix: Optional[str] = '') -> List[str]:
         """Get all indexed document IDs from vectorstore"""
         # Check if this is a PGVector store
-        if hasattr(store, 'session_maker') and hasattr(store, 'EmbeddingStore'):
-            return self._get_pgvector_indexed_ids(store)
+        if self._is_pgvector():
+            return self._get_pgvector_indexed_ids(collection_suffix)
         else:
             # Fall back to Chroma implementation
-            return self._get_chroma_indexed_ids(store)
+            # TODO: update filter by collection_suffix for Chroma
+            return self._get_chroma_indexed_ids(collection_suffix)
-    def _get_pgvector_indexed_ids(self, store):
+    def _get_pgvector_indexed_ids(self, collection_suffix: Optional[str] = ''):
         """Get all indexed document IDs from PGVector"""
         from sqlalchemy.orm import Session
+        from sqlalchemy import func
+        store = self.vectorstore
         try:
             with Session(store.session_maker.bind) as session:
-                ids = session.query(store.EmbeddingStore.id).all()
-            return [str(id_tuple[0]) for id_tuple in ids]
+                # Start building the query
+                query = session.query(store.EmbeddingStore.id)
+                # Apply filter only if collection_suffix is provided
+                if collection_suffix:
+                    query = query.filter(
+                        func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'collection') == collection_suffix
+                    )
+                ids = query.all()
+                return [str(id_tuple[0]) for id_tuple in ids]
         except Exception as e:
             logger.error(f"Failed to get indexed IDs from PGVector: {str(e)}")
             return []
-    def _get_chroma_indexed_ids(self, store):
+    def _get_chroma_indexed_ids(self, collection_suffix: Optional[str] = ''):
         """Get all indexed document IDs from Chroma"""
         try:
-            data = store.get(include=[])  # Only get IDs, no metadata
+            data = self.vectorstore.get(include=[])  # Only get IDs, no metadata
             return data.get('ids', [])
         except Exception as e:
             logger.error(f"Failed to get indexed IDs from Chroma: {str(e)}")
             return []
-    def _clean_collection(self):
+    def _clean_collection(self, collection_suffix: str = ''):
         """
         Clean the vectorstore collection by deleting all indexed data.
         """
@@ -251,33 +261,37 @@ class VectorStoreWrapper(BaseToolApiWrapper):
         )
         # This logic deletes all data from the vectorstore collection without removal of collection.
         # Collection itself remains available for future indexing.
-        self.vectoradapter.vectorstore.delete(ids=self._get_indexed_ids(self.vectoradapter.vectorstore))
+        self.vectorstore.delete(ids=self._get_indexed_ids(collection_suffix))
         self._log_data(
             f"Collection '{self.dataset}' has been cleaned. ",
             tool_name="_clean_collection"
         )
+    def _is_pgvector(self) -> bool:
+        """Check if the vectorstore is a PGVector store."""
+        return hasattr(self.vectorstore, 'session_maker') and hasattr(self.vectorstore, 'EmbeddingStore')
     # TODO: refactor to use common method for different vectorstores in a separate vectorstore wrappers
-    def _get_indexed_data(self, store):
+    def _get_indexed_data(self):
         """ Get all indexed data from vectorstore for non-code content """
         # Check if this is a PGVector store
-        if hasattr(store, 'session_maker') and hasattr(store, 'EmbeddingStore'):
-            return self._get_pgvector_indexed_data(store)
+        if self._is_pgvector():
+            return self._get_pgvector_indexed_data()
         else:
             # Fall back to original Chroma implementation
-            return self._get_chroma_indexed_data(store)
+            return self._get_chroma_indexed_data(self.vectorstore)
-    def _get_pgvector_indexed_data(self, store):
+    def _get_pgvector_indexed_data(self):
         """ Get all indexed data from PGVector for non-code content """
         from sqlalchemy.orm import Session
         result = {}
         try:
             self._log_data("Retrieving already indexed data from PGVector vectorstore",
-                           tool_name="index_documents")
+                           tool_name="get_indexed_data")
+            store = self.vectorstore
             with Session(store.session_maker.bind) as session:
                 docs = session.query(
                     store.EmbeddingStore.id,
@@ -320,7 +334,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
         result = {}
         try:
             self._log_data("Retrieving already indexed data from Chroma vectorstore",
-                           tool_name="index_documents")
+                           tool_name="get_indexed_data")
             data = store.get(include=['metadatas'])
             # Re-structure data to be more usable
@@ -349,21 +363,63 @@ class VectorStoreWrapper(BaseToolApiWrapper):
         return result
-    def _get_code_indexed_data(self, store) -> Dict[str, Dict[str, Any]]:
+    def _get_code_indexed_data(self) -> Dict[str, Dict[str, Any]]:
         """ Get all indexed data from vectorstore for code content """
         # get already indexed data
+        if self._is_pgvector():
+            result = self._get_pgvector_code_indexed_data()
+        else:
+            result = self._get_chroma_code_indexed_data()
+        return result
+    def _get_chroma_code_indexed_data(self) -> Dict[str, Dict[str, Any]]:
+        """Get all indexed code data from Chroma."""
         result = {}
         try:
-            self._log_data("Retrieving already indexed code data from vectorstore",
-                           tool_name="index_documents")
-            data = store.get(include=['metadatas'])
-            # re-structure data to be more usable
+            self._log_data("Retrieving already indexed code data from Chroma vectorstore",
+                           tool_name="index_code_data")
+            data = self.vectorstore.get(include=['metadatas'])
             for meta, db_id in zip(data['metadatas'], data['ids']):
-                filename = meta['filename']
+                filename = meta.get('filename')
+                commit_hash = meta.get('commit_hash')
+                if not filename:
+                    continue
+                if filename not in result:
+                    result[filename] = {
+                        'commit_hashes': [],
+                        'ids': []
+                    }
+                if commit_hash is not None:
+                    result[filename]['commit_hashes'].append(commit_hash)
+                result[filename]['ids'].append(db_id)
+        except Exception as e:
+            logger.error(f"Failed to get indexed code data from Chroma: {str(e)}. Continuing with empty index.")
+        return result
+    def _get_pgvector_code_indexed_data(self) -> Dict[str, Dict[str, Any]]:
+        """Get all indexed code data from PGVector."""
+        from sqlalchemy.orm import Session
+        result = {}
+        try:
+            self._log_data("Retrieving already indexed code data from PGVector vectorstore",
+                           tool_name="index_code_data")
+            store = self.vectorstore
+            with Session(store.session_maker.bind) as session:
+                docs = session.query(
+                    store.EmbeddingStore.id,
+                    store.EmbeddingStore.cmetadata
+                ).all()
+            for db_id, meta in docs:
+                filename = meta.get('filename')
                 commit_hash = meta.get('commit_hash')
+                if not filename:
+                    continue
                 if filename not in result:
                     result[filename] = {
+                        'metadata': meta,
                         'commit_hashes': [],
                         'ids': []
                     }
@@ -371,13 +427,64 @@ class VectorStoreWrapper(BaseToolApiWrapper):
                     result[filename]['commit_hashes'].append(commit_hash)
                 result[filename]['ids'].append(db_id)
         except Exception as e:
-            logger.error(f"Failed to get indexed code data from vectorstore: {str(e)}. Continuing with empty index.")
+            logger.error(f"Failed to get indexed code data from PGVector: {str(e)}. Continuing with empty index.")
         return result
+    def _add_to_collection(self, entry_id, new_collection_value):
+        """Add a new collection name to the `collection` key in the `metadata` column."""
+        from sqlalchemy import func
+        from sqlalchemy.orm import Session
+        store = self.vectorstore
+        try:
+            with Session(store.session_maker.bind) as session:
+                # Query the current value of the `collection` key
+                current_collection_query = session.query(
+                    func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'collection')
+                ).filter(store.EmbeddingStore.id == entry_id).scalar()
+                # If the `collection` key is NULL or doesn't contain the new value, update it
+                if current_collection_query is None:
+                    # If `collection` is NULL, initialize it with the new value
+                    session.query(store.EmbeddingStore).filter(
+                        store.EmbeddingStore.id == entry_id
+                    ).update(
+                        {
+                            store.EmbeddingStore.cmetadata: func.jsonb_set(
+                                func.coalesce(store.EmbeddingStore.cmetadata, '{}'),
+                                '{collection}',  # Path to the `collection` key
+                                f'"{new_collection_value}"',  # New value for the `collection` key
+                                True  # Create the key if it doesn't exist
+                            )
+                        }
+                    )
+                elif new_collection_value not in current_collection_query.split(";"):
+                    # If `collection` exists but doesn't contain the new value, append it
+                    updated_collection_value = f"{current_collection_query};{new_collection_value}"
+                    session.query(store.EmbeddingStore).filter(
+                        store.EmbeddingStore.id == entry_id
+                    ).update(
+                        {
+                            store.EmbeddingStore.cmetadata: func.jsonb_set(
+                                store.EmbeddingStore.cmetadata,
+                                '{collection}',  # Path to the `collection` key
+                                f'"{updated_collection_value}"',  # Concatenated value as a valid JSON string
+                                True  # Create the key if it doesn't exist
+                            )
+                        }
+                    )
+                session.commit()
+                logger.info(f"Successfully updated collection for entry ID {entry_id}.")
+        except Exception as e:
+            logger.error(f"Failed to update collection for entry ID {entry_id}: {str(e)}")
     def _reduce_duplicates(
             self,
             documents: Generator[Any, None, None],
-            store,
+            collection_suffix: str,
             get_indexed_data: Callable,
             key_fn: Callable,
             compare_fn: Callable,
@@ -386,7 +493,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
     ) -> List[Any]:
         """Generic duplicate reduction logic for documents."""
         self._log_data(log_msg, tool_name="index_documents")
-        indexed_data = get_indexed_data(store)
+        indexed_data = get_indexed_data()
         indexed_keys = set(indexed_data.keys())
         if not indexed_keys:
             self._log_data("Vectorstore is empty, indexing all incoming documents", tool_name="index_documents")
@@ -397,8 +504,15 @@ class VectorStoreWrapper(BaseToolApiWrapper):
         for document in documents:
             key = key_fn(document)
-            if key in indexed_keys:
+            if key in indexed_keys and collection_suffix == indexed_data[key]['metadata'].get('collection'):
                 if compare_fn(document, indexed_data[key]):
+                    # Disabled addition of new collection to already indexed documents
+                    # # check metadata.collection and update if needed
+                    # for update_collection_id in remove_ids_fn(indexed_data, key):
+                    #     self._add_to_collection(
+                    #         update_collection_id,
+                    #         collection_suffix
+                    #     )
                     continue
                 final_docs.append(document)
                 docs_to_remove.update(remove_ids_fn(indexed_data, key))
@@ -410,14 +524,14 @@ class VectorStoreWrapper(BaseToolApiWrapper):
                 f"Removing {len(docs_to_remove)} documents from vectorstore that are already indexed with different updated_on.",
                 tool_name="index_documents"
             )
-            store.delete(ids=list(docs_to_remove))
+            self.vectorstore.delete(ids=list(docs_to_remove))
         return final_docs
-    def _reduce_non_code_duplicates(self, documents: Generator[Any, None, None], store) -> List[Any]:
+    def _reduce_non_code_duplicates(self, documents: Generator[Any, None, None], collection_suffix: str) -> List[Any]:
         return self._reduce_duplicates(
             documents,
-            store,
+            collection_suffix,
             self._get_indexed_data,
             lambda doc: doc.metadata.get('id'),
             lambda doc, idx: (
@@ -434,10 +548,10 @@ class VectorStoreWrapper(BaseToolApiWrapper):
             log_msg="Verification of documents to index started"
         )
-    def _reduce_code_duplicates(self, documents: Generator[Any, None, None], store) -> List[Any]:
+    def _reduce_code_duplicates(self, documents: Generator[Any, None, None], collection_suffix: str) -> List[Any]:
         return self._reduce_duplicates(
             documents,
-            store,
+            collection_suffix,
             self._get_code_indexed_data,
             lambda doc: doc.metadata.get('filename'),
             lambda doc, idx: (
@@ -449,7 +563,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
             log_msg="Verification of code documents to index started"
         )
-    def index_documents(self, documents: Generator[Document, None, None], progress_step: int = 20, clean_index: bool = True, is_code: bool = False):
+    def index_documents(self, documents: Generator[Document, None, None], collection_suffix: str, progress_step: int = 20, clean_index: bool = True, is_code: bool = False):
         """ Index documents in the vectorstore.
         Args:
@@ -465,7 +579,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
             logger.info("Cleaning index before re-indexing all documents.")
             self._log_data("Cleaning index before re-indexing all documents. Previous index will be removed", tool_name="index_documents")
             try:
-                self._clean_collection()
+                self._clean_collection(collection_suffix)
                 self.vectoradapter.persist()
                 self.vectoradapter.vacuum()
                 self._log_data("Previous index has been removed",
@@ -476,8 +590,8 @@ class VectorStoreWrapper(BaseToolApiWrapper):
                 documents = list(documents)
         else:
             # remove duplicates based on metadata 'id' and 'updated_on' or 'commit_hash' fields
-            documents = self._reduce_code_duplicates(documents, self.vectoradapter.vectorstore) if is_code \
-                else self._reduce_non_code_duplicates(documents, self.vectoradapter.vectorstore)
+            documents = self._reduce_code_duplicates(documents, collection_suffix) if is_code \
+                else self._reduce_non_code_duplicates(documents, collection_suffix)
         if not documents or len(documents) == 0:
             logger.info("No new documents to index after duplicate check.")
@@ -498,6 +612,15 @@ class VectorStoreWrapper(BaseToolApiWrapper):
         logger.debug(self.vectoradapter)
         documents = documents + list(dependent_docs_generator)
+        # if collection_suffix is provided, add it to metadata of each document
+        if collection_suffix:
+            for doc in documents:
+                if not doc.metadata.get('collection'):
+                    doc.metadata['collection'] = collection_suffix
+                else:
+                    doc.metadata['collection'] += f";{collection_suffix}"
         total_docs = len(documents)
         documents_count = 0
         _documents = []
@@ -511,8 +634,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
             try:
                 _documents.append(document)
                 if len(_documents) >= self.max_docs_per_add:
-                    add_documents(vectorstore=self.vectoradapter.vectorstore, documents=_documents)
-                    self.vectoradapter.persist()
+                    add_documents(vectorstore=self.vectorstore, documents=_documents)
                     _documents = []
                 percent = math.floor((documents_count / total_docs) * 100)
@@ -526,8 +648,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
                 logger.error(f"Error: {format_exc()}")
                 return {"status": "error", "message": f"Error: {format_exc()}"}
         if _documents:
-            add_documents(vectorstore=self.vectoradapter.vectorstore, documents=_documents)
-            self.vectoradapter.persist()
+            add_documents(vectorstore=self.vectorstore, documents=_documents)
         return {"status": "ok", "message": f"successfully indexed {documents_count} documents"}
     def search_documents(self, query:str, doctype: str = 'code',
@@ -562,7 +683,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
                 }
             try:
-                document_items = self.vectoradapter.vectorstore.similarity_search_with_score(
+                document_items = self.vectorstore.similarity_search_with_score(
                     query, filter=document_filter, k=search_top
                 )
                 # Add document results to unique docs
@@ -595,7 +716,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
                     }
                 try:
-                    chunk_items = self.vectoradapter.vectorstore.similarity_search_with_score(
+                    chunk_items = self.vectorstore.similarity_search_with_score(
                         query, filter=chunk_filter, k=search_top
                     )
@@ -628,7 +749,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
                                 }
                             try:
-                                fetch_items = self.vectoradapter.vectorstore.similarity_search_with_score(
+                                fetch_items = self.vectorstore.similarity_search_with_score(
                                     query, filter=doc_filter, k=1
                                 )
                                 if fetch_items:
@@ -642,7 +763,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
         else:
             # Default search behavior (unchanged)
             max_search_results = 30 if search_top * 3 > 30 else search_top * 3
-            vector_items = self.vectoradapter.vectorstore.similarity_search_with_score(
+            vector_items = self.vectorstore.similarity_search_with_score(
                 query, filter=filter, k=max_search_results
             )

alita_sdk/tools/elitea_base.py CHANGED Viewed

@@ -30,13 +30,13 @@ LoaderSchema = create_model(
 # Base Vector Store Schema Models
 BaseIndexParams = create_model(
     "BaseIndexParams",
-    collection_suffix=(Optional[str], Field(description="Optional suffix for collection name (max 7 characters)", default="", max_length=7)),
+    collection_suffix=(str, Field(description="Suffix for collection name (max 7 characters) used to separate datasets", min_length=1, max_length=7)),
     vectorstore_type=(Optional[str], Field(description="Vectorstore type (Chroma, PGVector, Elastic, etc.)", default="PGVector")),
 )
 BaseCodeIndexParams = create_model(
     "BaseCodeIndexParams",
-    collection_suffix=(Optional[str], Field(description="Optional suffix for collection name (max 7 characters)", default="", max_length=7)),
+    collection_suffix=(str, Field(description="Suffix for collection name (max 7 characters) used to separate datasets", min_length=1, max_length=7)),
     vectorstore_type=(Optional[str], Field(description="Vectorstore type (Chroma, PGVector, Elastic, etc.)", default="PGVector")),
     branch=(Optional[str], Field(description="Branch to index files from. Defaults to active branch if None.", default=None)),
     whitelist=(Optional[List[str]], Field(description="File extensions or paths to include. Defaults to all files if None.", default=None)),
@@ -51,7 +51,9 @@ RemoveIndexParams = create_model(
 BaseSearchParams = create_model(
     "BaseSearchParams",
     query=(str, Field(description="Query text to search in the index")),
-    collection_suffix=(Optional[str], Field(description="Optional suffix for collection name (max 7 characters)", default="", max_length=7)),
+    collection_suffix=(Optional[str], Field(
+        description="Optional suffix for collection name (max 7 characters). Leave empty to search across all datasets",
+        default="", max_length=7)),
     vectorstore_type=(Optional[str], Field(description="Vectorstore type (Chroma, PGVector, Elastic, etc.)", default="PGVector")),
     filter=(Optional[dict | str], Field(
         description="Filter to apply to the search results. Can be a dictionary or a JSON string.",
@@ -219,6 +221,7 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
     embedding_model: Optional[str] = "HuggingFaceEmbeddings"
     embedding_model_params: Optional[Dict[str, Any]] = {"model_name": "sentence-transformers/all-MiniLM-L6-v2"}
     vectorstore_type: Optional[str] = "PGVector"
+    _vector_store: Optional[Any] = None
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
@@ -297,9 +300,9 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
         collection_suffix = kwargs.get("collection_suffix")
         progress_step = kwargs.get("progress_step")
         clean_index = kwargs.get("clean_index")
-        vs = self._init_vector_store(collection_suffix, embeddings=embedding)
+        vs = self._init_vector_store(embeddings=embedding)
         #
-        return vs.index_documents(docs, progress_step=progress_step, clean_index=clean_index)
+        return vs.index_documents(docs, collection_suffix=collection_suffix, progress_step=progress_step, clean_index=clean_index)
     def _process_documents(self, documents: List[Document]) -> Generator[Document, None, None]:
         """
@@ -333,42 +336,31 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
     # TODO: init store once and re-use the instance
-    def _init_vector_store(self, collection_suffix: str = "", embeddings: Optional[Any] = None):
+    def _init_vector_store(self, embeddings: Optional[Any] = None):
         """Initializes the vector store wrapper with the provided parameters."""
         try:
             from alita_sdk.runtime.tools.vectorstore import VectorStoreWrapper
         except ImportError:
             from alita_sdk.runtime.tools.vectorstore import VectorStoreWrapper
-        # Validate collection_suffix length
-        if collection_suffix and len(collection_suffix.strip()) > 7:
-            raise ToolException("collection_suffix must be 7 characters or less")
-        # Create collection name with suffix if provided
-        collection_name = str(self.collection_name)
-        if collection_suffix and collection_suffix.strip():
-            collection_name = f"{self.collection_name}_{collection_suffix.strip()}"
-        # Get database-specific parameters using adapter
-        connection_string = self.connection_string.get_secret_value() if self.connection_string else None
-        vectorstore_params = self._adapter.get_vectorstore_params(collection_name, connection_string)
-        return VectorStoreWrapper(
-            llm=self.llm,
-            vectorstore_type=self.vectorstore_type,
-            embedding_model=self.embedding_model,
-            embedding_model_params=self.embedding_model_params,
-            vectorstore_params=vectorstore_params,
-            embeddings=embeddings,
-            process_document_func=self._process_documents,
-        )
+        if not self._vector_store:
+            connection_string = self.connection_string.get_secret_value() if self.connection_string else None
+            vectorstore_params = self._adapter.get_vectorstore_params(self.collection_name, connection_string)
+            self._vector_store = VectorStoreWrapper(
+                llm=self.llm,
+                vectorstore_type=self.vectorstore_type,
+                embedding_model=self.embedding_model,
+                embedding_model_params=self.embedding_model_params,
+                vectorstore_params=vectorstore_params,
+                embeddings=embeddings,
+                process_document_func=self._process_documents,
+            )
+        return self._vector_store
     def remove_index(self, collection_suffix: str = ""):
         """Cleans the indexed data in the collection."""
-        vectorstore_wrapper = self._init_vector_store(collection_suffix)
-        collection_name = f"{self.collection_name}_{collection_suffix}" if collection_suffix else str(self.collection_name)
-        self._adapter.remove_collection(vectorstore_wrapper, collection_name)
-        return (f"Collection '{collection_name}' has been removed from the vector store.\n"
+        self._init_vector_store()._clean_collection(collection_suffix=collection_suffix)
+        return (f"Collection '{collection_suffix}' has been removed from the vector store.\n"
                 f"Available collections: {self.list_collections()}")
     def list_collections(self):
@@ -386,7 +378,14 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
                      extended_search: Optional[List[str]] = None,
                      **kwargs):
         """ Searches indexed documents in the vector store."""
-        vectorstore = self._init_vector_store(collection_suffix)
+        vectorstore = self._init_vector_store()
+        # build filter on top of collection_suffix
+        filter = filter if isinstance(filter, dict) else json.loads(filter)
+        if collection_suffix:
+            filter.update({"collection": {
+                "$eq": collection_suffix.strip()
+            }})
         found_docs = vectorstore.search_documents(
             query,
             doctype=self.doctype,
@@ -579,22 +578,20 @@ class BaseCodeToolApiWrapper(BaseVectorStoreToolApiWrapper):
         return parse_code_files_for_db(file_content_generator())
     def index_data(self,
+                   collection_suffix: str,
                    branch: Optional[str] = None,
                    whitelist: Optional[List[str]] = None,
                    blacklist: Optional[List[str]] = None,
-                   collection_suffix: str = "",
                    **kwargs) -> str:
         """Index repository files in the vector store using code parsing."""
         documents = self.loader(
             branch=branch,
             whitelist=whitelist,
             blacklist=blacklist
         )
-        vectorstore = self._init_vector_store(collection_suffix)
-        return vectorstore.index_documents(documents, clean_index=False, is_code=True)
+        vectorstore = self._init_vector_store()
+        return vectorstore.index_documents(documents, collection_suffix=collection_suffix, clean_index=False, is_code=True)
     def _get_vector_search_tools(self):
         """

alita_sdk/tools/vector_adapters/VectorStoreAdapter.py CHANGED Viewed

@@ -1,5 +1,8 @@
 from abc import ABC, abstractmethod
 from typing import Any, Dict, Optional
+from logging import getLogger
+logger = getLogger(__name__)
 class VectorStoreAdapter(ABC):
@@ -36,19 +39,23 @@ class PGVectorAdapter(VectorStoreAdapter):
         }
     def list_collections(self, vectorstore_wrapper, collection_name) -> str:
-        from sqlalchemy import text
+        from sqlalchemy import func
         from sqlalchemy.orm import Session
-        with Session(vectorstore_wrapper.vectorstore.session_maker.bind) as session:
-            get_collections = text(f"""
-                 SELECT table_schema
-                 FROM information_schema.columns
-                 WHERE udt_name = 'vector'
-                   AND table_schema LIKE '%{collection_name}%';
-            """)
-            result = session.execute(get_collections)
-            docs = result.fetchall()
-        return str(docs)
+        store = vectorstore_wrapper.vectorstore
+        try:
+            with Session(store.session_maker.bind) as session:
+                collections = (
+                    session.query(
+                        func.distinct(func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'collection'))
+                    )
+                    .filter(store.EmbeddingStore.cmetadata.isnot(None))
+                    .all()
+                )
+                return [collection[0] for collection in collections if collection[0] is not None]
+        except Exception as e:
+            logger.error(f"Failed to get unique collections from PGVector: {str(e)}")
+            return []
     def remove_collection(self, vectorstore_wrapper, collection_name: str):
         vectorstore_wrapper._remove_collection()

{alita_sdk-0.3.230.dist-info → alita_sdk-0.3.231.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: alita_sdk
-Version: 0.3.230
+Version: 0.3.231
 Summary: SDK for building langchain agents using resources from Alita
 Author-email: Artem Rozumenko <artyom.rozumenko@gmail.com>, Mikalai Biazruchka <mikalai_biazruchka@epam.com>, Roman Mitusov <roman_mitusov@epam.com>, Ivan Krakhmaliuk <lifedjik@gmail.com>, Artem Dubrovskiy <ad13box@gmail.com>
 License-Expression: Apache-2.0

{alita_sdk-0.3.230.dist-info → alita_sdk-0.3.231.dist-info}/RECORD RENAMED Viewed

@@ -100,7 +100,7 @@ alita_sdk/runtime/tools/pgvector_search.py,sha256=NN2BGAnq4SsDHIhUcFZ8d_dbEOM8Qw
 alita_sdk/runtime/tools/prompt.py,sha256=nJafb_e5aOM1Rr3qGFCR-SKziU9uCsiP2okIMs9PppM,741
 alita_sdk/runtime/tools/router.py,sha256=wCvZjVkdXK9dMMeEerrgKf5M790RudH68pDortnHSz0,1517
 alita_sdk/runtime/tools/tool.py,sha256=lE1hGi6qOAXG7qxtqxarD_XMQqTghdywf261DZawwno,5631
-alita_sdk/runtime/tools/vectorstore.py,sha256=ItOkyorjusvoyZQcszs72FzDgo2ri9Xh3yWFhA4wdKM,38549
+alita_sdk/runtime/tools/vectorstore.py,sha256=R6M6emjP7VUkXwufI_tfTicx4EKn-lZwxQ16-WzIwMA,44557
 alita_sdk/runtime/utils/AlitaCallback.py,sha256=E4LlSBuCHWiUq6W7IZExERHZY0qcmdjzc_rJlF2iQIw,7356
 alita_sdk/runtime/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 alita_sdk/runtime/utils/constants.py,sha256=Xntx1b_uxUzT4clwqHA_U6K8y5bBqf_4lSQwXdcWrp4,13586
@@ -112,7 +112,7 @@ alita_sdk/runtime/utils/toolkit_runtime.py,sha256=MU63Fpxj0b5_r1IUUc0Q3-PN9VwL7r
 alita_sdk/runtime/utils/toolkit_utils.py,sha256=I9QFqnaqfVgN26LUr6s3XlBlG6y0CoHURnCzG7XcwVs,5311
 alita_sdk/runtime/utils/utils.py,sha256=CpEl3LCeLbhzQySz08lkKPm7Auac6IiLF7WB8wmArMI,589
 alita_sdk/tools/__init__.py,sha256=1AHqP2xyLjn92xVm70l9XIke6FkfHkLo5OoQVe4BuP8,10421
-alita_sdk/tools/elitea_base.py,sha256=22P97EmUlO-eC8DoGPiSNjHNjTv8ru0Q05qnR3U9WWs,30572
+alita_sdk/tools/elitea_base.py,sha256=Wg9HqeWlsJ_R5--_Xfg7bm8rqKd68aqHm4a1sKAvahI,30365
 alita_sdk/tools/ado/__init__.py,sha256=j4lt6MLWlpkIIVkHmAyVG3i_qQeQ3ZmL_g8BfMhVhVI,1289
 alita_sdk/tools/ado/utils.py,sha256=PTCludvaQmPLakF2EbCGy66Mro4-rjDtavVP-xcB2Wc,1252
 alita_sdk/tools/ado/repos/__init__.py,sha256=kc4ZJI3B9CDUp4q3jRSj7JZNc3fJwwMTsV40CiKO7Po,6111
@@ -304,7 +304,7 @@ alita_sdk/tools/testrail/__init__.py,sha256=577XVaOAoXG3mDkojCsy5XCUlxCsdJf_2-_5
 alita_sdk/tools/testrail/api_wrapper.py,sha256=Aax0jspgidXYNxLIw6qTWu3dO2JOIS0ALIqsCzQuFbQ,32087
 alita_sdk/tools/utils/__init__.py,sha256=155xepXPr4OEzs2Mz5YnjXcBpxSv1X2eznRUVoPtyK0,3268
 alita_sdk/tools/utils/content_parser.py,sha256=yi1IDLreqfM41w-PnoFEvVLtSV50qpNvKshJwbDTgqs,7172
-alita_sdk/tools/vector_adapters/VectorStoreAdapter.py,sha256=wZ_MhVWPEgoPhKRvbPB6Qs34EPyWx0IW7ydY_GVBZFQ,3344
+alita_sdk/tools/vector_adapters/VectorStoreAdapter.py,sha256=KhxojgddWlEQ4TZA7jEL5ZEp86PcXfmfgRXixsjj7-M,3634
 alita_sdk/tools/vector_adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 alita_sdk/tools/xray/__init__.py,sha256=OYa1wveTm-lAhsJaGXMnwOrDQWl6ch--NjNLBeR63eM,4331
 alita_sdk/tools/xray/api_wrapper.py,sha256=A8PJmY2k7TowaD_vk6ZxkMnSUoZUt9A6g4TJrZfNTAw,32225
@@ -325,8 +325,8 @@ alita_sdk/tools/zephyr_scale/api_wrapper.py,sha256=JAeWf-RXohsxheUpT0iMDClc_izj-
 alita_sdk/tools/zephyr_squad/__init__.py,sha256=0AI_j27xVO5Gk5HQMFrqPTd4uvuVTpiZUicBrdfEpKg,2796
 alita_sdk/tools/zephyr_squad/api_wrapper.py,sha256=kmw_xol8YIYFplBLWTqP_VKPRhL_1ItDD0_vXTe_UuI,14906
 alita_sdk/tools/zephyr_squad/zephyr_squad_cloud_client.py,sha256=R371waHsms4sllHCbijKYs90C-9Yu0sSR3N4SUfQOgU,5066
-alita_sdk-0.3.230.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-alita_sdk-0.3.230.dist-info/METADATA,sha256=jFOYe8oP7YbfYBFrKSoV_BtsZYIcsH2RPMmiQ-X2-tw,18896
-alita_sdk-0.3.230.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-alita_sdk-0.3.230.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
-alita_sdk-0.3.230.dist-info/RECORD,,
+alita_sdk-0.3.231.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+alita_sdk-0.3.231.dist-info/METADATA,sha256=8JHRCwh-SnyBtnD3wL4yJDUL1ov5fLXL1WUyfSIynf0,18896
+alita_sdk-0.3.231.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+alita_sdk-0.3.231.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
+alita_sdk-0.3.231.dist-info/RECORD,,

{alita_sdk-0.3.230.dist-info → alita_sdk-0.3.231.dist-info}/WHEEL RENAMED Viewed

File without changes

{alita_sdk-0.3.230.dist-info → alita_sdk-0.3.231.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{alita_sdk-0.3.230.dist-info → alita_sdk-0.3.231.dist-info}/top_level.txt RENAMED Viewed

File without changes

alita-sdk 0.3.230__py3-none-any.whl → 0.3.231__py3-none-any.whl

alita-sdk 0.3.230py3-none-any.whl → 0.3.231py3-none-any.whl