PyPI - alita-sdk - Versions diffs - 0.3.231__py3-none-any.whl → 0.3.232__py3-none-any.whl - Mend

alita-sdk 0.3.231py3-none-any.whl → 0.3.232py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

alita_sdk/runtime/tools/vectorstore.py CHANGED Viewed

@@ -8,6 +8,7 @@ from pydantic import BaseModel, model_validator, Field
 from ..langchain.tools.vector import VectorAdapter
 from langchain_core.messages import HumanMessage
 from alita_sdk.tools.elitea_base import BaseToolApiWrapper
+from alita_sdk.tools.vector_adapters.VectorStoreAdapter import VectorStoreAdapterFactory
 from logging import getLogger
 from ..utils.logging import dispatch_custom_event
@@ -141,11 +142,14 @@ class VectorStoreWrapper(BaseToolApiWrapper):
     dataset: str = None
     embedding: Any = None
     vectorstore: Any = None
+    # Review usage of old adapter
     vectoradapter: Any = None
     pg_helper: Any = None
     embeddings: Any = None
     process_document_func: Optional[Callable] = None
+    # New adapter for vector database operations
+    vector_adapter: Any = None
     @model_validator(mode='before')
     @classmethod
     def validate_toolkit(cls, values):
@@ -170,6 +174,8 @@ class VectorStoreWrapper(BaseToolApiWrapper):
             embeddings=values['embeddings'],
             quota_params=None,
         )
+        # Initialize the new vector adapter
+        values['vector_adapter'] = VectorStoreAdapterFactory.create_adapter(values['vectorstore_type'])
         logger.debug(f"Vectorstore wrapper initialized: {values}")
         return values
@@ -196,15 +202,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
             f"Remove collection '{self.dataset}'",
             tool_name="_remove_collection"
         )
-        from sqlalchemy import text
-        from sqlalchemy.orm import Session
-        schema_name = self.vectorstore.collection_name
-        with Session(self.vectorstore.session_maker.bind) as session:
-            drop_schema_query = text(f"DROP SCHEMA IF EXISTS {schema_name} CASCADE;")
-            session.execute(drop_schema_query)
-            session.commit()
-            logger.info(f"Schema '{schema_name}' has been dropped.")
+        self.vector_adapter.remove_collection(self, self.dataset)
         self._log_data(
             f"Collection '{self.dataset}' has been removed. ",
             tool_name="_remove_collection"
@@ -212,44 +210,12 @@ class VectorStoreWrapper(BaseToolApiWrapper):
     def _get_indexed_ids(self, collection_suffix: Optional[str] = '') -> List[str]:
         """Get all indexed document IDs from vectorstore"""
+        return self.vector_adapter.get_indexed_ids(self, collection_suffix)
-        # Check if this is a PGVector store
-        if self._is_pgvector():
-            return self._get_pgvector_indexed_ids(collection_suffix)
-        else:
-            # Fall back to Chroma implementation
-            # TODO: update filter by collection_suffix for Chroma
-            return self._get_chroma_indexed_ids(collection_suffix)
+    def list_collections(self) -> List[str]:
+        """List all collections in the vectorstore."""
-    def _get_pgvector_indexed_ids(self, collection_suffix: Optional[str] = ''):
-        """Get all indexed document IDs from PGVector"""
-        from sqlalchemy.orm import Session
-        from sqlalchemy import func
-        store = self.vectorstore
-        try:
-            with Session(store.session_maker.bind) as session:
-                # Start building the query
-                query = session.query(store.EmbeddingStore.id)
-                # Apply filter only if collection_suffix is provided
-                if collection_suffix:
-                    query = query.filter(
-                        func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'collection') == collection_suffix
-                    )
-                ids = query.all()
-                return [str(id_tuple[0]) for id_tuple in ids]
-        except Exception as e:
-            logger.error(f"Failed to get indexed IDs from PGVector: {str(e)}")
-            return []
-    def _get_chroma_indexed_ids(self, collection_suffix: Optional[str] = ''):
-        """Get all indexed document IDs from Chroma"""
-        try:
-            data = self.vectorstore.get(include=[])  # Only get IDs, no metadata
-            return data.get('ids', [])
-        except Exception as e:
-            logger.error(f"Failed to get indexed IDs from Chroma: {str(e)}")
-            return []
+        return self.vector_adapter.list_collections(self)
     def _clean_collection(self, collection_suffix: str = ''):
         """
@@ -259,227 +225,23 @@ class VectorStoreWrapper(BaseToolApiWrapper):
             f"Cleaning collection '{self.dataset}'",
             tool_name="_clean_collection"
         )
-        # This logic deletes all data from the vectorstore collection without removal of collection.
-        # Collection itself remains available for future indexing.
-        self.vectorstore.delete(ids=self._get_indexed_ids(collection_suffix))
+        self.vector_adapter.clean_collection(self, collection_suffix)
         self._log_data(
             f"Collection '{self.dataset}' has been cleaned. ",
             tool_name="_clean_collection"
         )
-    def _is_pgvector(self) -> bool:
-        """Check if the vectorstore is a PGVector store."""
-        return hasattr(self.vectorstore, 'session_maker') and hasattr(self.vectorstore, 'EmbeddingStore')
-    # TODO: refactor to use common method for different vectorstores in a separate vectorstore wrappers
     def _get_indexed_data(self):
         """ Get all indexed data from vectorstore for non-code content """
-        # Check if this is a PGVector store
-        if self._is_pgvector():
-            return self._get_pgvector_indexed_data()
-        else:
-            # Fall back to original Chroma implementation
-            return self._get_chroma_indexed_data(self.vectorstore)
-    def _get_pgvector_indexed_data(self):
-        """ Get all indexed data from PGVector for non-code content """
-        from sqlalchemy.orm import Session
-        result = {}
-        try:
-            self._log_data("Retrieving already indexed data from PGVector vectorstore",
-                           tool_name="get_indexed_data")
-            store = self.vectorstore
-            with Session(store.session_maker.bind) as session:
-                docs = session.query(
-                    store.EmbeddingStore.id,
-                    store.EmbeddingStore.document,
-                    store.EmbeddingStore.cmetadata
-                ).all()
-            # Process the retrieved data
-            for doc in docs:
-                db_id = doc.id
-                meta = doc.cmetadata or {}
-                # Get document id from metadata
-                doc_id = str(meta.get('id', db_id))
-                dependent_docs = meta.get(IndexerKeywords.DEPENDENT_DOCS.value, [])
-                if dependent_docs:
-                    dependent_docs = [d.strip() for d in dependent_docs.split(';') if d.strip()]
-                parent_id = meta.get(IndexerKeywords.PARENT.value, -1)
-                chunk_id = meta.get('chunk_id')
-                if doc_id in result and chunk_id:
-                    # If document with the same id already saved, add db_id for current one as chunk
-                    result[doc_id]['all_chunks'].append(db_id)
-                else:
-                    result[doc_id] = {
-                        'metadata': meta,
-                        'id': db_id,
-                        'all_chunks': [db_id],
-                        IndexerKeywords.DEPENDENT_DOCS.value: dependent_docs,
-                        IndexerKeywords.PARENT.value: parent_id
-                    }
-        except Exception as e:
-            logger.error(f"Failed to get indexed data from PGVector: {str(e)}. Continuing with empty index.")
-        return result
-    def _get_chroma_indexed_data(self, store):
-        """ Get all indexed data from Chroma for non-code content """
-        result = {}
-        try:
-            self._log_data("Retrieving already indexed data from Chroma vectorstore",
-                           tool_name="get_indexed_data")
-            data = store.get(include=['metadatas'])
-            # Re-structure data to be more usable
-            for meta, db_id in zip(data['metadatas'], data['ids']):
-                # Get document id from metadata
-                doc_id = str(meta['id'])
-                dependent_docs = meta.get(IndexerKeywords.DEPENDENT_DOCS.value, [])
-                if dependent_docs:
-                    dependent_docs = [d.strip() for d in dependent_docs.split(';') if d.strip()]
-                parent_id = meta.get(IndexerKeywords.PARENT.value, -1)
-                chunk_id = meta.get('chunk_id')
-                if doc_id in result and chunk_id:
-                    # If document with the same id already saved, add db_id for current one as chunk
-                    result[doc_id]['all_chunks'].append(db_id)
-                else:
-                    result[doc_id] = {
-                        'metadata': meta,
-                        'id': db_id,
-                        'all_chunks': [db_id],
-                        IndexerKeywords.DEPENDENT_DOCS.value: dependent_docs,
-                        IndexerKeywords.PARENT.value: parent_id
-                    }
-        except Exception as e:
-            logger.error(f"Failed to get indexed data from Chroma: {str(e)}. Continuing with empty index.")
-        return result
+        return self.vector_adapter.get_indexed_data(self)
     def _get_code_indexed_data(self) -> Dict[str, Dict[str, Any]]:
         """ Get all indexed data from vectorstore for code content """
-        # get already indexed data
-        if self._is_pgvector():
-            result = self._get_pgvector_code_indexed_data()
-        else:
-            result = self._get_chroma_code_indexed_data()
-        return result
-    def _get_chroma_code_indexed_data(self) -> Dict[str, Dict[str, Any]]:
-        """Get all indexed code data from Chroma."""
-        result = {}
-        try:
-            self._log_data("Retrieving already indexed code data from Chroma vectorstore",
-                           tool_name="index_code_data")
-            data = self.vectorstore.get(include=['metadatas'])
-            for meta, db_id in zip(data['metadatas'], data['ids']):
-                filename = meta.get('filename')
-                commit_hash = meta.get('commit_hash')
-                if not filename:
-                    continue
-                if filename not in result:
-                    result[filename] = {
-                        'commit_hashes': [],
-                        'ids': []
-                    }
-                if commit_hash is not None:
-                    result[filename]['commit_hashes'].append(commit_hash)
-                result[filename]['ids'].append(db_id)
-        except Exception as e:
-            logger.error(f"Failed to get indexed code data from Chroma: {str(e)}. Continuing with empty index.")
-        return result
-    def _get_pgvector_code_indexed_data(self) -> Dict[str, Dict[str, Any]]:
-        """Get all indexed code data from PGVector."""
-        from sqlalchemy.orm import Session
-        result = {}
-        try:
-            self._log_data("Retrieving already indexed code data from PGVector vectorstore",
-                           tool_name="index_code_data")
-            store = self.vectorstore
-            with Session(store.session_maker.bind) as session:
-                docs = session.query(
-                    store.EmbeddingStore.id,
-                    store.EmbeddingStore.cmetadata
-                ).all()
-            for db_id, meta in docs:
-                filename = meta.get('filename')
-                commit_hash = meta.get('commit_hash')
-                if not filename:
-                    continue
-                if filename not in result:
-                    result[filename] = {
-                        'metadata': meta,
-                        'commit_hashes': [],
-                        'ids': []
-                    }
-                if commit_hash is not None:
-                    result[filename]['commit_hashes'].append(commit_hash)
-                result[filename]['ids'].append(db_id)
-        except Exception as e:
-            logger.error(f"Failed to get indexed code data from PGVector: {str(e)}. Continuing with empty index.")
-        return result
+        return self.vector_adapter.get_code_indexed_data(self)
     def _add_to_collection(self, entry_id, new_collection_value):
         """Add a new collection name to the `collection` key in the `metadata` column."""
-        from sqlalchemy import func
-        from sqlalchemy.orm import Session
-        store = self.vectorstore
-        try:
-            with Session(store.session_maker.bind) as session:
-                # Query the current value of the `collection` key
-                current_collection_query = session.query(
-                    func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'collection')
-                ).filter(store.EmbeddingStore.id == entry_id).scalar()
-                # If the `collection` key is NULL or doesn't contain the new value, update it
-                if current_collection_query is None:
-                    # If `collection` is NULL, initialize it with the new value
-                    session.query(store.EmbeddingStore).filter(
-                        store.EmbeddingStore.id == entry_id
-                    ).update(
-                        {
-                            store.EmbeddingStore.cmetadata: func.jsonb_set(
-                                func.coalesce(store.EmbeddingStore.cmetadata, '{}'),
-                                '{collection}',  # Path to the `collection` key
-                                f'"{new_collection_value}"',  # New value for the `collection` key
-                                True  # Create the key if it doesn't exist
-                            )
-                        }
-                    )
-                elif new_collection_value not in current_collection_query.split(";"):
-                    # If `collection` exists but doesn't contain the new value, append it
-                    updated_collection_value = f"{current_collection_query};{new_collection_value}"
-                    session.query(store.EmbeddingStore).filter(
-                        store.EmbeddingStore.id == entry_id
-                    ).update(
-                        {
-                            store.EmbeddingStore.cmetadata: func.jsonb_set(
-                                store.EmbeddingStore.cmetadata,
-                                '{collection}',  # Path to the `collection` key
-                                f'"{updated_collection_value}"',  # Concatenated value as a valid JSON string
-                                True  # Create the key if it doesn't exist
-                            )
-                        }
-                    )
-                session.commit()
-                logger.info(f"Successfully updated collection for entry ID {entry_id}.")
-        except Exception as e:
-            logger.error(f"Failed to update collection for entry ID {entry_id}: {str(e)}")
+        self.vector_adapter.add_to_collection(self, entry_id, new_collection_value)
     def _reduce_duplicates(
             self,
@@ -983,4 +745,5 @@ class VectorStoreWrapper(BaseToolApiWrapper):
                 "description": "Get summary of search results using stepback technique",
                 "args_schema": StepBackSearchDocumentsModel
             }
-        ]
+        ]

alita_sdk/tools/elitea_base.py CHANGED Viewed

@@ -366,7 +366,7 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
     def list_collections(self):
         """Lists all collections in the vector store."""
         vectorstore_wrapper = self._init_vector_store()
-        return self._adapter.list_collections(vectorstore_wrapper, self.collection_name or "")
+        return vectorstore_wrapper.list_collections()
     def search_index(self,
                      query: str,
@@ -498,6 +498,8 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
 class BaseCodeToolApiWrapper(BaseVectorStoreToolApiWrapper):
+    doctype: Optional[str] = 'code'
     def _get_files(self):
         raise NotImplementedError("Subclasses should implement this method")

alita_sdk/tools/github/api_wrapper.py CHANGED Viewed

@@ -37,9 +37,9 @@ class AlitaGitHubAPIWrapper(BaseCodeToolApiWrapper):
     Wrapper for GitHub API that integrates both REST and GraphQL functionality.
     """
     # Authentication config
-    github_access_token: Optional[str] = None
-    github_username: Optional[str] = None
-    github_password: Optional[str] = None
+    github_access_token: Optional[SecretStr] = None
+    github_username: Optional[SecretStr] = None
+    github_password: Optional[SecretStr] = None
     github_app_id: Optional[str] = None
     github_app_private_key: Optional[str] = None
     github_base_url: Optional[str] = None
@@ -49,19 +49,9 @@ class AlitaGitHubAPIWrapper(BaseCodeToolApiWrapper):
     active_branch: Optional[str] = None
     github_base_branch: Optional[str] = None
-    # Add LLM instance
-    llm: Optional[Any] = None
     # Alita instance
     alita: Optional[Any] = None
-    # Vector store configuration
-    connection_string: Optional[SecretStr] = None
-    collection_name: Optional[str] = None
-    doctype: Optional[str] = 'code'  # GitHub uses 'code' doctype
-    embedding_model: Optional[str] = "HuggingFaceEmbeddings"
-    embedding_model_params: Optional[Dict[str, Any]] = {"model_name": "sentence-transformers/all-MiniLM-L6-v2"}
-    vectorstore_type: Optional[str] = "PGVector"
     # Client instances - renamed without leading underscores and marked as exclude=True
     github_client_instance: Optional[GitHubClient] = Field(default=None, exclude=True)
     graphql_client_instance: Optional[GraphQLClientWrapper] = Field(default=None, exclude=True)
@@ -84,12 +74,12 @@ class AlitaGitHubAPIWrapper(BaseCodeToolApiWrapper):
         from langchain.utils import get_from_dict_or_env
         # Get all authentication values
-        github_access_token = get_from_dict_or_env(values, "github_access_token", "GITHUB_ACCESS_TOKEN", default='')
-        github_username = get_from_dict_or_env(values, "github_username", "GITHUB_USERNAME", default='')
-        github_password = get_from_dict_or_env(values, "github_password", "GITHUB_PASSWORD", default='')
-        github_app_id = get_from_dict_or_env(values, "github_app_id", "GITHUB_APP_ID", default='')
-        github_app_private_key = get_from_dict_or_env(values, "github_app_private_key", "GITHUB_APP_PRIVATE_KEY", default='')
-        github_base_url = get_from_dict_or_env(values, "github_base_url", "GITHUB_BASE_URL", default='https://api.github.com')
+        github_access_token = get_from_dict_or_env(values, ["access_token", "github_access_token"], "GITHUB_ACCESS_TOKEN", default='')
+        github_username = get_from_dict_or_env(values, ["username", "github_username"], "GITHUB_USERNAME", default='')
+        github_password = get_from_dict_or_env(values, ["password", "github_password"], "GITHUB_PASSWORD", default='')
+        github_app_id = get_from_dict_or_env(values, ["app_id", "github_app_id"], "GITHUB_APP_ID", default='')
+        github_app_private_key = get_from_dict_or_env(values, ["app_private_key", "github_app_private_key"], "GITHUB_APP_PRIVATE_KEY", default='')
+        github_base_url = get_from_dict_or_env(values, ["base_url", "github_base_url"], "GITHUB_BASE_URL", default='https://api.github.com')
         auth_config = GitHubAuthConfig(
             github_access_token=github_access_token,

alita_sdk/tools/vector_adapters/VectorStoreAdapter.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from abc import ABC, abstractmethod
-from typing import Any, Dict, Optional
+from typing import Any, Dict, Optional, List
 from logging import getLogger
 logger = getLogger(__name__)
@@ -23,6 +23,31 @@ class VectorStoreAdapter(ABC):
         """Remove a collection from the vector store."""
         pass
+    @abstractmethod
+    def get_indexed_ids(self, vectorstore_wrapper, collection_suffix: Optional[str] = '') -> List[str]:
+        """Get all indexed document IDs from vectorstore"""
+        pass
+    @abstractmethod
+    def clean_collection(self, vectorstore_wrapper, collection_suffix: str = ''):
+        """Clean the vectorstore collection by deleting all indexed data."""
+        pass
+    @abstractmethod
+    def get_indexed_data(self, vectorstore_wrapper):
+        """Get all indexed data from vectorstore for non-code content"""
+        pass
+    @abstractmethod
+    def get_code_indexed_data(self, vectorstore_wrapper) -> Dict[str, Dict[str, Any]]:
+        """Get all indexed data from vectorstore for code content"""
+        pass
+    @abstractmethod
+    def add_to_collection(self, vectorstore_wrapper, entry_id, new_collection_value):
+        """Add a new collection name to the metadata"""
+        pass
 class PGVectorAdapter(VectorStoreAdapter):
     """Adapter for PGVector database operations."""
@@ -38,7 +63,7 @@ class PGVectorAdapter(VectorStoreAdapter):
             "connection_string": connection_string
         }
-    def list_collections(self, vectorstore_wrapper, collection_name) -> str:
+    def list_collections(self, vectorstore_wrapper) -> str:
         from sqlalchemy import func
         from sqlalchemy.orm import Session
@@ -58,7 +83,175 @@ class PGVectorAdapter(VectorStoreAdapter):
             return []
     def remove_collection(self, vectorstore_wrapper, collection_name: str):
-        vectorstore_wrapper._remove_collection()
+        from sqlalchemy import text
+        from sqlalchemy.orm import Session
+        schema_name = vectorstore_wrapper.vectorstore.collection_name
+        with Session(vectorstore_wrapper.vectorstore.session_maker.bind) as session:
+            drop_schema_query = text(f"DROP SCHEMA IF EXISTS {schema_name} CASCADE;")
+            session.execute(drop_schema_query)
+            session.commit()
+            logger.info(f"Schema '{schema_name}' has been dropped.")
+    def get_indexed_ids(self, vectorstore_wrapper, collection_suffix: Optional[str] = '') -> List[str]:
+        """Get all indexed document IDs from PGVector"""
+        from sqlalchemy.orm import Session
+        from sqlalchemy import func
+        store = vectorstore_wrapper.vectorstore
+        try:
+            with Session(store.session_maker.bind) as session:
+                # Start building the query
+                query = session.query(store.EmbeddingStore.id)
+                # Apply filter only if collection_suffix is provided
+                if collection_suffix:
+                    query = query.filter(
+                        func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'collection') == collection_suffix
+                    )
+                ids = query.all()
+                return [str(id_tuple[0]) for id_tuple in ids]
+        except Exception as e:
+            logger.error(f"Failed to get indexed IDs from PGVector: {str(e)}")
+            return []
+    def clean_collection(self, vectorstore_wrapper, collection_suffix: str = ''):
+        """Clean the vectorstore collection by deleting all indexed data."""
+        # This logic deletes all data from the vectorstore collection without removal of collection.
+        # Collection itself remains available for future indexing.
+        vectorstore_wrapper.vectorstore.delete(ids=self.get_indexed_ids(vectorstore_wrapper, collection_suffix))
+    def is_vectorstore_type(self, vectorstore) -> bool:
+        """Check if the vectorstore is a PGVector store."""
+        return hasattr(vectorstore, 'session_maker') and hasattr(vectorstore, 'EmbeddingStore')
+    def get_indexed_data(self, vectorstore_wrapper):
+        """Get all indexed data from PGVector for non-code content"""
+        from sqlalchemy.orm import Session
+        from ...runtime.utils.utils import IndexerKeywords
+        result = {}
+        try:
+            vectorstore_wrapper._log_data("Retrieving already indexed data from PGVector vectorstore",
+                           tool_name="get_indexed_data")
+            store = vectorstore_wrapper.vectorstore
+            with Session(store.session_maker.bind) as session:
+                docs = session.query(
+                    store.EmbeddingStore.id,
+                    store.EmbeddingStore.document,
+                    store.EmbeddingStore.cmetadata
+                ).all()
+            # Process the retrieved data
+            for doc in docs:
+                db_id = doc.id
+                meta = doc.cmetadata or {}
+                # Get document id from metadata
+                doc_id = str(meta.get('id', db_id))
+                dependent_docs = meta.get(IndexerKeywords.DEPENDENT_DOCS.value, [])
+                if dependent_docs:
+                    dependent_docs = [d.strip() for d in dependent_docs.split(';') if d.strip()]
+                parent_id = meta.get(IndexerKeywords.PARENT.value, -1)
+                chunk_id = meta.get('chunk_id')
+                if doc_id in result and chunk_id:
+                    # If document with the same id already saved, add db_id for current one as chunk
+                    result[doc_id]['all_chunks'].append(db_id)
+                else:
+                    result[doc_id] = {
+                        'metadata': meta,
+                        'id': db_id,
+                        'all_chunks': [db_id],
+                        IndexerKeywords.DEPENDENT_DOCS.value: dependent_docs,
+                        IndexerKeywords.PARENT.value: parent_id
+                    }
+        except Exception as e:
+            logger.error(f"Failed to get indexed data from PGVector: {str(e)}. Continuing with empty index.")
+        return result
+    def get_code_indexed_data(self, vectorstore_wrapper) -> Dict[str, Dict[str, Any]]:
+        """Get all indexed code data from PGVector."""
+        from sqlalchemy.orm import Session
+        result = {}
+        try:
+            vectorstore_wrapper._log_data("Retrieving already indexed code data from PGVector vectorstore",
+                           tool_name="index_code_data")
+            store = vectorstore_wrapper.vectorstore
+            with Session(store.session_maker.bind) as session:
+                docs = session.query(
+                    store.EmbeddingStore.id,
+                    store.EmbeddingStore.cmetadata
+                ).all()
+            for db_id, meta in docs:
+                filename = meta.get('filename')
+                commit_hash = meta.get('commit_hash')
+                if not filename:
+                    continue
+                if filename not in result:
+                    result[filename] = {
+                        'metadata': meta,
+                        'commit_hashes': [],
+                        'ids': []
+                    }
+                if commit_hash is not None:
+                    result[filename]['commit_hashes'].append(commit_hash)
+                result[filename]['ids'].append(db_id)
+        except Exception as e:
+            logger.error(f"Failed to get indexed code data from PGVector: {str(e)}. Continuing with empty index.")
+        return result
+    def add_to_collection(self, vectorstore_wrapper, entry_id, new_collection_value):
+        """Add a new collection name to the `collection` key in the `metadata` column."""
+        from sqlalchemy import func
+        from sqlalchemy.orm import Session
+        store = vectorstore_wrapper.vectorstore
+        try:
+            with Session(store.session_maker.bind) as session:
+                # Query the current value of the `collection` key
+                current_collection_query = session.query(
+                    func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'collection')
+                ).filter(store.EmbeddingStore.id == entry_id).scalar()
+                # If the `collection` key is NULL or doesn't contain the new value, update it
+                if current_collection_query is None:
+                    # If `collection` is NULL, initialize it with the new value
+                    session.query(store.EmbeddingStore).filter(
+                        store.EmbeddingStore.id == entry_id
+                    ).update(
+                        {
+                            store.EmbeddingStore.cmetadata: func.jsonb_set(
+                                func.coalesce(store.EmbeddingStore.cmetadata, '{}'),
+                                '{collection}',  # Path to the `collection` key
+                                f'"{new_collection_value}"',  # New value for the `collection` key
+                                True  # Create the key if it doesn't exist
+                            )
+                        }
+                    )
+                elif new_collection_value not in current_collection_query.split(";"):
+                    # If `collection` exists but doesn't contain the new value, append it
+                    updated_collection_value = f"{current_collection_query};{new_collection_value}"
+                    session.query(store.EmbeddingStore).filter(
+                        store.EmbeddingStore.id == entry_id
+                    ).update(
+                        {
+                            store.EmbeddingStore.cmetadata: func.jsonb_set(
+                                store.EmbeddingStore.cmetadata,
+                                '{collection}',  # Path to the `collection` key
+                                f'"{updated_collection_value}"',  # Concatenated value as a valid JSON string
+                                True  # Create the key if it doesn't exist
+                            )
+                        }
+                    )
+                session.commit()
+                logger.info(f"Successfully updated collection for entry ID {entry_id}.")
+        except Exception as e:
+            logger.error(f"Failed to update collection for entry ID {entry_id}: {str(e)}")
 class ChromaAdapter(VectorStoreAdapter):
@@ -71,11 +264,90 @@ class ChromaAdapter(VectorStoreAdapter):
         }
     def list_collections(self, vectorstore_wrapper) -> str:
-        vector_client = vectorstore_wrapper.vectoradapter.vectorstore._client
+        vector_client = vectorstore_wrapper.vectorstore._client
         return ','.join([collection.name for collection in vector_client.list_collections()])
     def remove_collection(self, vectorstore_wrapper, collection_name: str):
-        vectorstore_wrapper.vectoradapter.vectorstore.delete_collection()
+        vectorstore_wrapper.vectorstore.delete_collection()
+    def get_indexed_ids(self, vectorstore_wrapper, collection_suffix: Optional[str] = '') -> List[str]:
+        """Get all indexed document IDs from Chroma"""
+        try:
+            data = vectorstore_wrapper.vectorstore.get(include=[])  # Only get IDs, no metadata
+            return data.get('ids', [])
+        except Exception as e:
+            logger.error(f"Failed to get indexed IDs from Chroma: {str(e)}")
+            return []
+    def clean_collection(self, vectorstore_wrapper, collection_suffix: str = ''):
+        """Clean the vectorstore collection by deleting all indexed data."""
+        vectorstore_wrapper.vectorstore.delete(ids=self.get_indexed_ids(vectorstore_wrapper, collection_suffix))
+    def get_indexed_data(self, vectorstore_wrapper):
+        """Get all indexed data from Chroma for non-code content"""
+        from ...runtime.utils.utils import IndexerKeywords
+        result = {}
+        try:
+            vectorstore_wrapper._log_data("Retrieving already indexed data from Chroma vectorstore",
+                           tool_name="get_indexed_data")
+            data = vectorstore_wrapper.vectorstore.get(include=['metadatas'])
+            # Re-structure data to be more usable
+            for meta, db_id in zip(data['metadatas'], data['ids']):
+                # Get document id from metadata
+                doc_id = str(meta['id'])
+                dependent_docs = meta.get(IndexerKeywords.DEPENDENT_DOCS.value, [])
+                if dependent_docs:
+                    dependent_docs = [d.strip() for d in dependent_docs.split(';') if d.strip()]
+                parent_id = meta.get(IndexerKeywords.PARENT.value, -1)
+                chunk_id = meta.get('chunk_id')
+                if doc_id in result and chunk_id:
+                    # If document with the same id already saved, add db_id for current one as chunk
+                    result[doc_id]['all_chunks'].append(db_id)
+                else:
+                    result[doc_id] = {
+                        'metadata': meta,
+                        'id': db_id,
+                        'all_chunks': [db_id],
+                        IndexerKeywords.DEPENDENT_DOCS.value: dependent_docs,
+                        IndexerKeywords.PARENT.value: parent_id
+                    }
+        except Exception as e:
+            logger.error(f"Failed to get indexed data from Chroma: {str(e)}. Continuing with empty index.")
+        return result
+    def get_code_indexed_data(self, vectorstore_wrapper) -> Dict[str, Dict[str, Any]]:
+        """Get all indexed code data from Chroma."""
+        result = {}
+        try:
+            vectorstore_wrapper._log_data("Retrieving already indexed code data from Chroma vectorstore",
+                           tool_name="index_code_data")
+            data = vectorstore_wrapper.vectorstore.get(include=['metadatas'])
+            for meta, db_id in zip(data['metadatas'], data['ids']):
+                filename = meta.get('filename')
+                commit_hash = meta.get('commit_hash')
+                if not filename:
+                    continue
+                if filename not in result:
+                    result[filename] = {
+                        'commit_hashes': [],
+                        'ids': []
+                    }
+                if commit_hash is not None:
+                    result[filename]['commit_hashes'].append(commit_hash)
+                result[filename]['ids'].append(db_id)
+        except Exception as e:
+            logger.error(f"Failed to get indexed code data from Chroma: {str(e)}. Continuing with empty index.")
+        return result
+    def add_to_collection(self, vectorstore_wrapper, entry_id, new_collection_value):
+        """Add a new collection name to the metadata - Chroma implementation"""
+        # For Chroma, we would need to update the metadata through vectorstore operations
+        # This is a simplified implementation - in practice, you might need more complex logic
+        logger.warning("add_to_collection for Chroma is not fully implemented yet")
 class VectorStoreAdapterFactory:

{alita_sdk-0.3.231.dist-info → alita_sdk-0.3.232.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: alita_sdk
-Version: 0.3.231
+Version: 0.3.232
 Summary: SDK for building langchain agents using resources from Alita
 Author-email: Artem Rozumenko <artyom.rozumenko@gmail.com>, Mikalai Biazruchka <mikalai_biazruchka@epam.com>, Roman Mitusov <roman_mitusov@epam.com>, Ivan Krakhmaliuk <lifedjik@gmail.com>, Artem Dubrovskiy <ad13box@gmail.com>
 License-Expression: Apache-2.0

{alita_sdk-0.3.231.dist-info → alita_sdk-0.3.232.dist-info}/RECORD RENAMED Viewed

@@ -100,7 +100,7 @@ alita_sdk/runtime/tools/pgvector_search.py,sha256=NN2BGAnq4SsDHIhUcFZ8d_dbEOM8Qw
 alita_sdk/runtime/tools/prompt.py,sha256=nJafb_e5aOM1Rr3qGFCR-SKziU9uCsiP2okIMs9PppM,741
 alita_sdk/runtime/tools/router.py,sha256=wCvZjVkdXK9dMMeEerrgKf5M790RudH68pDortnHSz0,1517
 alita_sdk/runtime/tools/tool.py,sha256=lE1hGi6qOAXG7qxtqxarD_XMQqTghdywf261DZawwno,5631
-alita_sdk/runtime/tools/vectorstore.py,sha256=R6M6emjP7VUkXwufI_tfTicx4EKn-lZwxQ16-WzIwMA,44557
+alita_sdk/runtime/tools/vectorstore.py,sha256=0VWmYRWgFvzGViFlhYbUk2fjkofrLlVQQg6Vnx6nxhs,33659
 alita_sdk/runtime/utils/AlitaCallback.py,sha256=E4LlSBuCHWiUq6W7IZExERHZY0qcmdjzc_rJlF2iQIw,7356
 alita_sdk/runtime/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 alita_sdk/runtime/utils/constants.py,sha256=Xntx1b_uxUzT4clwqHA_U6K8y5bBqf_4lSQwXdcWrp4,13586
@@ -112,7 +112,7 @@ alita_sdk/runtime/utils/toolkit_runtime.py,sha256=MU63Fpxj0b5_r1IUUc0Q3-PN9VwL7r
 alita_sdk/runtime/utils/toolkit_utils.py,sha256=I9QFqnaqfVgN26LUr6s3XlBlG6y0CoHURnCzG7XcwVs,5311
 alita_sdk/runtime/utils/utils.py,sha256=CpEl3LCeLbhzQySz08lkKPm7Auac6IiLF7WB8wmArMI,589
 alita_sdk/tools/__init__.py,sha256=1AHqP2xyLjn92xVm70l9XIke6FkfHkLo5OoQVe4BuP8,10421
-alita_sdk/tools/elitea_base.py,sha256=Wg9HqeWlsJ_R5--_Xfg7bm8rqKd68aqHm4a1sKAvahI,30365
+alita_sdk/tools/elitea_base.py,sha256=pxcUj_z4xDy5EQDbEkBuneDBh8QdUzevDcHkCKR35v4,30361
 alita_sdk/tools/ado/__init__.py,sha256=j4lt6MLWlpkIIVkHmAyVG3i_qQeQ3ZmL_g8BfMhVhVI,1289
 alita_sdk/tools/ado/utils.py,sha256=PTCludvaQmPLakF2EbCGy66Mro4-rjDtavVP-xcB2Wc,1252
 alita_sdk/tools/ado/repos/__init__.py,sha256=kc4ZJI3B9CDUp4q3jRSj7JZNc3fJwwMTsV40CiKO7Po,6111
@@ -214,7 +214,7 @@ alita_sdk/tools/elastic/api_wrapper.py,sha256=pl8CqQxteJAGwyOhMcld-ZgtOTFwwbv42O
 alita_sdk/tools/figma/__init__.py,sha256=281OU_aw4Y87Do09HhDSi5zL5ne9YlrsRLZQo8s1U8Q,5316
 alita_sdk/tools/figma/api_wrapper.py,sha256=Rtgt9FvR8VD0oPdYhlgvVyXLVqLTjtiOPTlwNeaV80w,20560
 alita_sdk/tools/github/__init__.py,sha256=CtU52t6-jd6JErWe3M2HF5XXWzFj9CqGmG7HBjUet6E,5348
-alita_sdk/tools/github/api_wrapper.py,sha256=JRhn7Cgg2j6uEwlvuQCMeISNYvRV2Yahx-v-p8HspUQ,8767
+alita_sdk/tools/github/api_wrapper.py,sha256=uDwYckdnpYRJtb0uZnDkaz2udvdDLVxuCh1tSwspsiU,8411
 alita_sdk/tools/github/github_client.py,sha256=nxnSXsDul2PPbWvYZS8TmAFFmR-5ALyakNoV5LN2D4U,86617
 alita_sdk/tools/github/graphql_client_wrapper.py,sha256=d3AGjzLGH_hdQV2V8HeAX92dJ4dlnE5OXqUlCO_PBr0,71539
 alita_sdk/tools/github/schemas.py,sha256=yFsqivfjCPRk9GxFJrL8sTz6nnjFCZ0j5DIfPtGSsvA,13852
@@ -304,7 +304,7 @@ alita_sdk/tools/testrail/__init__.py,sha256=577XVaOAoXG3mDkojCsy5XCUlxCsdJf_2-_5
 alita_sdk/tools/testrail/api_wrapper.py,sha256=Aax0jspgidXYNxLIw6qTWu3dO2JOIS0ALIqsCzQuFbQ,32087
 alita_sdk/tools/utils/__init__.py,sha256=155xepXPr4OEzs2Mz5YnjXcBpxSv1X2eznRUVoPtyK0,3268
 alita_sdk/tools/utils/content_parser.py,sha256=yi1IDLreqfM41w-PnoFEvVLtSV50qpNvKshJwbDTgqs,7172
-alita_sdk/tools/vector_adapters/VectorStoreAdapter.py,sha256=KhxojgddWlEQ4TZA7jEL5ZEp86PcXfmfgRXixsjj7-M,3634
+alita_sdk/tools/vector_adapters/VectorStoreAdapter.py,sha256=kB6KYN4IRisyNc3U4SYJ4PdOoPKH1wrRvRwvdrjZ0OQ,16850
 alita_sdk/tools/vector_adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 alita_sdk/tools/xray/__init__.py,sha256=OYa1wveTm-lAhsJaGXMnwOrDQWl6ch--NjNLBeR63eM,4331
 alita_sdk/tools/xray/api_wrapper.py,sha256=A8PJmY2k7TowaD_vk6ZxkMnSUoZUt9A6g4TJrZfNTAw,32225
@@ -325,8 +325,8 @@ alita_sdk/tools/zephyr_scale/api_wrapper.py,sha256=JAeWf-RXohsxheUpT0iMDClc_izj-
 alita_sdk/tools/zephyr_squad/__init__.py,sha256=0AI_j27xVO5Gk5HQMFrqPTd4uvuVTpiZUicBrdfEpKg,2796
 alita_sdk/tools/zephyr_squad/api_wrapper.py,sha256=kmw_xol8YIYFplBLWTqP_VKPRhL_1ItDD0_vXTe_UuI,14906
 alita_sdk/tools/zephyr_squad/zephyr_squad_cloud_client.py,sha256=R371waHsms4sllHCbijKYs90C-9Yu0sSR3N4SUfQOgU,5066
-alita_sdk-0.3.231.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-alita_sdk-0.3.231.dist-info/METADATA,sha256=8JHRCwh-SnyBtnD3wL4yJDUL1ov5fLXL1WUyfSIynf0,18896
-alita_sdk-0.3.231.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-alita_sdk-0.3.231.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
-alita_sdk-0.3.231.dist-info/RECORD,,
+alita_sdk-0.3.232.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+alita_sdk-0.3.232.dist-info/METADATA,sha256=HMHMoJWO6wQ3h3u5c-p_27RlppcpFUaw9BDyOL7Y9_c,18896
+alita_sdk-0.3.232.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+alita_sdk-0.3.232.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
+alita_sdk-0.3.232.dist-info/RECORD,,

{alita_sdk-0.3.231.dist-info → alita_sdk-0.3.232.dist-info}/WHEEL RENAMED Viewed

File without changes

{alita_sdk-0.3.231.dist-info → alita_sdk-0.3.232.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{alita_sdk-0.3.231.dist-info → alita_sdk-0.3.232.dist-info}/top_level.txt RENAMED Viewed

File without changes

alita-sdk 0.3.231__py3-none-any.whl → 0.3.232__py3-none-any.whl

alita-sdk 0.3.231py3-none-any.whl → 0.3.232py3-none-any.whl