PyPI - alita-sdk - Versions diffs - 0.3.362__py3-none-any.whl → 0.3.364__py3-none-any.whl - Mend

alita-sdk 0.3.362py3-none-any.whl → 0.3.364py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of alita-sdk might be problematic. Click here for more details.

Files changed (9) hide show

alita_sdk/runtime/langchain/interfaces/llm_processor.py CHANGED Viewed

@@ -173,7 +173,7 @@ def get_vectorstore(vectorstore_type, vectorstore_params, embedding_func=None):
     #
     raise RuntimeError(f"Unknown VectorStore type: {vectorstore_type}")
-def add_documents(vectorstore, documents):
+def add_documents(vectorstore, documents, ids = None) -> list[str]:
     """ Add documents to vectorstore """
     if vectorstore is None:
         return None
@@ -189,7 +189,7 @@ def add_documents(vectorstore, documents):
             if isinstance(document.metadata[key], dict):
                 document.metadata[key] = dumps(document.metadata[key])
         metadata.append(document.metadata)
-    vectorstore.add_texts(texts, metadatas=metadata)
+    return vectorstore.add_texts(texts, metadatas=metadata, ids=ids)
 def generateResponse(

alita_sdk/runtime/utils/utils.py CHANGED Viewed

@@ -11,6 +11,9 @@ class IndexerKeywords(Enum):
     UPDATED_ON = 'updated_on'
     CONTENT_IN_BYTES = 'loader_content'
     CONTENT_FILE_NAME = 'loader_content_type'
+    INDEX_META_TYPE = 'index_meta'
+    INDEX_META_IN_PROGRESS = 'in_progress'
+    INDEX_META_COMPLETED = 'completed'
 # This pattern matches characters that are NOT alphanumeric, underscores, or hyphens
 clean_string_pattern = re.compile(r'[^a-zA-Z0-9_.-]')

alita_sdk/tools/base_indexer_toolkit.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import json
 import logging
-from typing import Any, Optional, List, Literal, Dict, Generator
+import time
+from typing import Any, Optional, List, Dict, Generator
 from langchain_core.documents import Document
 from pydantic import create_model, Field, SecretStr
@@ -147,6 +148,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
         yield from ()
     def index_data(self, **kwargs):
+        from ..runtime.langchain.interfaces.llm_processor import add_documents
         collection_suffix = kwargs.get("collection_suffix")
         progress_step = kwargs.get("progress_step")
         clean_index = kwargs.get("clean_index")
@@ -156,6 +158,18 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
         if clean_index:
             self._clean_index(collection_suffix)
         #
+        # create and add initial index meta document
+        index_meta_doc = Document(page_content=f"{IndexerKeywords.INDEX_META_TYPE.value}_{collection_suffix}", metadata={
+            "collection": collection_suffix,
+            "type": IndexerKeywords.INDEX_META_TYPE.value,
+            "indexed": 0,
+            "state": IndexerKeywords.INDEX_META_IN_PROGRESS.value,
+            "index_configuration": kwargs,
+            "created_on": time.time(),
+            "updated_on": time.time(),
+        })
+        index_meta_ids = add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc])
+        #
         self._log_tool_event(f"Indexing data into collection with suffix '{collection_suffix}'. It can take some time...")
         self._log_tool_event(f"Loading the documents to index...{kwargs}")
         documents = self._base_loader(**kwargs)
@@ -164,10 +178,18 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
         documents = (doc for doc in documents)
         self._log_tool_event(f"Base documents were pre-loaded. "
                              f"Search for possible document duplicates and remove them from the indexing list...")
-        # documents = self._reduce_duplicates(documents, collection_suffix)
+        documents = self._reduce_duplicates(documents, collection_suffix)
         self._log_tool_event(f"Duplicates were removed. "
                              f"Processing documents to collect dependencies and prepare them for indexing...")
-        return self._save_index_generator(documents, documents_count, chunking_tool, chunking_config, collection_suffix=collection_suffix, progress_step=progress_step)
+        result = self._save_index_generator(documents, documents_count, chunking_tool, chunking_config, collection_suffix=collection_suffix, progress_step=progress_step)
+        #
+        # update index meta document
+        index_meta_doc.metadata["indexed"] = result
+        index_meta_doc.metadata["state"] = IndexerKeywords.INDEX_META_COMPLETED.value
+        index_meta_doc.metadata["updated_on"] = time.time()
+        add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc], ids=index_meta_ids)
+        #
+        return {"status": "ok", "message": f"successfully indexed {result} documents"}
     def _save_index_generator(self, base_documents: Generator[Document, None, None], base_total: int, chunking_tool, chunking_config, collection_suffix: Optional[str] = None, progress_step: int = 20):
         self._log_tool_event(f"Base documents are ready for indexing. {base_total} base documents in total to index.")
@@ -225,7 +247,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
             total_counter += dependent_docs_counter
         if pg_vector_add_docs_chunk:
             add_documents(vectorstore=self.vectorstore, documents=pg_vector_add_docs_chunk)
-        return {"status": "ok", "message": f"successfully indexed {total_counter} documents"}
+        return total_counter
     def _apply_loaders_chunkers(self, documents: Generator[Document, None, None], chunking_tool: str=None, chunking_config=None) -> Generator[Document, None, None]:
         from ..tools.chunkers import __all__ as chunkers
@@ -344,8 +366,41 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
             filter.update({"collection": {
                 "$eq": collection_suffix.strip()
             }})
+        filter = {
+            "$and": [
+                filter,
+                {"$or": [
+                    {"type": {"$exists": False}},
+                    {"type": {"$ne": IndexerKeywords.INDEX_META_TYPE.value}}
+                ]},
+            ]
+        }
         return filter
+    def index_meta_read(self):
+        from sqlalchemy import func
+        from sqlalchemy.orm import Session
+        store = self.vectorstore
+        try:
+            with Session(store.session_maker.bind) as session:
+                meta = session.query(
+                        store.EmbeddingStore.id,
+                        store.EmbeddingStore.cmetadata
+                    ).filter(
+                        func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'type') == IndexerKeywords.INDEX_META_TYPE.value
+                    ).all()
+                return [
+                    {"id": id_, "metadata": cmetadata}
+                    for id_, cmetadata in meta
+                ]
+        except Exception as e:
+            logger.error(f"Failed to get index_meta from PGVector: {str(e)}")
+            return []
+    def index_meta_delete(self, index_meta_ids: list[str]):
+        self.vectorstore.delete(ids=index_meta_ids)
     def search_index(self,
                      query: str,
                      collection_suffix: str = "",

alita_sdk/tools/gitlab/api_wrapper.py CHANGED Viewed

@@ -1,13 +1,13 @@
 # api_wrapper.py
-from typing import Any, Dict, List, Optional
 import fnmatch
+from typing import Any, Dict, List, Optional
 from langchain_core.tools import ToolException
-from ..code_indexer_toolkit import CodeIndexerToolkit
 from pydantic import create_model, Field, model_validator, SecretStr, PrivateAttr
+from ..code_indexer_toolkit import CodeIndexerToolkit
 from ..utils.available_tools_decorator import extend_with_parent_available_tools
+from ..utils.content_parser import parse_file_content
 AppendFileModel = create_model(
     "AppendFileModel",
@@ -318,7 +318,9 @@ class GitLabAPIWrapper(CodeIndexerToolkit):
     def read_file(self, file_path: str, branch: str) -> str:
         self.set_active_branch(branch)
         file = self.repo_instance.files.get(file_path, branch)
-        return file.decode().decode("utf-8")
+        return parse_file_content(file_name=file_path,
+                                  file_content=file.decode(),
+                                  llm=self.llm)
     def update_file(self, file_query: str, branch: str) -> str:
         if branch == self.branch:

{alita_sdk-0.3.362.dist-info → alita_sdk-0.3.364.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: alita_sdk
-Version: 0.3.362
+Version: 0.3.364
 Summary: SDK for building langchain agents using resources from Alita
 Author-email: Artem Rozumenko <artyom.rozumenko@gmail.com>, Mikalai Biazruchka <mikalai_biazruchka@epam.com>, Roman Mitusov <roman_mitusov@epam.com>, Ivan Krakhmaliuk <lifedj27@gmail.com>, Artem Dubrovskiy <ad13box@gmail.com>
 License-Expression: Apache-2.0

{alita_sdk-0.3.362.dist-info → alita_sdk-0.3.364.dist-info}/RECORD RENAMED Viewed

@@ -74,7 +74,7 @@ alita_sdk/runtime/langchain/document_loaders/constants.py,sha256=XUNC63S7U2HjE_1
 alita_sdk/runtime/langchain/document_loaders/utils.py,sha256=9xghESf3axBbwxATyVuS0Yu-TWe8zWZnXgCD1ZVyNW0,2414
 alita_sdk/runtime/langchain/interfaces/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 alita_sdk/runtime/langchain/interfaces/kwextractor.py,sha256=kSJA9L8g8UArmHu7Bd9dIO0Rrq86JPUb8RYNlnN68FQ,3072
-alita_sdk/runtime/langchain/interfaces/llm_processor.py,sha256=8vqkbGYBvjFNyeEgry26JtWGwrvvM-3A0rTX5Ey_v3g,8764
+alita_sdk/runtime/langchain/interfaces/llm_processor.py,sha256=o4YwgTpR2v6v-rJM3iNq9sfzmAYnRZqZYyf2Si-74Ew,8805
 alita_sdk/runtime/langchain/interfaces/loaders.py,sha256=li-O2dubiDNYn-qfVcDsuD4LqP_IZ61cV2vHUZAqeXc,3337
 alita_sdk/runtime/langchain/interfaces/splitters.py,sha256=tW65-Ejj9VYyxXFZNgPts_CKILQ18bWp_1bZ-24FKGc,3630
 alita_sdk/runtime/langchain/retrievers/AlitaRetriever.py,sha256=osChtJxUlfpsFESpJSE5mnJAkxTXnzgFZnC6l5mUlbo,6148
@@ -133,9 +133,9 @@ alita_sdk/runtime/utils/save_dataframe.py,sha256=i-E1wp-t4wb17Zq3nA3xYwgSILjoXNi
 alita_sdk/runtime/utils/streamlit.py,sha256=GQ69CsjfRMcGXcCrslL0Uoj24Cl07Jeji0rZxELaKTQ,104930
 alita_sdk/runtime/utils/toolkit_runtime.py,sha256=MU63Fpxj0b5_r1IUUc0Q3-PN9VwL7rUxp2MRR4tmYR8,5136
 alita_sdk/runtime/utils/toolkit_utils.py,sha256=I9QFqnaqfVgN26LUr6s3XlBlG6y0CoHURnCzG7XcwVs,5311
-alita_sdk/runtime/utils/utils.py,sha256=iuCcyVZoBpXrHh0zQa8M-Gg_tIaznc7T9kEEUJ8a0l4,891
+alita_sdk/runtime/utils/utils.py,sha256=BVEVLkYiiotcUD0XsHyx-wACpHfALsQg7PLZpObqvK8,1008
 alita_sdk/tools/__init__.py,sha256=jUj1ztC2FbkIUB-YYmiqaz_rqW7Il5kWzDPn1mJmj5w,10545
-alita_sdk/tools/base_indexer_toolkit.py,sha256=PyT3BDSn6gNJPXdbZw21tvTbE9WkhJD3m_pFWZJlYbU,23825
+alita_sdk/tools/base_indexer_toolkit.py,sha256=dOdl-n_TUCryYCVuCNNyGYN3fwTQuLjNTMTU5axwzW8,26101
 alita_sdk/tools/code_indexer_toolkit.py,sha256=6QvI1by0OFdnKTx5TfNoDJjnMrvnTi9T56xaDxzeleU,7306
 alita_sdk/tools/elitea_base.py,sha256=up3HshASSDfjlHV_HPrs1aD4JIwwX0Ug26WGTzgIYvY,34724
 alita_sdk/tools/non_code_indexer_toolkit.py,sha256=B3QvhpT1F9QidkCcsOi3J_QrTOaNlTxqWFwe90VivQQ,1329
@@ -247,7 +247,7 @@ alita_sdk/tools/github/schemas.py,sha256=TxEWR3SjDKVwzo9i2tLnss_uPAv85Mh7oWjvQvY
 alita_sdk/tools/github/tool.py,sha256=Jnnv5lenV5ds8AAdyo2m8hSzyJ117HZBjzHC6T1ck-M,1037
 alita_sdk/tools/github/tool_prompts.py,sha256=y6ZW_FpUCE87Uop3WuQAZVRnzxO5t7xjBOI5bCqiluw,30194
 alita_sdk/tools/gitlab/__init__.py,sha256=iis7RHD3YgKWxF_ryTfdtA8RPGV-W8zUfy4BgiTDADw,4540
-alita_sdk/tools/gitlab/api_wrapper.py,sha256=jziPnjBkJE7TRIAyGsV7s9sX74NuL97yP1UiNKzzK8s,22626
+alita_sdk/tools/gitlab/api_wrapper.py,sha256=gmL6o6yZDJKvAOVVgd-gG4wyjD3SlxJ4Ipoyz0GvqW8,22799
 alita_sdk/tools/gitlab/tools.py,sha256=vOGTlSaGaFmWn6LS6YFP-FuTqUPun9vnv1VrUcUHAZQ,16500
 alita_sdk/tools/gitlab/utils.py,sha256=Z2XiqIg54ouqqt1to-geFybmkCb1I6bpE91wfnINH1I,2320
 alita_sdk/tools/gitlab_org/__init__.py,sha256=PSTsC4BcPoyDv03Wj9VQHrEGUeR8hw4MRarB64VeqFg,3865
@@ -352,8 +352,8 @@ alita_sdk/tools/zephyr_scale/api_wrapper.py,sha256=kT0TbmMvuKhDUZc0i7KO18O38JM9S
 alita_sdk/tools/zephyr_squad/__init__.py,sha256=0ne8XLJEQSLOWfzd2HdnqOYmQlUliKHbBED5kW_Vias,2895
 alita_sdk/tools/zephyr_squad/api_wrapper.py,sha256=kmw_xol8YIYFplBLWTqP_VKPRhL_1ItDD0_vXTe_UuI,14906
 alita_sdk/tools/zephyr_squad/zephyr_squad_cloud_client.py,sha256=R371waHsms4sllHCbijKYs90C-9Yu0sSR3N4SUfQOgU,5066
-alita_sdk-0.3.362.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-alita_sdk-0.3.362.dist-info/METADATA,sha256=-dQUAdfEQUBXMeIDx9i7d9eNDss9eUsj7_dWUT-pTO8,19071
-alita_sdk-0.3.362.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-alita_sdk-0.3.362.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
-alita_sdk-0.3.362.dist-info/RECORD,,
+alita_sdk-0.3.364.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+alita_sdk-0.3.364.dist-info/METADATA,sha256=g5WuZmVHZprcEd89flt9ni3Itkhj7tQu7znDpq2BOzs,19071
+alita_sdk-0.3.364.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+alita_sdk-0.3.364.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
+alita_sdk-0.3.364.dist-info/RECORD,,

{alita_sdk-0.3.362.dist-info → alita_sdk-0.3.364.dist-info}/WHEEL RENAMED Viewed

File without changes

{alita_sdk-0.3.362.dist-info → alita_sdk-0.3.364.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{alita_sdk-0.3.362.dist-info → alita_sdk-0.3.364.dist-info}/top_level.txt RENAMED Viewed

File without changes

alita-sdk 0.3.362__py3-none-any.whl → 0.3.364__py3-none-any.whl

Potentially problematic release.

alita-sdk 0.3.362py3-none-any.whl → 0.3.364py3-none-any.whl