alita-sdk 0.3.362__py3-none-any.whl → 0.3.364__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of alita-sdk might be problematic. Click here for more details.

@@ -173,7 +173,7 @@ def get_vectorstore(vectorstore_type, vectorstore_params, embedding_func=None):
173
173
  #
174
174
  raise RuntimeError(f"Unknown VectorStore type: {vectorstore_type}")
175
175
 
176
- def add_documents(vectorstore, documents):
176
+ def add_documents(vectorstore, documents, ids = None) -> list[str]:
177
177
  """ Add documents to vectorstore """
178
178
  if vectorstore is None:
179
179
  return None
@@ -189,7 +189,7 @@ def add_documents(vectorstore, documents):
189
189
  if isinstance(document.metadata[key], dict):
190
190
  document.metadata[key] = dumps(document.metadata[key])
191
191
  metadata.append(document.metadata)
192
- vectorstore.add_texts(texts, metadatas=metadata)
192
+ return vectorstore.add_texts(texts, metadatas=metadata, ids=ids)
193
193
 
194
194
 
195
195
  def generateResponse(
@@ -11,6 +11,9 @@ class IndexerKeywords(Enum):
11
11
  UPDATED_ON = 'updated_on'
12
12
  CONTENT_IN_BYTES = 'loader_content'
13
13
  CONTENT_FILE_NAME = 'loader_content_type'
14
+ INDEX_META_TYPE = 'index_meta'
15
+ INDEX_META_IN_PROGRESS = 'in_progress'
16
+ INDEX_META_COMPLETED = 'completed'
14
17
 
15
18
  # This pattern matches characters that are NOT alphanumeric, underscores, or hyphens
16
19
  clean_string_pattern = re.compile(r'[^a-zA-Z0-9_.-]')
@@ -1,6 +1,7 @@
1
1
  import json
2
2
  import logging
3
- from typing import Any, Optional, List, Literal, Dict, Generator
3
+ import time
4
+ from typing import Any, Optional, List, Dict, Generator
4
5
 
5
6
  from langchain_core.documents import Document
6
7
  from pydantic import create_model, Field, SecretStr
@@ -147,6 +148,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
147
148
  yield from ()
148
149
 
149
150
  def index_data(self, **kwargs):
151
+ from ..runtime.langchain.interfaces.llm_processor import add_documents
150
152
  collection_suffix = kwargs.get("collection_suffix")
151
153
  progress_step = kwargs.get("progress_step")
152
154
  clean_index = kwargs.get("clean_index")
@@ -156,6 +158,18 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
156
158
  if clean_index:
157
159
  self._clean_index(collection_suffix)
158
160
  #
161
+ # create and add initial index meta document
162
+ index_meta_doc = Document(page_content=f"{IndexerKeywords.INDEX_META_TYPE.value}_{collection_suffix}", metadata={
163
+ "collection": collection_suffix,
164
+ "type": IndexerKeywords.INDEX_META_TYPE.value,
165
+ "indexed": 0,
166
+ "state": IndexerKeywords.INDEX_META_IN_PROGRESS.value,
167
+ "index_configuration": kwargs,
168
+ "created_on": time.time(),
169
+ "updated_on": time.time(),
170
+ })
171
+ index_meta_ids = add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc])
172
+ #
159
173
  self._log_tool_event(f"Indexing data into collection with suffix '{collection_suffix}'. It can take some time...")
160
174
  self._log_tool_event(f"Loading the documents to index...{kwargs}")
161
175
  documents = self._base_loader(**kwargs)
@@ -164,10 +178,18 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
164
178
  documents = (doc for doc in documents)
165
179
  self._log_tool_event(f"Base documents were pre-loaded. "
166
180
  f"Search for possible document duplicates and remove them from the indexing list...")
167
- # documents = self._reduce_duplicates(documents, collection_suffix)
181
+ documents = self._reduce_duplicates(documents, collection_suffix)
168
182
  self._log_tool_event(f"Duplicates were removed. "
169
183
  f"Processing documents to collect dependencies and prepare them for indexing...")
170
- return self._save_index_generator(documents, documents_count, chunking_tool, chunking_config, collection_suffix=collection_suffix, progress_step=progress_step)
184
+ result = self._save_index_generator(documents, documents_count, chunking_tool, chunking_config, collection_suffix=collection_suffix, progress_step=progress_step)
185
+ #
186
+ # update index meta document
187
+ index_meta_doc.metadata["indexed"] = result
188
+ index_meta_doc.metadata["state"] = IndexerKeywords.INDEX_META_COMPLETED.value
189
+ index_meta_doc.metadata["updated_on"] = time.time()
190
+ add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc], ids=index_meta_ids)
191
+ #
192
+ return {"status": "ok", "message": f"successfully indexed {result} documents"}
171
193
 
172
194
  def _save_index_generator(self, base_documents: Generator[Document, None, None], base_total: int, chunking_tool, chunking_config, collection_suffix: Optional[str] = None, progress_step: int = 20):
173
195
  self._log_tool_event(f"Base documents are ready for indexing. {base_total} base documents in total to index.")
@@ -225,7 +247,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
225
247
  total_counter += dependent_docs_counter
226
248
  if pg_vector_add_docs_chunk:
227
249
  add_documents(vectorstore=self.vectorstore, documents=pg_vector_add_docs_chunk)
228
- return {"status": "ok", "message": f"successfully indexed {total_counter} documents"}
250
+ return total_counter
229
251
 
230
252
  def _apply_loaders_chunkers(self, documents: Generator[Document, None, None], chunking_tool: str=None, chunking_config=None) -> Generator[Document, None, None]:
231
253
  from ..tools.chunkers import __all__ as chunkers
@@ -344,8 +366,41 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
344
366
  filter.update({"collection": {
345
367
  "$eq": collection_suffix.strip()
346
368
  }})
369
+ filter = {
370
+ "$and": [
371
+ filter,
372
+ {"$or": [
373
+ {"type": {"$exists": False}},
374
+ {"type": {"$ne": IndexerKeywords.INDEX_META_TYPE.value}}
375
+ ]},
376
+ ]
377
+ }
347
378
  return filter
348
379
 
380
+ def index_meta_read(self):
381
+ from sqlalchemy import func
382
+ from sqlalchemy.orm import Session
383
+
384
+ store = self.vectorstore
385
+ try:
386
+ with Session(store.session_maker.bind) as session:
387
+ meta = session.query(
388
+ store.EmbeddingStore.id,
389
+ store.EmbeddingStore.cmetadata
390
+ ).filter(
391
+ func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'type') == IndexerKeywords.INDEX_META_TYPE.value
392
+ ).all()
393
+ return [
394
+ {"id": id_, "metadata": cmetadata}
395
+ for id_, cmetadata in meta
396
+ ]
397
+ except Exception as e:
398
+ logger.error(f"Failed to get index_meta from PGVector: {str(e)}")
399
+ return []
400
+
401
+ def index_meta_delete(self, index_meta_ids: list[str]):
402
+ self.vectorstore.delete(ids=index_meta_ids)
403
+
349
404
  def search_index(self,
350
405
  query: str,
351
406
  collection_suffix: str = "",
@@ -1,13 +1,13 @@
1
1
  # api_wrapper.py
2
- from typing import Any, Dict, List, Optional
3
2
  import fnmatch
3
+ from typing import Any, Dict, List, Optional
4
4
 
5
5
  from langchain_core.tools import ToolException
6
-
7
- from ..code_indexer_toolkit import CodeIndexerToolkit
8
6
  from pydantic import create_model, Field, model_validator, SecretStr, PrivateAttr
9
7
 
8
+ from ..code_indexer_toolkit import CodeIndexerToolkit
10
9
  from ..utils.available_tools_decorator import extend_with_parent_available_tools
10
+ from ..utils.content_parser import parse_file_content
11
11
 
12
12
  AppendFileModel = create_model(
13
13
  "AppendFileModel",
@@ -318,7 +318,9 @@ class GitLabAPIWrapper(CodeIndexerToolkit):
318
318
  def read_file(self, file_path: str, branch: str) -> str:
319
319
  self.set_active_branch(branch)
320
320
  file = self.repo_instance.files.get(file_path, branch)
321
- return file.decode().decode("utf-8")
321
+ return parse_file_content(file_name=file_path,
322
+ file_content=file.decode(),
323
+ llm=self.llm)
322
324
 
323
325
  def update_file(self, file_query: str, branch: str) -> str:
324
326
  if branch == self.branch:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: alita_sdk
3
- Version: 0.3.362
3
+ Version: 0.3.364
4
4
  Summary: SDK for building langchain agents using resources from Alita
5
5
  Author-email: Artem Rozumenko <artyom.rozumenko@gmail.com>, Mikalai Biazruchka <mikalai_biazruchka@epam.com>, Roman Mitusov <roman_mitusov@epam.com>, Ivan Krakhmaliuk <lifedj27@gmail.com>, Artem Dubrovskiy <ad13box@gmail.com>
6
6
  License-Expression: Apache-2.0
@@ -74,7 +74,7 @@ alita_sdk/runtime/langchain/document_loaders/constants.py,sha256=XUNC63S7U2HjE_1
74
74
  alita_sdk/runtime/langchain/document_loaders/utils.py,sha256=9xghESf3axBbwxATyVuS0Yu-TWe8zWZnXgCD1ZVyNW0,2414
75
75
  alita_sdk/runtime/langchain/interfaces/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
76
76
  alita_sdk/runtime/langchain/interfaces/kwextractor.py,sha256=kSJA9L8g8UArmHu7Bd9dIO0Rrq86JPUb8RYNlnN68FQ,3072
77
- alita_sdk/runtime/langchain/interfaces/llm_processor.py,sha256=8vqkbGYBvjFNyeEgry26JtWGwrvvM-3A0rTX5Ey_v3g,8764
77
+ alita_sdk/runtime/langchain/interfaces/llm_processor.py,sha256=o4YwgTpR2v6v-rJM3iNq9sfzmAYnRZqZYyf2Si-74Ew,8805
78
78
  alita_sdk/runtime/langchain/interfaces/loaders.py,sha256=li-O2dubiDNYn-qfVcDsuD4LqP_IZ61cV2vHUZAqeXc,3337
79
79
  alita_sdk/runtime/langchain/interfaces/splitters.py,sha256=tW65-Ejj9VYyxXFZNgPts_CKILQ18bWp_1bZ-24FKGc,3630
80
80
  alita_sdk/runtime/langchain/retrievers/AlitaRetriever.py,sha256=osChtJxUlfpsFESpJSE5mnJAkxTXnzgFZnC6l5mUlbo,6148
@@ -133,9 +133,9 @@ alita_sdk/runtime/utils/save_dataframe.py,sha256=i-E1wp-t4wb17Zq3nA3xYwgSILjoXNi
133
133
  alita_sdk/runtime/utils/streamlit.py,sha256=GQ69CsjfRMcGXcCrslL0Uoj24Cl07Jeji0rZxELaKTQ,104930
134
134
  alita_sdk/runtime/utils/toolkit_runtime.py,sha256=MU63Fpxj0b5_r1IUUc0Q3-PN9VwL7rUxp2MRR4tmYR8,5136
135
135
  alita_sdk/runtime/utils/toolkit_utils.py,sha256=I9QFqnaqfVgN26LUr6s3XlBlG6y0CoHURnCzG7XcwVs,5311
136
- alita_sdk/runtime/utils/utils.py,sha256=iuCcyVZoBpXrHh0zQa8M-Gg_tIaznc7T9kEEUJ8a0l4,891
136
+ alita_sdk/runtime/utils/utils.py,sha256=BVEVLkYiiotcUD0XsHyx-wACpHfALsQg7PLZpObqvK8,1008
137
137
  alita_sdk/tools/__init__.py,sha256=jUj1ztC2FbkIUB-YYmiqaz_rqW7Il5kWzDPn1mJmj5w,10545
138
- alita_sdk/tools/base_indexer_toolkit.py,sha256=PyT3BDSn6gNJPXdbZw21tvTbE9WkhJD3m_pFWZJlYbU,23825
138
+ alita_sdk/tools/base_indexer_toolkit.py,sha256=dOdl-n_TUCryYCVuCNNyGYN3fwTQuLjNTMTU5axwzW8,26101
139
139
  alita_sdk/tools/code_indexer_toolkit.py,sha256=6QvI1by0OFdnKTx5TfNoDJjnMrvnTi9T56xaDxzeleU,7306
140
140
  alita_sdk/tools/elitea_base.py,sha256=up3HshASSDfjlHV_HPrs1aD4JIwwX0Ug26WGTzgIYvY,34724
141
141
  alita_sdk/tools/non_code_indexer_toolkit.py,sha256=B3QvhpT1F9QidkCcsOi3J_QrTOaNlTxqWFwe90VivQQ,1329
@@ -247,7 +247,7 @@ alita_sdk/tools/github/schemas.py,sha256=TxEWR3SjDKVwzo9i2tLnss_uPAv85Mh7oWjvQvY
247
247
  alita_sdk/tools/github/tool.py,sha256=Jnnv5lenV5ds8AAdyo2m8hSzyJ117HZBjzHC6T1ck-M,1037
248
248
  alita_sdk/tools/github/tool_prompts.py,sha256=y6ZW_FpUCE87Uop3WuQAZVRnzxO5t7xjBOI5bCqiluw,30194
249
249
  alita_sdk/tools/gitlab/__init__.py,sha256=iis7RHD3YgKWxF_ryTfdtA8RPGV-W8zUfy4BgiTDADw,4540
250
- alita_sdk/tools/gitlab/api_wrapper.py,sha256=jziPnjBkJE7TRIAyGsV7s9sX74NuL97yP1UiNKzzK8s,22626
250
+ alita_sdk/tools/gitlab/api_wrapper.py,sha256=gmL6o6yZDJKvAOVVgd-gG4wyjD3SlxJ4Ipoyz0GvqW8,22799
251
251
  alita_sdk/tools/gitlab/tools.py,sha256=vOGTlSaGaFmWn6LS6YFP-FuTqUPun9vnv1VrUcUHAZQ,16500
252
252
  alita_sdk/tools/gitlab/utils.py,sha256=Z2XiqIg54ouqqt1to-geFybmkCb1I6bpE91wfnINH1I,2320
253
253
  alita_sdk/tools/gitlab_org/__init__.py,sha256=PSTsC4BcPoyDv03Wj9VQHrEGUeR8hw4MRarB64VeqFg,3865
@@ -352,8 +352,8 @@ alita_sdk/tools/zephyr_scale/api_wrapper.py,sha256=kT0TbmMvuKhDUZc0i7KO18O38JM9S
352
352
  alita_sdk/tools/zephyr_squad/__init__.py,sha256=0ne8XLJEQSLOWfzd2HdnqOYmQlUliKHbBED5kW_Vias,2895
353
353
  alita_sdk/tools/zephyr_squad/api_wrapper.py,sha256=kmw_xol8YIYFplBLWTqP_VKPRhL_1ItDD0_vXTe_UuI,14906
354
354
  alita_sdk/tools/zephyr_squad/zephyr_squad_cloud_client.py,sha256=R371waHsms4sllHCbijKYs90C-9Yu0sSR3N4SUfQOgU,5066
355
- alita_sdk-0.3.362.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
356
- alita_sdk-0.3.362.dist-info/METADATA,sha256=-dQUAdfEQUBXMeIDx9i7d9eNDss9eUsj7_dWUT-pTO8,19071
357
- alita_sdk-0.3.362.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
358
- alita_sdk-0.3.362.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
359
- alita_sdk-0.3.362.dist-info/RECORD,,
355
+ alita_sdk-0.3.364.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
356
+ alita_sdk-0.3.364.dist-info/METADATA,sha256=g5WuZmVHZprcEd89flt9ni3Itkhj7tQu7znDpq2BOzs,19071
357
+ alita_sdk-0.3.364.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
358
+ alita_sdk-0.3.364.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
359
+ alita_sdk-0.3.364.dist-info/RECORD,,