alita-sdk 0.3.370__py3-none-any.whl → 0.3.371__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of alita-sdk might be problematic. Click here for more details.

@@ -185,6 +185,12 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
185
185
  return "No indexed collections"
186
186
  return collections
187
187
 
188
+ def get_index_meta(self, collection_suffix: str):
189
+ index_metas = self.vector_adapter.get_index_meta(self, collection_suffix)
190
+ if len(index_metas) > 1:
191
+ raise RuntimeError(f"Multiple index_meta documents found: {index_metas}")
192
+ return index_metas[0] if index_metas else None
193
+
188
194
  def _clean_collection(self, collection_suffix: str = ''):
189
195
  """
190
196
  Clean the vectorstore collection by deleting all indexed data.
@@ -1,3 +1,4 @@
1
+ import copy
1
2
  import json
2
3
  import logging
3
4
  import time
@@ -148,7 +149,6 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
148
149
  yield from ()
149
150
 
150
151
  def index_data(self, **kwargs):
151
- from ..runtime.langchain.interfaces.llm_processor import add_documents
152
152
  collection_suffix = kwargs.get("collection_suffix")
153
153
  progress_step = kwargs.get("progress_step")
154
154
  clean_index = kwargs.get("clean_index")
@@ -158,17 +158,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
158
158
  if clean_index:
159
159
  self._clean_index(collection_suffix)
160
160
  #
161
- # create and add initial index meta document
162
- index_meta_doc = Document(page_content=f"{IndexerKeywords.INDEX_META_TYPE.value}_{collection_suffix}", metadata={
163
- "collection": collection_suffix,
164
- "type": IndexerKeywords.INDEX_META_TYPE.value,
165
- "indexed": 0,
166
- "state": IndexerKeywords.INDEX_META_IN_PROGRESS.value,
167
- "index_configuration": kwargs,
168
- "created_on": time.time(),
169
- "updated_on": time.time(),
170
- })
171
- index_meta_ids = add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc])
161
+ self.index_meta_init(collection_suffix, kwargs)
172
162
  #
173
163
  self._log_tool_event(f"Indexing data into collection with suffix '{collection_suffix}'. It can take some time...")
174
164
  self._log_tool_event(f"Loading the documents to index...{kwargs}")
@@ -183,11 +173,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
183
173
  f"Processing documents to collect dependencies and prepare them for indexing...")
184
174
  result = self._save_index_generator(documents, documents_count, chunking_tool, chunking_config, collection_suffix=collection_suffix, progress_step=progress_step)
185
175
  #
186
- # update index meta document
187
- index_meta_doc.metadata["indexed"] = result
188
- index_meta_doc.metadata["state"] = IndexerKeywords.INDEX_META_COMPLETED.value
189
- index_meta_doc.metadata["updated_on"] = time.time()
190
- add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc], ids=index_meta_ids)
176
+ self.index_meta_update(collection_suffix, IndexerKeywords.INDEX_META_COMPLETED.value, result)
191
177
  #
192
178
  return {"status": "ok", "message": f"successfully indexed {result} documents"}
193
179
 
@@ -377,30 +363,6 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
377
363
  }
378
364
  return filter
379
365
 
380
- def index_meta_read(self):
381
- from sqlalchemy import func
382
- from sqlalchemy.orm import Session
383
-
384
- store = self.vectorstore
385
- try:
386
- with Session(store.session_maker.bind) as session:
387
- meta = session.query(
388
- store.EmbeddingStore.id,
389
- store.EmbeddingStore.cmetadata
390
- ).filter(
391
- func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'type') == IndexerKeywords.INDEX_META_TYPE.value
392
- ).all()
393
- return [
394
- {"id": id_, "metadata": cmetadata}
395
- for id_, cmetadata in meta
396
- ]
397
- except Exception as e:
398
- logger.error(f"Failed to get index_meta from PGVector: {str(e)}")
399
- return []
400
-
401
- def index_meta_delete(self, index_meta_ids: list[str]):
402
- self.vectorstore.delete(ids=index_meta_ids)
403
-
404
366
  def search_index(self,
405
367
  query: str,
406
368
  collection_suffix: str = "",
@@ -480,6 +442,51 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
480
442
  reranking_config=reranking_config,
481
443
  extended_search=extended_search
482
444
  )
445
+
446
+ def index_meta_init(self, collection_suffix: str, index_configuration: dict[str, Any]):
447
+ index_meta_raw = super().get_index_meta(collection_suffix)
448
+ from ..runtime.langchain.interfaces.llm_processor import add_documents
449
+ created_on = time.time()
450
+ metadata = {
451
+ "collection": collection_suffix,
452
+ "type": IndexerKeywords.INDEX_META_TYPE.value,
453
+ "indexed": 0,
454
+ "state": IndexerKeywords.INDEX_META_IN_PROGRESS.value,
455
+ "index_configuration": index_configuration,
456
+ "created_on": created_on,
457
+ "updated_on": created_on,
458
+ "history": "[]",
459
+ }
460
+ index_meta_ids = None
461
+ #
462
+ if index_meta_raw:
463
+ history_raw = index_meta_raw.get("metadata", {}).get("history", "[]")
464
+ if isinstance(history_raw, str) and history_raw.strip():
465
+ try:
466
+ history = json.loads(history_raw)
467
+ except (json.JSONDecodeError, TypeError):
468
+ history = []
469
+ else:
470
+ history = []
471
+ new_history_item = {k: v for k, v in index_meta_raw.get("metadata", {}).items() if k != "history"}
472
+ history.append(new_history_item)
473
+ metadata["history"] = json.dumps(history)
474
+ index_meta_ids = [index_meta_raw.get("id")]
475
+ #
476
+ index_meta_doc = Document(page_content=f"{IndexerKeywords.INDEX_META_TYPE.value}_{collection_suffix}", metadata=metadata)
477
+ add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc], ids=index_meta_ids)
478
+
479
+ def index_meta_update(self, collection_suffix: str, state: str, result: int):
480
+ index_meta_raw = super().get_index_meta(collection_suffix)
481
+ from ..runtime.langchain.interfaces.llm_processor import add_documents
482
+ #
483
+ if index_meta_raw:
484
+ metadata = copy.deepcopy(index_meta_raw.get("metadata", {}))
485
+ metadata["indexed"] = result
486
+ metadata["state"] = state
487
+ metadata["updated_on"] = time.time()
488
+ index_meta_doc = Document(page_content=index_meta_raw.get("content", ""), metadata=metadata)
489
+ add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc], ids=[index_meta_raw.get("id")])
483
490
 
484
491
  def get_available_tools(self):
485
492
  """
@@ -2,6 +2,8 @@ from abc import ABC, abstractmethod
2
2
  from typing import Any, Dict, Optional, List
3
3
  from logging import getLogger
4
4
 
5
+ from ...runtime.utils.utils import IndexerKeywords
6
+
5
7
  logger = getLogger(__name__)
6
8
 
7
9
 
@@ -48,6 +50,11 @@ class VectorStoreAdapter(ABC):
48
50
  """Add a new collection name to the metadata"""
49
51
  pass
50
52
 
53
+ @abstractmethod
54
+ def get_index_meta(self, vectorstore_wrapper, collection_suffix: str) -> List[Dict[str, Any]]:
55
+ """Get all index_meta entries from the vector store."""
56
+ pass
57
+
51
58
 
52
59
  class PGVectorAdapter(VectorStoreAdapter):
53
60
  """Adapter for PGVector database operations."""
@@ -265,6 +272,29 @@ class PGVectorAdapter(VectorStoreAdapter):
265
272
  except Exception as e:
266
273
  logger.error(f"Failed to update collection for entry ID {entry_id}: {str(e)}")
267
274
 
275
+ def get_index_meta(self, vectorstore_wrapper, collection_suffix: str) -> List[Dict[str, Any]]:
276
+ from sqlalchemy.orm import Session
277
+ from sqlalchemy import func
278
+
279
+ store = vectorstore_wrapper.vectorstore
280
+ try:
281
+ with Session(store.session_maker.bind) as session:
282
+ meta = session.query(
283
+ store.EmbeddingStore.id,
284
+ store.EmbeddingStore.document,
285
+ store.EmbeddingStore.cmetadata
286
+ ).filter(
287
+ store.EmbeddingStore.cmetadata['type'].astext == IndexerKeywords.INDEX_META_TYPE.value,
288
+ func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'collection') == collection_suffix
289
+ ).all()
290
+ result = []
291
+ for id, document, cmetadata in meta:
292
+ result.append({"id": id, "content": document, "metadata": cmetadata})
293
+ return result
294
+ except Exception as e:
295
+ logger.error(f"Failed to get index_meta from PGVector: {str(e)}")
296
+ raise e
297
+
268
298
 
269
299
  class ChromaAdapter(VectorStoreAdapter):
270
300
  """Adapter for Chroma database operations."""
@@ -361,6 +391,9 @@ class ChromaAdapter(VectorStoreAdapter):
361
391
  # This is a simplified implementation - in practice, you might need more complex logic
362
392
  logger.warning("add_to_collection for Chroma is not fully implemented yet")
363
393
 
394
+ def get_index_meta(self, vectorstore_wrapper, collection_suffix: str) -> List[Dict[str, Any]]:
395
+ logger.warning("get_index_meta for Chroma is not implemented yet")
396
+
364
397
 
365
398
  class VectorStoreAdapterFactory:
366
399
  """Factory for creating vector store adapters."""
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: alita_sdk
3
- Version: 0.3.370
3
+ Version: 0.3.371
4
4
  Summary: SDK for building langchain agents using resources from Alita
5
5
  Author-email: Artem Rozumenko <artyom.rozumenko@gmail.com>, Mikalai Biazruchka <mikalai_biazruchka@epam.com>, Roman Mitusov <roman_mitusov@epam.com>, Ivan Krakhmaliuk <lifedj27@gmail.com>, Artem Dubrovskiy <ad13box@gmail.com>
6
6
  License-Expression: Apache-2.0
@@ -123,7 +123,7 @@ alita_sdk/runtime/tools/router.py,sha256=p7e0tX6YAWw2M2Nq0A_xqw1E2P-Xz1DaJvhUstf
123
123
  alita_sdk/runtime/tools/sandbox.py,sha256=WNz-aUMtkGCPg84dDy_0BPkyp-6YjoYB-xjIEFFrtKw,11601
124
124
  alita_sdk/runtime/tools/tool.py,sha256=lE1hGi6qOAXG7qxtqxarD_XMQqTghdywf261DZawwno,5631
125
125
  alita_sdk/runtime/tools/vectorstore.py,sha256=8vRhi1lGFEs3unvnflEi2p59U2MfV32lStpEizpDms0,34467
126
- alita_sdk/runtime/tools/vectorstore_base.py,sha256=1DYmMQEBMLetxQgi6D9Wd_vM_xVCa9qGTAfLOo2kNC0,27533
126
+ alita_sdk/runtime/tools/vectorstore_base.py,sha256=WF-v3sGQKo9q8D8ULyuBo5dPdFcx79X0DCRjyoOd7DI,27844
127
127
  alita_sdk/runtime/utils/AlitaCallback.py,sha256=E4LlSBuCHWiUq6W7IZExERHZY0qcmdjzc_rJlF2iQIw,7356
128
128
  alita_sdk/runtime/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
129
129
  alita_sdk/runtime/utils/constants.py,sha256=Xntx1b_uxUzT4clwqHA_U6K8y5bBqf_4lSQwXdcWrp4,13586
@@ -135,7 +135,7 @@ alita_sdk/runtime/utils/toolkit_runtime.py,sha256=MU63Fpxj0b5_r1IUUc0Q3-PN9VwL7r
135
135
  alita_sdk/runtime/utils/toolkit_utils.py,sha256=I9QFqnaqfVgN26LUr6s3XlBlG6y0CoHURnCzG7XcwVs,5311
136
136
  alita_sdk/runtime/utils/utils.py,sha256=BVEVLkYiiotcUD0XsHyx-wACpHfALsQg7PLZpObqvK8,1008
137
137
  alita_sdk/tools/__init__.py,sha256=jUj1ztC2FbkIUB-YYmiqaz_rqW7Il5kWzDPn1mJmj5w,10545
138
- alita_sdk/tools/base_indexer_toolkit.py,sha256=jaUzLqzGwY0YJ4ZGeRHfyrWOiuTpOawUqGrLVqXHtFo,26137
138
+ alita_sdk/tools/base_indexer_toolkit.py,sha256=LDiz9J8hxbRnZhc_3PekuR5PtktzDxDmh3jwqGH7loo,26638
139
139
  alita_sdk/tools/code_indexer_toolkit.py,sha256=6QvI1by0OFdnKTx5TfNoDJjnMrvnTi9T56xaDxzeleU,7306
140
140
  alita_sdk/tools/elitea_base.py,sha256=up3HshASSDfjlHV_HPrs1aD4JIwwX0Ug26WGTzgIYvY,34724
141
141
  alita_sdk/tools/non_code_indexer_toolkit.py,sha256=B3QvhpT1F9QidkCcsOi3J_QrTOaNlTxqWFwe90VivQQ,1329
@@ -331,7 +331,7 @@ alita_sdk/tools/testrail/api_wrapper.py,sha256=tQcGlFJmftvs5ZiO4tsP19fCo4CrJeq_U
331
331
  alita_sdk/tools/utils/__init__.py,sha256=W9rCCUPtHCP5nGAbWp0n5jaNA84572aiRoqKneBnaS4,3330
332
332
  alita_sdk/tools/utils/available_tools_decorator.py,sha256=IbrdfeQkswxUFgvvN7-dyLMZMyXLiwvX7kgi3phciCk,273
333
333
  alita_sdk/tools/utils/content_parser.py,sha256=TuKAPUzIZx9F-pzHiVyrCFpI5emrGaOF8DgWHJP2cM4,15235
334
- alita_sdk/tools/vector_adapters/VectorStoreAdapter.py,sha256=ypBEAkFRGHv5edW0N9rdo1yKurNGQ4pRVEWtrN_7SeA,17656
334
+ alita_sdk/tools/vector_adapters/VectorStoreAdapter.py,sha256=p_9Cu5eausnfiKNsitbVxwu5eimZHRv3R-OMw7lBrts,19173
335
335
  alita_sdk/tools/vector_adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
336
336
  alita_sdk/tools/xray/__init__.py,sha256=eOMWP8VamFbbJgt1xrGpGPqB9ByOTA0Cd3LCaETzGk4,4376
337
337
  alita_sdk/tools/xray/api_wrapper.py,sha256=uj5kzUgPdo_Oct9WCNMOpkb6o_3L7J4LZrEGtrwYMmc,30157
@@ -352,8 +352,8 @@ alita_sdk/tools/zephyr_scale/api_wrapper.py,sha256=kT0TbmMvuKhDUZc0i7KO18O38JM9S
352
352
  alita_sdk/tools/zephyr_squad/__init__.py,sha256=0ne8XLJEQSLOWfzd2HdnqOYmQlUliKHbBED5kW_Vias,2895
353
353
  alita_sdk/tools/zephyr_squad/api_wrapper.py,sha256=kmw_xol8YIYFplBLWTqP_VKPRhL_1ItDD0_vXTe_UuI,14906
354
354
  alita_sdk/tools/zephyr_squad/zephyr_squad_cloud_client.py,sha256=R371waHsms4sllHCbijKYs90C-9Yu0sSR3N4SUfQOgU,5066
355
- alita_sdk-0.3.370.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
356
- alita_sdk-0.3.370.dist-info/METADATA,sha256=7o5P_ba4fUU5FVQU9htx-olWpTUnrpVOcfl2o3DwSEs,19071
357
- alita_sdk-0.3.370.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
358
- alita_sdk-0.3.370.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
359
- alita_sdk-0.3.370.dist-info/RECORD,,
355
+ alita_sdk-0.3.371.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
356
+ alita_sdk-0.3.371.dist-info/METADATA,sha256=WP9MwZivEJP78MqLbiZNALJ_lgrPTKik1hfNddFJI-Y,19071
357
+ alita_sdk-0.3.371.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
358
+ alita_sdk-0.3.371.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
359
+ alita_sdk-0.3.371.dist-info/RECORD,,