alita-sdk 0.3.370__py3-none-any.whl → 0.3.372__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of alita-sdk might be problematic. Click here for more details.

@@ -185,6 +185,12 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
185
185
  return "No indexed collections"
186
186
  return collections
187
187
 
188
+ def get_index_meta(self, collection_suffix: str):
189
+ index_metas = self.vector_adapter.get_index_meta(self, collection_suffix)
190
+ if len(index_metas) > 1:
191
+ raise RuntimeError(f"Multiple index_meta documents found: {index_metas}")
192
+ return index_metas[0] if index_metas else None
193
+
188
194
  def _clean_collection(self, collection_suffix: str = ''):
189
195
  """
190
196
  Clean the vectorstore collection by deleting all indexed data.
@@ -1,3 +1,4 @@
1
+ import copy
1
2
  import json
2
3
  import logging
3
4
  import time
@@ -148,7 +149,6 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
148
149
  yield from ()
149
150
 
150
151
  def index_data(self, **kwargs):
151
- from ..runtime.langchain.interfaces.llm_processor import add_documents
152
152
  collection_suffix = kwargs.get("collection_suffix")
153
153
  progress_step = kwargs.get("progress_step")
154
154
  clean_index = kwargs.get("clean_index")
@@ -158,17 +158,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
158
158
  if clean_index:
159
159
  self._clean_index(collection_suffix)
160
160
  #
161
- # create and add initial index meta document
162
- index_meta_doc = Document(page_content=f"{IndexerKeywords.INDEX_META_TYPE.value}_{collection_suffix}", metadata={
163
- "collection": collection_suffix,
164
- "type": IndexerKeywords.INDEX_META_TYPE.value,
165
- "indexed": 0,
166
- "state": IndexerKeywords.INDEX_META_IN_PROGRESS.value,
167
- "index_configuration": kwargs,
168
- "created_on": time.time(),
169
- "updated_on": time.time(),
170
- })
171
- index_meta_ids = add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc])
161
+ self.index_meta_init(collection_suffix, kwargs)
172
162
  #
173
163
  self._log_tool_event(f"Indexing data into collection with suffix '{collection_suffix}'. It can take some time...")
174
164
  self._log_tool_event(f"Loading the documents to index...{kwargs}")
@@ -183,11 +173,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
183
173
  f"Processing documents to collect dependencies and prepare them for indexing...")
184
174
  result = self._save_index_generator(documents, documents_count, chunking_tool, chunking_config, collection_suffix=collection_suffix, progress_step=progress_step)
185
175
  #
186
- # update index meta document
187
- index_meta_doc.metadata["indexed"] = result
188
- index_meta_doc.metadata["state"] = IndexerKeywords.INDEX_META_COMPLETED.value
189
- index_meta_doc.metadata["updated_on"] = time.time()
190
- add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc], ids=index_meta_ids)
176
+ self.index_meta_update(collection_suffix, IndexerKeywords.INDEX_META_COMPLETED.value, result)
191
177
  #
192
178
  return {"status": "ok", "message": f"successfully indexed {result} documents"}
193
179
 
@@ -366,6 +352,9 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
366
352
  filter.update({"collection": {
367
353
  "$eq": collection_suffix.strip()
368
354
  }})
355
+
356
+ if filter:
357
+ # Exclude index meta documents from search results
369
358
  filter = {
370
359
  "$and": [
371
360
  filter,
@@ -375,32 +364,13 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
375
364
  ]},
376
365
  ]
377
366
  }
367
+ else:
368
+ filter = {"$or": [
369
+ {"type": {"$exists": False}},
370
+ {"type": {"$ne": IndexerKeywords.INDEX_META_TYPE.value}}
371
+ ]}
378
372
  return filter
379
373
 
380
- def index_meta_read(self):
381
- from sqlalchemy import func
382
- from sqlalchemy.orm import Session
383
-
384
- store = self.vectorstore
385
- try:
386
- with Session(store.session_maker.bind) as session:
387
- meta = session.query(
388
- store.EmbeddingStore.id,
389
- store.EmbeddingStore.cmetadata
390
- ).filter(
391
- func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'type') == IndexerKeywords.INDEX_META_TYPE.value
392
- ).all()
393
- return [
394
- {"id": id_, "metadata": cmetadata}
395
- for id_, cmetadata in meta
396
- ]
397
- except Exception as e:
398
- logger.error(f"Failed to get index_meta from PGVector: {str(e)}")
399
- return []
400
-
401
- def index_meta_delete(self, index_meta_ids: list[str]):
402
- self.vectorstore.delete(ids=index_meta_ids)
403
-
404
374
  def search_index(self,
405
375
  query: str,
406
376
  collection_suffix: str = "",
@@ -480,6 +450,51 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
480
450
  reranking_config=reranking_config,
481
451
  extended_search=extended_search
482
452
  )
453
+
454
+ def index_meta_init(self, collection_suffix: str, index_configuration: dict[str, Any]):
455
+ index_meta_raw = super().get_index_meta(collection_suffix)
456
+ from ..runtime.langchain.interfaces.llm_processor import add_documents
457
+ created_on = time.time()
458
+ metadata = {
459
+ "collection": collection_suffix,
460
+ "type": IndexerKeywords.INDEX_META_TYPE.value,
461
+ "indexed": 0,
462
+ "state": IndexerKeywords.INDEX_META_IN_PROGRESS.value,
463
+ "index_configuration": index_configuration,
464
+ "created_on": created_on,
465
+ "updated_on": created_on,
466
+ "history": "[]",
467
+ }
468
+ index_meta_ids = None
469
+ #
470
+ if index_meta_raw:
471
+ history_raw = index_meta_raw.get("metadata", {}).get("history", "[]")
472
+ if isinstance(history_raw, str) and history_raw.strip():
473
+ try:
474
+ history = json.loads(history_raw)
475
+ except (json.JSONDecodeError, TypeError):
476
+ history = []
477
+ else:
478
+ history = []
479
+ new_history_item = {k: v for k, v in index_meta_raw.get("metadata", {}).items() if k != "history"}
480
+ history.append(new_history_item)
481
+ metadata["history"] = json.dumps(history)
482
+ index_meta_ids = [index_meta_raw.get("id")]
483
+ #
484
+ index_meta_doc = Document(page_content=f"{IndexerKeywords.INDEX_META_TYPE.value}_{collection_suffix}", metadata=metadata)
485
+ add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc], ids=index_meta_ids)
486
+
487
+ def index_meta_update(self, collection_suffix: str, state: str, result: int):
488
+ index_meta_raw = super().get_index_meta(collection_suffix)
489
+ from ..runtime.langchain.interfaces.llm_processor import add_documents
490
+ #
491
+ if index_meta_raw:
492
+ metadata = copy.deepcopy(index_meta_raw.get("metadata", {}))
493
+ metadata["indexed"] = result
494
+ metadata["state"] = state
495
+ metadata["updated_on"] = time.time()
496
+ index_meta_doc = Document(page_content=index_meta_raw.get("content", ""), metadata=metadata)
497
+ add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc], ids=[index_meta_raw.get("id")])
483
498
 
484
499
  def get_available_tools(self):
485
500
  """
@@ -2,6 +2,8 @@ from abc import ABC, abstractmethod
2
2
  from typing import Any, Dict, Optional, List
3
3
  from logging import getLogger
4
4
 
5
+ from ...runtime.utils.utils import IndexerKeywords
6
+
5
7
  logger = getLogger(__name__)
6
8
 
7
9
 
@@ -48,6 +50,11 @@ class VectorStoreAdapter(ABC):
48
50
  """Add a new collection name to the metadata"""
49
51
  pass
50
52
 
53
+ @abstractmethod
54
+ def get_index_meta(self, vectorstore_wrapper, collection_suffix: str) -> List[Dict[str, Any]]:
55
+ """Get all index_meta entries from the vector store."""
56
+ pass
57
+
51
58
 
52
59
  class PGVectorAdapter(VectorStoreAdapter):
53
60
  """Adapter for PGVector database operations."""
@@ -265,6 +272,29 @@ class PGVectorAdapter(VectorStoreAdapter):
265
272
  except Exception as e:
266
273
  logger.error(f"Failed to update collection for entry ID {entry_id}: {str(e)}")
267
274
 
275
+ def get_index_meta(self, vectorstore_wrapper, collection_suffix: str) -> List[Dict[str, Any]]:
276
+ from sqlalchemy.orm import Session
277
+ from sqlalchemy import func
278
+
279
+ store = vectorstore_wrapper.vectorstore
280
+ try:
281
+ with Session(store.session_maker.bind) as session:
282
+ meta = session.query(
283
+ store.EmbeddingStore.id,
284
+ store.EmbeddingStore.document,
285
+ store.EmbeddingStore.cmetadata
286
+ ).filter(
287
+ store.EmbeddingStore.cmetadata['type'].astext == IndexerKeywords.INDEX_META_TYPE.value,
288
+ func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'collection') == collection_suffix
289
+ ).all()
290
+ result = []
291
+ for id, document, cmetadata in meta:
292
+ result.append({"id": id, "content": document, "metadata": cmetadata})
293
+ return result
294
+ except Exception as e:
295
+ logger.error(f"Failed to get index_meta from PGVector: {str(e)}")
296
+ raise e
297
+
268
298
 
269
299
  class ChromaAdapter(VectorStoreAdapter):
270
300
  """Adapter for Chroma database operations."""
@@ -361,6 +391,9 @@ class ChromaAdapter(VectorStoreAdapter):
361
391
  # This is a simplified implementation - in practice, you might need more complex logic
362
392
  logger.warning("add_to_collection for Chroma is not fully implemented yet")
363
393
 
394
+ def get_index_meta(self, vectorstore_wrapper, collection_suffix: str) -> List[Dict[str, Any]]:
395
+ logger.warning("get_index_meta for Chroma is not implemented yet")
396
+
364
397
 
365
398
  class VectorStoreAdapterFactory:
366
399
  """Factory for creating vector store adapters."""
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: alita_sdk
3
- Version: 0.3.370
3
+ Version: 0.3.372
4
4
  Summary: SDK for building langchain agents using resources from Alita
5
5
  Author-email: Artem Rozumenko <artyom.rozumenko@gmail.com>, Mikalai Biazruchka <mikalai_biazruchka@epam.com>, Roman Mitusov <roman_mitusov@epam.com>, Ivan Krakhmaliuk <lifedj27@gmail.com>, Artem Dubrovskiy <ad13box@gmail.com>
6
6
  License-Expression: Apache-2.0
@@ -123,7 +123,7 @@ alita_sdk/runtime/tools/router.py,sha256=p7e0tX6YAWw2M2Nq0A_xqw1E2P-Xz1DaJvhUstf
123
123
  alita_sdk/runtime/tools/sandbox.py,sha256=WNz-aUMtkGCPg84dDy_0BPkyp-6YjoYB-xjIEFFrtKw,11601
124
124
  alita_sdk/runtime/tools/tool.py,sha256=lE1hGi6qOAXG7qxtqxarD_XMQqTghdywf261DZawwno,5631
125
125
  alita_sdk/runtime/tools/vectorstore.py,sha256=8vRhi1lGFEs3unvnflEi2p59U2MfV32lStpEizpDms0,34467
126
- alita_sdk/runtime/tools/vectorstore_base.py,sha256=1DYmMQEBMLetxQgi6D9Wd_vM_xVCa9qGTAfLOo2kNC0,27533
126
+ alita_sdk/runtime/tools/vectorstore_base.py,sha256=WF-v3sGQKo9q8D8ULyuBo5dPdFcx79X0DCRjyoOd7DI,27844
127
127
  alita_sdk/runtime/utils/AlitaCallback.py,sha256=E4LlSBuCHWiUq6W7IZExERHZY0qcmdjzc_rJlF2iQIw,7356
128
128
  alita_sdk/runtime/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
129
129
  alita_sdk/runtime/utils/constants.py,sha256=Xntx1b_uxUzT4clwqHA_U6K8y5bBqf_4lSQwXdcWrp4,13586
@@ -135,7 +135,7 @@ alita_sdk/runtime/utils/toolkit_runtime.py,sha256=MU63Fpxj0b5_r1IUUc0Q3-PN9VwL7r
135
135
  alita_sdk/runtime/utils/toolkit_utils.py,sha256=I9QFqnaqfVgN26LUr6s3XlBlG6y0CoHURnCzG7XcwVs,5311
136
136
  alita_sdk/runtime/utils/utils.py,sha256=BVEVLkYiiotcUD0XsHyx-wACpHfALsQg7PLZpObqvK8,1008
137
137
  alita_sdk/tools/__init__.py,sha256=jUj1ztC2FbkIUB-YYmiqaz_rqW7Il5kWzDPn1mJmj5w,10545
138
- alita_sdk/tools/base_indexer_toolkit.py,sha256=jaUzLqzGwY0YJ4ZGeRHfyrWOiuTpOawUqGrLVqXHtFo,26137
138
+ alita_sdk/tools/base_indexer_toolkit.py,sha256=GpeIYY7kJZCjv0-gGcHNStY0uCtEBRl-I5XiISM0Tdo,26900
139
139
  alita_sdk/tools/code_indexer_toolkit.py,sha256=6QvI1by0OFdnKTx5TfNoDJjnMrvnTi9T56xaDxzeleU,7306
140
140
  alita_sdk/tools/elitea_base.py,sha256=up3HshASSDfjlHV_HPrs1aD4JIwwX0Ug26WGTzgIYvY,34724
141
141
  alita_sdk/tools/non_code_indexer_toolkit.py,sha256=B3QvhpT1F9QidkCcsOi3J_QrTOaNlTxqWFwe90VivQQ,1329
@@ -331,7 +331,7 @@ alita_sdk/tools/testrail/api_wrapper.py,sha256=tQcGlFJmftvs5ZiO4tsP19fCo4CrJeq_U
331
331
  alita_sdk/tools/utils/__init__.py,sha256=W9rCCUPtHCP5nGAbWp0n5jaNA84572aiRoqKneBnaS4,3330
332
332
  alita_sdk/tools/utils/available_tools_decorator.py,sha256=IbrdfeQkswxUFgvvN7-dyLMZMyXLiwvX7kgi3phciCk,273
333
333
  alita_sdk/tools/utils/content_parser.py,sha256=TuKAPUzIZx9F-pzHiVyrCFpI5emrGaOF8DgWHJP2cM4,15235
334
- alita_sdk/tools/vector_adapters/VectorStoreAdapter.py,sha256=ypBEAkFRGHv5edW0N9rdo1yKurNGQ4pRVEWtrN_7SeA,17656
334
+ alita_sdk/tools/vector_adapters/VectorStoreAdapter.py,sha256=p_9Cu5eausnfiKNsitbVxwu5eimZHRv3R-OMw7lBrts,19173
335
335
  alita_sdk/tools/vector_adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
336
336
  alita_sdk/tools/xray/__init__.py,sha256=eOMWP8VamFbbJgt1xrGpGPqB9ByOTA0Cd3LCaETzGk4,4376
337
337
  alita_sdk/tools/xray/api_wrapper.py,sha256=uj5kzUgPdo_Oct9WCNMOpkb6o_3L7J4LZrEGtrwYMmc,30157
@@ -352,8 +352,8 @@ alita_sdk/tools/zephyr_scale/api_wrapper.py,sha256=kT0TbmMvuKhDUZc0i7KO18O38JM9S
352
352
  alita_sdk/tools/zephyr_squad/__init__.py,sha256=0ne8XLJEQSLOWfzd2HdnqOYmQlUliKHbBED5kW_Vias,2895
353
353
  alita_sdk/tools/zephyr_squad/api_wrapper.py,sha256=kmw_xol8YIYFplBLWTqP_VKPRhL_1ItDD0_vXTe_UuI,14906
354
354
  alita_sdk/tools/zephyr_squad/zephyr_squad_cloud_client.py,sha256=R371waHsms4sllHCbijKYs90C-9Yu0sSR3N4SUfQOgU,5066
355
- alita_sdk-0.3.370.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
356
- alita_sdk-0.3.370.dist-info/METADATA,sha256=7o5P_ba4fUU5FVQU9htx-olWpTUnrpVOcfl2o3DwSEs,19071
357
- alita_sdk-0.3.370.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
358
- alita_sdk-0.3.370.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
359
- alita_sdk-0.3.370.dist-info/RECORD,,
355
+ alita_sdk-0.3.372.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
356
+ alita_sdk-0.3.372.dist-info/METADATA,sha256=A3hNTePpqTE8uzQhDG7RbgX5Iv7MoyOYgkucgkqKEpI,19071
357
+ alita_sdk-0.3.372.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
358
+ alita_sdk-0.3.372.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
359
+ alita_sdk-0.3.372.dist-info/RECORD,,