alita-sdk 0.3.370__py3-none-any.whl → 0.3.372__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of alita-sdk might be problematic. Click here for more details.
- alita_sdk/runtime/tools/vectorstore_base.py +6 -0
- alita_sdk/tools/base_indexer_toolkit.py +56 -41
- alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +33 -0
- {alita_sdk-0.3.370.dist-info → alita_sdk-0.3.372.dist-info}/METADATA +1 -1
- {alita_sdk-0.3.370.dist-info → alita_sdk-0.3.372.dist-info}/RECORD +8 -8
- {alita_sdk-0.3.370.dist-info → alita_sdk-0.3.372.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.370.dist-info → alita_sdk-0.3.372.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.370.dist-info → alita_sdk-0.3.372.dist-info}/top_level.txt +0 -0
|
@@ -185,6 +185,12 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
|
|
|
185
185
|
return "No indexed collections"
|
|
186
186
|
return collections
|
|
187
187
|
|
|
188
|
+
def get_index_meta(self, collection_suffix: str):
|
|
189
|
+
index_metas = self.vector_adapter.get_index_meta(self, collection_suffix)
|
|
190
|
+
if len(index_metas) > 1:
|
|
191
|
+
raise RuntimeError(f"Multiple index_meta documents found: {index_metas}")
|
|
192
|
+
return index_metas[0] if index_metas else None
|
|
193
|
+
|
|
188
194
|
def _clean_collection(self, collection_suffix: str = ''):
|
|
189
195
|
"""
|
|
190
196
|
Clean the vectorstore collection by deleting all indexed data.
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import copy
|
|
1
2
|
import json
|
|
2
3
|
import logging
|
|
3
4
|
import time
|
|
@@ -148,7 +149,6 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
148
149
|
yield from ()
|
|
149
150
|
|
|
150
151
|
def index_data(self, **kwargs):
|
|
151
|
-
from ..runtime.langchain.interfaces.llm_processor import add_documents
|
|
152
152
|
collection_suffix = kwargs.get("collection_suffix")
|
|
153
153
|
progress_step = kwargs.get("progress_step")
|
|
154
154
|
clean_index = kwargs.get("clean_index")
|
|
@@ -158,17 +158,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
158
158
|
if clean_index:
|
|
159
159
|
self._clean_index(collection_suffix)
|
|
160
160
|
#
|
|
161
|
-
|
|
162
|
-
index_meta_doc = Document(page_content=f"{IndexerKeywords.INDEX_META_TYPE.value}_{collection_suffix}", metadata={
|
|
163
|
-
"collection": collection_suffix,
|
|
164
|
-
"type": IndexerKeywords.INDEX_META_TYPE.value,
|
|
165
|
-
"indexed": 0,
|
|
166
|
-
"state": IndexerKeywords.INDEX_META_IN_PROGRESS.value,
|
|
167
|
-
"index_configuration": kwargs,
|
|
168
|
-
"created_on": time.time(),
|
|
169
|
-
"updated_on": time.time(),
|
|
170
|
-
})
|
|
171
|
-
index_meta_ids = add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc])
|
|
161
|
+
self.index_meta_init(collection_suffix, kwargs)
|
|
172
162
|
#
|
|
173
163
|
self._log_tool_event(f"Indexing data into collection with suffix '{collection_suffix}'. It can take some time...")
|
|
174
164
|
self._log_tool_event(f"Loading the documents to index...{kwargs}")
|
|
@@ -183,11 +173,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
183
173
|
f"Processing documents to collect dependencies and prepare them for indexing...")
|
|
184
174
|
result = self._save_index_generator(documents, documents_count, chunking_tool, chunking_config, collection_suffix=collection_suffix, progress_step=progress_step)
|
|
185
175
|
#
|
|
186
|
-
|
|
187
|
-
index_meta_doc.metadata["indexed"] = result
|
|
188
|
-
index_meta_doc.metadata["state"] = IndexerKeywords.INDEX_META_COMPLETED.value
|
|
189
|
-
index_meta_doc.metadata["updated_on"] = time.time()
|
|
190
|
-
add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc], ids=index_meta_ids)
|
|
176
|
+
self.index_meta_update(collection_suffix, IndexerKeywords.INDEX_META_COMPLETED.value, result)
|
|
191
177
|
#
|
|
192
178
|
return {"status": "ok", "message": f"successfully indexed {result} documents"}
|
|
193
179
|
|
|
@@ -366,6 +352,9 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
366
352
|
filter.update({"collection": {
|
|
367
353
|
"$eq": collection_suffix.strip()
|
|
368
354
|
}})
|
|
355
|
+
|
|
356
|
+
if filter:
|
|
357
|
+
# Exclude index meta documents from search results
|
|
369
358
|
filter = {
|
|
370
359
|
"$and": [
|
|
371
360
|
filter,
|
|
@@ -375,32 +364,13 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
375
364
|
]},
|
|
376
365
|
]
|
|
377
366
|
}
|
|
367
|
+
else:
|
|
368
|
+
filter = {"$or": [
|
|
369
|
+
{"type": {"$exists": False}},
|
|
370
|
+
{"type": {"$ne": IndexerKeywords.INDEX_META_TYPE.value}}
|
|
371
|
+
]}
|
|
378
372
|
return filter
|
|
379
373
|
|
|
380
|
-
def index_meta_read(self):
|
|
381
|
-
from sqlalchemy import func
|
|
382
|
-
from sqlalchemy.orm import Session
|
|
383
|
-
|
|
384
|
-
store = self.vectorstore
|
|
385
|
-
try:
|
|
386
|
-
with Session(store.session_maker.bind) as session:
|
|
387
|
-
meta = session.query(
|
|
388
|
-
store.EmbeddingStore.id,
|
|
389
|
-
store.EmbeddingStore.cmetadata
|
|
390
|
-
).filter(
|
|
391
|
-
func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'type') == IndexerKeywords.INDEX_META_TYPE.value
|
|
392
|
-
).all()
|
|
393
|
-
return [
|
|
394
|
-
{"id": id_, "metadata": cmetadata}
|
|
395
|
-
for id_, cmetadata in meta
|
|
396
|
-
]
|
|
397
|
-
except Exception as e:
|
|
398
|
-
logger.error(f"Failed to get index_meta from PGVector: {str(e)}")
|
|
399
|
-
return []
|
|
400
|
-
|
|
401
|
-
def index_meta_delete(self, index_meta_ids: list[str]):
|
|
402
|
-
self.vectorstore.delete(ids=index_meta_ids)
|
|
403
|
-
|
|
404
374
|
def search_index(self,
|
|
405
375
|
query: str,
|
|
406
376
|
collection_suffix: str = "",
|
|
@@ -480,6 +450,51 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
|
|
|
480
450
|
reranking_config=reranking_config,
|
|
481
451
|
extended_search=extended_search
|
|
482
452
|
)
|
|
453
|
+
|
|
454
|
+
def index_meta_init(self, collection_suffix: str, index_configuration: dict[str, Any]):
|
|
455
|
+
index_meta_raw = super().get_index_meta(collection_suffix)
|
|
456
|
+
from ..runtime.langchain.interfaces.llm_processor import add_documents
|
|
457
|
+
created_on = time.time()
|
|
458
|
+
metadata = {
|
|
459
|
+
"collection": collection_suffix,
|
|
460
|
+
"type": IndexerKeywords.INDEX_META_TYPE.value,
|
|
461
|
+
"indexed": 0,
|
|
462
|
+
"state": IndexerKeywords.INDEX_META_IN_PROGRESS.value,
|
|
463
|
+
"index_configuration": index_configuration,
|
|
464
|
+
"created_on": created_on,
|
|
465
|
+
"updated_on": created_on,
|
|
466
|
+
"history": "[]",
|
|
467
|
+
}
|
|
468
|
+
index_meta_ids = None
|
|
469
|
+
#
|
|
470
|
+
if index_meta_raw:
|
|
471
|
+
history_raw = index_meta_raw.get("metadata", {}).get("history", "[]")
|
|
472
|
+
if isinstance(history_raw, str) and history_raw.strip():
|
|
473
|
+
try:
|
|
474
|
+
history = json.loads(history_raw)
|
|
475
|
+
except (json.JSONDecodeError, TypeError):
|
|
476
|
+
history = []
|
|
477
|
+
else:
|
|
478
|
+
history = []
|
|
479
|
+
new_history_item = {k: v for k, v in index_meta_raw.get("metadata", {}).items() if k != "history"}
|
|
480
|
+
history.append(new_history_item)
|
|
481
|
+
metadata["history"] = json.dumps(history)
|
|
482
|
+
index_meta_ids = [index_meta_raw.get("id")]
|
|
483
|
+
#
|
|
484
|
+
index_meta_doc = Document(page_content=f"{IndexerKeywords.INDEX_META_TYPE.value}_{collection_suffix}", metadata=metadata)
|
|
485
|
+
add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc], ids=index_meta_ids)
|
|
486
|
+
|
|
487
|
+
def index_meta_update(self, collection_suffix: str, state: str, result: int):
|
|
488
|
+
index_meta_raw = super().get_index_meta(collection_suffix)
|
|
489
|
+
from ..runtime.langchain.interfaces.llm_processor import add_documents
|
|
490
|
+
#
|
|
491
|
+
if index_meta_raw:
|
|
492
|
+
metadata = copy.deepcopy(index_meta_raw.get("metadata", {}))
|
|
493
|
+
metadata["indexed"] = result
|
|
494
|
+
metadata["state"] = state
|
|
495
|
+
metadata["updated_on"] = time.time()
|
|
496
|
+
index_meta_doc = Document(page_content=index_meta_raw.get("content", ""), metadata=metadata)
|
|
497
|
+
add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc], ids=[index_meta_raw.get("id")])
|
|
483
498
|
|
|
484
499
|
def get_available_tools(self):
|
|
485
500
|
"""
|
|
@@ -2,6 +2,8 @@ from abc import ABC, abstractmethod
|
|
|
2
2
|
from typing import Any, Dict, Optional, List
|
|
3
3
|
from logging import getLogger
|
|
4
4
|
|
|
5
|
+
from ...runtime.utils.utils import IndexerKeywords
|
|
6
|
+
|
|
5
7
|
logger = getLogger(__name__)
|
|
6
8
|
|
|
7
9
|
|
|
@@ -48,6 +50,11 @@ class VectorStoreAdapter(ABC):
|
|
|
48
50
|
"""Add a new collection name to the metadata"""
|
|
49
51
|
pass
|
|
50
52
|
|
|
53
|
+
@abstractmethod
|
|
54
|
+
def get_index_meta(self, vectorstore_wrapper, collection_suffix: str) -> List[Dict[str, Any]]:
|
|
55
|
+
"""Get all index_meta entries from the vector store."""
|
|
56
|
+
pass
|
|
57
|
+
|
|
51
58
|
|
|
52
59
|
class PGVectorAdapter(VectorStoreAdapter):
|
|
53
60
|
"""Adapter for PGVector database operations."""
|
|
@@ -265,6 +272,29 @@ class PGVectorAdapter(VectorStoreAdapter):
|
|
|
265
272
|
except Exception as e:
|
|
266
273
|
logger.error(f"Failed to update collection for entry ID {entry_id}: {str(e)}")
|
|
267
274
|
|
|
275
|
+
def get_index_meta(self, vectorstore_wrapper, collection_suffix: str) -> List[Dict[str, Any]]:
|
|
276
|
+
from sqlalchemy.orm import Session
|
|
277
|
+
from sqlalchemy import func
|
|
278
|
+
|
|
279
|
+
store = vectorstore_wrapper.vectorstore
|
|
280
|
+
try:
|
|
281
|
+
with Session(store.session_maker.bind) as session:
|
|
282
|
+
meta = session.query(
|
|
283
|
+
store.EmbeddingStore.id,
|
|
284
|
+
store.EmbeddingStore.document,
|
|
285
|
+
store.EmbeddingStore.cmetadata
|
|
286
|
+
).filter(
|
|
287
|
+
store.EmbeddingStore.cmetadata['type'].astext == IndexerKeywords.INDEX_META_TYPE.value,
|
|
288
|
+
func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'collection') == collection_suffix
|
|
289
|
+
).all()
|
|
290
|
+
result = []
|
|
291
|
+
for id, document, cmetadata in meta:
|
|
292
|
+
result.append({"id": id, "content": document, "metadata": cmetadata})
|
|
293
|
+
return result
|
|
294
|
+
except Exception as e:
|
|
295
|
+
logger.error(f"Failed to get index_meta from PGVector: {str(e)}")
|
|
296
|
+
raise e
|
|
297
|
+
|
|
268
298
|
|
|
269
299
|
class ChromaAdapter(VectorStoreAdapter):
|
|
270
300
|
"""Adapter for Chroma database operations."""
|
|
@@ -361,6 +391,9 @@ class ChromaAdapter(VectorStoreAdapter):
|
|
|
361
391
|
# This is a simplified implementation - in practice, you might need more complex logic
|
|
362
392
|
logger.warning("add_to_collection for Chroma is not fully implemented yet")
|
|
363
393
|
|
|
394
|
+
def get_index_meta(self, vectorstore_wrapper, collection_suffix: str) -> List[Dict[str, Any]]:
|
|
395
|
+
logger.warning("get_index_meta for Chroma is not implemented yet")
|
|
396
|
+
|
|
364
397
|
|
|
365
398
|
class VectorStoreAdapterFactory:
|
|
366
399
|
"""Factory for creating vector store adapters."""
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: alita_sdk
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.372
|
|
4
4
|
Summary: SDK for building langchain agents using resources from Alita
|
|
5
5
|
Author-email: Artem Rozumenko <artyom.rozumenko@gmail.com>, Mikalai Biazruchka <mikalai_biazruchka@epam.com>, Roman Mitusov <roman_mitusov@epam.com>, Ivan Krakhmaliuk <lifedj27@gmail.com>, Artem Dubrovskiy <ad13box@gmail.com>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -123,7 +123,7 @@ alita_sdk/runtime/tools/router.py,sha256=p7e0tX6YAWw2M2Nq0A_xqw1E2P-Xz1DaJvhUstf
|
|
|
123
123
|
alita_sdk/runtime/tools/sandbox.py,sha256=WNz-aUMtkGCPg84dDy_0BPkyp-6YjoYB-xjIEFFrtKw,11601
|
|
124
124
|
alita_sdk/runtime/tools/tool.py,sha256=lE1hGi6qOAXG7qxtqxarD_XMQqTghdywf261DZawwno,5631
|
|
125
125
|
alita_sdk/runtime/tools/vectorstore.py,sha256=8vRhi1lGFEs3unvnflEi2p59U2MfV32lStpEizpDms0,34467
|
|
126
|
-
alita_sdk/runtime/tools/vectorstore_base.py,sha256=
|
|
126
|
+
alita_sdk/runtime/tools/vectorstore_base.py,sha256=WF-v3sGQKo9q8D8ULyuBo5dPdFcx79X0DCRjyoOd7DI,27844
|
|
127
127
|
alita_sdk/runtime/utils/AlitaCallback.py,sha256=E4LlSBuCHWiUq6W7IZExERHZY0qcmdjzc_rJlF2iQIw,7356
|
|
128
128
|
alita_sdk/runtime/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
129
129
|
alita_sdk/runtime/utils/constants.py,sha256=Xntx1b_uxUzT4clwqHA_U6K8y5bBqf_4lSQwXdcWrp4,13586
|
|
@@ -135,7 +135,7 @@ alita_sdk/runtime/utils/toolkit_runtime.py,sha256=MU63Fpxj0b5_r1IUUc0Q3-PN9VwL7r
|
|
|
135
135
|
alita_sdk/runtime/utils/toolkit_utils.py,sha256=I9QFqnaqfVgN26LUr6s3XlBlG6y0CoHURnCzG7XcwVs,5311
|
|
136
136
|
alita_sdk/runtime/utils/utils.py,sha256=BVEVLkYiiotcUD0XsHyx-wACpHfALsQg7PLZpObqvK8,1008
|
|
137
137
|
alita_sdk/tools/__init__.py,sha256=jUj1ztC2FbkIUB-YYmiqaz_rqW7Il5kWzDPn1mJmj5w,10545
|
|
138
|
-
alita_sdk/tools/base_indexer_toolkit.py,sha256=
|
|
138
|
+
alita_sdk/tools/base_indexer_toolkit.py,sha256=GpeIYY7kJZCjv0-gGcHNStY0uCtEBRl-I5XiISM0Tdo,26900
|
|
139
139
|
alita_sdk/tools/code_indexer_toolkit.py,sha256=6QvI1by0OFdnKTx5TfNoDJjnMrvnTi9T56xaDxzeleU,7306
|
|
140
140
|
alita_sdk/tools/elitea_base.py,sha256=up3HshASSDfjlHV_HPrs1aD4JIwwX0Ug26WGTzgIYvY,34724
|
|
141
141
|
alita_sdk/tools/non_code_indexer_toolkit.py,sha256=B3QvhpT1F9QidkCcsOi3J_QrTOaNlTxqWFwe90VivQQ,1329
|
|
@@ -331,7 +331,7 @@ alita_sdk/tools/testrail/api_wrapper.py,sha256=tQcGlFJmftvs5ZiO4tsP19fCo4CrJeq_U
|
|
|
331
331
|
alita_sdk/tools/utils/__init__.py,sha256=W9rCCUPtHCP5nGAbWp0n5jaNA84572aiRoqKneBnaS4,3330
|
|
332
332
|
alita_sdk/tools/utils/available_tools_decorator.py,sha256=IbrdfeQkswxUFgvvN7-dyLMZMyXLiwvX7kgi3phciCk,273
|
|
333
333
|
alita_sdk/tools/utils/content_parser.py,sha256=TuKAPUzIZx9F-pzHiVyrCFpI5emrGaOF8DgWHJP2cM4,15235
|
|
334
|
-
alita_sdk/tools/vector_adapters/VectorStoreAdapter.py,sha256=
|
|
334
|
+
alita_sdk/tools/vector_adapters/VectorStoreAdapter.py,sha256=p_9Cu5eausnfiKNsitbVxwu5eimZHRv3R-OMw7lBrts,19173
|
|
335
335
|
alita_sdk/tools/vector_adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
336
336
|
alita_sdk/tools/xray/__init__.py,sha256=eOMWP8VamFbbJgt1xrGpGPqB9ByOTA0Cd3LCaETzGk4,4376
|
|
337
337
|
alita_sdk/tools/xray/api_wrapper.py,sha256=uj5kzUgPdo_Oct9WCNMOpkb6o_3L7J4LZrEGtrwYMmc,30157
|
|
@@ -352,8 +352,8 @@ alita_sdk/tools/zephyr_scale/api_wrapper.py,sha256=kT0TbmMvuKhDUZc0i7KO18O38JM9S
|
|
|
352
352
|
alita_sdk/tools/zephyr_squad/__init__.py,sha256=0ne8XLJEQSLOWfzd2HdnqOYmQlUliKHbBED5kW_Vias,2895
|
|
353
353
|
alita_sdk/tools/zephyr_squad/api_wrapper.py,sha256=kmw_xol8YIYFplBLWTqP_VKPRhL_1ItDD0_vXTe_UuI,14906
|
|
354
354
|
alita_sdk/tools/zephyr_squad/zephyr_squad_cloud_client.py,sha256=R371waHsms4sllHCbijKYs90C-9Yu0sSR3N4SUfQOgU,5066
|
|
355
|
-
alita_sdk-0.3.
|
|
356
|
-
alita_sdk-0.3.
|
|
357
|
-
alita_sdk-0.3.
|
|
358
|
-
alita_sdk-0.3.
|
|
359
|
-
alita_sdk-0.3.
|
|
355
|
+
alita_sdk-0.3.372.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
356
|
+
alita_sdk-0.3.372.dist-info/METADATA,sha256=A3hNTePpqTE8uzQhDG7RbgX5Iv7MoyOYgkucgkqKEpI,19071
|
|
357
|
+
alita_sdk-0.3.372.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
358
|
+
alita_sdk-0.3.372.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
|
|
359
|
+
alita_sdk-0.3.372.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|