alita-sdk 0.3.217__py3-none-any.whl → 0.3.218__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alita_sdk/runtime/tools/vectorstore.py +71 -10
- {alita_sdk-0.3.217.dist-info → alita_sdk-0.3.218.dist-info}/METADATA +1 -1
- {alita_sdk-0.3.217.dist-info → alita_sdk-0.3.218.dist-info}/RECORD +6 -6
- {alita_sdk-0.3.217.dist-info → alita_sdk-0.3.218.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.217.dist-info → alita_sdk-0.3.218.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.217.dist-info → alita_sdk-0.3.218.dist-info}/top_level.txt +0 -0
@@ -196,35 +196,95 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
196
196
|
f"Cleaning collection '{self.dataset}'",
|
197
197
|
tool_name="_clean_collection"
|
198
198
|
)
|
199
|
-
|
200
|
-
|
201
|
-
|
199
|
+
# This logic deletes the entire collection
|
200
|
+
# Works for PGVector and Chroma
|
201
|
+
self.vectoradapter.vectorstore.delete_collection()
|
202
|
+
# This logic deletes all data from the vectorstore collection without removal of collection.
|
203
|
+
# data = self.vectoradapter.vectorstore.get(include=['metadatas'])
|
204
|
+
# if data['ids']:
|
205
|
+
# self.vectoradapter.vectorstore.delete(ids=data['ids'])
|
202
206
|
self._log_data(
|
203
207
|
f"Collection '{self.dataset}' has been cleaned. ",
|
204
208
|
tool_name="_clean_collection"
|
205
209
|
)
|
206
210
|
|
211
|
+
# TODO: refactor to use common method for different vectorstores in a separate vectorstore wrappers
|
207
212
|
def _get_indexed_data(self, store):
|
208
213
|
""" Get all indexed data from vectorstore for non-code content """
|
209
214
|
|
210
|
-
#
|
215
|
+
# Check if this is a PGVector store
|
216
|
+
if hasattr(store, 'session_maker') and hasattr(store, 'EmbeddingStore'):
|
217
|
+
return self._get_pgvector_indexed_data(store)
|
218
|
+
else:
|
219
|
+
# Fall back to original Chroma implementation
|
220
|
+
return self._get_chroma_indexed_data(store)
|
221
|
+
|
222
|
+
def _get_pgvector_indexed_data(self, store):
|
223
|
+
""" Get all indexed data from PGVector for non-code content """
|
224
|
+
from sqlalchemy.orm import Session
|
225
|
+
|
211
226
|
result = {}
|
212
227
|
try:
|
213
|
-
self._log_data("Retrieving already indexed data from vectorstore",
|
228
|
+
self._log_data("Retrieving already indexed data from PGVector vectorstore",
|
229
|
+
tool_name="index_documents")
|
230
|
+
|
231
|
+
with Session(store.session_maker.bind) as session:
|
232
|
+
docs = session.query(
|
233
|
+
store.EmbeddingStore.id,
|
234
|
+
store.EmbeddingStore.document,
|
235
|
+
store.EmbeddingStore.cmetadata
|
236
|
+
).all()
|
237
|
+
|
238
|
+
# Process the retrieved data
|
239
|
+
for doc in docs:
|
240
|
+
db_id = doc.id
|
241
|
+
meta = doc.cmetadata or {}
|
242
|
+
|
243
|
+
# Get document id from metadata
|
244
|
+
doc_id = str(meta.get('id', db_id))
|
245
|
+
dependent_docs = meta.get(IndexerKeywords.DEPENDENT_DOCS.value, [])
|
246
|
+
if dependent_docs:
|
247
|
+
dependent_docs = [d.strip() for d in dependent_docs.split(';') if d.strip()]
|
248
|
+
parent_id = meta.get(IndexerKeywords.PARENT.value, -1)
|
249
|
+
|
250
|
+
chunk_id = meta.get('chunk_id')
|
251
|
+
if doc_id in result and chunk_id:
|
252
|
+
# If document with the same id already saved, add db_id for current one as chunk
|
253
|
+
result[doc_id]['all_chunks'].append(db_id)
|
254
|
+
else:
|
255
|
+
result[doc_id] = {
|
256
|
+
'metadata': meta,
|
257
|
+
'id': db_id,
|
258
|
+
'all_chunks': [db_id],
|
259
|
+
IndexerKeywords.DEPENDENT_DOCS.value: dependent_docs,
|
260
|
+
IndexerKeywords.PARENT.value: parent_id
|
261
|
+
}
|
262
|
+
|
263
|
+
except Exception as e:
|
264
|
+
logger.error(f"Failed to get indexed data from PGVector: {str(e)}. Continuing with empty index.")
|
265
|
+
|
266
|
+
return result
|
267
|
+
|
268
|
+
def _get_chroma_indexed_data(self, store):
|
269
|
+
""" Get all indexed data from Chroma for non-code content """
|
270
|
+
result = {}
|
271
|
+
try:
|
272
|
+
self._log_data("Retrieving already indexed data from Chroma vectorstore",
|
214
273
|
tool_name="index_documents")
|
215
274
|
data = store.get(include=['metadatas'])
|
216
|
-
|
275
|
+
|
276
|
+
# Re-structure data to be more usable
|
217
277
|
for meta, db_id in zip(data['metadatas'], data['ids']):
|
218
|
-
#
|
278
|
+
# Get document id from metadata
|
219
279
|
doc_id = str(meta['id'])
|
220
280
|
dependent_docs = meta.get(IndexerKeywords.DEPENDENT_DOCS.value, [])
|
221
281
|
if dependent_docs:
|
222
282
|
dependent_docs = [d.strip() for d in dependent_docs.split(';') if d.strip()]
|
223
283
|
parent_id = meta.get(IndexerKeywords.PARENT.value, -1)
|
224
|
-
|
284
|
+
|
225
285
|
chunk_id = meta.get('chunk_id')
|
226
286
|
if doc_id in result and chunk_id:
|
227
|
-
#
|
287
|
+
# If document with the same id already saved, add db_id for current one as chunk
|
228
288
|
result[doc_id]['all_chunks'].append(db_id)
|
229
289
|
else:
|
230
290
|
result[doc_id] = {
|
@@ -235,7 +295,8 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
235
295
|
IndexerKeywords.PARENT.value: parent_id
|
236
296
|
}
|
237
297
|
except Exception as e:
|
238
|
-
logger.error(f"Failed to get indexed data from
|
298
|
+
logger.error(f"Failed to get indexed data from Chroma: {str(e)}. Continuing with empty index.")
|
299
|
+
|
239
300
|
return result
|
240
301
|
|
241
302
|
def _get_code_indexed_data(self, store) -> Dict[str, Dict[str, Any]]:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: alita_sdk
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.218
|
4
4
|
Summary: SDK for building langchain agents using resources from Alita
|
5
5
|
Author-email: Artem Rozumenko <artyom.rozumenko@gmail.com>, Mikalai Biazruchka <mikalai_biazruchka@epam.com>, Roman Mitusov <roman_mitusov@epam.com>, Ivan Krakhmaliuk <lifedjik@gmail.com>, Artem Dubrovskiy <ad13box@gmail.com>
|
6
6
|
License-Expression: Apache-2.0
|
@@ -82,7 +82,7 @@ alita_sdk/runtime/tools/pgvector_search.py,sha256=NN2BGAnq4SsDHIhUcFZ8d_dbEOM8Qw
|
|
82
82
|
alita_sdk/runtime/tools/prompt.py,sha256=nJafb_e5aOM1Rr3qGFCR-SKziU9uCsiP2okIMs9PppM,741
|
83
83
|
alita_sdk/runtime/tools/router.py,sha256=wCvZjVkdXK9dMMeEerrgKf5M790RudH68pDortnHSz0,1517
|
84
84
|
alita_sdk/runtime/tools/tool.py,sha256=lE1hGi6qOAXG7qxtqxarD_XMQqTghdywf261DZawwno,5631
|
85
|
-
alita_sdk/runtime/tools/vectorstore.py,sha256=
|
85
|
+
alita_sdk/runtime/tools/vectorstore.py,sha256=w9NbsBFnO-3H9i0U8p5lzJkU-1K30jAlbKDfgFbiIAE,36631
|
86
86
|
alita_sdk/runtime/utils/AlitaCallback.py,sha256=E4LlSBuCHWiUq6W7IZExERHZY0qcmdjzc_rJlF2iQIw,7356
|
87
87
|
alita_sdk/runtime/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
88
88
|
alita_sdk/runtime/utils/constants.py,sha256=Xntx1b_uxUzT4clwqHA_U6K8y5bBqf_4lSQwXdcWrp4,13586
|
@@ -305,8 +305,8 @@ alita_sdk/tools/zephyr_scale/api_wrapper.py,sha256=9CzQqQKv45LqZCmwSe4zzEXvBtStI
|
|
305
305
|
alita_sdk/tools/zephyr_squad/__init__.py,sha256=0AI_j27xVO5Gk5HQMFrqPTd4uvuVTpiZUicBrdfEpKg,2796
|
306
306
|
alita_sdk/tools/zephyr_squad/api_wrapper.py,sha256=kmw_xol8YIYFplBLWTqP_VKPRhL_1ItDD0_vXTe_UuI,14906
|
307
307
|
alita_sdk/tools/zephyr_squad/zephyr_squad_cloud_client.py,sha256=R371waHsms4sllHCbijKYs90C-9Yu0sSR3N4SUfQOgU,5066
|
308
|
-
alita_sdk-0.3.
|
309
|
-
alita_sdk-0.3.
|
310
|
-
alita_sdk-0.3.
|
311
|
-
alita_sdk-0.3.
|
312
|
-
alita_sdk-0.3.
|
308
|
+
alita_sdk-0.3.218.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
309
|
+
alita_sdk-0.3.218.dist-info/METADATA,sha256=9JpdmaYNFhDeebVRZjzWjDTB0PU1eo-JkZn4i-XqhDw,18917
|
310
|
+
alita_sdk-0.3.218.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
311
|
+
alita_sdk-0.3.218.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
|
312
|
+
alita_sdk-0.3.218.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|