alita-sdk 0.3.217__py3-none-any.whl → 0.3.219__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alita_sdk/runtime/tools/vectorstore.py +121 -10
- alita_sdk/tools/elitea_base.py +19 -1
- {alita_sdk-0.3.217.dist-info → alita_sdk-0.3.219.dist-info}/METADATA +1 -1
- {alita_sdk-0.3.217.dist-info → alita_sdk-0.3.219.dist-info}/RECORD +7 -7
- {alita_sdk-0.3.217.dist-info → alita_sdk-0.3.219.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.217.dist-info → alita_sdk-0.3.219.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.217.dist-info → alita_sdk-0.3.219.dist-info}/top_level.txt +0 -0
@@ -188,6 +188,59 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
188
188
|
except Exception as e:
|
189
189
|
logger.error(f"Failed to initialize PGVectorSearch: {str(e)}")
|
190
190
|
|
191
|
+
def _remove_collection(self):
|
192
|
+
"""
|
193
|
+
Remove the vectorstore collection entirely.
|
194
|
+
"""
|
195
|
+
self._log_data(
|
196
|
+
f"Remove collection '{self.dataset}'",
|
197
|
+
tool_name="_remove_collection"
|
198
|
+
)
|
199
|
+
from sqlalchemy import text
|
200
|
+
from sqlalchemy.orm import Session
|
201
|
+
|
202
|
+
schema_name = self.vectorstore.collection_name
|
203
|
+
with Session(self.vectorstore.session_maker.bind) as session:
|
204
|
+
drop_schema_query = text(f"DROP SCHEMA IF EXISTS {schema_name} CASCADE;")
|
205
|
+
session.execute(drop_schema_query)
|
206
|
+
session.commit()
|
207
|
+
logger.info(f"Schema '{schema_name}' has been dropped.")
|
208
|
+
self._log_data(
|
209
|
+
f"Collection '{self.dataset}' has been removed. ",
|
210
|
+
tool_name="_remove_collection"
|
211
|
+
)
|
212
|
+
|
213
|
+
def _get_indexed_ids(self, store):
|
214
|
+
"""Get all indexed document IDs from vectorstore"""
|
215
|
+
|
216
|
+
# Check if this is a PGVector store
|
217
|
+
if hasattr(store, 'session_maker') and hasattr(store, 'EmbeddingStore'):
|
218
|
+
return self._get_pgvector_indexed_ids(store)
|
219
|
+
else:
|
220
|
+
# Fall back to Chroma implementation
|
221
|
+
return self._get_chroma_indexed_ids(store)
|
222
|
+
|
223
|
+
def _get_pgvector_indexed_ids(self, store):
|
224
|
+
"""Get all indexed document IDs from PGVector"""
|
225
|
+
from sqlalchemy.orm import Session
|
226
|
+
|
227
|
+
try:
|
228
|
+
with Session(store.session_maker.bind) as session:
|
229
|
+
ids = session.query(store.EmbeddingStore.id).all()
|
230
|
+
return [str(id_tuple[0]) for id_tuple in ids]
|
231
|
+
except Exception as e:
|
232
|
+
logger.error(f"Failed to get indexed IDs from PGVector: {str(e)}")
|
233
|
+
return []
|
234
|
+
|
235
|
+
def _get_chroma_indexed_ids(self, store):
|
236
|
+
"""Get all indexed document IDs from Chroma"""
|
237
|
+
try:
|
238
|
+
data = store.get(include=[]) # Only get IDs, no metadata
|
239
|
+
return data.get('ids', [])
|
240
|
+
except Exception as e:
|
241
|
+
logger.error(f"Failed to get indexed IDs from Chroma: {str(e)}")
|
242
|
+
return []
|
243
|
+
|
191
244
|
def _clean_collection(self):
|
192
245
|
"""
|
193
246
|
Clean the vectorstore collection by deleting all indexed data.
|
@@ -196,35 +249,92 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
196
249
|
f"Cleaning collection '{self.dataset}'",
|
197
250
|
tool_name="_clean_collection"
|
198
251
|
)
|
199
|
-
data
|
200
|
-
|
201
|
-
|
252
|
+
# This logic deletes all data from the vectorstore collection without removal of collection.
|
253
|
+
# Collection itself remains available for future indexing.
|
254
|
+
self.vectoradapter.vectorstore.delete(ids=self._get_indexed_ids(self.vectoradapter.vectorstore))
|
255
|
+
|
202
256
|
self._log_data(
|
203
257
|
f"Collection '{self.dataset}' has been cleaned. ",
|
204
258
|
tool_name="_clean_collection"
|
205
259
|
)
|
206
260
|
|
261
|
+
# TODO: refactor to use common method for different vectorstores in a separate vectorstore wrappers
|
207
262
|
def _get_indexed_data(self, store):
|
208
263
|
""" Get all indexed data from vectorstore for non-code content """
|
209
264
|
|
210
|
-
#
|
265
|
+
# Check if this is a PGVector store
|
266
|
+
if hasattr(store, 'session_maker') and hasattr(store, 'EmbeddingStore'):
|
267
|
+
return self._get_pgvector_indexed_data(store)
|
268
|
+
else:
|
269
|
+
# Fall back to original Chroma implementation
|
270
|
+
return self._get_chroma_indexed_data(store)
|
271
|
+
|
272
|
+
def _get_pgvector_indexed_data(self, store):
|
273
|
+
""" Get all indexed data from PGVector for non-code content """
|
274
|
+
from sqlalchemy.orm import Session
|
275
|
+
|
211
276
|
result = {}
|
212
277
|
try:
|
213
|
-
self._log_data("Retrieving already indexed data from vectorstore",
|
278
|
+
self._log_data("Retrieving already indexed data from PGVector vectorstore",
|
279
|
+
tool_name="index_documents")
|
280
|
+
|
281
|
+
with Session(store.session_maker.bind) as session:
|
282
|
+
docs = session.query(
|
283
|
+
store.EmbeddingStore.id,
|
284
|
+
store.EmbeddingStore.document,
|
285
|
+
store.EmbeddingStore.cmetadata
|
286
|
+
).all()
|
287
|
+
|
288
|
+
# Process the retrieved data
|
289
|
+
for doc in docs:
|
290
|
+
db_id = doc.id
|
291
|
+
meta = doc.cmetadata or {}
|
292
|
+
|
293
|
+
# Get document id from metadata
|
294
|
+
doc_id = str(meta.get('id', db_id))
|
295
|
+
dependent_docs = meta.get(IndexerKeywords.DEPENDENT_DOCS.value, [])
|
296
|
+
if dependent_docs:
|
297
|
+
dependent_docs = [d.strip() for d in dependent_docs.split(';') if d.strip()]
|
298
|
+
parent_id = meta.get(IndexerKeywords.PARENT.value, -1)
|
299
|
+
|
300
|
+
chunk_id = meta.get('chunk_id')
|
301
|
+
if doc_id in result and chunk_id:
|
302
|
+
# If document with the same id already saved, add db_id for current one as chunk
|
303
|
+
result[doc_id]['all_chunks'].append(db_id)
|
304
|
+
else:
|
305
|
+
result[doc_id] = {
|
306
|
+
'metadata': meta,
|
307
|
+
'id': db_id,
|
308
|
+
'all_chunks': [db_id],
|
309
|
+
IndexerKeywords.DEPENDENT_DOCS.value: dependent_docs,
|
310
|
+
IndexerKeywords.PARENT.value: parent_id
|
311
|
+
}
|
312
|
+
|
313
|
+
except Exception as e:
|
314
|
+
logger.error(f"Failed to get indexed data from PGVector: {str(e)}. Continuing with empty index.")
|
315
|
+
|
316
|
+
return result
|
317
|
+
|
318
|
+
def _get_chroma_indexed_data(self, store):
|
319
|
+
""" Get all indexed data from Chroma for non-code content """
|
320
|
+
result = {}
|
321
|
+
try:
|
322
|
+
self._log_data("Retrieving already indexed data from Chroma vectorstore",
|
214
323
|
tool_name="index_documents")
|
215
324
|
data = store.get(include=['metadatas'])
|
216
|
-
|
325
|
+
|
326
|
+
# Re-structure data to be more usable
|
217
327
|
for meta, db_id in zip(data['metadatas'], data['ids']):
|
218
|
-
#
|
328
|
+
# Get document id from metadata
|
219
329
|
doc_id = str(meta['id'])
|
220
330
|
dependent_docs = meta.get(IndexerKeywords.DEPENDENT_DOCS.value, [])
|
221
331
|
if dependent_docs:
|
222
332
|
dependent_docs = [d.strip() for d in dependent_docs.split(';') if d.strip()]
|
223
333
|
parent_id = meta.get(IndexerKeywords.PARENT.value, -1)
|
224
|
-
|
334
|
+
|
225
335
|
chunk_id = meta.get('chunk_id')
|
226
336
|
if doc_id in result and chunk_id:
|
227
|
-
#
|
337
|
+
# If document with the same id already saved, add db_id for current one as chunk
|
228
338
|
result[doc_id]['all_chunks'].append(db_id)
|
229
339
|
else:
|
230
340
|
result[doc_id] = {
|
@@ -235,7 +345,8 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
235
345
|
IndexerKeywords.PARENT.value: parent_id
|
236
346
|
}
|
237
347
|
except Exception as e:
|
238
|
-
logger.error(f"Failed to get indexed data from
|
348
|
+
logger.error(f"Failed to get indexed data from Chroma: {str(e)}. Continuing with empty index.")
|
349
|
+
|
239
350
|
return result
|
240
351
|
|
241
352
|
def _get_code_indexed_data(self, store) -> Dict[str, Dict[str, Any]]:
|
alita_sdk/tools/elitea_base.py
CHANGED
@@ -375,12 +375,30 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
|
|
375
375
|
Cleans the indexed data in the collection
|
376
376
|
"""
|
377
377
|
|
378
|
-
self._init_vector_store(collection_suffix).
|
378
|
+
self._init_vector_store(collection_suffix)._remove_collection()
|
379
379
|
|
380
380
|
def list_collections(self):
|
381
381
|
"""
|
382
382
|
Lists all collections in the vector store
|
383
383
|
"""
|
384
|
+
if self.vectorstore_type == 'PGVector'.lower():
|
385
|
+
from sqlalchemy import text
|
386
|
+
from sqlalchemy.orm import Session
|
387
|
+
|
388
|
+
# schema_name = self.vectorstore.collection_name
|
389
|
+
with Session(self._init_vector_store().vectorstore.session_maker.bind) as session:
|
390
|
+
get_collections = text("""
|
391
|
+
SELECT table_schema
|
392
|
+
FROM information_schema.columns
|
393
|
+
WHERE udt_name = 'vector';
|
394
|
+
""")
|
395
|
+
|
396
|
+
# Execute the raw SQL query
|
397
|
+
result = session.execute(get_collections)
|
398
|
+
|
399
|
+
# Fetch all rows from the result
|
400
|
+
docs = result.fetchall()
|
401
|
+
return str(docs)
|
384
402
|
vector_client = self._init_vector_store().vectoradapter.vectorstore._client
|
385
403
|
return ','.join([collection.name for collection in vector_client.list_collections()])
|
386
404
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: alita_sdk
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.219
|
4
4
|
Summary: SDK for building langchain agents using resources from Alita
|
5
5
|
Author-email: Artem Rozumenko <artyom.rozumenko@gmail.com>, Mikalai Biazruchka <mikalai_biazruchka@epam.com>, Roman Mitusov <roman_mitusov@epam.com>, Ivan Krakhmaliuk <lifedjik@gmail.com>, Artem Dubrovskiy <ad13box@gmail.com>
|
6
6
|
License-Expression: Apache-2.0
|
@@ -82,7 +82,7 @@ alita_sdk/runtime/tools/pgvector_search.py,sha256=NN2BGAnq4SsDHIhUcFZ8d_dbEOM8Qw
|
|
82
82
|
alita_sdk/runtime/tools/prompt.py,sha256=nJafb_e5aOM1Rr3qGFCR-SKziU9uCsiP2okIMs9PppM,741
|
83
83
|
alita_sdk/runtime/tools/router.py,sha256=wCvZjVkdXK9dMMeEerrgKf5M790RudH68pDortnHSz0,1517
|
84
84
|
alita_sdk/runtime/tools/tool.py,sha256=lE1hGi6qOAXG7qxtqxarD_XMQqTghdywf261DZawwno,5631
|
85
|
-
alita_sdk/runtime/tools/vectorstore.py,sha256=
|
85
|
+
alita_sdk/runtime/tools/vectorstore.py,sha256=lpbpS2yukyT9xRkT2tNQl9YqnlO5F0rNCyj0nU7OJDE,38537
|
86
86
|
alita_sdk/runtime/utils/AlitaCallback.py,sha256=E4LlSBuCHWiUq6W7IZExERHZY0qcmdjzc_rJlF2iQIw,7356
|
87
87
|
alita_sdk/runtime/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
88
88
|
alita_sdk/runtime/utils/constants.py,sha256=Xntx1b_uxUzT4clwqHA_U6K8y5bBqf_4lSQwXdcWrp4,13586
|
@@ -94,7 +94,7 @@ alita_sdk/runtime/utils/toolkit_runtime.py,sha256=MU63Fpxj0b5_r1IUUc0Q3-PN9VwL7r
|
|
94
94
|
alita_sdk/runtime/utils/toolkit_utils.py,sha256=I9QFqnaqfVgN26LUr6s3XlBlG6y0CoHURnCzG7XcwVs,5311
|
95
95
|
alita_sdk/runtime/utils/utils.py,sha256=CpEl3LCeLbhzQySz08lkKPm7Auac6IiLF7WB8wmArMI,589
|
96
96
|
alita_sdk/tools/__init__.py,sha256=1AHqP2xyLjn92xVm70l9XIke6FkfHkLo5OoQVe4BuP8,10421
|
97
|
-
alita_sdk/tools/elitea_base.py,sha256=
|
97
|
+
alita_sdk/tools/elitea_base.py,sha256=7mi-Bg3DHkaCCfldovroy8zqp5IIROVfWGHG5k-Zl7o,31083
|
98
98
|
alita_sdk/tools/ado/__init__.py,sha256=2NMQwt2pjIukSC9nSZ7CLocdGpK7002x7ixKr_wunxk,1313
|
99
99
|
alita_sdk/tools/ado/utils.py,sha256=PTCludvaQmPLakF2EbCGy66Mro4-rjDtavVP-xcB2Wc,1252
|
100
100
|
alita_sdk/tools/ado/repos/__init__.py,sha256=guYY95Gtyb0S4Jj0V1qO0x2jlRoH0H1cKjHXNwmShow,6388
|
@@ -305,8 +305,8 @@ alita_sdk/tools/zephyr_scale/api_wrapper.py,sha256=9CzQqQKv45LqZCmwSe4zzEXvBtStI
|
|
305
305
|
alita_sdk/tools/zephyr_squad/__init__.py,sha256=0AI_j27xVO5Gk5HQMFrqPTd4uvuVTpiZUicBrdfEpKg,2796
|
306
306
|
alita_sdk/tools/zephyr_squad/api_wrapper.py,sha256=kmw_xol8YIYFplBLWTqP_VKPRhL_1ItDD0_vXTe_UuI,14906
|
307
307
|
alita_sdk/tools/zephyr_squad/zephyr_squad_cloud_client.py,sha256=R371waHsms4sllHCbijKYs90C-9Yu0sSR3N4SUfQOgU,5066
|
308
|
-
alita_sdk-0.3.
|
309
|
-
alita_sdk-0.3.
|
310
|
-
alita_sdk-0.3.
|
311
|
-
alita_sdk-0.3.
|
312
|
-
alita_sdk-0.3.
|
308
|
+
alita_sdk-0.3.219.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
309
|
+
alita_sdk-0.3.219.dist-info/METADATA,sha256=8OiIgT_-Gr1auGCJvupch3DVJFcQ3bGTgU59Wj43Idk,18917
|
310
|
+
alita_sdk-0.3.219.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
311
|
+
alita_sdk-0.3.219.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
|
312
|
+
alita_sdk-0.3.219.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|