alita-sdk 0.3.217__py3-none-any.whl → 0.3.218__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -196,35 +196,95 @@ class VectorStoreWrapper(BaseToolApiWrapper):
196
196
  f"Cleaning collection '{self.dataset}'",
197
197
  tool_name="_clean_collection"
198
198
  )
199
- data = self.vectoradapter.vectorstore.get(include=['metadatas'])
200
- if data['ids']:
201
- self.vectoradapter.vectorstore.delete(ids=data['ids'])
199
+ # This logic deletes the entire collection
200
+ # Works for PGVector and Chroma
201
+ self.vectoradapter.vectorstore.delete_collection()
202
+ # This logic deletes all data from the vectorstore collection without removal of collection.
203
+ # data = self.vectoradapter.vectorstore.get(include=['metadatas'])
204
+ # if data['ids']:
205
+ # self.vectoradapter.vectorstore.delete(ids=data['ids'])
202
206
  self._log_data(
203
207
  f"Collection '{self.dataset}' has been cleaned. ",
204
208
  tool_name="_clean_collection"
205
209
  )
206
210
 
211
+ # TODO: refactor to use common method for different vectorstores in a separate vectorstore wrappers
207
212
  def _get_indexed_data(self, store):
208
213
  """ Get all indexed data from vectorstore for non-code content """
209
214
 
210
- # get already indexed data
215
+ # Check if this is a PGVector store
216
+ if hasattr(store, 'session_maker') and hasattr(store, 'EmbeddingStore'):
217
+ return self._get_pgvector_indexed_data(store)
218
+ else:
219
+ # Fall back to original Chroma implementation
220
+ return self._get_chroma_indexed_data(store)
221
+
222
+ def _get_pgvector_indexed_data(self, store):
223
+ """ Get all indexed data from PGVector for non-code content """
224
+ from sqlalchemy.orm import Session
225
+
211
226
  result = {}
212
227
  try:
213
- self._log_data("Retrieving already indexed data from vectorstore",
228
+ self._log_data("Retrieving already indexed data from PGVector vectorstore",
229
+ tool_name="index_documents")
230
+
231
+ with Session(store.session_maker.bind) as session:
232
+ docs = session.query(
233
+ store.EmbeddingStore.id,
234
+ store.EmbeddingStore.document,
235
+ store.EmbeddingStore.cmetadata
236
+ ).all()
237
+
238
+ # Process the retrieved data
239
+ for doc in docs:
240
+ db_id = doc.id
241
+ meta = doc.cmetadata or {}
242
+
243
+ # Get document id from metadata
244
+ doc_id = str(meta.get('id', db_id))
245
+ dependent_docs = meta.get(IndexerKeywords.DEPENDENT_DOCS.value, [])
246
+ if dependent_docs:
247
+ dependent_docs = [d.strip() for d in dependent_docs.split(';') if d.strip()]
248
+ parent_id = meta.get(IndexerKeywords.PARENT.value, -1)
249
+
250
+ chunk_id = meta.get('chunk_id')
251
+ if doc_id in result and chunk_id:
252
+ # If document with the same id already saved, add db_id for current one as chunk
253
+ result[doc_id]['all_chunks'].append(db_id)
254
+ else:
255
+ result[doc_id] = {
256
+ 'metadata': meta,
257
+ 'id': db_id,
258
+ 'all_chunks': [db_id],
259
+ IndexerKeywords.DEPENDENT_DOCS.value: dependent_docs,
260
+ IndexerKeywords.PARENT.value: parent_id
261
+ }
262
+
263
+ except Exception as e:
264
+ logger.error(f"Failed to get indexed data from PGVector: {str(e)}. Continuing with empty index.")
265
+
266
+ return result
267
+
268
+ def _get_chroma_indexed_data(self, store):
269
+ """ Get all indexed data from Chroma for non-code content """
270
+ result = {}
271
+ try:
272
+ self._log_data("Retrieving already indexed data from Chroma vectorstore",
214
273
  tool_name="index_documents")
215
274
  data = store.get(include=['metadatas'])
216
- # re-structure data to be more usable
275
+
276
+ # Re-structure data to be more usable
217
277
  for meta, db_id in zip(data['metadatas'], data['ids']):
218
- # get document id from metadata
278
+ # Get document id from metadata
219
279
  doc_id = str(meta['id'])
220
280
  dependent_docs = meta.get(IndexerKeywords.DEPENDENT_DOCS.value, [])
221
281
  if dependent_docs:
222
282
  dependent_docs = [d.strip() for d in dependent_docs.split(';') if d.strip()]
223
283
  parent_id = meta.get(IndexerKeywords.PARENT.value, -1)
224
- #
284
+
225
285
  chunk_id = meta.get('chunk_id')
226
286
  if doc_id in result and chunk_id:
227
- # if document with the same id already saved, add db_id fof current one as chunk
287
+ # If document with the same id already saved, add db_id for current one as chunk
228
288
  result[doc_id]['all_chunks'].append(db_id)
229
289
  else:
230
290
  result[doc_id] = {
@@ -235,7 +295,8 @@ class VectorStoreWrapper(BaseToolApiWrapper):
235
295
  IndexerKeywords.PARENT.value: parent_id
236
296
  }
237
297
  except Exception as e:
238
- logger.error(f"Failed to get indexed data from vectorstore: {str(e)}. Continuing with empty index.")
298
+ logger.error(f"Failed to get indexed data from Chroma: {str(e)}. Continuing with empty index.")
299
+
239
300
  return result
240
301
 
241
302
  def _get_code_indexed_data(self, store) -> Dict[str, Dict[str, Any]]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: alita_sdk
3
- Version: 0.3.217
3
+ Version: 0.3.218
4
4
  Summary: SDK for building langchain agents using resources from Alita
5
5
  Author-email: Artem Rozumenko <artyom.rozumenko@gmail.com>, Mikalai Biazruchka <mikalai_biazruchka@epam.com>, Roman Mitusov <roman_mitusov@epam.com>, Ivan Krakhmaliuk <lifedjik@gmail.com>, Artem Dubrovskiy <ad13box@gmail.com>
6
6
  License-Expression: Apache-2.0
@@ -82,7 +82,7 @@ alita_sdk/runtime/tools/pgvector_search.py,sha256=NN2BGAnq4SsDHIhUcFZ8d_dbEOM8Qw
82
82
  alita_sdk/runtime/tools/prompt.py,sha256=nJafb_e5aOM1Rr3qGFCR-SKziU9uCsiP2okIMs9PppM,741
83
83
  alita_sdk/runtime/tools/router.py,sha256=wCvZjVkdXK9dMMeEerrgKf5M790RudH68pDortnHSz0,1517
84
84
  alita_sdk/runtime/tools/tool.py,sha256=lE1hGi6qOAXG7qxtqxarD_XMQqTghdywf261DZawwno,5631
85
- alita_sdk/runtime/tools/vectorstore.py,sha256=R7Xy2HMIcXSoJ3exvPH_BYbzZfTSnRY23Tn46tqKIiU,33961
85
+ alita_sdk/runtime/tools/vectorstore.py,sha256=w9NbsBFnO-3H9i0U8p5lzJkU-1K30jAlbKDfgFbiIAE,36631
86
86
  alita_sdk/runtime/utils/AlitaCallback.py,sha256=E4LlSBuCHWiUq6W7IZExERHZY0qcmdjzc_rJlF2iQIw,7356
87
87
  alita_sdk/runtime/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
88
88
  alita_sdk/runtime/utils/constants.py,sha256=Xntx1b_uxUzT4clwqHA_U6K8y5bBqf_4lSQwXdcWrp4,13586
@@ -305,8 +305,8 @@ alita_sdk/tools/zephyr_scale/api_wrapper.py,sha256=9CzQqQKv45LqZCmwSe4zzEXvBtStI
305
305
  alita_sdk/tools/zephyr_squad/__init__.py,sha256=0AI_j27xVO5Gk5HQMFrqPTd4uvuVTpiZUicBrdfEpKg,2796
306
306
  alita_sdk/tools/zephyr_squad/api_wrapper.py,sha256=kmw_xol8YIYFplBLWTqP_VKPRhL_1ItDD0_vXTe_UuI,14906
307
307
  alita_sdk/tools/zephyr_squad/zephyr_squad_cloud_client.py,sha256=R371waHsms4sllHCbijKYs90C-9Yu0sSR3N4SUfQOgU,5066
308
- alita_sdk-0.3.217.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
309
- alita_sdk-0.3.217.dist-info/METADATA,sha256=Bdw9AsoShDiMxiNJlCg-PegsjzUFDeryEMKuwkH66lQ,18917
310
- alita_sdk-0.3.217.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
311
- alita_sdk-0.3.217.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
312
- alita_sdk-0.3.217.dist-info/RECORD,,
308
+ alita_sdk-0.3.218.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
309
+ alita_sdk-0.3.218.dist-info/METADATA,sha256=9JpdmaYNFhDeebVRZjzWjDTB0PU1eo-JkZn4i-XqhDw,18917
310
+ alita_sdk-0.3.218.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
311
+ alita_sdk-0.3.218.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
312
+ alita_sdk-0.3.218.dist-info/RECORD,,