alita-sdk 0.3.216__py3-none-any.whl → 0.3.218__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -196,35 +196,95 @@ class VectorStoreWrapper(BaseToolApiWrapper):
196
196
  f"Cleaning collection '{self.dataset}'",
197
197
  tool_name="_clean_collection"
198
198
  )
199
- data = self.vectoradapter.vectorstore.get(include=['metadatas'])
200
- if data['ids']:
201
- self.vectoradapter.vectorstore.delete(ids=data['ids'])
199
+ # This logic deletes the entire collection
200
+ # Works for PGVector and Chroma
201
+ self.vectoradapter.vectorstore.delete_collection()
202
+ # This logic deletes all data from the vectorstore collection without removal of collection.
203
+ # data = self.vectoradapter.vectorstore.get(include=['metadatas'])
204
+ # if data['ids']:
205
+ # self.vectoradapter.vectorstore.delete(ids=data['ids'])
202
206
  self._log_data(
203
207
  f"Collection '{self.dataset}' has been cleaned. ",
204
208
  tool_name="_clean_collection"
205
209
  )
206
210
 
211
+ # TODO: refactor to use common method for different vectorstores in a separate vectorstore wrappers
207
212
  def _get_indexed_data(self, store):
208
213
  """ Get all indexed data from vectorstore for non-code content """
209
214
 
210
- # get already indexed data
215
+ # Check if this is a PGVector store
216
+ if hasattr(store, 'session_maker') and hasattr(store, 'EmbeddingStore'):
217
+ return self._get_pgvector_indexed_data(store)
218
+ else:
219
+ # Fall back to original Chroma implementation
220
+ return self._get_chroma_indexed_data(store)
221
+
222
+ def _get_pgvector_indexed_data(self, store):
223
+ """ Get all indexed data from PGVector for non-code content """
224
+ from sqlalchemy.orm import Session
225
+
211
226
  result = {}
212
227
  try:
213
- self._log_data("Retrieving already indexed data from vectorstore",
228
+ self._log_data("Retrieving already indexed data from PGVector vectorstore",
229
+ tool_name="index_documents")
230
+
231
+ with Session(store.session_maker.bind) as session:
232
+ docs = session.query(
233
+ store.EmbeddingStore.id,
234
+ store.EmbeddingStore.document,
235
+ store.EmbeddingStore.cmetadata
236
+ ).all()
237
+
238
+ # Process the retrieved data
239
+ for doc in docs:
240
+ db_id = doc.id
241
+ meta = doc.cmetadata or {}
242
+
243
+ # Get document id from metadata
244
+ doc_id = str(meta.get('id', db_id))
245
+ dependent_docs = meta.get(IndexerKeywords.DEPENDENT_DOCS.value, [])
246
+ if dependent_docs:
247
+ dependent_docs = [d.strip() for d in dependent_docs.split(';') if d.strip()]
248
+ parent_id = meta.get(IndexerKeywords.PARENT.value, -1)
249
+
250
+ chunk_id = meta.get('chunk_id')
251
+ if doc_id in result and chunk_id:
252
+ # If document with the same id already saved, add db_id for current one as chunk
253
+ result[doc_id]['all_chunks'].append(db_id)
254
+ else:
255
+ result[doc_id] = {
256
+ 'metadata': meta,
257
+ 'id': db_id,
258
+ 'all_chunks': [db_id],
259
+ IndexerKeywords.DEPENDENT_DOCS.value: dependent_docs,
260
+ IndexerKeywords.PARENT.value: parent_id
261
+ }
262
+
263
+ except Exception as e:
264
+ logger.error(f"Failed to get indexed data from PGVector: {str(e)}. Continuing with empty index.")
265
+
266
+ return result
267
+
268
+ def _get_chroma_indexed_data(self, store):
269
+ """ Get all indexed data from Chroma for non-code content """
270
+ result = {}
271
+ try:
272
+ self._log_data("Retrieving already indexed data from Chroma vectorstore",
214
273
  tool_name="index_documents")
215
274
  data = store.get(include=['metadatas'])
216
- # re-structure data to be more usable
275
+
276
+ # Re-structure data to be more usable
217
277
  for meta, db_id in zip(data['metadatas'], data['ids']):
218
- # get document id from metadata
278
+ # Get document id from metadata
219
279
  doc_id = str(meta['id'])
220
280
  dependent_docs = meta.get(IndexerKeywords.DEPENDENT_DOCS.value, [])
221
281
  if dependent_docs:
222
282
  dependent_docs = [d.strip() for d in dependent_docs.split(';') if d.strip()]
223
283
  parent_id = meta.get(IndexerKeywords.PARENT.value, -1)
224
- #
284
+
225
285
  chunk_id = meta.get('chunk_id')
226
286
  if doc_id in result and chunk_id:
227
- # if document with the same id already saved, add db_id fof current one as chunk
287
+ # If document with the same id already saved, add db_id for current one as chunk
228
288
  result[doc_id]['all_chunks'].append(db_id)
229
289
  else:
230
290
  result[doc_id] = {
@@ -235,7 +295,8 @@ class VectorStoreWrapper(BaseToolApiWrapper):
235
295
  IndexerKeywords.PARENT.value: parent_id
236
296
  }
237
297
  except Exception as e:
238
- logger.error(f"Failed to get indexed data from vectorstore: {str(e)}. Continuing with empty index.")
298
+ logger.error(f"Failed to get indexed data from Chroma: {str(e)}. Continuing with empty index.")
299
+
239
300
  return result
240
301
 
241
302
  def _get_code_indexed_data(self, store) -> Dict[str, Dict[str, Any]]:
@@ -327,6 +327,7 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
327
327
  yield processed_doc
328
328
 
329
329
 
330
+ # TODO: init store once and re-use the instance
330
331
  def _init_vector_store(self, collection_suffix: str = "", embeddings: Optional[Any] = None):
331
332
  """ Initializes the vector store wrapper with the provided parameters."""
332
333
  try:
@@ -380,7 +381,8 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
380
381
  """
381
382
  Lists all collections in the vector store
382
383
  """
383
- return ','.join([collection.name for collection in self.vectoradapter.vectorstore._client.list_collections()])
384
+ vector_client = self._init_vector_store().vectoradapter.vectorstore._client
385
+ return ','.join([collection.name for collection in vector_client.list_collections()])
384
386
 
385
387
  def search_index(self,
386
388
  query: str,
@@ -13,7 +13,15 @@ def get_tools(tool):
13
13
  selected_tools=tool['settings'].get('selected_tools', []),
14
14
  base_url=tool['settings']['base_url'],
15
15
  token=tool['settings']['token'],
16
- toolkit_name=tool.get('toolkit_name')
16
+ toolkit_name=tool.get('toolkit_name'),
17
+ llm=tool['settings'].get('llm', None),
18
+
19
+ # indexer settings
20
+ connection_string=tool['settings'].get('connection_string', None),
21
+ collection_name=f"{tool.get('toolkit_name')}_{str(tool['id'])}",
22
+ embedding_model="HuggingFaceEmbeddings",
23
+ embedding_model_params={"model_name": "sentence-transformers/all-MiniLM-L6-v2"},
24
+ vectorstore_type="PGVector"
17
25
  ).get_tools()
18
26
 
19
27
  class ZephyrEnterpriseToolkit(BaseToolkit):
@@ -29,6 +37,10 @@ class ZephyrEnterpriseToolkit(BaseToolkit):
29
37
  name,
30
38
  base_url=(str, Field(description="Zephyr Enterprise base URL", json_schema_extra={'toolkit_name': True, 'max_toolkit_length': ZephyrEnterpriseToolkit.toolkit_max_length })),
31
39
  token=(SecretStr, Field(description="API token", json_schema_extra={'secret': True})),
40
+ # indexer settings
41
+ connection_string=(Optional[SecretStr], Field(description="Connection string for vectorstore",
42
+ default=None,
43
+ json_schema_extra={'secret': True})),
32
44
  selected_tools=(List[Literal[tuple(selected_tools)]], []),
33
45
  __config__=ConfigDict(json_schema_extra={
34
46
  'metadata': {
@@ -13,7 +13,15 @@ def get_tools(tool):
13
13
  return ZephyrEssentialToolkit().get_toolkit(
14
14
  selected_tools=tool['settings'].get('selected_tools', []),
15
15
  token=tool['settings']["token"],
16
- toolkit_name=tool.get('toolkit_name')
16
+ toolkit_name=tool.get('toolkit_name'),
17
+ llm = tool['settings'].get('llm', None),
18
+
19
+ # indexer settings
20
+ connection_string = tool['settings'].get('connection_string', None),
21
+ collection_name = f"{tool.get('toolkit_name')}_{str(tool['id'])}",
22
+ embedding_model = "HuggingFaceEmbeddings",
23
+ embedding_model_params = {"model_name": "sentence-transformers/all-MiniLM-L6-v2"},
24
+ vectorstore_type = "PGVector"
17
25
  ).get_tools()
18
26
 
19
27
  class ZephyrEssentialToolkit(BaseToolkit):
@@ -29,6 +37,10 @@ class ZephyrEssentialToolkit(BaseToolkit):
29
37
  token=(str, Field(description="Bearer api token")),
30
38
  base_url=(Optional[str], Field(description="Zephyr Essential base url", default=None)),
31
39
  selected_tools=(List[Literal[tuple(selected_tools)]], Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
40
+ # indexer settings
41
+ connection_string=(Optional[SecretStr], Field(description="Connection string for vectorstore",
42
+ default=None,
43
+ json_schema_extra={'secret': True})),
32
44
  __config__={'json_schema_extra': {'metadata': {"label": "Zephyr Essential", "icon_url": "zephyr.svg",
33
45
  "categories": ["test management"],
34
46
  "extra_categories": ["test automation", "test case management", "test planning"]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: alita_sdk
3
- Version: 0.3.216
3
+ Version: 0.3.218
4
4
  Summary: SDK for building langchain agents using resources from Alita
5
5
  Author-email: Artem Rozumenko <artyom.rozumenko@gmail.com>, Mikalai Biazruchka <mikalai_biazruchka@epam.com>, Roman Mitusov <roman_mitusov@epam.com>, Ivan Krakhmaliuk <lifedjik@gmail.com>, Artem Dubrovskiy <ad13box@gmail.com>
6
6
  License-Expression: Apache-2.0
@@ -82,7 +82,7 @@ alita_sdk/runtime/tools/pgvector_search.py,sha256=NN2BGAnq4SsDHIhUcFZ8d_dbEOM8Qw
82
82
  alita_sdk/runtime/tools/prompt.py,sha256=nJafb_e5aOM1Rr3qGFCR-SKziU9uCsiP2okIMs9PppM,741
83
83
  alita_sdk/runtime/tools/router.py,sha256=wCvZjVkdXK9dMMeEerrgKf5M790RudH68pDortnHSz0,1517
84
84
  alita_sdk/runtime/tools/tool.py,sha256=lE1hGi6qOAXG7qxtqxarD_XMQqTghdywf261DZawwno,5631
85
- alita_sdk/runtime/tools/vectorstore.py,sha256=R7Xy2HMIcXSoJ3exvPH_BYbzZfTSnRY23Tn46tqKIiU,33961
85
+ alita_sdk/runtime/tools/vectorstore.py,sha256=w9NbsBFnO-3H9i0U8p5lzJkU-1K30jAlbKDfgFbiIAE,36631
86
86
  alita_sdk/runtime/utils/AlitaCallback.py,sha256=E4LlSBuCHWiUq6W7IZExERHZY0qcmdjzc_rJlF2iQIw,7356
87
87
  alita_sdk/runtime/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
88
88
  alita_sdk/runtime/utils/constants.py,sha256=Xntx1b_uxUzT4clwqHA_U6K8y5bBqf_4lSQwXdcWrp4,13586
@@ -94,7 +94,7 @@ alita_sdk/runtime/utils/toolkit_runtime.py,sha256=MU63Fpxj0b5_r1IUUc0Q3-PN9VwL7r
94
94
  alita_sdk/runtime/utils/toolkit_utils.py,sha256=I9QFqnaqfVgN26LUr6s3XlBlG6y0CoHURnCzG7XcwVs,5311
95
95
  alita_sdk/runtime/utils/utils.py,sha256=CpEl3LCeLbhzQySz08lkKPm7Auac6IiLF7WB8wmArMI,589
96
96
  alita_sdk/tools/__init__.py,sha256=1AHqP2xyLjn92xVm70l9XIke6FkfHkLo5OoQVe4BuP8,10421
97
- alita_sdk/tools/elitea_base.py,sha256=4_OWvTlvJwEZaRC1rB5NpD6BcTbKSXsutGrb4BBsR1A,30245
97
+ alita_sdk/tools/elitea_base.py,sha256=iGWoskj7mUCMKz7yubcyrLYEHr1YJQMGwsuTGxJyrv8,30356
98
98
  alita_sdk/tools/ado/__init__.py,sha256=2NMQwt2pjIukSC9nSZ7CLocdGpK7002x7ixKr_wunxk,1313
99
99
  alita_sdk/tools/ado/utils.py,sha256=PTCludvaQmPLakF2EbCGy66Mro4-rjDtavVP-xcB2Wc,1252
100
100
  alita_sdk/tools/ado/repos/__init__.py,sha256=guYY95Gtyb0S4Jj0V1qO0x2jlRoH0H1cKjHXNwmShow,6388
@@ -294,10 +294,10 @@ alita_sdk/tools/zephyr/Zephyr.py,sha256=ODZbg9Aw0H0Rbv-HcDXLI4KHbPiLDHoteDofshw9
294
294
  alita_sdk/tools/zephyr/__init__.py,sha256=8B2Ibz5QTmB5WkV0q8Sq4kuj92FFaFWZLrT877zRRLg,2897
295
295
  alita_sdk/tools/zephyr/api_wrapper.py,sha256=lJCYPG03ej0qgdpLflnS7LFB4HSAfGzIvTjAJt07CQs,6244
296
296
  alita_sdk/tools/zephyr/rest_client.py,sha256=7vSD3oYIX-3KbAFed-mphSQif_VRuXrq5O07ryNQ7Pk,6208
297
- alita_sdk/tools/zephyr_enterprise/__init__.py,sha256=y9KDJS3E3D22xc0l08AUuhmGSjS6195XKrSlrDrJ-Zs,2807
297
+ alita_sdk/tools/zephyr_enterprise/__init__.py,sha256=lWnOuVmva8vWBSlnk-wv40oBowxJDXa7iumsiXACcA0,3511
298
298
  alita_sdk/tools/zephyr_enterprise/api_wrapper.py,sha256=Ir3zHljhbZQJRJJQOBzS_GL5xvxb3-Vq5VF8XIMkxck,9348
299
299
  alita_sdk/tools/zephyr_enterprise/zephyr_enterprise.py,sha256=hV9LIrYfJT6oYp-ZfQR0YHflqBFPsUw2Oc55HwK0H48,6809
300
- alita_sdk/tools/zephyr_essential/__init__.py,sha256=3_v1F31YM1VovDe6rzeLtPbZ6fropmrcPXddbPMkSzc,2540
300
+ alita_sdk/tools/zephyr_essential/__init__.py,sha256=LYLF9imlfeuW8KZDGLeENWGXT71x7WWDv-Ss8rG2v8Q,3256
301
301
  alita_sdk/tools/zephyr_essential/api_wrapper.py,sha256=ksg-2j_w74pt2pdoWuuSU-gF3E6IlNtv4wxPf8sJMWg,36812
302
302
  alita_sdk/tools/zephyr_essential/client.py,sha256=bfNcUKNqj9MFWTludGbbqD4qZlxrBaC2JtWsCfZMqSY,9722
303
303
  alita_sdk/tools/zephyr_scale/__init__.py,sha256=2NTcdrfkx4GSegqyXhsPLsEpc4FlACuDy85b0fk6cAo,4572
@@ -305,8 +305,8 @@ alita_sdk/tools/zephyr_scale/api_wrapper.py,sha256=9CzQqQKv45LqZCmwSe4zzEXvBtStI
305
305
  alita_sdk/tools/zephyr_squad/__init__.py,sha256=0AI_j27xVO5Gk5HQMFrqPTd4uvuVTpiZUicBrdfEpKg,2796
306
306
  alita_sdk/tools/zephyr_squad/api_wrapper.py,sha256=kmw_xol8YIYFplBLWTqP_VKPRhL_1ItDD0_vXTe_UuI,14906
307
307
  alita_sdk/tools/zephyr_squad/zephyr_squad_cloud_client.py,sha256=R371waHsms4sllHCbijKYs90C-9Yu0sSR3N4SUfQOgU,5066
308
- alita_sdk-0.3.216.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
309
- alita_sdk-0.3.216.dist-info/METADATA,sha256=UtT_lnvEsUcDTZTITervx85djwHpVdoLnJmhwuQjZYw,18917
310
- alita_sdk-0.3.216.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
311
- alita_sdk-0.3.216.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
312
- alita_sdk-0.3.216.dist-info/RECORD,,
308
+ alita_sdk-0.3.218.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
309
+ alita_sdk-0.3.218.dist-info/METADATA,sha256=9JpdmaYNFhDeebVRZjzWjDTB0PU1eo-JkZn4i-XqhDw,18917
310
+ alita_sdk-0.3.218.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
311
+ alita_sdk-0.3.218.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
312
+ alita_sdk-0.3.218.dist-info/RECORD,,