alita-sdk 0.3.205__py3-none-any.whl → 0.3.207__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. alita_sdk/runtime/clients/client.py +314 -11
  2. alita_sdk/runtime/langchain/assistant.py +22 -21
  3. alita_sdk/runtime/langchain/interfaces/llm_processor.py +1 -4
  4. alita_sdk/runtime/langchain/langraph_agent.py +6 -1
  5. alita_sdk/runtime/langchain/store_manager.py +4 -4
  6. alita_sdk/runtime/toolkits/application.py +5 -10
  7. alita_sdk/runtime/toolkits/tools.py +11 -21
  8. alita_sdk/runtime/tools/vectorstore.py +25 -11
  9. alita_sdk/runtime/utils/streamlit.py +505 -222
  10. alita_sdk/runtime/utils/toolkit_runtime.py +147 -0
  11. alita_sdk/runtime/utils/toolkit_utils.py +157 -0
  12. alita_sdk/runtime/utils/utils.py +5 -0
  13. alita_sdk/tools/__init__.py +2 -0
  14. alita_sdk/tools/ado/repos/repos_wrapper.py +20 -13
  15. alita_sdk/tools/bitbucket/api_wrapper.py +5 -5
  16. alita_sdk/tools/bitbucket/cloud_api_wrapper.py +54 -29
  17. alita_sdk/tools/elitea_base.py +9 -4
  18. alita_sdk/tools/gitlab/__init__.py +22 -10
  19. alita_sdk/tools/gitlab/api_wrapper.py +278 -253
  20. alita_sdk/tools/gitlab/tools.py +354 -376
  21. alita_sdk/tools/llm/llm_utils.py +0 -6
  22. alita_sdk/tools/memory/__init__.py +54 -10
  23. alita_sdk/tools/openapi/__init__.py +14 -3
  24. alita_sdk/tools/sharepoint/__init__.py +2 -1
  25. alita_sdk/tools/sharepoint/api_wrapper.py +11 -3
  26. alita_sdk/tools/testrail/api_wrapper.py +39 -16
  27. alita_sdk/tools/utils/content_parser.py +77 -13
  28. {alita_sdk-0.3.205.dist-info → alita_sdk-0.3.207.dist-info}/METADATA +1 -1
  29. {alita_sdk-0.3.205.dist-info → alita_sdk-0.3.207.dist-info}/RECORD +32 -40
  30. alita_sdk/community/analysis/__init__.py +0 -0
  31. alita_sdk/community/analysis/ado_analyse/__init__.py +0 -103
  32. alita_sdk/community/analysis/ado_analyse/api_wrapper.py +0 -261
  33. alita_sdk/community/analysis/github_analyse/__init__.py +0 -98
  34. alita_sdk/community/analysis/github_analyse/api_wrapper.py +0 -166
  35. alita_sdk/community/analysis/gitlab_analyse/__init__.py +0 -110
  36. alita_sdk/community/analysis/gitlab_analyse/api_wrapper.py +0 -172
  37. alita_sdk/community/analysis/jira_analyse/__init__.py +0 -141
  38. alita_sdk/community/analysis/jira_analyse/api_wrapper.py +0 -252
  39. alita_sdk/runtime/llms/alita.py +0 -259
  40. {alita_sdk-0.3.205.dist-info → alita_sdk-0.3.207.dist-info}/WHEEL +0 -0
  41. {alita_sdk-0.3.205.dist-info → alita_sdk-0.3.207.dist-info}/licenses/LICENSE +0 -0
  42. {alita_sdk-0.3.205.dist-info → alita_sdk-0.3.207.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,9 @@
1
1
  import json
2
2
  import math
3
- from typing import Any, Optional, List, Dict, Callable
3
+ import types
4
+ from typing import Any, Optional, List, Dict, Callable, Generator
5
+
6
+ from langchain_core.documents import Document
4
7
  from pydantic import BaseModel, model_validator, Field
5
8
  from ..langchain.tools.vector import VectorAdapter
6
9
  from langchain_core.messages import HumanMessage
@@ -8,6 +11,7 @@ from alita_sdk.tools.elitea_base import BaseToolApiWrapper
8
11
  from logging import getLogger
9
12
 
10
13
  from ..utils.logging import dispatch_custom_event
14
+ from ..utils.utils import IndexerKeywords
11
15
 
12
16
  logger = getLogger(__name__)
13
17
 
@@ -197,25 +201,29 @@ class VectorStoreWrapper(BaseToolApiWrapper):
197
201
  for doc_str, meta, db_id in zip(data['documents'], data['metadatas'], data['ids']):
198
202
  doc = json.loads(doc_str)
199
203
  doc_id = str(meta['id'])
204
+ dependent_docs = meta.get(IndexerKeywords.DEPENDENT_DOCS.value, [])
205
+ parent_id = meta.get(IndexerKeywords.PARENT.value, -1)
200
206
  result[doc_id] = {
201
207
  'metadata': meta,
202
208
  'document': doc,
203
- 'id': db_id
209
+ 'id': db_id,
210
+ IndexerKeywords.DEPENDENT_DOCS.value: dependent_docs,
211
+ IndexerKeywords.PARENT.value: parent_id
204
212
  }
205
213
  except Exception as e:
206
214
  logger.error(f"Failed to get indexed data from vectorstore: {str(e)}. Continuing with empty index.")
207
215
  return result
208
216
 
209
- def _reduce_duplicates(self, documents, store) -> List[Any]:
217
+ def _reduce_duplicates(self, documents: Generator[Document, None, None], store) -> List[Any]:
210
218
  """Remove documents already indexed in the vectorstore based on metadata 'id' and 'updated_on' fields."""
211
219
 
212
220
  self._log_data("Verification of documents to index started", tool_name="index_documents")
213
221
 
214
- data = self._get_indexed_data(store)
215
- indexed_ids = set(data.keys())
222
+ indexed_data = self._get_indexed_data(store)
223
+ indexed_ids = set(indexed_data.keys())
216
224
  if not indexed_ids:
217
225
  self._log_data("Vectorstore is empty, indexing all incoming documents", tool_name="index_documents")
218
- return documents
226
+ return list(documents)
219
227
 
220
228
  final_docs = []
221
229
  docs_to_remove = []
@@ -226,13 +234,17 @@ class VectorStoreWrapper(BaseToolApiWrapper):
226
234
  if doc_id in indexed_ids:
227
235
  # document has been indexed already, then verify `updated_on`
228
236
  to_index_updated_on = document.metadata.get('updated_on')
229
- indexed_meta = data[doc_id]['metadata']
237
+ indexed_meta = indexed_data[doc_id]['metadata']
230
238
  indexed_updated_on = indexed_meta.get('updated_on')
231
239
  if to_index_updated_on and indexed_updated_on and to_index_updated_on == indexed_updated_on:
232
240
  # same updated_on, skip indexing
233
241
  continue
234
242
  # if updated_on is missing or different, we will re-index the document and remove old one
235
- docs_to_remove.append(data[doc_id]['id'])
243
+ # parent doc removal
244
+ docs_to_remove.append(indexed_data[doc_id]['id'])
245
+ # mark dependent docs for removal
246
+ for dependent_doc_id in indexed_data[doc_id][IndexerKeywords.DEPENDENT_DOCS.value]:
247
+ docs_to_remove.append(indexed_data[dependent_doc_id]['id'])
236
248
  else:
237
249
  final_docs.append(document)
238
250
 
@@ -245,7 +257,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
245
257
 
246
258
  return final_docs
247
259
 
248
- def index_documents(self, documents, progress_step: int = 20, clean_index: bool = True):
260
+ def index_documents(self, documents: Generator[Document, None, None], progress_step: int = 20, clean_index: bool = True):
249
261
  """ Index documents in the vectorstore.
250
262
 
251
263
  Args:
@@ -269,6 +281,8 @@ class VectorStoreWrapper(BaseToolApiWrapper):
269
281
  tool_name="index_documents")
270
282
  except Exception as e:
271
283
  logger.warning(f"Failed to clean index: {str(e)}. Continuing with re-indexing.")
284
+ if isinstance(documents, types.GeneratorType):
285
+ documents = list(documents)
272
286
  else:
273
287
  # remove duplicates based on metadata 'id' and 'updated_on' fields
274
288
  documents = self._reduce_duplicates(documents, self.vectoradapter.vectorstore)
@@ -281,7 +295,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
281
295
  # if func is provided, apply it to documents
282
296
  # used for processing of documents before indexing,
283
297
  # e.g. to avoid time-consuming operations for documents that are already indexed
284
- self.process_document_func(documents) if self.process_document_func else None
298
+ dependent_docs_generator = self.process_document_func(documents) if self.process_document_func else []
285
299
 
286
300
  # notify user about missed required metadata fields: id, updated_on
287
301
  # it is not required to have them, but it is recommended to have them for proper re-indexing and duplicate detection
@@ -292,7 +306,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
292
306
  logger.debug(f"Indexing documents: {documents}")
293
307
  logger.debug(self.vectoradapter)
294
308
 
295
- documents = list(documents)
309
+ documents = documents + list(dependent_docs_generator)
296
310
  total_docs = len(documents)
297
311
  documents_count = 0
298
312
  _documents = []