alita-sdk 0.3.205__py3-none-any.whl → 0.3.207__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alita_sdk/runtime/clients/client.py +314 -11
- alita_sdk/runtime/langchain/assistant.py +22 -21
- alita_sdk/runtime/langchain/interfaces/llm_processor.py +1 -4
- alita_sdk/runtime/langchain/langraph_agent.py +6 -1
- alita_sdk/runtime/langchain/store_manager.py +4 -4
- alita_sdk/runtime/toolkits/application.py +5 -10
- alita_sdk/runtime/toolkits/tools.py +11 -21
- alita_sdk/runtime/tools/vectorstore.py +25 -11
- alita_sdk/runtime/utils/streamlit.py +505 -222
- alita_sdk/runtime/utils/toolkit_runtime.py +147 -0
- alita_sdk/runtime/utils/toolkit_utils.py +157 -0
- alita_sdk/runtime/utils/utils.py +5 -0
- alita_sdk/tools/__init__.py +2 -0
- alita_sdk/tools/ado/repos/repos_wrapper.py +20 -13
- alita_sdk/tools/bitbucket/api_wrapper.py +5 -5
- alita_sdk/tools/bitbucket/cloud_api_wrapper.py +54 -29
- alita_sdk/tools/elitea_base.py +9 -4
- alita_sdk/tools/gitlab/__init__.py +22 -10
- alita_sdk/tools/gitlab/api_wrapper.py +278 -253
- alita_sdk/tools/gitlab/tools.py +354 -376
- alita_sdk/tools/llm/llm_utils.py +0 -6
- alita_sdk/tools/memory/__init__.py +54 -10
- alita_sdk/tools/openapi/__init__.py +14 -3
- alita_sdk/tools/sharepoint/__init__.py +2 -1
- alita_sdk/tools/sharepoint/api_wrapper.py +11 -3
- alita_sdk/tools/testrail/api_wrapper.py +39 -16
- alita_sdk/tools/utils/content_parser.py +77 -13
- {alita_sdk-0.3.205.dist-info → alita_sdk-0.3.207.dist-info}/METADATA +1 -1
- {alita_sdk-0.3.205.dist-info → alita_sdk-0.3.207.dist-info}/RECORD +32 -40
- alita_sdk/community/analysis/__init__.py +0 -0
- alita_sdk/community/analysis/ado_analyse/__init__.py +0 -103
- alita_sdk/community/analysis/ado_analyse/api_wrapper.py +0 -261
- alita_sdk/community/analysis/github_analyse/__init__.py +0 -98
- alita_sdk/community/analysis/github_analyse/api_wrapper.py +0 -166
- alita_sdk/community/analysis/gitlab_analyse/__init__.py +0 -110
- alita_sdk/community/analysis/gitlab_analyse/api_wrapper.py +0 -172
- alita_sdk/community/analysis/jira_analyse/__init__.py +0 -141
- alita_sdk/community/analysis/jira_analyse/api_wrapper.py +0 -252
- alita_sdk/runtime/llms/alita.py +0 -259
- {alita_sdk-0.3.205.dist-info → alita_sdk-0.3.207.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.205.dist-info → alita_sdk-0.3.207.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.205.dist-info → alita_sdk-0.3.207.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,9 @@
|
|
1
1
|
import json
|
2
2
|
import math
|
3
|
-
|
3
|
+
import types
|
4
|
+
from typing import Any, Optional, List, Dict, Callable, Generator
|
5
|
+
|
6
|
+
from langchain_core.documents import Document
|
4
7
|
from pydantic import BaseModel, model_validator, Field
|
5
8
|
from ..langchain.tools.vector import VectorAdapter
|
6
9
|
from langchain_core.messages import HumanMessage
|
@@ -8,6 +11,7 @@ from alita_sdk.tools.elitea_base import BaseToolApiWrapper
|
|
8
11
|
from logging import getLogger
|
9
12
|
|
10
13
|
from ..utils.logging import dispatch_custom_event
|
14
|
+
from ..utils.utils import IndexerKeywords
|
11
15
|
|
12
16
|
logger = getLogger(__name__)
|
13
17
|
|
@@ -197,25 +201,29 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
197
201
|
for doc_str, meta, db_id in zip(data['documents'], data['metadatas'], data['ids']):
|
198
202
|
doc = json.loads(doc_str)
|
199
203
|
doc_id = str(meta['id'])
|
204
|
+
dependent_docs = meta.get(IndexerKeywords.DEPENDENT_DOCS.value, [])
|
205
|
+
parent_id = meta.get(IndexerKeywords.PARENT.value, -1)
|
200
206
|
result[doc_id] = {
|
201
207
|
'metadata': meta,
|
202
208
|
'document': doc,
|
203
|
-
'id': db_id
|
209
|
+
'id': db_id,
|
210
|
+
IndexerKeywords.DEPENDENT_DOCS.value: dependent_docs,
|
211
|
+
IndexerKeywords.PARENT.value: parent_id
|
204
212
|
}
|
205
213
|
except Exception as e:
|
206
214
|
logger.error(f"Failed to get indexed data from vectorstore: {str(e)}. Continuing with empty index.")
|
207
215
|
return result
|
208
216
|
|
209
|
-
def _reduce_duplicates(self, documents, store) -> List[Any]:
|
217
|
+
def _reduce_duplicates(self, documents: Generator[Document, None, None], store) -> List[Any]:
|
210
218
|
"""Remove documents already indexed in the vectorstore based on metadata 'id' and 'updated_on' fields."""
|
211
219
|
|
212
220
|
self._log_data("Verification of documents to index started", tool_name="index_documents")
|
213
221
|
|
214
|
-
|
215
|
-
indexed_ids = set(
|
222
|
+
indexed_data = self._get_indexed_data(store)
|
223
|
+
indexed_ids = set(indexed_data.keys())
|
216
224
|
if not indexed_ids:
|
217
225
|
self._log_data("Vectorstore is empty, indexing all incoming documents", tool_name="index_documents")
|
218
|
-
return documents
|
226
|
+
return list(documents)
|
219
227
|
|
220
228
|
final_docs = []
|
221
229
|
docs_to_remove = []
|
@@ -226,13 +234,17 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
226
234
|
if doc_id in indexed_ids:
|
227
235
|
# document has been indexed already, then verify `updated_on`
|
228
236
|
to_index_updated_on = document.metadata.get('updated_on')
|
229
|
-
indexed_meta =
|
237
|
+
indexed_meta = indexed_data[doc_id]['metadata']
|
230
238
|
indexed_updated_on = indexed_meta.get('updated_on')
|
231
239
|
if to_index_updated_on and indexed_updated_on and to_index_updated_on == indexed_updated_on:
|
232
240
|
# same updated_on, skip indexing
|
233
241
|
continue
|
234
242
|
# if updated_on is missing or different, we will re-index the document and remove old one
|
235
|
-
|
243
|
+
# parent doc removal
|
244
|
+
docs_to_remove.append(indexed_data[doc_id]['id'])
|
245
|
+
# mark dependent docs for removal
|
246
|
+
for dependent_doc_id in indexed_data[doc_id][IndexerKeywords.DEPENDENT_DOCS.value]:
|
247
|
+
docs_to_remove.append(indexed_data[dependent_doc_id]['id'])
|
236
248
|
else:
|
237
249
|
final_docs.append(document)
|
238
250
|
|
@@ -245,7 +257,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
245
257
|
|
246
258
|
return final_docs
|
247
259
|
|
248
|
-
def index_documents(self, documents, progress_step: int = 20, clean_index: bool = True):
|
260
|
+
def index_documents(self, documents: Generator[Document, None, None], progress_step: int = 20, clean_index: bool = True):
|
249
261
|
""" Index documents in the vectorstore.
|
250
262
|
|
251
263
|
Args:
|
@@ -269,6 +281,8 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
269
281
|
tool_name="index_documents")
|
270
282
|
except Exception as e:
|
271
283
|
logger.warning(f"Failed to clean index: {str(e)}. Continuing with re-indexing.")
|
284
|
+
if isinstance(documents, types.GeneratorType):
|
285
|
+
documents = list(documents)
|
272
286
|
else:
|
273
287
|
# remove duplicates based on metadata 'id' and 'updated_on' fields
|
274
288
|
documents = self._reduce_duplicates(documents, self.vectoradapter.vectorstore)
|
@@ -281,7 +295,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
281
295
|
# if func is provided, apply it to documents
|
282
296
|
# used for processing of documents before indexing,
|
283
297
|
# e.g. to avoid time-consuming operations for documents that are already indexed
|
284
|
-
self.process_document_func(documents) if self.process_document_func else
|
298
|
+
dependent_docs_generator = self.process_document_func(documents) if self.process_document_func else []
|
285
299
|
|
286
300
|
# notify user about missed required metadata fields: id, updated_on
|
287
301
|
# it is not required to have them, but it is recommended to have them for proper re-indexing and duplicate detection
|
@@ -292,7 +306,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
292
306
|
logger.debug(f"Indexing documents: {documents}")
|
293
307
|
logger.debug(self.vectoradapter)
|
294
308
|
|
295
|
-
documents = list(
|
309
|
+
documents = documents + list(dependent_docs_generator)
|
296
310
|
total_docs = len(documents)
|
297
311
|
documents_count = 0
|
298
312
|
_documents = []
|