alita-sdk 0.3.365__py3-none-any.whl → 0.3.462__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of alita-sdk might be problematic. Click here for more details.

Files changed (118) hide show
  1. alita_sdk/cli/__init__.py +10 -0
  2. alita_sdk/cli/__main__.py +17 -0
  3. alita_sdk/cli/agent_executor.py +144 -0
  4. alita_sdk/cli/agent_loader.py +197 -0
  5. alita_sdk/cli/agent_ui.py +166 -0
  6. alita_sdk/cli/agents.py +1069 -0
  7. alita_sdk/cli/callbacks.py +576 -0
  8. alita_sdk/cli/cli.py +159 -0
  9. alita_sdk/cli/config.py +153 -0
  10. alita_sdk/cli/formatting.py +182 -0
  11. alita_sdk/cli/mcp_loader.py +315 -0
  12. alita_sdk/cli/toolkit.py +330 -0
  13. alita_sdk/cli/toolkit_loader.py +55 -0
  14. alita_sdk/cli/tools/__init__.py +9 -0
  15. alita_sdk/cli/tools/filesystem.py +905 -0
  16. alita_sdk/configurations/bitbucket.py +95 -0
  17. alita_sdk/configurations/confluence.py +96 -1
  18. alita_sdk/configurations/gitlab.py +79 -0
  19. alita_sdk/configurations/jira.py +103 -0
  20. alita_sdk/configurations/testrail.py +88 -0
  21. alita_sdk/configurations/xray.py +93 -0
  22. alita_sdk/configurations/zephyr_enterprise.py +93 -0
  23. alita_sdk/configurations/zephyr_essential.py +75 -0
  24. alita_sdk/runtime/clients/artifact.py +1 -1
  25. alita_sdk/runtime/clients/client.py +47 -10
  26. alita_sdk/runtime/clients/mcp_discovery.py +342 -0
  27. alita_sdk/runtime/clients/mcp_manager.py +262 -0
  28. alita_sdk/runtime/clients/sandbox_client.py +373 -0
  29. alita_sdk/runtime/langchain/assistant.py +70 -41
  30. alita_sdk/runtime/langchain/constants.py +6 -1
  31. alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
  32. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +4 -1
  33. alita_sdk/runtime/langchain/document_loaders/constants.py +73 -100
  34. alita_sdk/runtime/langchain/langraph_agent.py +164 -38
  35. alita_sdk/runtime/langchain/utils.py +43 -7
  36. alita_sdk/runtime/models/mcp_models.py +61 -0
  37. alita_sdk/runtime/toolkits/__init__.py +24 -0
  38. alita_sdk/runtime/toolkits/application.py +8 -1
  39. alita_sdk/runtime/toolkits/artifact.py +5 -6
  40. alita_sdk/runtime/toolkits/mcp.py +895 -0
  41. alita_sdk/runtime/toolkits/tools.py +140 -50
  42. alita_sdk/runtime/tools/__init__.py +7 -2
  43. alita_sdk/runtime/tools/application.py +7 -0
  44. alita_sdk/runtime/tools/function.py +94 -5
  45. alita_sdk/runtime/tools/graph.py +10 -4
  46. alita_sdk/runtime/tools/image_generation.py +104 -8
  47. alita_sdk/runtime/tools/llm.py +204 -114
  48. alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
  49. alita_sdk/runtime/tools/mcp_remote_tool.py +166 -0
  50. alita_sdk/runtime/tools/mcp_server_tool.py +3 -1
  51. alita_sdk/runtime/tools/sandbox.py +180 -79
  52. alita_sdk/runtime/tools/vectorstore.py +22 -21
  53. alita_sdk/runtime/tools/vectorstore_base.py +79 -26
  54. alita_sdk/runtime/utils/mcp_oauth.py +164 -0
  55. alita_sdk/runtime/utils/mcp_sse_client.py +405 -0
  56. alita_sdk/runtime/utils/streamlit.py +34 -3
  57. alita_sdk/runtime/utils/toolkit_utils.py +14 -4
  58. alita_sdk/runtime/utils/utils.py +1 -0
  59. alita_sdk/tools/__init__.py +48 -31
  60. alita_sdk/tools/ado/repos/__init__.py +1 -0
  61. alita_sdk/tools/ado/test_plan/__init__.py +1 -1
  62. alita_sdk/tools/ado/wiki/__init__.py +1 -5
  63. alita_sdk/tools/ado/work_item/__init__.py +1 -5
  64. alita_sdk/tools/ado/work_item/ado_wrapper.py +17 -8
  65. alita_sdk/tools/base_indexer_toolkit.py +194 -112
  66. alita_sdk/tools/bitbucket/__init__.py +1 -0
  67. alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
  68. alita_sdk/tools/code/sonar/__init__.py +1 -1
  69. alita_sdk/tools/code_indexer_toolkit.py +15 -5
  70. alita_sdk/tools/confluence/__init__.py +2 -2
  71. alita_sdk/tools/confluence/api_wrapper.py +110 -63
  72. alita_sdk/tools/confluence/loader.py +10 -0
  73. alita_sdk/tools/elitea_base.py +22 -22
  74. alita_sdk/tools/github/__init__.py +2 -2
  75. alita_sdk/tools/gitlab/__init__.py +2 -1
  76. alita_sdk/tools/gitlab/api_wrapper.py +11 -7
  77. alita_sdk/tools/gitlab_org/__init__.py +1 -2
  78. alita_sdk/tools/google_places/__init__.py +2 -1
  79. alita_sdk/tools/jira/__init__.py +1 -0
  80. alita_sdk/tools/jira/api_wrapper.py +1 -1
  81. alita_sdk/tools/memory/__init__.py +1 -1
  82. alita_sdk/tools/non_code_indexer_toolkit.py +2 -2
  83. alita_sdk/tools/openapi/__init__.py +10 -1
  84. alita_sdk/tools/pandas/__init__.py +1 -1
  85. alita_sdk/tools/postman/__init__.py +2 -1
  86. alita_sdk/tools/postman/api_wrapper.py +18 -8
  87. alita_sdk/tools/postman/postman_analysis.py +8 -1
  88. alita_sdk/tools/pptx/__init__.py +2 -2
  89. alita_sdk/tools/qtest/__init__.py +3 -3
  90. alita_sdk/tools/qtest/api_wrapper.py +1708 -76
  91. alita_sdk/tools/rally/__init__.py +1 -2
  92. alita_sdk/tools/report_portal/__init__.py +1 -0
  93. alita_sdk/tools/salesforce/__init__.py +1 -0
  94. alita_sdk/tools/servicenow/__init__.py +2 -3
  95. alita_sdk/tools/sharepoint/__init__.py +1 -0
  96. alita_sdk/tools/sharepoint/api_wrapper.py +125 -34
  97. alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
  98. alita_sdk/tools/sharepoint/utils.py +8 -2
  99. alita_sdk/tools/slack/__init__.py +1 -0
  100. alita_sdk/tools/sql/__init__.py +2 -1
  101. alita_sdk/tools/sql/api_wrapper.py +71 -23
  102. alita_sdk/tools/testio/__init__.py +1 -0
  103. alita_sdk/tools/testrail/__init__.py +1 -3
  104. alita_sdk/tools/utils/__init__.py +17 -0
  105. alita_sdk/tools/utils/content_parser.py +35 -24
  106. alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +67 -21
  107. alita_sdk/tools/xray/__init__.py +2 -1
  108. alita_sdk/tools/zephyr/__init__.py +2 -1
  109. alita_sdk/tools/zephyr_enterprise/__init__.py +1 -0
  110. alita_sdk/tools/zephyr_essential/__init__.py +1 -0
  111. alita_sdk/tools/zephyr_scale/__init__.py +1 -0
  112. alita_sdk/tools/zephyr_squad/__init__.py +1 -0
  113. {alita_sdk-0.3.365.dist-info → alita_sdk-0.3.462.dist-info}/METADATA +8 -2
  114. {alita_sdk-0.3.365.dist-info → alita_sdk-0.3.462.dist-info}/RECORD +118 -93
  115. alita_sdk-0.3.462.dist-info/entry_points.txt +2 -0
  116. {alita_sdk-0.3.365.dist-info → alita_sdk-0.3.462.dist-info}/WHEEL +0 -0
  117. {alita_sdk-0.3.365.dist-info → alita_sdk-0.3.462.dist-info}/licenses/LICENSE +0 -0
  118. {alita_sdk-0.3.365.dist-info → alita_sdk-0.3.462.dist-info}/top_level.txt +0 -0
@@ -24,11 +24,6 @@ class AzureDevOpsWikiToolkit(BaseToolkit):
24
24
  AzureDevOpsWikiToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
25
25
  m = create_model(
26
26
  name_alias,
27
- name=(str, Field(description="Toolkit name",
28
- json_schema_extra={
29
- 'toolkit_name': True,
30
- 'max_toolkit_length': AzureDevOpsWikiToolkit.toolkit_max_length})
31
- ),
32
27
  ado_configuration=(AdoConfiguration, Field(description="Ado configuration", json_schema_extra={'configuration_types': ['ado']})),
33
28
  # indexer settings
34
29
  pgvector_configuration=(Optional[PgVectorConfiguration], Field(default=None,
@@ -42,6 +37,7 @@ class AzureDevOpsWikiToolkit(BaseToolkit):
42
37
  'metadata': {
43
38
  "label": "ADO wiki",
44
39
  "icon_url": "ado-wiki-icon.svg",
40
+ "max_length": AzureDevOpsWikiToolkit.toolkit_max_length,
45
41
  "categories": ["documentation"],
46
42
  "extra_categories": ["knowledge base", "documentation management", "wiki"],
47
43
  "sections": {
@@ -23,11 +23,6 @@ class AzureDevOpsWorkItemsToolkit(BaseToolkit):
23
23
  AzureDevOpsWorkItemsToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
24
24
  m = create_model(
25
25
  name,
26
- name=(str, Field(description="Toolkit name",
27
- json_schema_extra={
28
- 'toolkit_name': True,
29
- 'max_toolkit_length': AzureDevOpsWorkItemsToolkit.toolkit_max_length})
30
- ),
31
26
  ado_configuration=(AdoConfiguration, Field(description="Ado Work Item configuration", json_schema_extra={'configuration_types': ['ado']})),
32
27
  limit=(Optional[int], Field(description="ADO plans limit used for limitation of the list with results", default=5)),
33
28
  selected_tools=(List[Literal[tuple(selected_tools)]], Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
@@ -42,6 +37,7 @@ class AzureDevOpsWorkItemsToolkit(BaseToolkit):
42
37
  'metadata': {
43
38
  "label": "ADO boards",
44
39
  "icon_url": "ado-boards-icon.svg",
40
+ "max_length": AzureDevOpsWorkItemsToolkit.toolkit_max_length,
45
41
  "categories": ["project management"],
46
42
  "extra_categories": ["work item management", "issue tracking", "agile boards"],
47
43
  "sections": {
@@ -329,11 +329,14 @@ class AzureDevOpsApiWrapper(NonCodeIndexerToolkit):
329
329
  parsed_item.update(fields_data)
330
330
 
331
331
  # extract relations if any
332
- relations_data = work_item.relations
332
+ relations_data = None
333
+ if expand and str(expand).lower() in ("relations", "all"):
334
+ try:
335
+ relations_data = getattr(work_item, 'relations', None)
336
+ except KeyError:
337
+ relations_data = None
333
338
  if relations_data:
334
- parsed_item['relations'] = []
335
- for relation in relations_data:
336
- parsed_item['relations'].append(relation.as_dict())
339
+ parsed_item['relations'] = [relation.as_dict() for relation in relations_data]
337
340
 
338
341
  if parse_attachments:
339
342
  # describe images in work item fields if present
@@ -344,13 +347,19 @@ class AzureDevOpsApiWrapper(NonCodeIndexerToolkit):
344
347
  for img in images:
345
348
  src = img.get('src')
346
349
  if src:
347
- description = self.parse_attachment_by_url(src, image_description_prompt)
350
+ description = self.parse_attachment_by_url(src, image_description_prompt=image_description_prompt)
348
351
  img['image-description'] = description
349
352
  parsed_item[field_name] = str(soup)
350
353
  # parse attached documents if present
351
- if parsed_item['relations']:
352
- for attachment in parsed_item['relations']:
353
- attachment['content'] = self.parse_attachment_by_url(attachment['url'], attachment['attributes']['name'], image_description_prompt)
354
+ for relation in parsed_item.get('relations', []):
355
+ # Only process actual file attachments
356
+ if relation.get('rel') == 'AttachedFile':
357
+ file_name = relation.get('attributes', {}).get('name')
358
+ if file_name:
359
+ try:
360
+ relation['content'] = self.parse_attachment_by_url(relation['url'], file_name, image_description_prompt=image_description_prompt)
361
+ except Exception as att_e:
362
+ logger.warning(f"Failed to parse attachment {file_name}: {att_e}")
354
363
 
355
364
 
356
365
  return parsed_item
@@ -1,41 +1,46 @@
1
+ import copy
1
2
  import json
2
3
  import logging
3
4
  import time
4
5
  from typing import Any, Optional, List, Dict, Generator
5
6
 
7
+ from langchain_core.callbacks import dispatch_custom_event
6
8
  from langchain_core.documents import Document
7
9
  from pydantic import create_model, Field, SecretStr
8
10
 
9
11
  from .utils.content_parser import file_extension_by_chunker, process_document_by_type
10
12
  from .vector_adapters.VectorStoreAdapter import VectorStoreAdapterFactory
13
+ from ..runtime.langchain.document_loaders.constants import loaders_allowed_to_override
11
14
  from ..runtime.tools.vectorstore_base import VectorStoreWrapperBase
12
15
  from ..runtime.utils.utils import IndexerKeywords
13
16
 
14
17
  logger = logging.getLogger(__name__)
15
18
 
19
+ DEFAULT_CUT_OFF = 0.2
20
+
16
21
  # Base Vector Store Schema Models
17
22
  BaseIndexParams = create_model(
18
23
  "BaseIndexParams",
19
- collection_suffix=(str, Field(description="Suffix for collection name (max 7 characters) used to separate datasets", min_length=1, max_length=7)),
24
+ index_name=(str, Field(description="Index name (max 7 characters)", min_length=1, max_length=7)),
20
25
  )
21
26
 
22
27
  RemoveIndexParams = create_model(
23
28
  "RemoveIndexParams",
24
- collection_suffix=(Optional[str], Field(description="Optional suffix for collection name (max 7 characters)", default="", max_length=7)),
29
+ index_name=(Optional[str], Field(description="Optional index name (max 7 characters)", default="", max_length=7)),
25
30
  )
26
31
 
27
32
  BaseSearchParams = create_model(
28
33
  "BaseSearchParams",
29
34
  query=(str, Field(description="Query text to search in the index")),
30
- collection_suffix=(Optional[str], Field(
31
- description="Optional suffix for collection name (max 7 characters). Leave empty to search across all datasets",
35
+ index_name=(Optional[str], Field(
36
+ description="Optional index name (max 7 characters). Leave empty to search across all datasets",
32
37
  default="", max_length=7)),
33
38
  filter=(Optional[dict | str], Field(
34
39
  description="Filter to apply to the search results. Can be a dictionary or a JSON string.",
35
40
  default={},
36
41
  examples=["{\"key\": \"value\"}", "{\"status\": \"active\"}"]
37
42
  )),
38
- cut_off=(Optional[float], Field(description="Cut-off score for search results", default=0.5, ge=0, le=1)),
43
+ cut_off=(Optional[float], Field(description="Cut-off score for search results", default=DEFAULT_CUT_OFF, ge=0, le=1)),
39
44
  search_top=(Optional[int], Field(description="Number of top results to return", default=10)),
40
45
  full_text_search=(Optional[Dict[str, Any]], Field(
41
46
  description="Full text search parameters. Can be a dictionary with search options.",
@@ -58,14 +63,14 @@ BaseSearchParams = create_model(
58
63
  BaseStepbackSearchParams = create_model(
59
64
  "BaseStepbackSearchParams",
60
65
  query=(str, Field(description="Query text to search in the index")),
61
- collection_suffix=(Optional[str], Field(description="Optional suffix for collection name (max 7 characters)", default="", max_length=7)),
66
+ index_name=(Optional[str], Field(description="Optional index name (max 7 characters)", default="", max_length=7)),
62
67
  messages=(Optional[List], Field(description="Chat messages for stepback search context", default=[])),
63
68
  filter=(Optional[dict | str], Field(
64
69
  description="Filter to apply to the search results. Can be a dictionary or a JSON string.",
65
70
  default={},
66
71
  examples=["{\"key\": \"value\"}", "{\"status\": \"active\"}"]
67
72
  )),
68
- cut_off=(Optional[float], Field(description="Cut-off score for search results", default=0.5, ge=0, le=1)),
73
+ cut_off=(Optional[float], Field(description="Cut-off score for search results", default=DEFAULT_CUT_OFF, ge=0, le=1)),
69
74
  search_top=(Optional[int], Field(description="Number of top results to return", default=10)),
70
75
  full_text_search=(Optional[Dict[str, Any]], Field(
71
76
  description="Full text search parameters. Can be a dictionary with search options.",
@@ -92,7 +97,7 @@ BaseIndexDataParams = create_model(
92
97
  description="Optional flag to enforce clean existing index before indexing new data")),
93
98
  progress_step=(Optional[int], Field(default=10, ge=0, le=100,
94
99
  description="Optional step size for progress reporting during indexing")),
95
- chunking_config=(Optional[dict], Field(description="Chunking tool configuration", default_factory=dict)),
100
+ chunking_config=(Optional[dict], Field(description="Chunking tool configuration", default=loaders_allowed_to_override)),
96
101
  )
97
102
 
98
103
 
@@ -108,7 +113,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
108
113
  def __init__(self, **kwargs):
109
114
  conn = kwargs.get('connection_string', None)
110
115
  connection_string = conn.get_secret_value() if isinstance(conn, SecretStr) else conn
111
- collection_name = kwargs.get('collection_name')
116
+ collection_name = kwargs.get('collection_schema')
112
117
 
113
118
  if 'vectorstore_type' not in kwargs:
114
119
  kwargs['vectorstore_type'] = 'PGVector'
@@ -148,55 +153,48 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
148
153
  yield from ()
149
154
 
150
155
  def index_data(self, **kwargs):
151
- from ..runtime.langchain.interfaces.llm_processor import add_documents
152
- collection_suffix = kwargs.get("collection_suffix")
153
- progress_step = kwargs.get("progress_step")
156
+ index_name = kwargs.get("index_name")
154
157
  clean_index = kwargs.get("clean_index")
155
158
  chunking_tool = kwargs.get("chunking_tool")
156
159
  chunking_config = kwargs.get("chunking_config")
160
+ result = {"count": 0}
157
161
  #
158
- if clean_index:
159
- self._clean_index(collection_suffix)
160
- #
161
- # create and add initial index meta document
162
- index_meta_doc = Document(page_content=f"{IndexerKeywords.INDEX_META_TYPE.value}_{collection_suffix}", metadata={
163
- "collection": collection_suffix,
164
- "type": IndexerKeywords.INDEX_META_TYPE.value,
165
- "indexed": 0,
166
- "state": IndexerKeywords.INDEX_META_IN_PROGRESS.value,
167
- "index_configuration": kwargs,
168
- "created_on": time.time(),
169
- "updated_on": time.time(),
170
- })
171
- index_meta_ids = add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc])
172
- #
173
- self._log_tool_event(f"Indexing data into collection with suffix '{collection_suffix}'. It can take some time...")
174
- self._log_tool_event(f"Loading the documents to index...{kwargs}")
175
- documents = self._base_loader(**kwargs)
176
- documents = list(documents) # consume/exhaust generator to count items
177
- documents_count = len(documents)
178
- documents = (doc for doc in documents)
179
- self._log_tool_event(f"Base documents were pre-loaded. "
180
- f"Search for possible document duplicates and remove them from the indexing list...")
181
- documents = self._reduce_duplicates(documents, collection_suffix)
182
- self._log_tool_event(f"Duplicates were removed. "
183
- f"Processing documents to collect dependencies and prepare them for indexing...")
184
- result = self._save_index_generator(documents, documents_count, chunking_tool, chunking_config, collection_suffix=collection_suffix, progress_step=progress_step)
185
- #
186
- # update index meta document
187
- index_meta_doc.metadata["indexed"] = result
188
- index_meta_doc.metadata["state"] = IndexerKeywords.INDEX_META_COMPLETED.value
189
- index_meta_doc.metadata["updated_on"] = time.time()
190
- add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc], ids=index_meta_ids)
191
- #
192
- return {"status": "ok", "message": f"successfully indexed {result} documents"}
162
+ try:
163
+ if clean_index:
164
+ self._clean_index(index_name)
165
+ #
166
+ self.index_meta_init(index_name, kwargs)
167
+ #
168
+ self._log_tool_event(f"Indexing data into collection with suffix '{index_name}'. It can take some time...")
169
+ self._log_tool_event(f"Loading the documents to index...{kwargs}")
170
+ documents = self._base_loader(**kwargs)
171
+ documents = list(documents) # consume/exhaust generator to count items
172
+ documents_count = len(documents)
173
+ documents = (doc for doc in documents)
174
+ self._log_tool_event(f"Base documents were pre-loaded. "
175
+ f"Search for possible document duplicates and remove them from the indexing list...")
176
+ documents = self._reduce_duplicates(documents, index_name)
177
+ self._log_tool_event(f"Duplicates were removed. "
178
+ f"Processing documents to collect dependencies and prepare them for indexing...")
179
+ self._save_index_generator(documents, documents_count, chunking_tool, chunking_config, index_name=index_name, result=result)
180
+ #
181
+ results_count = result["count"]
182
+ self.index_meta_update(index_name, IndexerKeywords.INDEX_META_COMPLETED.value, results_count)
183
+ self._emit_index_event(index_name)
184
+ #
185
+ return {"status": "ok", "message": f"successfully indexed {results_count} documents" if results_count > 0
186
+ else "no new documents to index"}
187
+ except Exception as e:
188
+ self.index_meta_update(index_name, IndexerKeywords.INDEX_META_FAILED.value, result["count"])
189
+ self._emit_index_event(index_name, error=str(e))
190
+ raise e
191
+
193
192
 
194
- def _save_index_generator(self, base_documents: Generator[Document, None, None], base_total: int, chunking_tool, chunking_config, collection_suffix: Optional[str] = None, progress_step: int = 20):
193
+ def _save_index_generator(self, base_documents: Generator[Document, None, None], base_total: int, chunking_tool, chunking_config, result, index_name: Optional[str] = None):
195
194
  self._log_tool_event(f"Base documents are ready for indexing. {base_total} base documents in total to index.")
196
195
  from ..runtime.langchain.interfaces.llm_processor import add_documents
197
196
  #
198
197
  base_doc_counter = 0
199
- total_counter = 0
200
198
  pg_vector_add_docs_chunk = []
201
199
  for base_doc in base_documents:
202
200
  base_doc_counter += 1
@@ -223,12 +221,12 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
223
221
  if 'id' not in doc.metadata or 'updated_on' not in doc.metadata:
224
222
  logger.warning(f"Document is missing required metadata field 'id' or 'updated_on': {doc.metadata}")
225
223
  #
226
- # if collection_suffix is provided, add it to metadata of each document
227
- if collection_suffix:
224
+ # if index_name is provided, add it to metadata of each document
225
+ if index_name:
228
226
  if not doc.metadata.get('collection'):
229
- doc.metadata['collection'] = collection_suffix
227
+ doc.metadata['collection'] = index_name
230
228
  else:
231
- doc.metadata['collection'] += f";{collection_suffix}"
229
+ doc.metadata['collection'] += f";{index_name}"
232
230
  #
233
231
  try:
234
232
  pg_vector_add_docs_chunk.append(doc)
@@ -244,10 +242,9 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
244
242
  msg = f"Indexed base document #{base_doc_counter} out of {base_total} (with {dependent_docs_counter} dependencies)."
245
243
  logger.debug(msg)
246
244
  self._log_tool_event(msg)
247
- total_counter += dependent_docs_counter
245
+ result["count"] += dependent_docs_counter
248
246
  if pg_vector_add_docs_chunk:
249
247
  add_documents(vectorstore=self.vectorstore, documents=pg_vector_add_docs_chunk)
250
- return total_counter
251
248
 
252
249
  def _apply_loaders_chunkers(self, documents: Generator[Document, None, None], chunking_tool: str=None, chunking_config=None) -> Generator[Document, None, None]:
253
250
  from ..tools.chunkers import __all__ as chunkers
@@ -307,12 +304,12 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
307
304
  def _reduce_duplicates(
308
305
  self,
309
306
  documents: Generator[Any, None, None],
310
- collection_suffix: str,
307
+ index_name: str,
311
308
  log_msg: str = "Verification of documents to index started"
312
309
  ) -> Generator[Document, None, None]:
313
310
  """Generic duplicate reduction logic for documents."""
314
311
  self._log_tool_event(log_msg, tool_name="index_documents")
315
- indexed_data = self._get_indexed_data(collection_suffix)
312
+ indexed_data = self._get_indexed_data(index_name)
316
313
  indexed_keys = set(indexed_data.keys())
317
314
  if not indexed_keys:
318
315
  self._log_tool_event("Vectorstore is empty, indexing all incoming documents", tool_name="index_documents")
@@ -324,7 +321,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
324
321
  for document in documents:
325
322
  key = self.key_fn(document)
326
323
  key = key if isinstance(key, str) else str(key)
327
- if key in indexed_keys and collection_suffix == indexed_data[key]['metadata'].get('collection'):
324
+ if key in indexed_keys and index_name == indexed_data[key]['metadata'].get('collection'):
328
325
  if self.compare_fn(document, indexed_data[key]):
329
326
  continue
330
327
  yield document
@@ -339,7 +336,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
339
336
  )
340
337
  self.vectorstore.delete(ids=list(docs_to_remove))
341
338
 
342
- def _get_indexed_data(self, collection_suffix: str):
339
+ def _get_indexed_data(self, index_name: str):
343
340
  raise NotImplementedError("Subclasses must implement this method")
344
341
 
345
342
  def key_fn(self, document: Document):
@@ -351,73 +348,57 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
351
348
  def remove_ids_fn(self, idx_data, key: str):
352
349
  raise NotImplementedError("Subclasses must implement this method")
353
350
 
354
- def remove_index(self, collection_suffix: str = ""):
351
+ def remove_index(self, index_name: str = ""):
355
352
  """Cleans the indexed data in the collection."""
356
- super()._clean_collection(collection_suffix=collection_suffix)
357
- return (f"Collection '{collection_suffix}' has been removed from the vector store.\n"
358
- f"Available collections: {self.list_collections()}") if collection_suffix \
353
+ super()._clean_collection(index_name=index_name)
354
+ return (f"Collection '{index_name}' has been removed from the vector store.\n"
355
+ f"Available collections: {self.list_collections()}") if index_name \
359
356
  else "All collections have been removed from the vector store."
360
357
 
361
- def _build_collection_filter(self, filter: dict | str, collection_suffix: str = "") -> dict:
358
+ def _build_collection_filter(self, filter: dict | str, index_name: str = "") -> dict:
362
359
  """Builds a filter for the collection based on the provided suffix."""
363
360
 
364
361
  filter = filter if isinstance(filter, dict) else json.loads(filter)
365
- if collection_suffix:
362
+ if index_name:
366
363
  filter.update({"collection": {
367
- "$eq": collection_suffix.strip()
364
+ "$eq": index_name.strip()
368
365
  }})
369
- filter = {
370
- "$and": [
371
- filter,
372
- {"$or": [
373
- {"type": {"$exists": False}},
374
- {"type": {"$ne": IndexerKeywords.INDEX_META_TYPE.value}}
375
- ]},
376
- ]
377
- }
378
- return filter
379
366
 
380
- def index_meta_read(self):
381
- from sqlalchemy import func
382
- from sqlalchemy.orm import Session
383
-
384
- store = self.vectorstore
385
- try:
386
- with Session(store.session_maker.bind) as session:
387
- meta = session.query(
388
- store.EmbeddingStore.id,
389
- store.EmbeddingStore.cmetadata
390
- ).filter(
391
- func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'type') == IndexerKeywords.INDEX_META_TYPE.value
392
- ).all()
393
- return [
394
- {"id": id_, "metadata": cmetadata}
395
- for id_, cmetadata in meta
367
+ if filter:
368
+ # Exclude index meta documents from search results
369
+ filter = {
370
+ "$and": [
371
+ filter,
372
+ {"$or": [
373
+ {"type": {"$exists": False}},
374
+ {"type": {"$ne": IndexerKeywords.INDEX_META_TYPE.value}}
375
+ ]},
396
376
  ]
397
- except Exception as e:
398
- logger.error(f"Failed to get index_meta from PGVector: {str(e)}")
399
- return []
400
-
401
- def index_meta_delete(self, index_meta_ids: list[str]):
402
- self.vectorstore.delete(ids=index_meta_ids)
377
+ }
378
+ else:
379
+ filter = {"$or": [
380
+ {"type": {"$exists": False}},
381
+ {"type": {"$ne": IndexerKeywords.INDEX_META_TYPE.value}}
382
+ ]}
383
+ return filter
403
384
 
404
385
  def search_index(self,
405
386
  query: str,
406
- collection_suffix: str = "",
407
- filter: dict | str = {}, cut_off: float = 0.5,
387
+ index_name: str = "",
388
+ filter: dict | str = {}, cut_off: float = DEFAULT_CUT_OFF,
408
389
  search_top: int = 10, reranker: dict = {},
409
390
  full_text_search: Optional[Dict[str, Any]] = None,
410
391
  reranking_config: Optional[Dict[str, Dict[str, Any]]] = None,
411
392
  extended_search: Optional[List[str]] = None,
412
393
  **kwargs):
413
394
  """ Searches indexed documents in the vector store."""
414
- # build filter on top of collection_suffix
395
+ # build filter on top of index_name
415
396
 
416
397
  available_collections = super().list_collections()
417
- if collection_suffix and collection_suffix not in available_collections:
418
- return f"Collection '{collection_suffix}' not found. Available collections: {available_collections}"
398
+ if index_name and index_name not in available_collections:
399
+ return f"Collection '{index_name}' not found. Available collections: {available_collections}"
419
400
 
420
- filter = self._build_collection_filter(filter, collection_suffix)
401
+ filter = self._build_collection_filter(filter, index_name)
421
402
  found_docs = super().search_documents(
422
403
  query,
423
404
  doctype=self.doctype,
@@ -434,15 +415,15 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
434
415
  def stepback_search_index(self,
435
416
  query: str,
436
417
  messages: List[Dict[str, Any]] = [],
437
- collection_suffix: str = "",
438
- filter: dict | str = {}, cut_off: float = 0.5,
418
+ index_name: str = "",
419
+ filter: dict | str = {}, cut_off: float = DEFAULT_CUT_OFF,
439
420
  search_top: int = 10, reranker: dict = {},
440
421
  full_text_search: Optional[Dict[str, Any]] = None,
441
422
  reranking_config: Optional[Dict[str, Dict[str, Any]]] = None,
442
423
  extended_search: Optional[List[str]] = None,
443
424
  **kwargs):
444
425
  """ Searches indexed documents in the vector store."""
445
- filter = self._build_collection_filter(filter, collection_suffix)
426
+ filter = self._build_collection_filter(filter, index_name)
446
427
  found_docs = super().stepback_search(
447
428
  query,
448
429
  messages,
@@ -459,8 +440,8 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
459
440
  def stepback_summary_index(self,
460
441
  query: str,
461
442
  messages: List[Dict[str, Any]] = [],
462
- collection_suffix: str = "",
463
- filter: dict | str = {}, cut_off: float = 0.5,
443
+ index_name: str = "",
444
+ filter: dict | str = {}, cut_off: float = DEFAULT_CUT_OFF,
464
445
  search_top: int = 10, reranker: dict = {},
465
446
  full_text_search: Optional[Dict[str, Any]] = None,
466
447
  reranking_config: Optional[Dict[str, Dict[str, Any]]] = None,
@@ -468,7 +449,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
468
449
  **kwargs):
469
450
  """ Generates a summary of indexed documents using stepback technique."""
470
451
 
471
- filter = self._build_collection_filter(filter, collection_suffix)
452
+ filter = self._build_collection_filter(filter, index_name)
472
453
  return super().stepback_summary(
473
454
  query,
474
455
  messages,
@@ -480,6 +461,106 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
480
461
  reranking_config=reranking_config,
481
462
  extended_search=extended_search
482
463
  )
464
+
465
+ def index_meta_init(self, index_name: str, index_configuration: dict[str, Any]):
466
+ index_meta = super().get_index_meta(index_name)
467
+ if not index_meta:
468
+ self._log_tool_event(
469
+ f"There is no existing index_meta for collection '{index_name}'. Initializing it.",
470
+ tool_name="index_data"
471
+ )
472
+ from ..runtime.langchain.interfaces.llm_processor import add_documents
473
+ created_on = time.time()
474
+ metadata = {
475
+ "collection": index_name,
476
+ "type": IndexerKeywords.INDEX_META_TYPE.value,
477
+ "indexed": 0,
478
+ "updated": 0,
479
+ "state": IndexerKeywords.INDEX_META_IN_PROGRESS.value,
480
+ "index_configuration": index_configuration,
481
+ "created_on": created_on,
482
+ "updated_on": created_on,
483
+ "task_id": None,
484
+ "conversation_id": None,
485
+ }
486
+ metadata["history"] = json.dumps([metadata])
487
+ index_meta_doc = Document(page_content=f"{IndexerKeywords.INDEX_META_TYPE.value}_{index_name}", metadata=metadata)
488
+ add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc])
489
+
490
+ def index_meta_update(self, index_name: str, state: str, result: int):
491
+ index_meta_raw = super().get_index_meta(index_name)
492
+ from ..runtime.langchain.interfaces.llm_processor import add_documents
493
+ #
494
+ if index_meta_raw:
495
+ metadata = copy.deepcopy(index_meta_raw.get("metadata", {}))
496
+ metadata["indexed"] = self.get_indexed_count(index_name)
497
+ metadata["updated"] = result
498
+ metadata["state"] = state
499
+ metadata["updated_on"] = time.time()
500
+ #
501
+ history_raw = metadata.pop("history", "[]")
502
+ try:
503
+ history = json.loads(history_raw) if history_raw.strip() else []
504
+ # replace the last history item with updated metadata
505
+ if history and isinstance(history, list):
506
+ history[-1] = metadata
507
+ else:
508
+ history = [metadata]
509
+ except (json.JSONDecodeError, TypeError):
510
+ logger.warning(f"Failed to load index history: {history_raw}. Create new with only current item.")
511
+ history = [metadata]
512
+ #
513
+ metadata["history"] = json.dumps(history)
514
+ index_meta_doc = Document(page_content=index_meta_raw.get("content", ""), metadata=metadata)
515
+ add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc], ids=[index_meta_raw.get("id")])
516
+
517
+ def _emit_index_event(self, index_name: str, error: Optional[str] = None):
518
+ """
519
+ Emit custom event for index data operation.
520
+
521
+ Args:
522
+ index_name: The name of the index
523
+ error: Error message if the operation failed, None otherwise
524
+ """
525
+ index_meta = super().get_index_meta(index_name)
526
+
527
+ if not index_meta:
528
+ logger.warning(
529
+ f"No index_meta found for index '{index_name}'. "
530
+ "Cannot emit index event."
531
+ )
532
+ return
533
+
534
+ metadata = index_meta.get("metadata", {})
535
+
536
+ # Determine if this is a reindex operation
537
+ history_raw = metadata.get("history", "[]")
538
+ try:
539
+ history = json.loads(history_raw) if history_raw.strip() else []
540
+ is_reindex = len(history) > 1
541
+ except (json.JSONDecodeError, TypeError):
542
+ is_reindex = False
543
+
544
+ # Build event message
545
+ event_data = {
546
+ "id": index_meta.get("id"),
547
+ "index_name": index_name,
548
+ "state": metadata.get("state"),
549
+ "error": error,
550
+ "reindex": is_reindex,
551
+ "indexed": metadata.get("indexed", 0),
552
+ "updated": metadata.get("updated", 0),
553
+ }
554
+
555
+ # Emit the event
556
+ try:
557
+ dispatch_custom_event("index_data_status", event_data)
558
+ logger.debug(
559
+ f"Emitted index_data_status event for index "
560
+ f"'{index_name}': {event_data}"
561
+ )
562
+ except Exception as e:
563
+ logger.warning(f"Failed to emit index_data_status event: {e}")
483
564
 
484
565
  def get_available_tools(self):
485
566
  """
@@ -534,6 +615,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
534
615
  "mode": "list_collections",
535
616
  "ref": self.list_collections,
536
617
  "description": self.list_collections.__doc__,
537
- "args_schema": create_model("ListCollectionsParams") # No parameters
618
+ # No parameters
619
+ "args_schema": create_model("ListCollectionsParams")
538
620
  },
539
- ]
621
+ ]
@@ -61,6 +61,7 @@ class AlitaBitbucketToolkit(BaseToolkit):
61
61
  'metadata':
62
62
  {
63
63
  "label": "Bitbucket", "icon_url": "bitbucket-icon.svg",
64
+ "max_length": AlitaBitbucketToolkit.toolkit_max_length,
64
65
  "categories": ["code repositories"],
65
66
  "extra_categories": ["bitbucket", "git", "repository", "code", "version control"],
66
67
  }
@@ -6,7 +6,7 @@ from langchain_core.prompts import ChatPromptTemplate
6
6
  from langchain.text_splitter import TokenTextSplitter
7
7
 
8
8
  from typing import Optional, List
9
- from langchain_core.pydantic_v1 import BaseModel
9
+ from pydantic import BaseModel
10
10
  from ..utils import tiktoken_length
11
11
 
12
12
  logger = getLogger(__name__)
@@ -29,7 +29,7 @@ class SonarToolkit(BaseToolkit):
29
29
  SonarToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
30
30
  return create_model(
31
31
  name,
32
- sonar_project_name=(str, Field(description="Project name of the desired repository", json_schema_extra={'toolkit_name': True, 'max_toolkit_length': SonarToolkit.toolkit_max_length})),
32
+ sonar_project_name=(str, Field(description="Project name of the desired repository")),
33
33
  sonar_configuration=(SonarConfiguration, Field(description="Sonar Configuration", json_schema_extra={'configuration_types': ['sonar']})),
34
34
  selected_tools=(List[Literal[tuple(selected_tools)]], Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
35
35
  __config__=ConfigDict(json_schema_extra=