alita-sdk 0.3.257__py3-none-any.whl → 0.3.584__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of alita-sdk might be problematic. Click here for more details.

Files changed (281) hide show
  1. alita_sdk/cli/__init__.py +10 -0
  2. alita_sdk/cli/__main__.py +17 -0
  3. alita_sdk/cli/agent/__init__.py +5 -0
  4. alita_sdk/cli/agent/default.py +258 -0
  5. alita_sdk/cli/agent_executor.py +155 -0
  6. alita_sdk/cli/agent_loader.py +215 -0
  7. alita_sdk/cli/agent_ui.py +228 -0
  8. alita_sdk/cli/agents.py +3794 -0
  9. alita_sdk/cli/callbacks.py +647 -0
  10. alita_sdk/cli/cli.py +168 -0
  11. alita_sdk/cli/config.py +306 -0
  12. alita_sdk/cli/context/__init__.py +30 -0
  13. alita_sdk/cli/context/cleanup.py +198 -0
  14. alita_sdk/cli/context/manager.py +731 -0
  15. alita_sdk/cli/context/message.py +285 -0
  16. alita_sdk/cli/context/strategies.py +289 -0
  17. alita_sdk/cli/context/token_estimation.py +127 -0
  18. alita_sdk/cli/formatting.py +182 -0
  19. alita_sdk/cli/input_handler.py +419 -0
  20. alita_sdk/cli/inventory.py +1073 -0
  21. alita_sdk/cli/mcp_loader.py +315 -0
  22. alita_sdk/cli/toolkit.py +327 -0
  23. alita_sdk/cli/toolkit_loader.py +85 -0
  24. alita_sdk/cli/tools/__init__.py +43 -0
  25. alita_sdk/cli/tools/approval.py +224 -0
  26. alita_sdk/cli/tools/filesystem.py +1751 -0
  27. alita_sdk/cli/tools/planning.py +389 -0
  28. alita_sdk/cli/tools/terminal.py +414 -0
  29. alita_sdk/community/__init__.py +72 -12
  30. alita_sdk/community/inventory/__init__.py +236 -0
  31. alita_sdk/community/inventory/config.py +257 -0
  32. alita_sdk/community/inventory/enrichment.py +2137 -0
  33. alita_sdk/community/inventory/extractors.py +1469 -0
  34. alita_sdk/community/inventory/ingestion.py +3172 -0
  35. alita_sdk/community/inventory/knowledge_graph.py +1457 -0
  36. alita_sdk/community/inventory/parsers/__init__.py +218 -0
  37. alita_sdk/community/inventory/parsers/base.py +295 -0
  38. alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
  39. alita_sdk/community/inventory/parsers/go_parser.py +851 -0
  40. alita_sdk/community/inventory/parsers/html_parser.py +389 -0
  41. alita_sdk/community/inventory/parsers/java_parser.py +593 -0
  42. alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
  43. alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
  44. alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
  45. alita_sdk/community/inventory/parsers/python_parser.py +604 -0
  46. alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
  47. alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
  48. alita_sdk/community/inventory/parsers/text_parser.py +322 -0
  49. alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
  50. alita_sdk/community/inventory/patterns/__init__.py +61 -0
  51. alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
  52. alita_sdk/community/inventory/patterns/loader.py +348 -0
  53. alita_sdk/community/inventory/patterns/registry.py +198 -0
  54. alita_sdk/community/inventory/presets.py +535 -0
  55. alita_sdk/community/inventory/retrieval.py +1403 -0
  56. alita_sdk/community/inventory/toolkit.py +173 -0
  57. alita_sdk/community/inventory/toolkit_utils.py +176 -0
  58. alita_sdk/community/inventory/visualize.py +1370 -0
  59. alita_sdk/configurations/__init__.py +11 -0
  60. alita_sdk/configurations/ado.py +148 -2
  61. alita_sdk/configurations/azure_search.py +1 -1
  62. alita_sdk/configurations/bigquery.py +1 -1
  63. alita_sdk/configurations/bitbucket.py +94 -2
  64. alita_sdk/configurations/browser.py +18 -0
  65. alita_sdk/configurations/carrier.py +19 -0
  66. alita_sdk/configurations/confluence.py +130 -1
  67. alita_sdk/configurations/delta_lake.py +1 -1
  68. alita_sdk/configurations/figma.py +76 -5
  69. alita_sdk/configurations/github.py +65 -1
  70. alita_sdk/configurations/gitlab.py +81 -0
  71. alita_sdk/configurations/google_places.py +17 -0
  72. alita_sdk/configurations/jira.py +103 -0
  73. alita_sdk/configurations/openapi.py +323 -0
  74. alita_sdk/configurations/postman.py +1 -1
  75. alita_sdk/configurations/qtest.py +72 -3
  76. alita_sdk/configurations/report_portal.py +115 -0
  77. alita_sdk/configurations/salesforce.py +19 -0
  78. alita_sdk/configurations/service_now.py +1 -12
  79. alita_sdk/configurations/sharepoint.py +167 -0
  80. alita_sdk/configurations/sonar.py +18 -0
  81. alita_sdk/configurations/sql.py +20 -0
  82. alita_sdk/configurations/testio.py +101 -0
  83. alita_sdk/configurations/testrail.py +88 -0
  84. alita_sdk/configurations/xray.py +94 -1
  85. alita_sdk/configurations/zephyr_enterprise.py +94 -1
  86. alita_sdk/configurations/zephyr_essential.py +95 -0
  87. alita_sdk/runtime/clients/artifact.py +21 -4
  88. alita_sdk/runtime/clients/client.py +458 -67
  89. alita_sdk/runtime/clients/mcp_discovery.py +342 -0
  90. alita_sdk/runtime/clients/mcp_manager.py +262 -0
  91. alita_sdk/runtime/clients/sandbox_client.py +352 -0
  92. alita_sdk/runtime/langchain/_constants_bkup.py +1318 -0
  93. alita_sdk/runtime/langchain/assistant.py +183 -43
  94. alita_sdk/runtime/langchain/constants.py +647 -1
  95. alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
  96. alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +209 -31
  97. alita_sdk/runtime/langchain/document_loaders/AlitaImageLoader.py +1 -1
  98. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLinesLoader.py +77 -0
  99. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +10 -3
  100. alita_sdk/runtime/langchain/document_loaders/AlitaMarkdownLoader.py +66 -0
  101. alita_sdk/runtime/langchain/document_loaders/AlitaPDFLoader.py +79 -10
  102. alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py +52 -15
  103. alita_sdk/runtime/langchain/document_loaders/AlitaPythonLoader.py +9 -0
  104. alita_sdk/runtime/langchain/document_loaders/AlitaTableLoader.py +1 -4
  105. alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py +15 -2
  106. alita_sdk/runtime/langchain/document_loaders/ImageParser.py +30 -0
  107. alita_sdk/runtime/langchain/document_loaders/constants.py +189 -41
  108. alita_sdk/runtime/langchain/interfaces/llm_processor.py +4 -2
  109. alita_sdk/runtime/langchain/langraph_agent.py +493 -105
  110. alita_sdk/runtime/langchain/utils.py +118 -8
  111. alita_sdk/runtime/llms/preloaded.py +2 -6
  112. alita_sdk/runtime/models/mcp_models.py +61 -0
  113. alita_sdk/runtime/skills/__init__.py +91 -0
  114. alita_sdk/runtime/skills/callbacks.py +498 -0
  115. alita_sdk/runtime/skills/discovery.py +540 -0
  116. alita_sdk/runtime/skills/executor.py +610 -0
  117. alita_sdk/runtime/skills/input_builder.py +371 -0
  118. alita_sdk/runtime/skills/models.py +330 -0
  119. alita_sdk/runtime/skills/registry.py +355 -0
  120. alita_sdk/runtime/skills/skill_runner.py +330 -0
  121. alita_sdk/runtime/toolkits/__init__.py +28 -0
  122. alita_sdk/runtime/toolkits/application.py +14 -4
  123. alita_sdk/runtime/toolkits/artifact.py +25 -9
  124. alita_sdk/runtime/toolkits/datasource.py +13 -6
  125. alita_sdk/runtime/toolkits/mcp.py +782 -0
  126. alita_sdk/runtime/toolkits/planning.py +178 -0
  127. alita_sdk/runtime/toolkits/skill_router.py +238 -0
  128. alita_sdk/runtime/toolkits/subgraph.py +11 -6
  129. alita_sdk/runtime/toolkits/tools.py +314 -70
  130. alita_sdk/runtime/toolkits/vectorstore.py +11 -5
  131. alita_sdk/runtime/tools/__init__.py +24 -0
  132. alita_sdk/runtime/tools/application.py +16 -4
  133. alita_sdk/runtime/tools/artifact.py +367 -33
  134. alita_sdk/runtime/tools/data_analysis.py +183 -0
  135. alita_sdk/runtime/tools/function.py +100 -4
  136. alita_sdk/runtime/tools/graph.py +81 -0
  137. alita_sdk/runtime/tools/image_generation.py +218 -0
  138. alita_sdk/runtime/tools/llm.py +1032 -177
  139. alita_sdk/runtime/tools/loop.py +3 -1
  140. alita_sdk/runtime/tools/loop_output.py +3 -1
  141. alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
  142. alita_sdk/runtime/tools/mcp_remote_tool.py +181 -0
  143. alita_sdk/runtime/tools/mcp_server_tool.py +3 -1
  144. alita_sdk/runtime/tools/planning/__init__.py +36 -0
  145. alita_sdk/runtime/tools/planning/models.py +246 -0
  146. alita_sdk/runtime/tools/planning/wrapper.py +607 -0
  147. alita_sdk/runtime/tools/router.py +2 -1
  148. alita_sdk/runtime/tools/sandbox.py +375 -0
  149. alita_sdk/runtime/tools/skill_router.py +776 -0
  150. alita_sdk/runtime/tools/tool.py +3 -1
  151. alita_sdk/runtime/tools/vectorstore.py +69 -65
  152. alita_sdk/runtime/tools/vectorstore_base.py +163 -90
  153. alita_sdk/runtime/utils/AlitaCallback.py +137 -21
  154. alita_sdk/runtime/utils/constants.py +5 -1
  155. alita_sdk/runtime/utils/mcp_client.py +492 -0
  156. alita_sdk/runtime/utils/mcp_oauth.py +361 -0
  157. alita_sdk/runtime/utils/mcp_sse_client.py +434 -0
  158. alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
  159. alita_sdk/runtime/utils/streamlit.py +41 -14
  160. alita_sdk/runtime/utils/toolkit_utils.py +28 -9
  161. alita_sdk/runtime/utils/utils.py +48 -0
  162. alita_sdk/tools/__init__.py +135 -37
  163. alita_sdk/tools/ado/__init__.py +2 -2
  164. alita_sdk/tools/ado/repos/__init__.py +16 -19
  165. alita_sdk/tools/ado/repos/repos_wrapper.py +12 -20
  166. alita_sdk/tools/ado/test_plan/__init__.py +27 -8
  167. alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +56 -28
  168. alita_sdk/tools/ado/wiki/__init__.py +28 -12
  169. alita_sdk/tools/ado/wiki/ado_wrapper.py +114 -40
  170. alita_sdk/tools/ado/work_item/__init__.py +28 -12
  171. alita_sdk/tools/ado/work_item/ado_wrapper.py +95 -11
  172. alita_sdk/tools/advanced_jira_mining/__init__.py +13 -8
  173. alita_sdk/tools/aws/delta_lake/__init__.py +15 -11
  174. alita_sdk/tools/aws/delta_lake/tool.py +5 -1
  175. alita_sdk/tools/azure_ai/search/__init__.py +14 -8
  176. alita_sdk/tools/base/tool.py +5 -1
  177. alita_sdk/tools/base_indexer_toolkit.py +454 -110
  178. alita_sdk/tools/bitbucket/__init__.py +28 -19
  179. alita_sdk/tools/bitbucket/api_wrapper.py +285 -27
  180. alita_sdk/tools/bitbucket/cloud_api_wrapper.py +5 -5
  181. alita_sdk/tools/browser/__init__.py +41 -16
  182. alita_sdk/tools/browser/crawler.py +3 -1
  183. alita_sdk/tools/browser/utils.py +15 -6
  184. alita_sdk/tools/carrier/__init__.py +18 -17
  185. alita_sdk/tools/carrier/backend_reports_tool.py +8 -4
  186. alita_sdk/tools/carrier/excel_reporter.py +8 -4
  187. alita_sdk/tools/chunkers/__init__.py +3 -1
  188. alita_sdk/tools/chunkers/code/codeparser.py +1 -1
  189. alita_sdk/tools/chunkers/sematic/json_chunker.py +2 -1
  190. alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
  191. alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
  192. alita_sdk/tools/chunkers/universal_chunker.py +270 -0
  193. alita_sdk/tools/cloud/aws/__init__.py +12 -7
  194. alita_sdk/tools/cloud/azure/__init__.py +12 -7
  195. alita_sdk/tools/cloud/gcp/__init__.py +12 -7
  196. alita_sdk/tools/cloud/k8s/__init__.py +12 -7
  197. alita_sdk/tools/code/linter/__init__.py +10 -8
  198. alita_sdk/tools/code/loaders/codesearcher.py +3 -2
  199. alita_sdk/tools/code/sonar/__init__.py +21 -13
  200. alita_sdk/tools/code_indexer_toolkit.py +199 -0
  201. alita_sdk/tools/confluence/__init__.py +22 -14
  202. alita_sdk/tools/confluence/api_wrapper.py +197 -58
  203. alita_sdk/tools/confluence/loader.py +14 -2
  204. alita_sdk/tools/custom_open_api/__init__.py +12 -5
  205. alita_sdk/tools/elastic/__init__.py +11 -8
  206. alita_sdk/tools/elitea_base.py +546 -64
  207. alita_sdk/tools/figma/__init__.py +60 -11
  208. alita_sdk/tools/figma/api_wrapper.py +1400 -167
  209. alita_sdk/tools/figma/figma_client.py +73 -0
  210. alita_sdk/tools/figma/toon_tools.py +2748 -0
  211. alita_sdk/tools/github/__init__.py +18 -17
  212. alita_sdk/tools/github/api_wrapper.py +9 -26
  213. alita_sdk/tools/github/github_client.py +81 -12
  214. alita_sdk/tools/github/schemas.py +2 -1
  215. alita_sdk/tools/github/tool.py +5 -1
  216. alita_sdk/tools/gitlab/__init__.py +19 -13
  217. alita_sdk/tools/gitlab/api_wrapper.py +256 -80
  218. alita_sdk/tools/gitlab_org/__init__.py +14 -10
  219. alita_sdk/tools/google/bigquery/__init__.py +14 -13
  220. alita_sdk/tools/google/bigquery/tool.py +5 -1
  221. alita_sdk/tools/google_places/__init__.py +21 -11
  222. alita_sdk/tools/jira/__init__.py +22 -11
  223. alita_sdk/tools/jira/api_wrapper.py +315 -168
  224. alita_sdk/tools/keycloak/__init__.py +11 -8
  225. alita_sdk/tools/localgit/__init__.py +9 -3
  226. alita_sdk/tools/localgit/local_git.py +62 -54
  227. alita_sdk/tools/localgit/tool.py +5 -1
  228. alita_sdk/tools/memory/__init__.py +38 -14
  229. alita_sdk/tools/non_code_indexer_toolkit.py +7 -2
  230. alita_sdk/tools/ocr/__init__.py +11 -8
  231. alita_sdk/tools/openapi/__init__.py +491 -106
  232. alita_sdk/tools/openapi/api_wrapper.py +1357 -0
  233. alita_sdk/tools/openapi/tool.py +20 -0
  234. alita_sdk/tools/pandas/__init__.py +20 -12
  235. alita_sdk/tools/pandas/api_wrapper.py +40 -45
  236. alita_sdk/tools/pandas/dataframe/generator/base.py +3 -1
  237. alita_sdk/tools/postman/__init__.py +11 -11
  238. alita_sdk/tools/postman/api_wrapper.py +19 -8
  239. alita_sdk/tools/postman/postman_analysis.py +8 -1
  240. alita_sdk/tools/pptx/__init__.py +11 -10
  241. alita_sdk/tools/qtest/__init__.py +22 -14
  242. alita_sdk/tools/qtest/api_wrapper.py +1784 -88
  243. alita_sdk/tools/rally/__init__.py +13 -10
  244. alita_sdk/tools/report_portal/__init__.py +23 -16
  245. alita_sdk/tools/salesforce/__init__.py +22 -16
  246. alita_sdk/tools/servicenow/__init__.py +21 -16
  247. alita_sdk/tools/servicenow/api_wrapper.py +1 -1
  248. alita_sdk/tools/sharepoint/__init__.py +17 -14
  249. alita_sdk/tools/sharepoint/api_wrapper.py +179 -39
  250. alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
  251. alita_sdk/tools/sharepoint/utils.py +8 -2
  252. alita_sdk/tools/slack/__init__.py +13 -8
  253. alita_sdk/tools/sql/__init__.py +22 -19
  254. alita_sdk/tools/sql/api_wrapper.py +71 -23
  255. alita_sdk/tools/testio/__init__.py +21 -13
  256. alita_sdk/tools/testrail/__init__.py +13 -11
  257. alita_sdk/tools/testrail/api_wrapper.py +214 -46
  258. alita_sdk/tools/utils/__init__.py +28 -4
  259. alita_sdk/tools/utils/content_parser.py +241 -55
  260. alita_sdk/tools/utils/text_operations.py +254 -0
  261. alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +83 -27
  262. alita_sdk/tools/xray/__init__.py +18 -14
  263. alita_sdk/tools/xray/api_wrapper.py +58 -113
  264. alita_sdk/tools/yagmail/__init__.py +9 -3
  265. alita_sdk/tools/zephyr/__init__.py +12 -7
  266. alita_sdk/tools/zephyr_enterprise/__init__.py +16 -9
  267. alita_sdk/tools/zephyr_enterprise/api_wrapper.py +30 -15
  268. alita_sdk/tools/zephyr_essential/__init__.py +16 -10
  269. alita_sdk/tools/zephyr_essential/api_wrapper.py +297 -54
  270. alita_sdk/tools/zephyr_essential/client.py +6 -4
  271. alita_sdk/tools/zephyr_scale/__init__.py +13 -8
  272. alita_sdk/tools/zephyr_scale/api_wrapper.py +39 -31
  273. alita_sdk/tools/zephyr_squad/__init__.py +12 -7
  274. {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.584.dist-info}/METADATA +184 -37
  275. alita_sdk-0.3.584.dist-info/RECORD +452 -0
  276. alita_sdk-0.3.584.dist-info/entry_points.txt +2 -0
  277. alita_sdk/tools/bitbucket/tools.py +0 -304
  278. alita_sdk-0.3.257.dist-info/RECORD +0 -343
  279. {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.584.dist-info}/WHEEL +0 -0
  280. {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.584.dist-info}/licenses/LICENSE +0 -0
  281. {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.584.dist-info}/top_level.txt +0 -0
@@ -86,7 +86,9 @@ Answer must be JSON only extractable by JSON.LOADS."""
86
86
  else:
87
87
  input_[-1].content += self.unstructured_output
88
88
  completion = self.client.invoke(input_, config=config)
89
- result = _extract_json(completion.content.strip())
89
+ from ..langchain.utils import extract_text_from_completion
90
+ content_text = extract_text_from_completion(completion)
91
+ result = _extract_json(content_text.strip())
90
92
  logger.info(f"ToolNode tool params: {result}")
91
93
  try:
92
94
  # handler for application added as a tool
@@ -1,7 +1,7 @@
1
1
  import json
2
2
  import math
3
3
  import types
4
- from typing import Any, Optional, List, Dict, Callable, Generator
4
+ from typing import Any, Optional, List, Dict, Callable, Generator, OrderedDict
5
5
 
6
6
  from langchain_core.documents import Document
7
7
  from pydantic import BaseModel, model_validator, Field
@@ -12,10 +12,11 @@ from alita_sdk.tools.vector_adapters.VectorStoreAdapter import VectorStoreAdapte
12
12
  from logging import getLogger
13
13
 
14
14
  from ..utils.logging import dispatch_custom_event
15
- from ..utils.utils import IndexerKeywords
15
+ from ..langchain.utils import extract_text_from_completion
16
16
 
17
17
  logger = getLogger(__name__)
18
18
 
19
+
19
20
  class IndexDocumentsModel(BaseModel):
20
21
  documents: Any = Field(description="Generator of documents to index")
21
22
 
@@ -73,6 +74,10 @@ class StepBackSearchDocumentsModel(BaseModel):
73
74
  }""",
74
75
  default=None
75
76
  )
77
+ extended_search: Optional[List[str]] = Field(
78
+ description="List of chunk types to search for (title, summary, propositions, keywords, documents)",
79
+ default=None
80
+ )
76
81
  reranking_config: Optional[Dict[str, Dict[str, Any]]] = Field(
77
82
  description="""Reranking configuration. Example:
78
83
  {
@@ -87,10 +92,6 @@ class StepBackSearchDocumentsModel(BaseModel):
87
92
  }""",
88
93
  default=None
89
94
  )
90
- extended_search: Optional[List[str]] = Field(
91
- description="List of chunk types to search for (title, summary, propositions, keywords, documents)",
92
- default=None
93
- )
94
95
 
95
96
  STEPBACK_PROMPT = """Your task is to convert provided question into a more generic question that will be used for similarity search.
96
97
  Remove all not important words, question words, but save all names, dates and acronym as in original question.
@@ -138,7 +139,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
138
139
  embedding_model_params: dict
139
140
  vectorstore_type: str
140
141
  vectorstore_params: dict
141
- max_docs_per_add: int = 100
142
+ max_docs_per_add: int = 20
142
143
  dataset: str = None
143
144
  embedding: Any = None
144
145
  vectorstore: Any = None
@@ -208,16 +209,33 @@ class VectorStoreWrapper(BaseToolApiWrapper):
208
209
  tool_name="_remove_collection"
209
210
  )
210
211
 
211
- def _get_indexed_ids(self, collection_suffix: Optional[str] = '') -> List[str]:
212
+ def _get_indexed_ids(self, index_name: Optional[str] = '') -> List[str]:
212
213
  """Get all indexed document IDs from vectorstore"""
213
- return self.vector_adapter.get_indexed_ids(self, collection_suffix)
214
-
215
- def list_collections(self) -> List[str]:
216
- """List all collections in the vectorstore."""
217
-
218
- return self.vector_adapter.list_collections(self)
214
+ return self.vector_adapter.get_indexed_ids(self, index_name)
215
+
216
+ def list_collections(self) -> Any:
217
+ """List all collections in the vectorstore.
218
+ Returns a list of collection names, or if no collections exist,
219
+ returns a dict with an empty list and a message."""
220
+ raw = self.vector_adapter.list_collections(self)
221
+ # Normalize raw result to a list of names
222
+ if not raw:
223
+ # No collections found
224
+ return {"collections": [], "message": "No indexed collections"}
225
+ if isinstance(raw, str):
226
+ # e.g., Chroma adapter returns comma-separated string
227
+ cols = [c for c in raw.split(',') if c]
228
+ else:
229
+ try:
230
+ cols = list(raw)
231
+ except Exception:
232
+ # Unexpected type, return raw directly
233
+ return raw
234
+ if not cols:
235
+ return {"collections": [], "message": "No indexed collections"}
236
+ return cols
219
237
 
220
- def _clean_collection(self, collection_suffix: str = ''):
238
+ def _clean_collection(self, index_name: str = ''):
221
239
  """
222
240
  Clean the vectorstore collection by deleting all indexed data.
223
241
  """
@@ -225,19 +243,15 @@ class VectorStoreWrapper(BaseToolApiWrapper):
225
243
  f"Cleaning collection '{self.dataset}'",
226
244
  tool_name="_clean_collection"
227
245
  )
228
- self.vector_adapter.clean_collection(self, collection_suffix)
246
+ self.vector_adapter.clean_collection(self, index_name)
229
247
  self._log_data(
230
248
  f"Collection '{self.dataset}' has been cleaned. ",
231
249
  tool_name="_clean_collection"
232
250
  )
233
251
 
234
- def _get_indexed_data(self, collection_name: str):
235
- """ Get all indexed data from vectorstore for non-code content """
236
- return self.vector_adapter.get_indexed_data(self, collection_name)
237
-
238
- def _get_code_indexed_data(self, collection_suffix: str) -> Dict[str, Dict[str, Any]]:
252
+ def _get_code_indexed_data(self, index_name: str) -> Dict[str, Dict[str, Any]]:
239
253
  """ Get all indexed data from vectorstore for code content """
240
- return self.vector_adapter.get_code_indexed_data(self, collection_suffix)
254
+ return self.vector_adapter.get_code_indexed_data(self, index_name)
241
255
 
242
256
  def _add_to_collection(self, entry_id, new_collection_value):
243
257
  """Add a new collection name to the `collection` key in the `metadata` column."""
@@ -246,7 +260,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
246
260
  def _reduce_duplicates(
247
261
  self,
248
262
  documents: Generator[Any, None, None],
249
- collection_suffix: str,
263
+ index_name: str,
250
264
  get_indexed_data: Callable,
251
265
  key_fn: Callable,
252
266
  compare_fn: Callable,
@@ -255,7 +269,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
255
269
  ) -> List[Any]:
256
270
  """Generic duplicate reduction logic for documents."""
257
271
  self._log_data(log_msg, tool_name="index_documents")
258
- indexed_data = get_indexed_data(collection_suffix)
272
+ indexed_data = get_indexed_data(index_name)
259
273
  indexed_keys = set(indexed_data.keys())
260
274
  if not indexed_keys:
261
275
  self._log_data("Vectorstore is empty, indexing all incoming documents", tool_name="index_documents")
@@ -266,14 +280,15 @@ class VectorStoreWrapper(BaseToolApiWrapper):
266
280
 
267
281
  for document in documents:
268
282
  key = key_fn(document)
269
- if key in indexed_keys and collection_suffix == indexed_data[key]['metadata'].get('collection'):
283
+ key = key if isinstance(key, str) else str(key)
284
+ if key in indexed_keys and index_name == indexed_data[key]['metadata'].get('collection'):
270
285
  if compare_fn(document, indexed_data[key]):
271
286
  # Disabled addition of new collection to already indexed documents
272
287
  # # check metadata.collection and update if needed
273
288
  # for update_collection_id in remove_ids_fn(indexed_data, key):
274
289
  # self._add_to_collection(
275
290
  # update_collection_id,
276
- # collection_suffix
291
+ # index_name
277
292
  # )
278
293
  continue
279
294
  final_docs.append(document)
@@ -290,30 +305,10 @@ class VectorStoreWrapper(BaseToolApiWrapper):
290
305
 
291
306
  return final_docs
292
307
 
293
- def _reduce_non_code_duplicates(self, documents: Generator[Any, None, None], collection_suffix: str) -> List[Any]:
294
- return self._reduce_duplicates(
295
- documents,
296
- collection_suffix,
297
- self._get_indexed_data,
298
- lambda doc: doc.metadata.get('id'),
299
- lambda doc, idx: (
300
- doc.metadata.get('updated_on') and
301
- idx['metadata'].get('updated_on') and
302
- doc.metadata.get('updated_on') == idx['metadata'].get('updated_on')
303
- ),
304
- lambda idx_data, key: (
305
- idx_data[key]['all_chunks'] +
306
- [idx_data[dep_id]['id'] for dep_id in idx_data[key][IndexerKeywords.DEPENDENT_DOCS.value]] +
307
- [chunk_db_id for dep_id in idx_data[key][IndexerKeywords.DEPENDENT_DOCS.value]
308
- for chunk_db_id in idx_data[dep_id]['all_chunks']]
309
- ),
310
- log_msg="Verification of documents to index started"
311
- )
312
-
313
- def _reduce_code_duplicates(self, documents: Generator[Any, None, None], collection_suffix: str) -> List[Any]:
308
+ def _reduce_code_duplicates(self, documents: Generator[Any, None, None], index_name: str) -> List[Any]:
314
309
  return self._reduce_duplicates(
315
310
  documents,
316
- collection_suffix,
311
+ index_name,
317
312
  self._get_code_indexed_data,
318
313
  lambda doc: doc.metadata.get('filename'),
319
314
  lambda doc, idx: (
@@ -325,7 +320,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
325
320
  log_msg="Verification of code documents to index started"
326
321
  )
327
322
 
328
- def index_documents(self, documents: Generator[Document, None, None], collection_suffix: str, progress_step: int = 20, clean_index: bool = True, is_code: bool = False):
323
+ def index_documents(self, documents: Generator[Document, None, None], index_name: str, progress_step: int = 20, clean_index: bool = True, is_code: bool = True):
329
324
  """ Index documents in the vectorstore.
330
325
 
331
326
  Args:
@@ -336,13 +331,13 @@ class VectorStoreWrapper(BaseToolApiWrapper):
336
331
 
337
332
  from ..langchain.interfaces.llm_processor import add_documents
338
333
 
339
- self._log_tool_event(message=f"Starting the indexing... Parameters: {collection_suffix=}, {clean_index=}, {is_code}", tool_name="index_documents")
334
+ self._log_tool_event(message=f"Starting the indexing... Parameters: {index_name=}, {clean_index=}, {is_code}", tool_name="index_documents")
340
335
  # pre-process documents if needed (find duplicates, etc.)
341
336
  if clean_index:
342
337
  logger.info("Cleaning index before re-indexing all documents.")
343
338
  self._log_data("Cleaning index before re-indexing all documents. Previous index will be removed", tool_name="index_documents")
344
339
  try:
345
- self._clean_collection(collection_suffix)
340
+ self._clean_collection(index_name)
346
341
  self.vectoradapter.persist()
347
342
  self.vectoradapter.vacuum()
348
343
  self._log_data("Previous index has been removed",
@@ -356,8 +351,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
356
351
  message="Filter for duplicates",
357
352
  tool_name="index_documents")
358
353
  # remove duplicates based on metadata 'id' and 'updated_on' or 'commit_hash' fields
359
- documents = self._reduce_code_duplicates(documents, collection_suffix) if is_code \
360
- else self._reduce_non_code_duplicates(documents, collection_suffix)
354
+ documents = self._reduce_code_duplicates(documents, index_name)
361
355
  self._log_tool_event(
362
356
  message="All the duplicates were filtered out. Proceeding with indexing.",
363
357
  tool_name="index_documents")
@@ -385,13 +379,13 @@ class VectorStoreWrapper(BaseToolApiWrapper):
385
379
  self._log_tool_event(message=f"Documents for indexing were processed. Total documents: {len(documents)}",
386
380
  tool_name="index_documents")
387
381
 
388
- # if collection_suffix is provided, add it to metadata of each document
389
- if collection_suffix:
382
+ # if index_name is provided, add it to metadata of each document
383
+ if index_name:
390
384
  for doc in documents:
391
385
  if not doc.metadata.get('collection'):
392
- doc.metadata['collection'] = collection_suffix
386
+ doc.metadata['collection'] = index_name
393
387
  else:
394
- doc.metadata['collection'] += f";{collection_suffix}"
388
+ doc.metadata['collection'] += f";{index_name}"
395
389
 
396
390
  total_docs = len(documents)
397
391
  documents_count = 0
@@ -422,7 +416,8 @@ class VectorStoreWrapper(BaseToolApiWrapper):
422
416
  return {"status": "error", "message": f"Error: {format_exc()}"}
423
417
  if _documents:
424
418
  add_documents(vectorstore=self.vectorstore, documents=_documents)
425
- return {"status": "ok", "message": f"successfully indexed {documents_count} documents"}
419
+ return {"status": "ok", "message": f"successfully indexed {documents_count} documents" if documents_count > 0
420
+ else "No new documents to index."}
426
421
 
427
422
  def search_documents(self, query:str, doctype: str = 'code',
428
423
  filter:dict|str={}, cut_off: float=0.5,
@@ -542,11 +537,18 @@ class VectorStoreWrapper(BaseToolApiWrapper):
542
537
 
543
538
  # Initialize document map for tracking by ID
544
539
  doc_map = {
545
- f"{doc.metadata.get('id', f'idx_{i}')}_{doc.metadata['chunk_id']}"
546
- if 'chunk_id' in doc.metadata
547
- else doc.metadata.get('id', f"idx_{i}"): (doc, score)
540
+ (
541
+ f"{doc.metadata.get('id', f'idx_{i}')}_{doc.metadata['chunk_id']}"
542
+ if 'chunk_id' in doc.metadata
543
+ else doc.metadata.get('id', f"idx_{i}")
544
+ ): (doc, 1 - score)
548
545
  for i, (doc, score) in enumerate(vector_items)
549
546
  }
547
+
548
+ # Sort the items by the new score in descending order
549
+ doc_map = OrderedDict(
550
+ sorted(doc_map.items(), key=lambda x: x[1][1], reverse=True)
551
+ )
550
552
 
551
553
  # Process full-text search if configured
552
554
  if full_text_search and full_text_search.get('enabled') and full_text_search.get('fields'):
@@ -597,7 +599,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
597
599
  # Apply cutoff filter
598
600
  if cut_off:
599
601
  # Filter out items above the cutoff score (since the lower the score, the better)
600
- combined_items = [item for item in combined_items if abs(item[1]) <= cut_off]
602
+ combined_items = [item for item in combined_items if abs(item[1]) >= cut_off]
601
603
 
602
604
  # Sort by score and limit results
603
605
  # DISABLED: for chroma we want ascending order (lower score is better), for others descending
@@ -684,8 +686,10 @@ class VectorStoreWrapper(BaseToolApiWrapper):
684
686
  ]
685
687
  )
686
688
  ])
689
+ # Extract text content safely (handles both string and list content from thinking models)
690
+ search_query = extract_text_from_completion(result)
687
691
  search_results = self.search_documents(
688
- result.content, doctype, filter, cut_off, search_top,
692
+ search_query, doctype, filter, cut_off, search_top,
689
693
  full_text_search=full_text_search,
690
694
  reranking_config=reranking_config,
691
695
  extended_search=extended_search
@@ -714,7 +718,8 @@ class VectorStoreWrapper(BaseToolApiWrapper):
714
718
  ]
715
719
  )
716
720
  ])
717
- return result.content
721
+ # Extract text content safely (handles both string and list content from thinking models)
722
+ return extract_text_from_completion(result)
718
723
 
719
724
  def _log_data(self, message: str, tool_name: str = "index_data"):
720
725
  """Log data and dispatch custom event for indexing progress"""
@@ -758,4 +763,3 @@ class VectorStoreWrapper(BaseToolApiWrapper):
758
763
  "args_schema": StepBackSearchDocumentsModel
759
764
  }
760
765
  ]
761
-