alita-sdk 0.3.263__py3-none-any.whl → 0.3.499__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (248) hide show
  1. alita_sdk/cli/__init__.py +10 -0
  2. alita_sdk/cli/__main__.py +17 -0
  3. alita_sdk/cli/agent/__init__.py +5 -0
  4. alita_sdk/cli/agent/default.py +258 -0
  5. alita_sdk/cli/agent_executor.py +155 -0
  6. alita_sdk/cli/agent_loader.py +215 -0
  7. alita_sdk/cli/agent_ui.py +228 -0
  8. alita_sdk/cli/agents.py +3601 -0
  9. alita_sdk/cli/callbacks.py +647 -0
  10. alita_sdk/cli/cli.py +168 -0
  11. alita_sdk/cli/config.py +306 -0
  12. alita_sdk/cli/context/__init__.py +30 -0
  13. alita_sdk/cli/context/cleanup.py +198 -0
  14. alita_sdk/cli/context/manager.py +731 -0
  15. alita_sdk/cli/context/message.py +285 -0
  16. alita_sdk/cli/context/strategies.py +289 -0
  17. alita_sdk/cli/context/token_estimation.py +127 -0
  18. alita_sdk/cli/formatting.py +182 -0
  19. alita_sdk/cli/input_handler.py +419 -0
  20. alita_sdk/cli/inventory.py +1256 -0
  21. alita_sdk/cli/mcp_loader.py +315 -0
  22. alita_sdk/cli/toolkit.py +327 -0
  23. alita_sdk/cli/toolkit_loader.py +85 -0
  24. alita_sdk/cli/tools/__init__.py +43 -0
  25. alita_sdk/cli/tools/approval.py +224 -0
  26. alita_sdk/cli/tools/filesystem.py +1751 -0
  27. alita_sdk/cli/tools/planning.py +389 -0
  28. alita_sdk/cli/tools/terminal.py +414 -0
  29. alita_sdk/community/__init__.py +64 -8
  30. alita_sdk/community/inventory/__init__.py +224 -0
  31. alita_sdk/community/inventory/config.py +257 -0
  32. alita_sdk/community/inventory/enrichment.py +2137 -0
  33. alita_sdk/community/inventory/extractors.py +1469 -0
  34. alita_sdk/community/inventory/ingestion.py +3172 -0
  35. alita_sdk/community/inventory/knowledge_graph.py +1457 -0
  36. alita_sdk/community/inventory/parsers/__init__.py +218 -0
  37. alita_sdk/community/inventory/parsers/base.py +295 -0
  38. alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
  39. alita_sdk/community/inventory/parsers/go_parser.py +851 -0
  40. alita_sdk/community/inventory/parsers/html_parser.py +389 -0
  41. alita_sdk/community/inventory/parsers/java_parser.py +593 -0
  42. alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
  43. alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
  44. alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
  45. alita_sdk/community/inventory/parsers/python_parser.py +604 -0
  46. alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
  47. alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
  48. alita_sdk/community/inventory/parsers/text_parser.py +322 -0
  49. alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
  50. alita_sdk/community/inventory/patterns/__init__.py +61 -0
  51. alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
  52. alita_sdk/community/inventory/patterns/loader.py +348 -0
  53. alita_sdk/community/inventory/patterns/registry.py +198 -0
  54. alita_sdk/community/inventory/presets.py +535 -0
  55. alita_sdk/community/inventory/retrieval.py +1403 -0
  56. alita_sdk/community/inventory/toolkit.py +173 -0
  57. alita_sdk/community/inventory/visualize.py +1370 -0
  58. alita_sdk/configurations/__init__.py +10 -0
  59. alita_sdk/configurations/ado.py +4 -2
  60. alita_sdk/configurations/azure_search.py +1 -1
  61. alita_sdk/configurations/bigquery.py +1 -1
  62. alita_sdk/configurations/bitbucket.py +94 -2
  63. alita_sdk/configurations/browser.py +18 -0
  64. alita_sdk/configurations/carrier.py +19 -0
  65. alita_sdk/configurations/confluence.py +96 -1
  66. alita_sdk/configurations/delta_lake.py +1 -1
  67. alita_sdk/configurations/figma.py +0 -5
  68. alita_sdk/configurations/github.py +65 -1
  69. alita_sdk/configurations/gitlab.py +79 -0
  70. alita_sdk/configurations/google_places.py +17 -0
  71. alita_sdk/configurations/jira.py +103 -0
  72. alita_sdk/configurations/postman.py +1 -1
  73. alita_sdk/configurations/qtest.py +1 -3
  74. alita_sdk/configurations/report_portal.py +19 -0
  75. alita_sdk/configurations/salesforce.py +19 -0
  76. alita_sdk/configurations/service_now.py +1 -12
  77. alita_sdk/configurations/sharepoint.py +19 -0
  78. alita_sdk/configurations/sonar.py +18 -0
  79. alita_sdk/configurations/sql.py +20 -0
  80. alita_sdk/configurations/testio.py +18 -0
  81. alita_sdk/configurations/testrail.py +88 -0
  82. alita_sdk/configurations/xray.py +94 -1
  83. alita_sdk/configurations/zephyr_enterprise.py +94 -1
  84. alita_sdk/configurations/zephyr_essential.py +95 -0
  85. alita_sdk/runtime/clients/artifact.py +12 -2
  86. alita_sdk/runtime/clients/client.py +235 -66
  87. alita_sdk/runtime/clients/mcp_discovery.py +342 -0
  88. alita_sdk/runtime/clients/mcp_manager.py +262 -0
  89. alita_sdk/runtime/clients/sandbox_client.py +373 -0
  90. alita_sdk/runtime/langchain/assistant.py +123 -17
  91. alita_sdk/runtime/langchain/constants.py +8 -1
  92. alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
  93. alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +209 -31
  94. alita_sdk/runtime/langchain/document_loaders/AlitaImageLoader.py +1 -1
  95. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +8 -2
  96. alita_sdk/runtime/langchain/document_loaders/AlitaMarkdownLoader.py +66 -0
  97. alita_sdk/runtime/langchain/document_loaders/AlitaPDFLoader.py +79 -10
  98. alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py +52 -15
  99. alita_sdk/runtime/langchain/document_loaders/AlitaPythonLoader.py +9 -0
  100. alita_sdk/runtime/langchain/document_loaders/AlitaTableLoader.py +1 -4
  101. alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py +15 -2
  102. alita_sdk/runtime/langchain/document_loaders/ImageParser.py +30 -0
  103. alita_sdk/runtime/langchain/document_loaders/constants.py +187 -40
  104. alita_sdk/runtime/langchain/interfaces/llm_processor.py +4 -2
  105. alita_sdk/runtime/langchain/langraph_agent.py +406 -91
  106. alita_sdk/runtime/langchain/utils.py +51 -8
  107. alita_sdk/runtime/llms/preloaded.py +2 -6
  108. alita_sdk/runtime/models/mcp_models.py +61 -0
  109. alita_sdk/runtime/toolkits/__init__.py +26 -0
  110. alita_sdk/runtime/toolkits/application.py +9 -2
  111. alita_sdk/runtime/toolkits/artifact.py +19 -7
  112. alita_sdk/runtime/toolkits/datasource.py +13 -6
  113. alita_sdk/runtime/toolkits/mcp.py +780 -0
  114. alita_sdk/runtime/toolkits/planning.py +178 -0
  115. alita_sdk/runtime/toolkits/subgraph.py +11 -6
  116. alita_sdk/runtime/toolkits/tools.py +214 -60
  117. alita_sdk/runtime/toolkits/vectorstore.py +9 -4
  118. alita_sdk/runtime/tools/__init__.py +22 -0
  119. alita_sdk/runtime/tools/application.py +16 -4
  120. alita_sdk/runtime/tools/artifact.py +312 -19
  121. alita_sdk/runtime/tools/function.py +100 -4
  122. alita_sdk/runtime/tools/graph.py +81 -0
  123. alita_sdk/runtime/tools/image_generation.py +212 -0
  124. alita_sdk/runtime/tools/llm.py +539 -180
  125. alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
  126. alita_sdk/runtime/tools/mcp_remote_tool.py +181 -0
  127. alita_sdk/runtime/tools/mcp_server_tool.py +3 -1
  128. alita_sdk/runtime/tools/planning/__init__.py +36 -0
  129. alita_sdk/runtime/tools/planning/models.py +246 -0
  130. alita_sdk/runtime/tools/planning/wrapper.py +607 -0
  131. alita_sdk/runtime/tools/router.py +2 -1
  132. alita_sdk/runtime/tools/sandbox.py +375 -0
  133. alita_sdk/runtime/tools/vectorstore.py +62 -63
  134. alita_sdk/runtime/tools/vectorstore_base.py +156 -85
  135. alita_sdk/runtime/utils/AlitaCallback.py +106 -20
  136. alita_sdk/runtime/utils/mcp_client.py +465 -0
  137. alita_sdk/runtime/utils/mcp_oauth.py +244 -0
  138. alita_sdk/runtime/utils/mcp_sse_client.py +405 -0
  139. alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
  140. alita_sdk/runtime/utils/streamlit.py +41 -14
  141. alita_sdk/runtime/utils/toolkit_utils.py +28 -9
  142. alita_sdk/runtime/utils/utils.py +14 -0
  143. alita_sdk/tools/__init__.py +78 -35
  144. alita_sdk/tools/ado/__init__.py +0 -1
  145. alita_sdk/tools/ado/repos/__init__.py +10 -6
  146. alita_sdk/tools/ado/repos/repos_wrapper.py +12 -11
  147. alita_sdk/tools/ado/test_plan/__init__.py +10 -7
  148. alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +56 -23
  149. alita_sdk/tools/ado/wiki/__init__.py +10 -11
  150. alita_sdk/tools/ado/wiki/ado_wrapper.py +114 -28
  151. alita_sdk/tools/ado/work_item/__init__.py +10 -11
  152. alita_sdk/tools/ado/work_item/ado_wrapper.py +63 -10
  153. alita_sdk/tools/advanced_jira_mining/__init__.py +10 -7
  154. alita_sdk/tools/aws/delta_lake/__init__.py +13 -11
  155. alita_sdk/tools/azure_ai/search/__init__.py +11 -7
  156. alita_sdk/tools/base_indexer_toolkit.py +392 -86
  157. alita_sdk/tools/bitbucket/__init__.py +18 -11
  158. alita_sdk/tools/bitbucket/api_wrapper.py +52 -9
  159. alita_sdk/tools/bitbucket/cloud_api_wrapper.py +5 -5
  160. alita_sdk/tools/browser/__init__.py +40 -16
  161. alita_sdk/tools/browser/crawler.py +3 -1
  162. alita_sdk/tools/browser/utils.py +15 -6
  163. alita_sdk/tools/carrier/__init__.py +17 -17
  164. alita_sdk/tools/carrier/backend_reports_tool.py +8 -4
  165. alita_sdk/tools/carrier/excel_reporter.py +8 -4
  166. alita_sdk/tools/chunkers/__init__.py +3 -1
  167. alita_sdk/tools/chunkers/code/codeparser.py +1 -1
  168. alita_sdk/tools/chunkers/sematic/json_chunker.py +1 -0
  169. alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
  170. alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
  171. alita_sdk/tools/chunkers/universal_chunker.py +270 -0
  172. alita_sdk/tools/cloud/aws/__init__.py +9 -6
  173. alita_sdk/tools/cloud/azure/__init__.py +9 -6
  174. alita_sdk/tools/cloud/gcp/__init__.py +9 -6
  175. alita_sdk/tools/cloud/k8s/__init__.py +9 -6
  176. alita_sdk/tools/code/linter/__init__.py +7 -7
  177. alita_sdk/tools/code/loaders/codesearcher.py +3 -2
  178. alita_sdk/tools/code/sonar/__init__.py +18 -12
  179. alita_sdk/tools/code_indexer_toolkit.py +199 -0
  180. alita_sdk/tools/confluence/__init__.py +14 -11
  181. alita_sdk/tools/confluence/api_wrapper.py +198 -58
  182. alita_sdk/tools/confluence/loader.py +10 -0
  183. alita_sdk/tools/custom_open_api/__init__.py +9 -4
  184. alita_sdk/tools/elastic/__init__.py +8 -7
  185. alita_sdk/tools/elitea_base.py +543 -64
  186. alita_sdk/tools/figma/__init__.py +10 -8
  187. alita_sdk/tools/figma/api_wrapper.py +352 -153
  188. alita_sdk/tools/github/__init__.py +13 -11
  189. alita_sdk/tools/github/api_wrapper.py +9 -26
  190. alita_sdk/tools/github/github_client.py +75 -12
  191. alita_sdk/tools/github/schemas.py +2 -1
  192. alita_sdk/tools/gitlab/__init__.py +11 -10
  193. alita_sdk/tools/gitlab/api_wrapper.py +135 -45
  194. alita_sdk/tools/gitlab_org/__init__.py +11 -9
  195. alita_sdk/tools/google/bigquery/__init__.py +12 -13
  196. alita_sdk/tools/google_places/__init__.py +18 -10
  197. alita_sdk/tools/jira/__init__.py +14 -8
  198. alita_sdk/tools/jira/api_wrapper.py +315 -168
  199. alita_sdk/tools/keycloak/__init__.py +8 -7
  200. alita_sdk/tools/localgit/local_git.py +56 -54
  201. alita_sdk/tools/memory/__init__.py +27 -11
  202. alita_sdk/tools/non_code_indexer_toolkit.py +7 -2
  203. alita_sdk/tools/ocr/__init__.py +8 -7
  204. alita_sdk/tools/openapi/__init__.py +10 -1
  205. alita_sdk/tools/pandas/__init__.py +8 -7
  206. alita_sdk/tools/pandas/api_wrapper.py +7 -25
  207. alita_sdk/tools/postman/__init__.py +8 -10
  208. alita_sdk/tools/postman/api_wrapper.py +19 -8
  209. alita_sdk/tools/postman/postman_analysis.py +8 -1
  210. alita_sdk/tools/pptx/__init__.py +8 -9
  211. alita_sdk/tools/qtest/__init__.py +19 -13
  212. alita_sdk/tools/qtest/api_wrapper.py +1784 -88
  213. alita_sdk/tools/rally/__init__.py +10 -9
  214. alita_sdk/tools/report_portal/__init__.py +20 -15
  215. alita_sdk/tools/salesforce/__init__.py +19 -15
  216. alita_sdk/tools/servicenow/__init__.py +14 -11
  217. alita_sdk/tools/sharepoint/__init__.py +14 -13
  218. alita_sdk/tools/sharepoint/api_wrapper.py +179 -39
  219. alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
  220. alita_sdk/tools/sharepoint/utils.py +8 -2
  221. alita_sdk/tools/slack/__init__.py +10 -7
  222. alita_sdk/tools/sql/__init__.py +19 -18
  223. alita_sdk/tools/sql/api_wrapper.py +71 -23
  224. alita_sdk/tools/testio/__init__.py +18 -12
  225. alita_sdk/tools/testrail/__init__.py +10 -10
  226. alita_sdk/tools/testrail/api_wrapper.py +213 -45
  227. alita_sdk/tools/utils/__init__.py +28 -4
  228. alita_sdk/tools/utils/content_parser.py +181 -61
  229. alita_sdk/tools/utils/text_operations.py +254 -0
  230. alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +83 -27
  231. alita_sdk/tools/xray/__init__.py +12 -7
  232. alita_sdk/tools/xray/api_wrapper.py +58 -113
  233. alita_sdk/tools/zephyr/__init__.py +9 -6
  234. alita_sdk/tools/zephyr_enterprise/__init__.py +13 -8
  235. alita_sdk/tools/zephyr_enterprise/api_wrapper.py +17 -7
  236. alita_sdk/tools/zephyr_essential/__init__.py +13 -9
  237. alita_sdk/tools/zephyr_essential/api_wrapper.py +289 -47
  238. alita_sdk/tools/zephyr_essential/client.py +6 -4
  239. alita_sdk/tools/zephyr_scale/__init__.py +10 -7
  240. alita_sdk/tools/zephyr_scale/api_wrapper.py +6 -2
  241. alita_sdk/tools/zephyr_squad/__init__.py +9 -6
  242. {alita_sdk-0.3.263.dist-info → alita_sdk-0.3.499.dist-info}/METADATA +180 -33
  243. alita_sdk-0.3.499.dist-info/RECORD +433 -0
  244. alita_sdk-0.3.499.dist-info/entry_points.txt +2 -0
  245. alita_sdk-0.3.263.dist-info/RECORD +0 -342
  246. {alita_sdk-0.3.263.dist-info → alita_sdk-0.3.499.dist-info}/WHEEL +0 -0
  247. {alita_sdk-0.3.263.dist-info → alita_sdk-0.3.499.dist-info}/licenses/LICENSE +0 -0
  248. {alita_sdk-0.3.263.dist-info → alita_sdk-0.3.499.dist-info}/top_level.txt +0 -0
@@ -1,16 +1,18 @@
1
1
  import json
2
- import math
2
+ from collections import OrderedDict
3
3
  from logging import getLogger
4
4
  from typing import Any, Optional, List, Dict, Generator
5
5
 
6
+ import math
6
7
  from langchain_core.documents import Document
7
8
  from langchain_core.messages import HumanMessage
9
+ from langchain_core.tools import ToolException
10
+ from psycopg.errors import DataException
8
11
  from pydantic import BaseModel, model_validator, Field
9
12
 
10
13
  from alita_sdk.tools.elitea_base import BaseToolApiWrapper
11
14
  from alita_sdk.tools.vector_adapters.VectorStoreAdapter import VectorStoreAdapterFactory
12
- from ..langchain.tools.vector import VectorAdapter
13
- from ..utils.logging import dispatch_custom_event
15
+ from ...runtime.utils.utils import IndexerKeywords
14
16
 
15
17
  logger = getLogger(__name__)
16
18
 
@@ -132,15 +134,12 @@ How did you come up with the answer?
132
134
 
133
135
  class VectorStoreWrapperBase(BaseToolApiWrapper):
134
136
  llm: Any
135
- embedding_model: str
136
- vectorstore_type: str
137
- vectorstore_params: dict
138
- max_docs_per_add: int = 100
139
- dataset: str = None
140
- embedding: Any = None
137
+ embedding_model: Optional[str] = None
138
+ vectorstore_type: Optional[str] = None
139
+ vectorstore_params: Optional[dict] = None
140
+ max_docs_per_add: int = 20
141
+ dataset: Optional[str] = None
141
142
  vectorstore: Any = None
142
- # Review usage of old adapter
143
- vectoradapter: Any = None
144
143
  pg_helper: Any = None
145
144
  embeddings: Any = None
146
145
  # New adapter for vector database operations
@@ -149,32 +148,52 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
149
148
  @model_validator(mode='before')
150
149
  @classmethod
151
150
  def validate_toolkit(cls, values):
152
- from ..langchain.interfaces.llm_processor import get_embeddings, get_vectorstore
151
+ from ..langchain.interfaces.llm_processor import get_vectorstore
153
152
  logger.debug(f"Validating toolkit: {values}")
154
- if not values.get('vectorstore_type'):
155
- raise ValueError("Vectorstore type is required.")
156
- if not values.get('embedding_model'):
157
- raise ValueError("Embedding model is required.")
158
- if not values.get('vectorstore_params'):
159
- raise ValueError("Vectorstore parameters are required.")
160
- values["dataset"] = values.get('vectorstore_params').get('collection_name')
161
- if not values["dataset"]:
162
- raise ValueError("Collection name is required.")
163
- if not values.get('embeddings'):
164
- values['embeddings'] = values['alita'].get_embeddings(values['embedding_model'])
165
- values['vectorstore'] = get_vectorstore(values['vectorstore_type'], values['vectorstore_params'], embedding_func=values['embeddings'])
166
- values['vectoradapter'] = VectorAdapter(
167
- vectorstore=values['vectorstore'],
168
- embeddings=values['embeddings'],
169
- quota_params=None,
170
- )
171
- # Initialize the new vector adapter
172
- values['vector_adapter'] = VectorStoreAdapterFactory.create_adapter(values['vectorstore_type'])
173
- logger.debug(f"Vectorstore wrapper initialized: {values}")
153
+ values["dataset"] = values.get('collection_name')
154
+
155
+ if values.get('alita') and values.get('embedding_model'):
156
+ values['embeddings'] = values.get('alita').get_embeddings(values.get('embedding_model'))
157
+
158
+ # Lazy initialization: vectorstore and vector_adapter are initialized on-demand
159
+ # This prevents errors when using non-index tools with broken/missing vector DB
174
160
  return values
175
161
 
162
+ def _ensure_vectorstore_initialized(self):
163
+ """Lazily initialize vectorstore and vector_adapter when needed for index operations."""
164
+ if self.vectorstore is None:
165
+ if not self.vectorstore_type or not self.vectorstore_params:
166
+ raise ToolException(
167
+ "Vector store is not configured. "
168
+ "Please ensure embedding_model and pgvector_configuration are provided."
169
+ )
170
+
171
+ from ..langchain.interfaces.llm_processor import get_vectorstore
172
+ try:
173
+ self.vectorstore = get_vectorstore(
174
+ self.vectorstore_type,
175
+ self.vectorstore_params,
176
+ embedding_func=self.embeddings
177
+ )
178
+ logger.debug(f"Vectorstore initialized: {self.vectorstore_type}")
179
+ except Exception as e:
180
+ raise ToolException(
181
+ f"Failed to initialize vector store: {str(e)}. "
182
+ "Check your vector database configuration and connection."
183
+ )
184
+
185
+ if self.vector_adapter is None:
186
+ try:
187
+ self.vector_adapter = VectorStoreAdapterFactory.create_adapter(self.vectorstore_type)
188
+ logger.debug(f"Vector adapter initialized: {self.vectorstore_type}")
189
+ except Exception as e:
190
+ raise ToolException(
191
+ f"Failed to initialize vector adapter: {str(e)}"
192
+ )
193
+
176
194
  def _init_pg_helper(self, language='english'):
177
195
  """Initialize PGVector helper if needed and not already initialized"""
196
+ self._ensure_vectorstore_initialized()
178
197
  if self.pg_helper is None and hasattr(self.vectorstore, 'connection_string') and hasattr(self.vectorstore, 'collection_name'):
179
198
  try:
180
199
  from .pgvector_search import PGVectorSearch
@@ -188,26 +207,85 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
188
207
  except Exception as e:
189
208
  logger.error(f"Failed to initialize PGVectorSearch: {str(e)}")
190
209
 
210
+ def _similarity_search_with_score(self, query: str, filter: dict = None, k: int = 10):
211
+ """
212
+ Perform similarity search with proper exception handling for DataException.
213
+
214
+ Args:
215
+ query: Search query string
216
+ filter: Optional filter dictionary
217
+ k: Number of results to return
218
+
219
+ Returns:
220
+ List of (Document, score) tuples
221
+
222
+ Raises:
223
+ ToolException: When DataException occurs or other search errors
224
+ """
225
+ self._ensure_vectorstore_initialized()
226
+ try:
227
+ return self.vectorstore.similarity_search_with_score(
228
+ query, filter=filter, k=k
229
+ )
230
+ except DataException as dimException:
231
+ exception_str = str(dimException)
232
+ if 'different vector dimensions' in exception_str:
233
+ logger.error(f"Data exception: {exception_str}")
234
+ raise ToolException(f"Global search cannot be completed since collections were indexed using "
235
+ f"different embedding models. Use search within a single collection."
236
+ f"\nDetails: {exception_str}")
237
+ raise ToolException(f"Data exception during search. Possibly invalid filter: {exception_str}")
238
+ except Exception as e:
239
+ logger.error(f"Error during similarity search: {str(e)}")
240
+ raise ToolException(f"Search failed: {str(e)}")
241
+
191
242
  def list_collections(self) -> List[str]:
192
243
  """List all collections in the vectorstore."""
244
+ self._ensure_vectorstore_initialized()
245
+ collections = self.vector_adapter.list_collections(self)
246
+ if not collections:
247
+ return "No indexed collections"
248
+ return collections
249
+
250
+ def get_index_meta(self, index_name: str):
251
+ self._ensure_vectorstore_initialized()
252
+ index_metas = self.vector_adapter.get_index_meta(self, index_name)
253
+ if len(index_metas) > 1:
254
+ raise RuntimeError(f"Multiple index_meta documents found: {index_metas}")
255
+ return index_metas[0] if index_metas else None
256
+
257
+ def get_indexed_count(self, index_name: str) -> int:
258
+ self._ensure_vectorstore_initialized()
259
+ from sqlalchemy.orm import Session
260
+ from sqlalchemy import func, or_
261
+
262
+ with Session(self.vectorstore.session_maker.bind) as session:
263
+ return session.query(
264
+ self.vectorstore.EmbeddingStore.id,
265
+ ).filter(
266
+ func.jsonb_extract_path_text(self.vectorstore.EmbeddingStore.cmetadata, 'collection') == index_name,
267
+ or_(
268
+ func.jsonb_extract_path_text(self.vectorstore.EmbeddingStore.cmetadata, 'type').is_(None),
269
+ func.jsonb_extract_path_text(self.vectorstore.EmbeddingStore.cmetadata, 'type') != IndexerKeywords.INDEX_META_TYPE.value
270
+ )
271
+ ).count()
193
272
 
194
- return self.vector_adapter.list_collections(self)
195
-
196
- def _clean_collection(self, collection_suffix: str = ''):
273
+ def _clean_collection(self, index_name: str = '', including_index_meta: bool = False):
197
274
  """
198
275
  Clean the vectorstore collection by deleting all indexed data.
199
276
  """
200
- self._log_data(
277
+ self._ensure_vectorstore_initialized()
278
+ self._log_tool_event(
201
279
  f"Cleaning collection '{self.dataset}'",
202
280
  tool_name="_clean_collection"
203
281
  )
204
- self.vector_adapter.clean_collection(self, collection_suffix)
205
- self._log_data(
282
+ self.vector_adapter.clean_collection(self, index_name, including_index_meta)
283
+ self._log_tool_event(
206
284
  f"Collection '{self.dataset}' has been cleaned. ",
207
285
  tool_name="_clean_collection"
208
286
  )
209
287
 
210
- def index_documents(self, documents: Generator[Document, None, None], collection_suffix: str, progress_step: int = 20, clean_index: bool = True):
288
+ def index_documents(self, documents: Generator[Document, None, None], index_name: str, progress_step: int = 20, clean_index: bool = True):
211
289
  """ Index documents in the vectorstore.
212
290
 
213
291
  Args:
@@ -215,24 +293,23 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
215
293
  progress_step (int): Step for progress reporting, default is 20.
216
294
  clean_index (bool): If True, clean the index before re-indexing all documents.
217
295
  """
296
+ self._ensure_vectorstore_initialized()
218
297
  if clean_index:
219
- self._clean_index(collection_suffix)
298
+ self._clean_index(index_name)
220
299
 
221
- return self._save_index(list(documents), collection_suffix, progress_step)
300
+ return self._save_index(list(documents), index_name, progress_step)
222
301
 
223
- def _clean_index(self, collection_suffix: str):
302
+ def _clean_index(self, index_name: str):
224
303
  logger.info("Cleaning index before re-indexing all documents.")
225
- self._log_data("Cleaning index before re-indexing all documents. Previous index will be removed", tool_name="index_documents")
304
+ self._log_tool_event("Cleaning index before re-indexing all documents. Previous index will be removed", tool_name="index_documents")
226
305
  try:
227
- self._clean_collection(collection_suffix)
228
- self.vectoradapter.persist()
229
- self.vectoradapter.vacuum()
230
- self._log_data("Previous index has been removed",
306
+ self._clean_collection(index_name, including_index_meta=False)
307
+ self._log_tool_event("Previous index has been removed",
231
308
  tool_name="index_documents")
232
309
  except Exception as e:
233
310
  logger.warning(f"Failed to clean index: {str(e)}. Continuing with re-indexing.")
234
311
 
235
- def _save_index(self, documents: list[Document], collection_suffix: Optional[str] = None, progress_step: int = 20):
312
+ def _save_index(self, documents: list[Document], index_name: Optional[str] = None, progress_step: int = 20):
236
313
  from ..langchain.interfaces.llm_processor import add_documents
237
314
  #
238
315
  for doc in documents:
@@ -240,15 +317,14 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
240
317
  logger.warning(f"Document is missing required metadata field 'id' or 'updated_on': {doc.metadata}")
241
318
 
242
319
  logger.debug(f"Indexing documents: {documents}")
243
- logger.debug(self.vectoradapter)
244
320
 
245
- # if collection_suffix is provided, add it to metadata of each document
246
- if collection_suffix:
321
+ # if index_name is provided, add it to metadata of each document
322
+ if index_name:
247
323
  for doc in documents:
248
324
  if not doc.metadata.get('collection'):
249
- doc.metadata['collection'] = collection_suffix
325
+ doc.metadata['collection'] = index_name
250
326
  else:
251
- doc.metadata['collection'] += f";{collection_suffix}"
327
+ doc.metadata['collection'] += f";{index_name}"
252
328
 
253
329
  total_docs = len(documents)
254
330
  documents_count = 0
@@ -258,6 +334,10 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
258
334
  progress_step = 20 if progress_step not in range(0, 100) else progress_step
259
335
  next_progress_point = progress_step
260
336
  for document in documents:
337
+ if not document.page_content:
338
+ # To avoid case when all documents have empty content
339
+ # See llm_processor.add_documents which exclude metadata of docs with empty content
340
+ continue
261
341
  documents_count += 1
262
342
  # logger.debug(f"Indexing document: {document}")
263
343
  try:
@@ -270,7 +350,7 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
270
350
  if percent >= next_progress_point:
271
351
  msg = f"Indexing progress: {percent}%. Processed {documents_count} of {total_docs} documents."
272
352
  logger.debug(msg)
273
- self._log_data(msg)
353
+ self._log_tool_event(msg)
274
354
  next_progress_point += progress_step
275
355
  except Exception:
276
356
  from traceback import format_exc
@@ -278,7 +358,8 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
278
358
  return {"status": "error", "message": f"Error: {format_exc()}"}
279
359
  if _documents:
280
360
  add_documents(vectorstore=self.vectorstore, documents=_documents)
281
- return {"status": "ok", "message": f"successfully indexed {documents_count} documents"}
361
+ return {"status": "ok", "message": f"successfully indexed {documents_count} documents" if documents_count > 0
362
+ else "no documents to index"}
282
363
 
283
364
  def search_documents(self, query:str, doctype: str = 'code',
284
365
  filter:dict|str={}, cut_off: float=0.5,
@@ -312,7 +393,7 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
312
393
  }
313
394
 
314
395
  try:
315
- document_items = self.vectorstore.similarity_search_with_score(
396
+ document_items = self._similarity_search_with_score(
316
397
  query, filter=document_filter, k=search_top
317
398
  )
318
399
  # Add document results to unique docs
@@ -345,18 +426,16 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
345
426
  }
346
427
 
347
428
  try:
348
- chunk_items = self.vectorstore.similarity_search_with_score(
429
+ chunk_items = self._similarity_search_with_score(
349
430
  query, filter=chunk_filter, k=search_top
350
431
  )
351
-
352
- logger.debug(f"Chunk items for {chunk_type}: {chunk_items[0]}")
353
-
432
+
354
433
  for doc, score in chunk_items:
355
434
  # Create unique identifier for document
356
435
  source = doc.metadata.get('source')
357
436
  chunk_id = doc.metadata.get('chunk_id')
358
437
  doc_id = f"{source}_{chunk_id}" if source and chunk_id else str(doc.metadata.get('id', id(doc)))
359
-
438
+
360
439
  # Store document and its score
361
440
  if doc_id not in unique_docs:
362
441
  unique_docs[doc_id] = doc
@@ -376,9 +455,9 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
376
455
  doc_filter = {
377
456
  "$and": doc_filter_parts
378
457
  }
379
-
458
+
380
459
  try:
381
- fetch_items = self.vectorstore.similarity_search_with_score(
460
+ fetch_items = self._similarity_search_with_score(
382
461
  query, filter=doc_filter, k=1
383
462
  )
384
463
  if fetch_items:
@@ -392,18 +471,25 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
392
471
  else:
393
472
  # Default search behavior (unchanged)
394
473
  max_search_results = 30 if search_top * 3 > 30 else search_top * 3
395
- vector_items = self.vectorstore.similarity_search_with_score(
474
+ vector_items = self._similarity_search_with_score(
396
475
  query, filter=filter, k=max_search_results
397
476
  )
398
477
 
399
478
  # Initialize document map for tracking by ID
400
479
  doc_map = {
401
- f"{doc.metadata.get('id', f'idx_{i}')}_{doc.metadata['chunk_id']}"
402
- if 'chunk_id' in doc.metadata
403
- else doc.metadata.get('id', f"idx_{i}"): (doc, score)
480
+ (
481
+ f"{doc.metadata.get('id', f'idx_{i}')}_{doc.metadata['chunk_id']}"
482
+ if 'chunk_id' in doc.metadata
483
+ else doc.metadata.get('id', f"idx_{i}")
484
+ ): (doc, 1 - score)
404
485
  for i, (doc, score) in enumerate(vector_items)
405
486
  }
406
-
487
+
488
+ # Sort the items by the new score in descending order
489
+ doc_map = OrderedDict(
490
+ sorted(doc_map.items(), key=lambda x: x[1][1], reverse=True)
491
+ )
492
+
407
493
  # Process full-text search if configured
408
494
  if full_text_search and full_text_search.get('enabled') and full_text_search.get('fields'):
409
495
  language = full_text_search.get('language', 'english')
@@ -416,7 +502,7 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
416
502
  for field_name in full_text_search.get('fields', []):
417
503
  try:
418
504
  text_results = self.pg_helper.full_text_search(field_name, query)
419
-
505
+
420
506
  # Combine text search results with vector results
421
507
  for result in text_results:
422
508
  doc_id = result['id']
@@ -452,7 +538,7 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
452
538
 
453
539
  # Apply cutoff filter
454
540
  if cut_off:
455
- combined_items = [item for item in combined_items if abs(item[1]) <= cut_off]
541
+ combined_items = [item for item in combined_items if abs(item[1]) >= cut_off]
456
542
 
457
543
  # Sort by score and limit results
458
544
  # DISABLED: for chroma we want ascending order (lower score is better), for others descending
@@ -571,21 +657,6 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
571
657
  ])
572
658
  return result.content
573
659
 
574
- def _log_data(self, message: str, tool_name: str = "index_data"):
575
- """Log data and dispatch custom event for indexing progress"""
576
-
577
- try:
578
- dispatch_custom_event(
579
- name="thinking_step",
580
- data={
581
- "message": message,
582
- "tool_name": tool_name,
583
- "toolkit": "vectorstore",
584
- },
585
- )
586
- except Exception as e:
587
- logger.warning(f"Failed to dispatch progress event: {str(e)}")
588
-
589
660
  def get_available_tools(self):
590
661
  return [
591
662
  {
@@ -23,9 +23,45 @@ class AlitaStreamlitCallback(BaseCallbackHandler):
23
23
  self.tokens_out = 0
24
24
  self.pending_llm_requests = defaultdict(int)
25
25
  self.current_model_name = 'gpt-4'
26
+ self._event_queue = [] # Queue for events when context is unavailable
26
27
  #
27
28
  super().__init__()
28
29
 
30
+ def _has_streamlit_context(self) -> bool:
31
+ """Check if Streamlit context is available in the current thread."""
32
+ try:
33
+ # Try to import streamlit runtime context checker
34
+ from streamlit.runtime.scriptrunner import get_script_run_ctx
35
+ ctx = get_script_run_ctx()
36
+ return ctx is not None
37
+ except (ImportError, Exception) as e:
38
+ if self.debug:
39
+ log.debug(f"Streamlit context check failed: {e}")
40
+ return False
41
+
42
+ def _safe_streamlit_call(self, func, *args, **kwargs):
43
+ """Safely execute a Streamlit UI operation, handling missing context gracefully."""
44
+ if not self._has_streamlit_context():
45
+ func_name = getattr(func, '__name__', str(func))
46
+ if self.debug:
47
+ log.warning(f"Streamlit context not available for {func_name}, queueing event")
48
+ # Store the event for potential replay when context is available
49
+ self._event_queue.append({
50
+ 'func': func_name,
51
+ 'args': args,
52
+ 'kwargs': kwargs,
53
+ 'timestamp': datetime.now(tz=timezone.utc)
54
+ })
55
+ return None
56
+
57
+ try:
58
+ return func(*args, **kwargs)
59
+ except Exception as e:
60
+ func_name = getattr(func, '__name__', str(func))
61
+ # Handle any Streamlit-specific exceptions gracefully
62
+ log.warning(f"Streamlit operation {func_name} failed: {e}")
63
+ return None
64
+
29
65
  #
30
66
  # Chain
31
67
  #
@@ -76,10 +112,14 @@ class AlitaStreamlitCallback(BaseCallbackHandler):
76
112
  json.dumps(payload, ensure_ascii=False, default=lambda o: str(o))
77
113
  )
78
114
 
79
- self.callback_state[str(run_id)] = self.st.status(
80
- f"Running {payload.get('tool_name')}...", expanded=True
115
+ status_widget = self._safe_streamlit_call(
116
+ self.st.status,
117
+ f"Running {payload.get('tool_name')}...",
118
+ expanded=True
81
119
  )
82
- self.callback_state[str(run_id)].write(f"Tool inputs: {payload}")
120
+ if status_widget:
121
+ self.callback_state[str(run_id)] = status_widget
122
+ self._safe_streamlit_call(status_widget.write, f"Tool inputs: {payload}")
83
123
 
84
124
  def on_tool_start(self, *args, run_id: UUID, **kwargs):
85
125
  """ Callback """
@@ -95,8 +135,15 @@ class AlitaStreamlitCallback(BaseCallbackHandler):
95
135
  "tool_inputs": kwargs.get('inputs')
96
136
  }
97
137
  payload = json.loads(json.dumps(payload, ensure_ascii=False, default=lambda o: str(o)))
98
- self.callback_state[tool_run_id] = self.st.status(f"Running {tool_name}...", expanded=True)
99
- self.callback_state[tool_run_id].write(f"Tool inputs: {kwargs.get('inputs')}")
138
+
139
+ status_widget = self._safe_streamlit_call(
140
+ self.st.status,
141
+ f"Running {tool_name}...",
142
+ expanded=True
143
+ )
144
+ if status_widget:
145
+ self.callback_state[tool_run_id] = status_widget
146
+ self._safe_streamlit_call(status_widget.write, f"Tool inputs: {kwargs.get('inputs')}")
100
147
 
101
148
  def on_tool_end(self, *args, run_id: UUID, **kwargs):
102
149
  """ Callback """
@@ -104,11 +151,16 @@ class AlitaStreamlitCallback(BaseCallbackHandler):
104
151
  log.info("on_tool_end(%s, %s)", args, kwargs)
105
152
  tool_run_id = str(run_id)
106
153
  tool_output = args[0]
107
- if self.callback_state[tool_run_id]:
108
- self.callback_state[tool_run_id].write(f"Tool output: {tool_output}")
109
- self.callback_state[tool_run_id].update(label=f"Completed {kwargs.get('name')}", state="complete", expanded=False)
154
+ if self.callback_state.get(tool_run_id):
155
+ status_widget = self.callback_state[tool_run_id]
156
+ self._safe_streamlit_call(status_widget.write, f"Tool output: {tool_output}")
157
+ self._safe_streamlit_call(
158
+ status_widget.update,
159
+ label=f"Completed {kwargs.get('name')}",
160
+ state="complete",
161
+ expanded=False
162
+ )
110
163
  self.callback_state.pop(tool_run_id, None)
111
- del self.callback_state[run_id]
112
164
 
113
165
  def on_tool_error(self, *args, run_id: UUID, **kwargs):
114
166
  """ Callback """
@@ -116,9 +168,19 @@ class AlitaStreamlitCallback(BaseCallbackHandler):
116
168
  log.info("on_tool_error(%s, %s)", args, kwargs)
117
169
  tool_run_id = str(run_id)
118
170
  tool_exception = args[0]
119
- self.callback_state[tool_run_id].write(f"{traceback.format_exception(tool_exception)}")
120
- self.callback_state[tool_run_id].update(label=f"Error {kwargs.get('name')}", state="error", expanded=False)
121
- self.callback_state.pop(tool_run_id, None)
171
+ if self.callback_state.get(tool_run_id):
172
+ status_widget = self.callback_state[tool_run_id]
173
+ self._safe_streamlit_call(
174
+ status_widget.write,
175
+ f"{traceback.format_exception(tool_exception)}"
176
+ )
177
+ self._safe_streamlit_call(
178
+ status_widget.update,
179
+ label=f"Error {kwargs.get('name')}",
180
+ state="error",
181
+ expanded=False
182
+ )
183
+ self.callback_state.pop(tool_run_id, None)
122
184
 
123
185
  #
124
186
  # Agent
@@ -156,8 +218,14 @@ class AlitaStreamlitCallback(BaseCallbackHandler):
156
218
  self.current_model_name = metadata.get('ls_model_name', self.current_model_name)
157
219
  llm_run_id = str(run_id)
158
220
 
159
- self.callback_state[llm_run_id] = self.st.status(f"Running LLM ...", expanded=True)
160
- self.callback_state[llm_run_id].write(f"LLM inputs: {messages}")
221
+ status_widget = self._safe_streamlit_call(
222
+ self.st.status,
223
+ f"Running LLM ...",
224
+ expanded=True
225
+ )
226
+ if status_widget:
227
+ self.callback_state[llm_run_id] = status_widget
228
+ self._safe_streamlit_call(status_widget.write, f"LLM inputs: {messages}")
161
229
 
162
230
  def on_llm_start(self, *args, **kwargs):
163
231
  """ Callback """
@@ -178,16 +246,27 @@ class AlitaStreamlitCallback(BaseCallbackHandler):
178
246
  content = None
179
247
  if chunk:
180
248
  content = chunk.text
181
- self.callback_state[str(run_id)].write(content)
249
+
250
+ llm_run_id = str(run_id)
251
+ if self.callback_state.get(llm_run_id):
252
+ status_widget = self.callback_state[llm_run_id]
253
+ self._safe_streamlit_call(status_widget.write, content)
182
254
 
183
255
  def on_llm_error(self, *args, run_id: UUID, **kwargs):
184
256
  """ Callback """
185
257
  if self.debug:
186
258
  log.error("on_llm_error(%s, %s)", args, kwargs)
187
259
  llm_run_id = str(run_id)
188
- self.callback_state[llm_run_id].write(f"on_llm_error({args}, {kwargs})")
189
- self.callback_state[llm_run_id].update(label=f"Error {kwargs.get('name')}", state="error", expanded=False)
190
- self.callback_state.pop(llm_run_id, None)
260
+ if self.callback_state.get(llm_run_id):
261
+ status_widget = self.callback_state[llm_run_id]
262
+ self._safe_streamlit_call(status_widget.write, f"on_llm_error({args}, {kwargs})")
263
+ self._safe_streamlit_call(
264
+ status_widget.update,
265
+ label=f"Error {kwargs.get('name')}",
266
+ state="error",
267
+ expanded=False
268
+ )
269
+ self.callback_state.pop(llm_run_id, None)
191
270
  #
192
271
  # exception = args[0]
193
272
  # FIXME: should we emit an error here too?
@@ -205,5 +284,12 @@ class AlitaStreamlitCallback(BaseCallbackHandler):
205
284
  if self.debug:
206
285
  log.debug("on_llm_end(%s, %s)", response, kwargs)
207
286
  llm_run_id = str(run_id)
208
- self.callback_state[llm_run_id].update(label=f"Completed LLM call", state="complete", expanded=False)
209
- self.callback_state.pop(llm_run_id, None)
287
+ if self.callback_state.get(llm_run_id):
288
+ status_widget = self.callback_state[llm_run_id]
289
+ self._safe_streamlit_call(
290
+ status_widget.update,
291
+ label=f"Completed LLM call",
292
+ state="complete",
293
+ expanded=False
294
+ )
295
+ self.callback_state.pop(llm_run_id, None)