alita-sdk 0.3.351__py3-none-any.whl → 0.3.499__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alita_sdk/cli/__init__.py +10 -0
- alita_sdk/cli/__main__.py +17 -0
- alita_sdk/cli/agent/__init__.py +5 -0
- alita_sdk/cli/agent/default.py +258 -0
- alita_sdk/cli/agent_executor.py +155 -0
- alita_sdk/cli/agent_loader.py +215 -0
- alita_sdk/cli/agent_ui.py +228 -0
- alita_sdk/cli/agents.py +3601 -0
- alita_sdk/cli/callbacks.py +647 -0
- alita_sdk/cli/cli.py +168 -0
- alita_sdk/cli/config.py +306 -0
- alita_sdk/cli/context/__init__.py +30 -0
- alita_sdk/cli/context/cleanup.py +198 -0
- alita_sdk/cli/context/manager.py +731 -0
- alita_sdk/cli/context/message.py +285 -0
- alita_sdk/cli/context/strategies.py +289 -0
- alita_sdk/cli/context/token_estimation.py +127 -0
- alita_sdk/cli/formatting.py +182 -0
- alita_sdk/cli/input_handler.py +419 -0
- alita_sdk/cli/inventory.py +1256 -0
- alita_sdk/cli/mcp_loader.py +315 -0
- alita_sdk/cli/toolkit.py +327 -0
- alita_sdk/cli/toolkit_loader.py +85 -0
- alita_sdk/cli/tools/__init__.py +43 -0
- alita_sdk/cli/tools/approval.py +224 -0
- alita_sdk/cli/tools/filesystem.py +1751 -0
- alita_sdk/cli/tools/planning.py +389 -0
- alita_sdk/cli/tools/terminal.py +414 -0
- alita_sdk/community/__init__.py +64 -8
- alita_sdk/community/inventory/__init__.py +224 -0
- alita_sdk/community/inventory/config.py +257 -0
- alita_sdk/community/inventory/enrichment.py +2137 -0
- alita_sdk/community/inventory/extractors.py +1469 -0
- alita_sdk/community/inventory/ingestion.py +3172 -0
- alita_sdk/community/inventory/knowledge_graph.py +1457 -0
- alita_sdk/community/inventory/parsers/__init__.py +218 -0
- alita_sdk/community/inventory/parsers/base.py +295 -0
- alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
- alita_sdk/community/inventory/parsers/go_parser.py +851 -0
- alita_sdk/community/inventory/parsers/html_parser.py +389 -0
- alita_sdk/community/inventory/parsers/java_parser.py +593 -0
- alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
- alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
- alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
- alita_sdk/community/inventory/parsers/python_parser.py +604 -0
- alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
- alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
- alita_sdk/community/inventory/parsers/text_parser.py +322 -0
- alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
- alita_sdk/community/inventory/patterns/__init__.py +61 -0
- alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
- alita_sdk/community/inventory/patterns/loader.py +348 -0
- alita_sdk/community/inventory/patterns/registry.py +198 -0
- alita_sdk/community/inventory/presets.py +535 -0
- alita_sdk/community/inventory/retrieval.py +1403 -0
- alita_sdk/community/inventory/toolkit.py +173 -0
- alita_sdk/community/inventory/visualize.py +1370 -0
- alita_sdk/configurations/bitbucket.py +94 -2
- alita_sdk/configurations/confluence.py +96 -1
- alita_sdk/configurations/gitlab.py +79 -0
- alita_sdk/configurations/jira.py +103 -0
- alita_sdk/configurations/testrail.py +88 -0
- alita_sdk/configurations/xray.py +93 -0
- alita_sdk/configurations/zephyr_enterprise.py +93 -0
- alita_sdk/configurations/zephyr_essential.py +75 -0
- alita_sdk/runtime/clients/artifact.py +1 -1
- alita_sdk/runtime/clients/client.py +214 -42
- alita_sdk/runtime/clients/mcp_discovery.py +342 -0
- alita_sdk/runtime/clients/mcp_manager.py +262 -0
- alita_sdk/runtime/clients/sandbox_client.py +373 -0
- alita_sdk/runtime/langchain/assistant.py +118 -30
- alita_sdk/runtime/langchain/constants.py +8 -1
- alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
- alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +103 -60
- alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +4 -1
- alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py +41 -12
- alita_sdk/runtime/langchain/document_loaders/AlitaTableLoader.py +1 -1
- alita_sdk/runtime/langchain/document_loaders/constants.py +116 -99
- alita_sdk/runtime/langchain/interfaces/llm_processor.py +2 -2
- alita_sdk/runtime/langchain/langraph_agent.py +307 -71
- alita_sdk/runtime/langchain/utils.py +48 -8
- alita_sdk/runtime/llms/preloaded.py +2 -6
- alita_sdk/runtime/models/mcp_models.py +61 -0
- alita_sdk/runtime/toolkits/__init__.py +26 -0
- alita_sdk/runtime/toolkits/application.py +9 -2
- alita_sdk/runtime/toolkits/artifact.py +18 -6
- alita_sdk/runtime/toolkits/datasource.py +13 -6
- alita_sdk/runtime/toolkits/mcp.py +780 -0
- alita_sdk/runtime/toolkits/planning.py +178 -0
- alita_sdk/runtime/toolkits/tools.py +205 -55
- alita_sdk/runtime/toolkits/vectorstore.py +9 -4
- alita_sdk/runtime/tools/__init__.py +11 -3
- alita_sdk/runtime/tools/application.py +7 -0
- alita_sdk/runtime/tools/artifact.py +225 -12
- alita_sdk/runtime/tools/function.py +95 -5
- alita_sdk/runtime/tools/graph.py +10 -4
- alita_sdk/runtime/tools/image_generation.py +212 -0
- alita_sdk/runtime/tools/llm.py +494 -102
- alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
- alita_sdk/runtime/tools/mcp_remote_tool.py +181 -0
- alita_sdk/runtime/tools/mcp_server_tool.py +4 -4
- alita_sdk/runtime/tools/planning/__init__.py +36 -0
- alita_sdk/runtime/tools/planning/models.py +246 -0
- alita_sdk/runtime/tools/planning/wrapper.py +607 -0
- alita_sdk/runtime/tools/router.py +2 -1
- alita_sdk/runtime/tools/sandbox.py +180 -79
- alita_sdk/runtime/tools/vectorstore.py +22 -21
- alita_sdk/runtime/tools/vectorstore_base.py +125 -52
- alita_sdk/runtime/utils/AlitaCallback.py +106 -20
- alita_sdk/runtime/utils/mcp_client.py +465 -0
- alita_sdk/runtime/utils/mcp_oauth.py +244 -0
- alita_sdk/runtime/utils/mcp_sse_client.py +405 -0
- alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
- alita_sdk/runtime/utils/streamlit.py +40 -13
- alita_sdk/runtime/utils/toolkit_utils.py +28 -9
- alita_sdk/runtime/utils/utils.py +12 -0
- alita_sdk/tools/__init__.py +77 -33
- alita_sdk/tools/ado/repos/__init__.py +7 -6
- alita_sdk/tools/ado/repos/repos_wrapper.py +11 -11
- alita_sdk/tools/ado/test_plan/__init__.py +7 -7
- alita_sdk/tools/ado/wiki/__init__.py +7 -11
- alita_sdk/tools/ado/wiki/ado_wrapper.py +89 -15
- alita_sdk/tools/ado/work_item/__init__.py +7 -11
- alita_sdk/tools/ado/work_item/ado_wrapper.py +17 -8
- alita_sdk/tools/advanced_jira_mining/__init__.py +8 -7
- alita_sdk/tools/aws/delta_lake/__init__.py +11 -9
- alita_sdk/tools/azure_ai/search/__init__.py +7 -6
- alita_sdk/tools/base_indexer_toolkit.py +345 -70
- alita_sdk/tools/bitbucket/__init__.py +9 -8
- alita_sdk/tools/bitbucket/api_wrapper.py +50 -6
- alita_sdk/tools/browser/__init__.py +4 -4
- alita_sdk/tools/carrier/__init__.py +4 -6
- alita_sdk/tools/chunkers/__init__.py +3 -1
- alita_sdk/tools/chunkers/sematic/json_chunker.py +1 -0
- alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
- alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
- alita_sdk/tools/chunkers/universal_chunker.py +270 -0
- alita_sdk/tools/cloud/aws/__init__.py +7 -6
- alita_sdk/tools/cloud/azure/__init__.py +7 -6
- alita_sdk/tools/cloud/gcp/__init__.py +7 -6
- alita_sdk/tools/cloud/k8s/__init__.py +7 -6
- alita_sdk/tools/code/linter/__init__.py +7 -7
- alita_sdk/tools/code/loaders/codesearcher.py +3 -2
- alita_sdk/tools/code/sonar/__init__.py +8 -7
- alita_sdk/tools/code_indexer_toolkit.py +199 -0
- alita_sdk/tools/confluence/__init__.py +9 -8
- alita_sdk/tools/confluence/api_wrapper.py +171 -75
- alita_sdk/tools/confluence/loader.py +10 -0
- alita_sdk/tools/custom_open_api/__init__.py +9 -4
- alita_sdk/tools/elastic/__init__.py +8 -7
- alita_sdk/tools/elitea_base.py +492 -52
- alita_sdk/tools/figma/__init__.py +7 -7
- alita_sdk/tools/figma/api_wrapper.py +2 -1
- alita_sdk/tools/github/__init__.py +9 -9
- alita_sdk/tools/github/api_wrapper.py +9 -26
- alita_sdk/tools/github/github_client.py +62 -2
- alita_sdk/tools/gitlab/__init__.py +8 -8
- alita_sdk/tools/gitlab/api_wrapper.py +135 -33
- alita_sdk/tools/gitlab_org/__init__.py +7 -8
- alita_sdk/tools/google/bigquery/__init__.py +11 -12
- alita_sdk/tools/google_places/__init__.py +8 -7
- alita_sdk/tools/jira/__init__.py +9 -7
- alita_sdk/tools/jira/api_wrapper.py +100 -52
- alita_sdk/tools/keycloak/__init__.py +8 -7
- alita_sdk/tools/localgit/local_git.py +56 -54
- alita_sdk/tools/memory/__init__.py +1 -1
- alita_sdk/tools/non_code_indexer_toolkit.py +3 -2
- alita_sdk/tools/ocr/__init__.py +8 -7
- alita_sdk/tools/openapi/__init__.py +10 -1
- alita_sdk/tools/pandas/__init__.py +8 -7
- alita_sdk/tools/postman/__init__.py +7 -8
- alita_sdk/tools/postman/api_wrapper.py +19 -8
- alita_sdk/tools/postman/postman_analysis.py +8 -1
- alita_sdk/tools/pptx/__init__.py +8 -9
- alita_sdk/tools/qtest/__init__.py +16 -11
- alita_sdk/tools/qtest/api_wrapper.py +1784 -88
- alita_sdk/tools/rally/__init__.py +7 -8
- alita_sdk/tools/report_portal/__init__.py +9 -7
- alita_sdk/tools/salesforce/__init__.py +7 -7
- alita_sdk/tools/servicenow/__init__.py +10 -10
- alita_sdk/tools/sharepoint/__init__.py +7 -6
- alita_sdk/tools/sharepoint/api_wrapper.py +127 -36
- alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
- alita_sdk/tools/sharepoint/utils.py +8 -2
- alita_sdk/tools/slack/__init__.py +7 -6
- alita_sdk/tools/sql/__init__.py +8 -7
- alita_sdk/tools/sql/api_wrapper.py +71 -23
- alita_sdk/tools/testio/__init__.py +7 -6
- alita_sdk/tools/testrail/__init__.py +8 -9
- alita_sdk/tools/utils/__init__.py +26 -4
- alita_sdk/tools/utils/content_parser.py +88 -60
- alita_sdk/tools/utils/text_operations.py +254 -0
- alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +76 -26
- alita_sdk/tools/xray/__init__.py +9 -7
- alita_sdk/tools/zephyr/__init__.py +7 -6
- alita_sdk/tools/zephyr_enterprise/__init__.py +8 -6
- alita_sdk/tools/zephyr_essential/__init__.py +7 -6
- alita_sdk/tools/zephyr_essential/api_wrapper.py +12 -13
- alita_sdk/tools/zephyr_scale/__init__.py +7 -6
- alita_sdk/tools/zephyr_squad/__init__.py +7 -6
- {alita_sdk-0.3.351.dist-info → alita_sdk-0.3.499.dist-info}/METADATA +147 -2
- {alita_sdk-0.3.351.dist-info → alita_sdk-0.3.499.dist-info}/RECORD +206 -130
- alita_sdk-0.3.499.dist-info/entry_points.txt +2 -0
- {alita_sdk-0.3.351.dist-info → alita_sdk-0.3.499.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.351.dist-info → alita_sdk-0.3.499.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.351.dist-info → alita_sdk-0.3.499.dist-info}/top_level.txt +0 -0
|
@@ -1,16 +1,18 @@
|
|
|
1
1
|
import json
|
|
2
|
-
import math
|
|
3
2
|
from collections import OrderedDict
|
|
4
3
|
from logging import getLogger
|
|
5
4
|
from typing import Any, Optional, List, Dict, Generator
|
|
6
5
|
|
|
6
|
+
import math
|
|
7
7
|
from langchain_core.documents import Document
|
|
8
8
|
from langchain_core.messages import HumanMessage
|
|
9
|
+
from langchain_core.tools import ToolException
|
|
10
|
+
from psycopg.errors import DataException
|
|
9
11
|
from pydantic import BaseModel, model_validator, Field
|
|
10
12
|
|
|
11
13
|
from alita_sdk.tools.elitea_base import BaseToolApiWrapper
|
|
12
14
|
from alita_sdk.tools.vector_adapters.VectorStoreAdapter import VectorStoreAdapterFactory
|
|
13
|
-
from
|
|
15
|
+
from ...runtime.utils.utils import IndexerKeywords
|
|
14
16
|
|
|
15
17
|
logger = getLogger(__name__)
|
|
16
18
|
|
|
@@ -153,15 +155,45 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
|
|
|
153
155
|
if values.get('alita') and values.get('embedding_model'):
|
|
154
156
|
values['embeddings'] = values.get('alita').get_embeddings(values.get('embedding_model'))
|
|
155
157
|
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
# Initialize the new vector adapter
|
|
159
|
-
values['vector_adapter'] = VectorStoreAdapterFactory.create_adapter(values['vectorstore_type'])
|
|
160
|
-
logger.debug(f"Vectorstore wrapper initialized: {values}")
|
|
158
|
+
# Lazy initialization: vectorstore and vector_adapter are initialized on-demand
|
|
159
|
+
# This prevents errors when using non-index tools with broken/missing vector DB
|
|
161
160
|
return values
|
|
162
161
|
|
|
162
|
+
def _ensure_vectorstore_initialized(self):
|
|
163
|
+
"""Lazily initialize vectorstore and vector_adapter when needed for index operations."""
|
|
164
|
+
if self.vectorstore is None:
|
|
165
|
+
if not self.vectorstore_type or not self.vectorstore_params:
|
|
166
|
+
raise ToolException(
|
|
167
|
+
"Vector store is not configured. "
|
|
168
|
+
"Please ensure embedding_model and pgvector_configuration are provided."
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
from ..langchain.interfaces.llm_processor import get_vectorstore
|
|
172
|
+
try:
|
|
173
|
+
self.vectorstore = get_vectorstore(
|
|
174
|
+
self.vectorstore_type,
|
|
175
|
+
self.vectorstore_params,
|
|
176
|
+
embedding_func=self.embeddings
|
|
177
|
+
)
|
|
178
|
+
logger.debug(f"Vectorstore initialized: {self.vectorstore_type}")
|
|
179
|
+
except Exception as e:
|
|
180
|
+
raise ToolException(
|
|
181
|
+
f"Failed to initialize vector store: {str(e)}. "
|
|
182
|
+
"Check your vector database configuration and connection."
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
if self.vector_adapter is None:
|
|
186
|
+
try:
|
|
187
|
+
self.vector_adapter = VectorStoreAdapterFactory.create_adapter(self.vectorstore_type)
|
|
188
|
+
logger.debug(f"Vector adapter initialized: {self.vectorstore_type}")
|
|
189
|
+
except Exception as e:
|
|
190
|
+
raise ToolException(
|
|
191
|
+
f"Failed to initialize vector adapter: {str(e)}"
|
|
192
|
+
)
|
|
193
|
+
|
|
163
194
|
def _init_pg_helper(self, language='english'):
|
|
164
195
|
"""Initialize PGVector helper if needed and not already initialized"""
|
|
196
|
+
self._ensure_vectorstore_initialized()
|
|
165
197
|
if self.pg_helper is None and hasattr(self.vectorstore, 'connection_string') and hasattr(self.vectorstore, 'collection_name'):
|
|
166
198
|
try:
|
|
167
199
|
from .pgvector_search import PGVectorSearch
|
|
@@ -175,29 +207,85 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
|
|
|
175
207
|
except Exception as e:
|
|
176
208
|
logger.error(f"Failed to initialize PGVectorSearch: {str(e)}")
|
|
177
209
|
|
|
210
|
+
def _similarity_search_with_score(self, query: str, filter: dict = None, k: int = 10):
|
|
211
|
+
"""
|
|
212
|
+
Perform similarity search with proper exception handling for DataException.
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
query: Search query string
|
|
216
|
+
filter: Optional filter dictionary
|
|
217
|
+
k: Number of results to return
|
|
218
|
+
|
|
219
|
+
Returns:
|
|
220
|
+
List of (Document, score) tuples
|
|
221
|
+
|
|
222
|
+
Raises:
|
|
223
|
+
ToolException: When DataException occurs or other search errors
|
|
224
|
+
"""
|
|
225
|
+
self._ensure_vectorstore_initialized()
|
|
226
|
+
try:
|
|
227
|
+
return self.vectorstore.similarity_search_with_score(
|
|
228
|
+
query, filter=filter, k=k
|
|
229
|
+
)
|
|
230
|
+
except DataException as dimException:
|
|
231
|
+
exception_str = str(dimException)
|
|
232
|
+
if 'different vector dimensions' in exception_str:
|
|
233
|
+
logger.error(f"Data exception: {exception_str}")
|
|
234
|
+
raise ToolException(f"Global search cannot be completed since collections were indexed using "
|
|
235
|
+
f"different embedding models. Use search within a single collection."
|
|
236
|
+
f"\nDetails: {exception_str}")
|
|
237
|
+
raise ToolException(f"Data exception during search. Possibly invalid filter: {exception_str}")
|
|
238
|
+
except Exception as e:
|
|
239
|
+
logger.error(f"Error during similarity search: {str(e)}")
|
|
240
|
+
raise ToolException(f"Search failed: {str(e)}")
|
|
241
|
+
|
|
178
242
|
def list_collections(self) -> List[str]:
|
|
179
243
|
"""List all collections in the vectorstore."""
|
|
180
|
-
|
|
244
|
+
self._ensure_vectorstore_initialized()
|
|
181
245
|
collections = self.vector_adapter.list_collections(self)
|
|
182
246
|
if not collections:
|
|
183
247
|
return "No indexed collections"
|
|
184
248
|
return collections
|
|
185
249
|
|
|
186
|
-
def
|
|
250
|
+
def get_index_meta(self, index_name: str):
|
|
251
|
+
self._ensure_vectorstore_initialized()
|
|
252
|
+
index_metas = self.vector_adapter.get_index_meta(self, index_name)
|
|
253
|
+
if len(index_metas) > 1:
|
|
254
|
+
raise RuntimeError(f"Multiple index_meta documents found: {index_metas}")
|
|
255
|
+
return index_metas[0] if index_metas else None
|
|
256
|
+
|
|
257
|
+
def get_indexed_count(self, index_name: str) -> int:
|
|
258
|
+
self._ensure_vectorstore_initialized()
|
|
259
|
+
from sqlalchemy.orm import Session
|
|
260
|
+
from sqlalchemy import func, or_
|
|
261
|
+
|
|
262
|
+
with Session(self.vectorstore.session_maker.bind) as session:
|
|
263
|
+
return session.query(
|
|
264
|
+
self.vectorstore.EmbeddingStore.id,
|
|
265
|
+
).filter(
|
|
266
|
+
func.jsonb_extract_path_text(self.vectorstore.EmbeddingStore.cmetadata, 'collection') == index_name,
|
|
267
|
+
or_(
|
|
268
|
+
func.jsonb_extract_path_text(self.vectorstore.EmbeddingStore.cmetadata, 'type').is_(None),
|
|
269
|
+
func.jsonb_extract_path_text(self.vectorstore.EmbeddingStore.cmetadata, 'type') != IndexerKeywords.INDEX_META_TYPE.value
|
|
270
|
+
)
|
|
271
|
+
).count()
|
|
272
|
+
|
|
273
|
+
def _clean_collection(self, index_name: str = '', including_index_meta: bool = False):
|
|
187
274
|
"""
|
|
188
275
|
Clean the vectorstore collection by deleting all indexed data.
|
|
189
276
|
"""
|
|
190
|
-
self.
|
|
277
|
+
self._ensure_vectorstore_initialized()
|
|
278
|
+
self._log_tool_event(
|
|
191
279
|
f"Cleaning collection '{self.dataset}'",
|
|
192
280
|
tool_name="_clean_collection"
|
|
193
281
|
)
|
|
194
|
-
self.vector_adapter.clean_collection(self,
|
|
195
|
-
self.
|
|
282
|
+
self.vector_adapter.clean_collection(self, index_name, including_index_meta)
|
|
283
|
+
self._log_tool_event(
|
|
196
284
|
f"Collection '{self.dataset}' has been cleaned. ",
|
|
197
285
|
tool_name="_clean_collection"
|
|
198
286
|
)
|
|
199
287
|
|
|
200
|
-
def index_documents(self, documents: Generator[Document, None, None],
|
|
288
|
+
def index_documents(self, documents: Generator[Document, None, None], index_name: str, progress_step: int = 20, clean_index: bool = True):
|
|
201
289
|
""" Index documents in the vectorstore.
|
|
202
290
|
|
|
203
291
|
Args:
|
|
@@ -205,22 +293,23 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
|
|
|
205
293
|
progress_step (int): Step for progress reporting, default is 20.
|
|
206
294
|
clean_index (bool): If True, clean the index before re-indexing all documents.
|
|
207
295
|
"""
|
|
296
|
+
self._ensure_vectorstore_initialized()
|
|
208
297
|
if clean_index:
|
|
209
|
-
self._clean_index(
|
|
298
|
+
self._clean_index(index_name)
|
|
210
299
|
|
|
211
|
-
return self._save_index(list(documents),
|
|
300
|
+
return self._save_index(list(documents), index_name, progress_step)
|
|
212
301
|
|
|
213
|
-
def _clean_index(self,
|
|
302
|
+
def _clean_index(self, index_name: str):
|
|
214
303
|
logger.info("Cleaning index before re-indexing all documents.")
|
|
215
|
-
self.
|
|
304
|
+
self._log_tool_event("Cleaning index before re-indexing all documents. Previous index will be removed", tool_name="index_documents")
|
|
216
305
|
try:
|
|
217
|
-
self._clean_collection(
|
|
218
|
-
self.
|
|
306
|
+
self._clean_collection(index_name, including_index_meta=False)
|
|
307
|
+
self._log_tool_event("Previous index has been removed",
|
|
219
308
|
tool_name="index_documents")
|
|
220
309
|
except Exception as e:
|
|
221
310
|
logger.warning(f"Failed to clean index: {str(e)}. Continuing with re-indexing.")
|
|
222
311
|
|
|
223
|
-
def _save_index(self, documents: list[Document],
|
|
312
|
+
def _save_index(self, documents: list[Document], index_name: Optional[str] = None, progress_step: int = 20):
|
|
224
313
|
from ..langchain.interfaces.llm_processor import add_documents
|
|
225
314
|
#
|
|
226
315
|
for doc in documents:
|
|
@@ -229,13 +318,13 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
|
|
|
229
318
|
|
|
230
319
|
logger.debug(f"Indexing documents: {documents}")
|
|
231
320
|
|
|
232
|
-
# if
|
|
233
|
-
if
|
|
321
|
+
# if index_name is provided, add it to metadata of each document
|
|
322
|
+
if index_name:
|
|
234
323
|
for doc in documents:
|
|
235
324
|
if not doc.metadata.get('collection'):
|
|
236
|
-
doc.metadata['collection'] =
|
|
325
|
+
doc.metadata['collection'] = index_name
|
|
237
326
|
else:
|
|
238
|
-
doc.metadata['collection'] += f";{
|
|
327
|
+
doc.metadata['collection'] += f";{index_name}"
|
|
239
328
|
|
|
240
329
|
total_docs = len(documents)
|
|
241
330
|
documents_count = 0
|
|
@@ -261,7 +350,7 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
|
|
|
261
350
|
if percent >= next_progress_point:
|
|
262
351
|
msg = f"Indexing progress: {percent}%. Processed {documents_count} of {total_docs} documents."
|
|
263
352
|
logger.debug(msg)
|
|
264
|
-
self.
|
|
353
|
+
self._log_tool_event(msg)
|
|
265
354
|
next_progress_point += progress_step
|
|
266
355
|
except Exception:
|
|
267
356
|
from traceback import format_exc
|
|
@@ -269,7 +358,8 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
|
|
|
269
358
|
return {"status": "error", "message": f"Error: {format_exc()}"}
|
|
270
359
|
if _documents:
|
|
271
360
|
add_documents(vectorstore=self.vectorstore, documents=_documents)
|
|
272
|
-
return {"status": "ok", "message": f"successfully indexed {documents_count} documents"
|
|
361
|
+
return {"status": "ok", "message": f"successfully indexed {documents_count} documents" if documents_count > 0
|
|
362
|
+
else "no documents to index"}
|
|
273
363
|
|
|
274
364
|
def search_documents(self, query:str, doctype: str = 'code',
|
|
275
365
|
filter:dict|str={}, cut_off: float=0.5,
|
|
@@ -303,7 +393,7 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
|
|
|
303
393
|
}
|
|
304
394
|
|
|
305
395
|
try:
|
|
306
|
-
document_items = self.
|
|
396
|
+
document_items = self._similarity_search_with_score(
|
|
307
397
|
query, filter=document_filter, k=search_top
|
|
308
398
|
)
|
|
309
399
|
# Add document results to unique docs
|
|
@@ -336,18 +426,16 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
|
|
|
336
426
|
}
|
|
337
427
|
|
|
338
428
|
try:
|
|
339
|
-
chunk_items = self.
|
|
429
|
+
chunk_items = self._similarity_search_with_score(
|
|
340
430
|
query, filter=chunk_filter, k=search_top
|
|
341
431
|
)
|
|
342
|
-
|
|
343
|
-
logger.debug(f"Chunk items for {chunk_type}: {chunk_items[0]}")
|
|
344
|
-
|
|
432
|
+
|
|
345
433
|
for doc, score in chunk_items:
|
|
346
434
|
# Create unique identifier for document
|
|
347
435
|
source = doc.metadata.get('source')
|
|
348
436
|
chunk_id = doc.metadata.get('chunk_id')
|
|
349
437
|
doc_id = f"{source}_{chunk_id}" if source and chunk_id else str(doc.metadata.get('id', id(doc)))
|
|
350
|
-
|
|
438
|
+
|
|
351
439
|
# Store document and its score
|
|
352
440
|
if doc_id not in unique_docs:
|
|
353
441
|
unique_docs[doc_id] = doc
|
|
@@ -367,9 +455,9 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
|
|
|
367
455
|
doc_filter = {
|
|
368
456
|
"$and": doc_filter_parts
|
|
369
457
|
}
|
|
370
|
-
|
|
458
|
+
|
|
371
459
|
try:
|
|
372
|
-
fetch_items = self.
|
|
460
|
+
fetch_items = self._similarity_search_with_score(
|
|
373
461
|
query, filter=doc_filter, k=1
|
|
374
462
|
)
|
|
375
463
|
if fetch_items:
|
|
@@ -383,7 +471,7 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
|
|
|
383
471
|
else:
|
|
384
472
|
# Default search behavior (unchanged)
|
|
385
473
|
max_search_results = 30 if search_top * 3 > 30 else search_top * 3
|
|
386
|
-
vector_items = self.
|
|
474
|
+
vector_items = self._similarity_search_with_score(
|
|
387
475
|
query, filter=filter, k=max_search_results
|
|
388
476
|
)
|
|
389
477
|
|
|
@@ -401,7 +489,7 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
|
|
|
401
489
|
doc_map = OrderedDict(
|
|
402
490
|
sorted(doc_map.items(), key=lambda x: x[1][1], reverse=True)
|
|
403
491
|
)
|
|
404
|
-
|
|
492
|
+
|
|
405
493
|
# Process full-text search if configured
|
|
406
494
|
if full_text_search and full_text_search.get('enabled') and full_text_search.get('fields'):
|
|
407
495
|
language = full_text_search.get('language', 'english')
|
|
@@ -414,7 +502,7 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
|
|
|
414
502
|
for field_name in full_text_search.get('fields', []):
|
|
415
503
|
try:
|
|
416
504
|
text_results = self.pg_helper.full_text_search(field_name, query)
|
|
417
|
-
|
|
505
|
+
|
|
418
506
|
# Combine text search results with vector results
|
|
419
507
|
for result in text_results:
|
|
420
508
|
doc_id = result['id']
|
|
@@ -569,21 +657,6 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
|
|
|
569
657
|
])
|
|
570
658
|
return result.content
|
|
571
659
|
|
|
572
|
-
def _log_data(self, message: str, tool_name: str = "index_data"):
|
|
573
|
-
"""Log data and dispatch custom event for indexing progress"""
|
|
574
|
-
|
|
575
|
-
try:
|
|
576
|
-
dispatch_custom_event(
|
|
577
|
-
name="thinking_step",
|
|
578
|
-
data={
|
|
579
|
-
"message": message,
|
|
580
|
-
"tool_name": tool_name,
|
|
581
|
-
"toolkit": "vectorstore",
|
|
582
|
-
},
|
|
583
|
-
)
|
|
584
|
-
except Exception as e:
|
|
585
|
-
logger.warning(f"Failed to dispatch progress event: {str(e)}")
|
|
586
|
-
|
|
587
660
|
def get_available_tools(self):
|
|
588
661
|
return [
|
|
589
662
|
{
|
|
@@ -23,9 +23,45 @@ class AlitaStreamlitCallback(BaseCallbackHandler):
|
|
|
23
23
|
self.tokens_out = 0
|
|
24
24
|
self.pending_llm_requests = defaultdict(int)
|
|
25
25
|
self.current_model_name = 'gpt-4'
|
|
26
|
+
self._event_queue = [] # Queue for events when context is unavailable
|
|
26
27
|
#
|
|
27
28
|
super().__init__()
|
|
28
29
|
|
|
30
|
+
def _has_streamlit_context(self) -> bool:
|
|
31
|
+
"""Check if Streamlit context is available in the current thread."""
|
|
32
|
+
try:
|
|
33
|
+
# Try to import streamlit runtime context checker
|
|
34
|
+
from streamlit.runtime.scriptrunner import get_script_run_ctx
|
|
35
|
+
ctx = get_script_run_ctx()
|
|
36
|
+
return ctx is not None
|
|
37
|
+
except (ImportError, Exception) as e:
|
|
38
|
+
if self.debug:
|
|
39
|
+
log.debug(f"Streamlit context check failed: {e}")
|
|
40
|
+
return False
|
|
41
|
+
|
|
42
|
+
def _safe_streamlit_call(self, func, *args, **kwargs):
|
|
43
|
+
"""Safely execute a Streamlit UI operation, handling missing context gracefully."""
|
|
44
|
+
if not self._has_streamlit_context():
|
|
45
|
+
func_name = getattr(func, '__name__', str(func))
|
|
46
|
+
if self.debug:
|
|
47
|
+
log.warning(f"Streamlit context not available for {func_name}, queueing event")
|
|
48
|
+
# Store the event for potential replay when context is available
|
|
49
|
+
self._event_queue.append({
|
|
50
|
+
'func': func_name,
|
|
51
|
+
'args': args,
|
|
52
|
+
'kwargs': kwargs,
|
|
53
|
+
'timestamp': datetime.now(tz=timezone.utc)
|
|
54
|
+
})
|
|
55
|
+
return None
|
|
56
|
+
|
|
57
|
+
try:
|
|
58
|
+
return func(*args, **kwargs)
|
|
59
|
+
except Exception as e:
|
|
60
|
+
func_name = getattr(func, '__name__', str(func))
|
|
61
|
+
# Handle any Streamlit-specific exceptions gracefully
|
|
62
|
+
log.warning(f"Streamlit operation {func_name} failed: {e}")
|
|
63
|
+
return None
|
|
64
|
+
|
|
29
65
|
#
|
|
30
66
|
# Chain
|
|
31
67
|
#
|
|
@@ -76,10 +112,14 @@ class AlitaStreamlitCallback(BaseCallbackHandler):
|
|
|
76
112
|
json.dumps(payload, ensure_ascii=False, default=lambda o: str(o))
|
|
77
113
|
)
|
|
78
114
|
|
|
79
|
-
|
|
80
|
-
|
|
115
|
+
status_widget = self._safe_streamlit_call(
|
|
116
|
+
self.st.status,
|
|
117
|
+
f"Running {payload.get('tool_name')}...",
|
|
118
|
+
expanded=True
|
|
81
119
|
)
|
|
82
|
-
|
|
120
|
+
if status_widget:
|
|
121
|
+
self.callback_state[str(run_id)] = status_widget
|
|
122
|
+
self._safe_streamlit_call(status_widget.write, f"Tool inputs: {payload}")
|
|
83
123
|
|
|
84
124
|
def on_tool_start(self, *args, run_id: UUID, **kwargs):
|
|
85
125
|
""" Callback """
|
|
@@ -95,8 +135,15 @@ class AlitaStreamlitCallback(BaseCallbackHandler):
|
|
|
95
135
|
"tool_inputs": kwargs.get('inputs')
|
|
96
136
|
}
|
|
97
137
|
payload = json.loads(json.dumps(payload, ensure_ascii=False, default=lambda o: str(o)))
|
|
98
|
-
|
|
99
|
-
|
|
138
|
+
|
|
139
|
+
status_widget = self._safe_streamlit_call(
|
|
140
|
+
self.st.status,
|
|
141
|
+
f"Running {tool_name}...",
|
|
142
|
+
expanded=True
|
|
143
|
+
)
|
|
144
|
+
if status_widget:
|
|
145
|
+
self.callback_state[tool_run_id] = status_widget
|
|
146
|
+
self._safe_streamlit_call(status_widget.write, f"Tool inputs: {kwargs.get('inputs')}")
|
|
100
147
|
|
|
101
148
|
def on_tool_end(self, *args, run_id: UUID, **kwargs):
|
|
102
149
|
""" Callback """
|
|
@@ -104,11 +151,16 @@ class AlitaStreamlitCallback(BaseCallbackHandler):
|
|
|
104
151
|
log.info("on_tool_end(%s, %s)", args, kwargs)
|
|
105
152
|
tool_run_id = str(run_id)
|
|
106
153
|
tool_output = args[0]
|
|
107
|
-
if self.callback_state
|
|
108
|
-
self.callback_state[tool_run_id]
|
|
109
|
-
self.
|
|
154
|
+
if self.callback_state.get(tool_run_id):
|
|
155
|
+
status_widget = self.callback_state[tool_run_id]
|
|
156
|
+
self._safe_streamlit_call(status_widget.write, f"Tool output: {tool_output}")
|
|
157
|
+
self._safe_streamlit_call(
|
|
158
|
+
status_widget.update,
|
|
159
|
+
label=f"Completed {kwargs.get('name')}",
|
|
160
|
+
state="complete",
|
|
161
|
+
expanded=False
|
|
162
|
+
)
|
|
110
163
|
self.callback_state.pop(tool_run_id, None)
|
|
111
|
-
del self.callback_state[run_id]
|
|
112
164
|
|
|
113
165
|
def on_tool_error(self, *args, run_id: UUID, **kwargs):
|
|
114
166
|
""" Callback """
|
|
@@ -116,9 +168,19 @@ class AlitaStreamlitCallback(BaseCallbackHandler):
|
|
|
116
168
|
log.info("on_tool_error(%s, %s)", args, kwargs)
|
|
117
169
|
tool_run_id = str(run_id)
|
|
118
170
|
tool_exception = args[0]
|
|
119
|
-
self.callback_state
|
|
120
|
-
|
|
121
|
-
|
|
171
|
+
if self.callback_state.get(tool_run_id):
|
|
172
|
+
status_widget = self.callback_state[tool_run_id]
|
|
173
|
+
self._safe_streamlit_call(
|
|
174
|
+
status_widget.write,
|
|
175
|
+
f"{traceback.format_exception(tool_exception)}"
|
|
176
|
+
)
|
|
177
|
+
self._safe_streamlit_call(
|
|
178
|
+
status_widget.update,
|
|
179
|
+
label=f"Error {kwargs.get('name')}",
|
|
180
|
+
state="error",
|
|
181
|
+
expanded=False
|
|
182
|
+
)
|
|
183
|
+
self.callback_state.pop(tool_run_id, None)
|
|
122
184
|
|
|
123
185
|
#
|
|
124
186
|
# Agent
|
|
@@ -156,8 +218,14 @@ class AlitaStreamlitCallback(BaseCallbackHandler):
|
|
|
156
218
|
self.current_model_name = metadata.get('ls_model_name', self.current_model_name)
|
|
157
219
|
llm_run_id = str(run_id)
|
|
158
220
|
|
|
159
|
-
|
|
160
|
-
|
|
221
|
+
status_widget = self._safe_streamlit_call(
|
|
222
|
+
self.st.status,
|
|
223
|
+
f"Running LLM ...",
|
|
224
|
+
expanded=True
|
|
225
|
+
)
|
|
226
|
+
if status_widget:
|
|
227
|
+
self.callback_state[llm_run_id] = status_widget
|
|
228
|
+
self._safe_streamlit_call(status_widget.write, f"LLM inputs: {messages}")
|
|
161
229
|
|
|
162
230
|
def on_llm_start(self, *args, **kwargs):
|
|
163
231
|
""" Callback """
|
|
@@ -178,16 +246,27 @@ class AlitaStreamlitCallback(BaseCallbackHandler):
|
|
|
178
246
|
content = None
|
|
179
247
|
if chunk:
|
|
180
248
|
content = chunk.text
|
|
181
|
-
|
|
249
|
+
|
|
250
|
+
llm_run_id = str(run_id)
|
|
251
|
+
if self.callback_state.get(llm_run_id):
|
|
252
|
+
status_widget = self.callback_state[llm_run_id]
|
|
253
|
+
self._safe_streamlit_call(status_widget.write, content)
|
|
182
254
|
|
|
183
255
|
def on_llm_error(self, *args, run_id: UUID, **kwargs):
|
|
184
256
|
""" Callback """
|
|
185
257
|
if self.debug:
|
|
186
258
|
log.error("on_llm_error(%s, %s)", args, kwargs)
|
|
187
259
|
llm_run_id = str(run_id)
|
|
188
|
-
self.callback_state
|
|
189
|
-
|
|
190
|
-
|
|
260
|
+
if self.callback_state.get(llm_run_id):
|
|
261
|
+
status_widget = self.callback_state[llm_run_id]
|
|
262
|
+
self._safe_streamlit_call(status_widget.write, f"on_llm_error({args}, {kwargs})")
|
|
263
|
+
self._safe_streamlit_call(
|
|
264
|
+
status_widget.update,
|
|
265
|
+
label=f"Error {kwargs.get('name')}",
|
|
266
|
+
state="error",
|
|
267
|
+
expanded=False
|
|
268
|
+
)
|
|
269
|
+
self.callback_state.pop(llm_run_id, None)
|
|
191
270
|
#
|
|
192
271
|
# exception = args[0]
|
|
193
272
|
# FIXME: should we emit an error here too?
|
|
@@ -205,5 +284,12 @@ class AlitaStreamlitCallback(BaseCallbackHandler):
|
|
|
205
284
|
if self.debug:
|
|
206
285
|
log.debug("on_llm_end(%s, %s)", response, kwargs)
|
|
207
286
|
llm_run_id = str(run_id)
|
|
208
|
-
self.callback_state
|
|
209
|
-
|
|
287
|
+
if self.callback_state.get(llm_run_id):
|
|
288
|
+
status_widget = self.callback_state[llm_run_id]
|
|
289
|
+
self._safe_streamlit_call(
|
|
290
|
+
status_widget.update,
|
|
291
|
+
label=f"Completed LLM call",
|
|
292
|
+
state="complete",
|
|
293
|
+
expanded=False
|
|
294
|
+
)
|
|
295
|
+
self.callback_state.pop(llm_run_id, None)
|