alita-sdk 0.3.257__py3-none-any.whl → 0.3.584__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of alita-sdk might be problematic. Click here for more details.
- alita_sdk/cli/__init__.py +10 -0
- alita_sdk/cli/__main__.py +17 -0
- alita_sdk/cli/agent/__init__.py +5 -0
- alita_sdk/cli/agent/default.py +258 -0
- alita_sdk/cli/agent_executor.py +155 -0
- alita_sdk/cli/agent_loader.py +215 -0
- alita_sdk/cli/agent_ui.py +228 -0
- alita_sdk/cli/agents.py +3794 -0
- alita_sdk/cli/callbacks.py +647 -0
- alita_sdk/cli/cli.py +168 -0
- alita_sdk/cli/config.py +306 -0
- alita_sdk/cli/context/__init__.py +30 -0
- alita_sdk/cli/context/cleanup.py +198 -0
- alita_sdk/cli/context/manager.py +731 -0
- alita_sdk/cli/context/message.py +285 -0
- alita_sdk/cli/context/strategies.py +289 -0
- alita_sdk/cli/context/token_estimation.py +127 -0
- alita_sdk/cli/formatting.py +182 -0
- alita_sdk/cli/input_handler.py +419 -0
- alita_sdk/cli/inventory.py +1073 -0
- alita_sdk/cli/mcp_loader.py +315 -0
- alita_sdk/cli/toolkit.py +327 -0
- alita_sdk/cli/toolkit_loader.py +85 -0
- alita_sdk/cli/tools/__init__.py +43 -0
- alita_sdk/cli/tools/approval.py +224 -0
- alita_sdk/cli/tools/filesystem.py +1751 -0
- alita_sdk/cli/tools/planning.py +389 -0
- alita_sdk/cli/tools/terminal.py +414 -0
- alita_sdk/community/__init__.py +72 -12
- alita_sdk/community/inventory/__init__.py +236 -0
- alita_sdk/community/inventory/config.py +257 -0
- alita_sdk/community/inventory/enrichment.py +2137 -0
- alita_sdk/community/inventory/extractors.py +1469 -0
- alita_sdk/community/inventory/ingestion.py +3172 -0
- alita_sdk/community/inventory/knowledge_graph.py +1457 -0
- alita_sdk/community/inventory/parsers/__init__.py +218 -0
- alita_sdk/community/inventory/parsers/base.py +295 -0
- alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
- alita_sdk/community/inventory/parsers/go_parser.py +851 -0
- alita_sdk/community/inventory/parsers/html_parser.py +389 -0
- alita_sdk/community/inventory/parsers/java_parser.py +593 -0
- alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
- alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
- alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
- alita_sdk/community/inventory/parsers/python_parser.py +604 -0
- alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
- alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
- alita_sdk/community/inventory/parsers/text_parser.py +322 -0
- alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
- alita_sdk/community/inventory/patterns/__init__.py +61 -0
- alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
- alita_sdk/community/inventory/patterns/loader.py +348 -0
- alita_sdk/community/inventory/patterns/registry.py +198 -0
- alita_sdk/community/inventory/presets.py +535 -0
- alita_sdk/community/inventory/retrieval.py +1403 -0
- alita_sdk/community/inventory/toolkit.py +173 -0
- alita_sdk/community/inventory/toolkit_utils.py +176 -0
- alita_sdk/community/inventory/visualize.py +1370 -0
- alita_sdk/configurations/__init__.py +11 -0
- alita_sdk/configurations/ado.py +148 -2
- alita_sdk/configurations/azure_search.py +1 -1
- alita_sdk/configurations/bigquery.py +1 -1
- alita_sdk/configurations/bitbucket.py +94 -2
- alita_sdk/configurations/browser.py +18 -0
- alita_sdk/configurations/carrier.py +19 -0
- alita_sdk/configurations/confluence.py +130 -1
- alita_sdk/configurations/delta_lake.py +1 -1
- alita_sdk/configurations/figma.py +76 -5
- alita_sdk/configurations/github.py +65 -1
- alita_sdk/configurations/gitlab.py +81 -0
- alita_sdk/configurations/google_places.py +17 -0
- alita_sdk/configurations/jira.py +103 -0
- alita_sdk/configurations/openapi.py +323 -0
- alita_sdk/configurations/postman.py +1 -1
- alita_sdk/configurations/qtest.py +72 -3
- alita_sdk/configurations/report_portal.py +115 -0
- alita_sdk/configurations/salesforce.py +19 -0
- alita_sdk/configurations/service_now.py +1 -12
- alita_sdk/configurations/sharepoint.py +167 -0
- alita_sdk/configurations/sonar.py +18 -0
- alita_sdk/configurations/sql.py +20 -0
- alita_sdk/configurations/testio.py +101 -0
- alita_sdk/configurations/testrail.py +88 -0
- alita_sdk/configurations/xray.py +94 -1
- alita_sdk/configurations/zephyr_enterprise.py +94 -1
- alita_sdk/configurations/zephyr_essential.py +95 -0
- alita_sdk/runtime/clients/artifact.py +21 -4
- alita_sdk/runtime/clients/client.py +458 -67
- alita_sdk/runtime/clients/mcp_discovery.py +342 -0
- alita_sdk/runtime/clients/mcp_manager.py +262 -0
- alita_sdk/runtime/clients/sandbox_client.py +352 -0
- alita_sdk/runtime/langchain/_constants_bkup.py +1318 -0
- alita_sdk/runtime/langchain/assistant.py +183 -43
- alita_sdk/runtime/langchain/constants.py +647 -1
- alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
- alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +209 -31
- alita_sdk/runtime/langchain/document_loaders/AlitaImageLoader.py +1 -1
- alita_sdk/runtime/langchain/document_loaders/AlitaJSONLinesLoader.py +77 -0
- alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +10 -3
- alita_sdk/runtime/langchain/document_loaders/AlitaMarkdownLoader.py +66 -0
- alita_sdk/runtime/langchain/document_loaders/AlitaPDFLoader.py +79 -10
- alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py +52 -15
- alita_sdk/runtime/langchain/document_loaders/AlitaPythonLoader.py +9 -0
- alita_sdk/runtime/langchain/document_loaders/AlitaTableLoader.py +1 -4
- alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py +15 -2
- alita_sdk/runtime/langchain/document_loaders/ImageParser.py +30 -0
- alita_sdk/runtime/langchain/document_loaders/constants.py +189 -41
- alita_sdk/runtime/langchain/interfaces/llm_processor.py +4 -2
- alita_sdk/runtime/langchain/langraph_agent.py +493 -105
- alita_sdk/runtime/langchain/utils.py +118 -8
- alita_sdk/runtime/llms/preloaded.py +2 -6
- alita_sdk/runtime/models/mcp_models.py +61 -0
- alita_sdk/runtime/skills/__init__.py +91 -0
- alita_sdk/runtime/skills/callbacks.py +498 -0
- alita_sdk/runtime/skills/discovery.py +540 -0
- alita_sdk/runtime/skills/executor.py +610 -0
- alita_sdk/runtime/skills/input_builder.py +371 -0
- alita_sdk/runtime/skills/models.py +330 -0
- alita_sdk/runtime/skills/registry.py +355 -0
- alita_sdk/runtime/skills/skill_runner.py +330 -0
- alita_sdk/runtime/toolkits/__init__.py +28 -0
- alita_sdk/runtime/toolkits/application.py +14 -4
- alita_sdk/runtime/toolkits/artifact.py +25 -9
- alita_sdk/runtime/toolkits/datasource.py +13 -6
- alita_sdk/runtime/toolkits/mcp.py +782 -0
- alita_sdk/runtime/toolkits/planning.py +178 -0
- alita_sdk/runtime/toolkits/skill_router.py +238 -0
- alita_sdk/runtime/toolkits/subgraph.py +11 -6
- alita_sdk/runtime/toolkits/tools.py +314 -70
- alita_sdk/runtime/toolkits/vectorstore.py +11 -5
- alita_sdk/runtime/tools/__init__.py +24 -0
- alita_sdk/runtime/tools/application.py +16 -4
- alita_sdk/runtime/tools/artifact.py +367 -33
- alita_sdk/runtime/tools/data_analysis.py +183 -0
- alita_sdk/runtime/tools/function.py +100 -4
- alita_sdk/runtime/tools/graph.py +81 -0
- alita_sdk/runtime/tools/image_generation.py +218 -0
- alita_sdk/runtime/tools/llm.py +1032 -177
- alita_sdk/runtime/tools/loop.py +3 -1
- alita_sdk/runtime/tools/loop_output.py +3 -1
- alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
- alita_sdk/runtime/tools/mcp_remote_tool.py +181 -0
- alita_sdk/runtime/tools/mcp_server_tool.py +3 -1
- alita_sdk/runtime/tools/planning/__init__.py +36 -0
- alita_sdk/runtime/tools/planning/models.py +246 -0
- alita_sdk/runtime/tools/planning/wrapper.py +607 -0
- alita_sdk/runtime/tools/router.py +2 -1
- alita_sdk/runtime/tools/sandbox.py +375 -0
- alita_sdk/runtime/tools/skill_router.py +776 -0
- alita_sdk/runtime/tools/tool.py +3 -1
- alita_sdk/runtime/tools/vectorstore.py +69 -65
- alita_sdk/runtime/tools/vectorstore_base.py +163 -90
- alita_sdk/runtime/utils/AlitaCallback.py +137 -21
- alita_sdk/runtime/utils/constants.py +5 -1
- alita_sdk/runtime/utils/mcp_client.py +492 -0
- alita_sdk/runtime/utils/mcp_oauth.py +361 -0
- alita_sdk/runtime/utils/mcp_sse_client.py +434 -0
- alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
- alita_sdk/runtime/utils/streamlit.py +41 -14
- alita_sdk/runtime/utils/toolkit_utils.py +28 -9
- alita_sdk/runtime/utils/utils.py +48 -0
- alita_sdk/tools/__init__.py +135 -37
- alita_sdk/tools/ado/__init__.py +2 -2
- alita_sdk/tools/ado/repos/__init__.py +16 -19
- alita_sdk/tools/ado/repos/repos_wrapper.py +12 -20
- alita_sdk/tools/ado/test_plan/__init__.py +27 -8
- alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +56 -28
- alita_sdk/tools/ado/wiki/__init__.py +28 -12
- alita_sdk/tools/ado/wiki/ado_wrapper.py +114 -40
- alita_sdk/tools/ado/work_item/__init__.py +28 -12
- alita_sdk/tools/ado/work_item/ado_wrapper.py +95 -11
- alita_sdk/tools/advanced_jira_mining/__init__.py +13 -8
- alita_sdk/tools/aws/delta_lake/__init__.py +15 -11
- alita_sdk/tools/aws/delta_lake/tool.py +5 -1
- alita_sdk/tools/azure_ai/search/__init__.py +14 -8
- alita_sdk/tools/base/tool.py +5 -1
- alita_sdk/tools/base_indexer_toolkit.py +454 -110
- alita_sdk/tools/bitbucket/__init__.py +28 -19
- alita_sdk/tools/bitbucket/api_wrapper.py +285 -27
- alita_sdk/tools/bitbucket/cloud_api_wrapper.py +5 -5
- alita_sdk/tools/browser/__init__.py +41 -16
- alita_sdk/tools/browser/crawler.py +3 -1
- alita_sdk/tools/browser/utils.py +15 -6
- alita_sdk/tools/carrier/__init__.py +18 -17
- alita_sdk/tools/carrier/backend_reports_tool.py +8 -4
- alita_sdk/tools/carrier/excel_reporter.py +8 -4
- alita_sdk/tools/chunkers/__init__.py +3 -1
- alita_sdk/tools/chunkers/code/codeparser.py +1 -1
- alita_sdk/tools/chunkers/sematic/json_chunker.py +2 -1
- alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
- alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
- alita_sdk/tools/chunkers/universal_chunker.py +270 -0
- alita_sdk/tools/cloud/aws/__init__.py +12 -7
- alita_sdk/tools/cloud/azure/__init__.py +12 -7
- alita_sdk/tools/cloud/gcp/__init__.py +12 -7
- alita_sdk/tools/cloud/k8s/__init__.py +12 -7
- alita_sdk/tools/code/linter/__init__.py +10 -8
- alita_sdk/tools/code/loaders/codesearcher.py +3 -2
- alita_sdk/tools/code/sonar/__init__.py +21 -13
- alita_sdk/tools/code_indexer_toolkit.py +199 -0
- alita_sdk/tools/confluence/__init__.py +22 -14
- alita_sdk/tools/confluence/api_wrapper.py +197 -58
- alita_sdk/tools/confluence/loader.py +14 -2
- alita_sdk/tools/custom_open_api/__init__.py +12 -5
- alita_sdk/tools/elastic/__init__.py +11 -8
- alita_sdk/tools/elitea_base.py +546 -64
- alita_sdk/tools/figma/__init__.py +60 -11
- alita_sdk/tools/figma/api_wrapper.py +1400 -167
- alita_sdk/tools/figma/figma_client.py +73 -0
- alita_sdk/tools/figma/toon_tools.py +2748 -0
- alita_sdk/tools/github/__init__.py +18 -17
- alita_sdk/tools/github/api_wrapper.py +9 -26
- alita_sdk/tools/github/github_client.py +81 -12
- alita_sdk/tools/github/schemas.py +2 -1
- alita_sdk/tools/github/tool.py +5 -1
- alita_sdk/tools/gitlab/__init__.py +19 -13
- alita_sdk/tools/gitlab/api_wrapper.py +256 -80
- alita_sdk/tools/gitlab_org/__init__.py +14 -10
- alita_sdk/tools/google/bigquery/__init__.py +14 -13
- alita_sdk/tools/google/bigquery/tool.py +5 -1
- alita_sdk/tools/google_places/__init__.py +21 -11
- alita_sdk/tools/jira/__init__.py +22 -11
- alita_sdk/tools/jira/api_wrapper.py +315 -168
- alita_sdk/tools/keycloak/__init__.py +11 -8
- alita_sdk/tools/localgit/__init__.py +9 -3
- alita_sdk/tools/localgit/local_git.py +62 -54
- alita_sdk/tools/localgit/tool.py +5 -1
- alita_sdk/tools/memory/__init__.py +38 -14
- alita_sdk/tools/non_code_indexer_toolkit.py +7 -2
- alita_sdk/tools/ocr/__init__.py +11 -8
- alita_sdk/tools/openapi/__init__.py +491 -106
- alita_sdk/tools/openapi/api_wrapper.py +1357 -0
- alita_sdk/tools/openapi/tool.py +20 -0
- alita_sdk/tools/pandas/__init__.py +20 -12
- alita_sdk/tools/pandas/api_wrapper.py +40 -45
- alita_sdk/tools/pandas/dataframe/generator/base.py +3 -1
- alita_sdk/tools/postman/__init__.py +11 -11
- alita_sdk/tools/postman/api_wrapper.py +19 -8
- alita_sdk/tools/postman/postman_analysis.py +8 -1
- alita_sdk/tools/pptx/__init__.py +11 -10
- alita_sdk/tools/qtest/__init__.py +22 -14
- alita_sdk/tools/qtest/api_wrapper.py +1784 -88
- alita_sdk/tools/rally/__init__.py +13 -10
- alita_sdk/tools/report_portal/__init__.py +23 -16
- alita_sdk/tools/salesforce/__init__.py +22 -16
- alita_sdk/tools/servicenow/__init__.py +21 -16
- alita_sdk/tools/servicenow/api_wrapper.py +1 -1
- alita_sdk/tools/sharepoint/__init__.py +17 -14
- alita_sdk/tools/sharepoint/api_wrapper.py +179 -39
- alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
- alita_sdk/tools/sharepoint/utils.py +8 -2
- alita_sdk/tools/slack/__init__.py +13 -8
- alita_sdk/tools/sql/__init__.py +22 -19
- alita_sdk/tools/sql/api_wrapper.py +71 -23
- alita_sdk/tools/testio/__init__.py +21 -13
- alita_sdk/tools/testrail/__init__.py +13 -11
- alita_sdk/tools/testrail/api_wrapper.py +214 -46
- alita_sdk/tools/utils/__init__.py +28 -4
- alita_sdk/tools/utils/content_parser.py +241 -55
- alita_sdk/tools/utils/text_operations.py +254 -0
- alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +83 -27
- alita_sdk/tools/xray/__init__.py +18 -14
- alita_sdk/tools/xray/api_wrapper.py +58 -113
- alita_sdk/tools/yagmail/__init__.py +9 -3
- alita_sdk/tools/zephyr/__init__.py +12 -7
- alita_sdk/tools/zephyr_enterprise/__init__.py +16 -9
- alita_sdk/tools/zephyr_enterprise/api_wrapper.py +30 -15
- alita_sdk/tools/zephyr_essential/__init__.py +16 -10
- alita_sdk/tools/zephyr_essential/api_wrapper.py +297 -54
- alita_sdk/tools/zephyr_essential/client.py +6 -4
- alita_sdk/tools/zephyr_scale/__init__.py +13 -8
- alita_sdk/tools/zephyr_scale/api_wrapper.py +39 -31
- alita_sdk/tools/zephyr_squad/__init__.py +12 -7
- {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.584.dist-info}/METADATA +184 -37
- alita_sdk-0.3.584.dist-info/RECORD +452 -0
- alita_sdk-0.3.584.dist-info/entry_points.txt +2 -0
- alita_sdk/tools/bitbucket/tools.py +0 -304
- alita_sdk-0.3.257.dist-info/RECORD +0 -343
- {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.584.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.584.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.584.dist-info}/top_level.txt +0 -0
alita_sdk/runtime/tools/tool.py
CHANGED
|
@@ -86,7 +86,9 @@ Answer must be JSON only extractable by JSON.LOADS."""
|
|
|
86
86
|
else:
|
|
87
87
|
input_[-1].content += self.unstructured_output
|
|
88
88
|
completion = self.client.invoke(input_, config=config)
|
|
89
|
-
|
|
89
|
+
from ..langchain.utils import extract_text_from_completion
|
|
90
|
+
content_text = extract_text_from_completion(completion)
|
|
91
|
+
result = _extract_json(content_text.strip())
|
|
90
92
|
logger.info(f"ToolNode tool params: {result}")
|
|
91
93
|
try:
|
|
92
94
|
# handler for application added as a tool
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import math
|
|
3
3
|
import types
|
|
4
|
-
from typing import Any, Optional, List, Dict, Callable, Generator
|
|
4
|
+
from typing import Any, Optional, List, Dict, Callable, Generator, OrderedDict
|
|
5
5
|
|
|
6
6
|
from langchain_core.documents import Document
|
|
7
7
|
from pydantic import BaseModel, model_validator, Field
|
|
@@ -12,10 +12,11 @@ from alita_sdk.tools.vector_adapters.VectorStoreAdapter import VectorStoreAdapte
|
|
|
12
12
|
from logging import getLogger
|
|
13
13
|
|
|
14
14
|
from ..utils.logging import dispatch_custom_event
|
|
15
|
-
from ..
|
|
15
|
+
from ..langchain.utils import extract_text_from_completion
|
|
16
16
|
|
|
17
17
|
logger = getLogger(__name__)
|
|
18
18
|
|
|
19
|
+
|
|
19
20
|
class IndexDocumentsModel(BaseModel):
|
|
20
21
|
documents: Any = Field(description="Generator of documents to index")
|
|
21
22
|
|
|
@@ -73,6 +74,10 @@ class StepBackSearchDocumentsModel(BaseModel):
|
|
|
73
74
|
}""",
|
|
74
75
|
default=None
|
|
75
76
|
)
|
|
77
|
+
extended_search: Optional[List[str]] = Field(
|
|
78
|
+
description="List of chunk types to search for (title, summary, propositions, keywords, documents)",
|
|
79
|
+
default=None
|
|
80
|
+
)
|
|
76
81
|
reranking_config: Optional[Dict[str, Dict[str, Any]]] = Field(
|
|
77
82
|
description="""Reranking configuration. Example:
|
|
78
83
|
{
|
|
@@ -87,10 +92,6 @@ class StepBackSearchDocumentsModel(BaseModel):
|
|
|
87
92
|
}""",
|
|
88
93
|
default=None
|
|
89
94
|
)
|
|
90
|
-
extended_search: Optional[List[str]] = Field(
|
|
91
|
-
description="List of chunk types to search for (title, summary, propositions, keywords, documents)",
|
|
92
|
-
default=None
|
|
93
|
-
)
|
|
94
95
|
|
|
95
96
|
STEPBACK_PROMPT = """Your task is to convert provided question into a more generic question that will be used for similarity search.
|
|
96
97
|
Remove all not important words, question words, but save all names, dates and acronym as in original question.
|
|
@@ -138,7 +139,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
|
138
139
|
embedding_model_params: dict
|
|
139
140
|
vectorstore_type: str
|
|
140
141
|
vectorstore_params: dict
|
|
141
|
-
max_docs_per_add: int =
|
|
142
|
+
max_docs_per_add: int = 20
|
|
142
143
|
dataset: str = None
|
|
143
144
|
embedding: Any = None
|
|
144
145
|
vectorstore: Any = None
|
|
@@ -208,16 +209,33 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
|
208
209
|
tool_name="_remove_collection"
|
|
209
210
|
)
|
|
210
211
|
|
|
211
|
-
def _get_indexed_ids(self,
|
|
212
|
+
def _get_indexed_ids(self, index_name: Optional[str] = '') -> List[str]:
|
|
212
213
|
"""Get all indexed document IDs from vectorstore"""
|
|
213
|
-
return self.vector_adapter.get_indexed_ids(self,
|
|
214
|
-
|
|
215
|
-
def list_collections(self) ->
|
|
216
|
-
"""List all collections in the vectorstore.
|
|
217
|
-
|
|
218
|
-
|
|
214
|
+
return self.vector_adapter.get_indexed_ids(self, index_name)
|
|
215
|
+
|
|
216
|
+
def list_collections(self) -> Any:
|
|
217
|
+
"""List all collections in the vectorstore.
|
|
218
|
+
Returns a list of collection names, or if no collections exist,
|
|
219
|
+
returns a dict with an empty list and a message."""
|
|
220
|
+
raw = self.vector_adapter.list_collections(self)
|
|
221
|
+
# Normalize raw result to a list of names
|
|
222
|
+
if not raw:
|
|
223
|
+
# No collections found
|
|
224
|
+
return {"collections": [], "message": "No indexed collections"}
|
|
225
|
+
if isinstance(raw, str):
|
|
226
|
+
# e.g., Chroma adapter returns comma-separated string
|
|
227
|
+
cols = [c for c in raw.split(',') if c]
|
|
228
|
+
else:
|
|
229
|
+
try:
|
|
230
|
+
cols = list(raw)
|
|
231
|
+
except Exception:
|
|
232
|
+
# Unexpected type, return raw directly
|
|
233
|
+
return raw
|
|
234
|
+
if not cols:
|
|
235
|
+
return {"collections": [], "message": "No indexed collections"}
|
|
236
|
+
return cols
|
|
219
237
|
|
|
220
|
-
def _clean_collection(self,
|
|
238
|
+
def _clean_collection(self, index_name: str = ''):
|
|
221
239
|
"""
|
|
222
240
|
Clean the vectorstore collection by deleting all indexed data.
|
|
223
241
|
"""
|
|
@@ -225,19 +243,15 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
|
225
243
|
f"Cleaning collection '{self.dataset}'",
|
|
226
244
|
tool_name="_clean_collection"
|
|
227
245
|
)
|
|
228
|
-
self.vector_adapter.clean_collection(self,
|
|
246
|
+
self.vector_adapter.clean_collection(self, index_name)
|
|
229
247
|
self._log_data(
|
|
230
248
|
f"Collection '{self.dataset}' has been cleaned. ",
|
|
231
249
|
tool_name="_clean_collection"
|
|
232
250
|
)
|
|
233
251
|
|
|
234
|
-
def
|
|
235
|
-
""" Get all indexed data from vectorstore for non-code content """
|
|
236
|
-
return self.vector_adapter.get_indexed_data(self, collection_name)
|
|
237
|
-
|
|
238
|
-
def _get_code_indexed_data(self, collection_suffix: str) -> Dict[str, Dict[str, Any]]:
|
|
252
|
+
def _get_code_indexed_data(self, index_name: str) -> Dict[str, Dict[str, Any]]:
|
|
239
253
|
""" Get all indexed data from vectorstore for code content """
|
|
240
|
-
return self.vector_adapter.get_code_indexed_data(self,
|
|
254
|
+
return self.vector_adapter.get_code_indexed_data(self, index_name)
|
|
241
255
|
|
|
242
256
|
def _add_to_collection(self, entry_id, new_collection_value):
|
|
243
257
|
"""Add a new collection name to the `collection` key in the `metadata` column."""
|
|
@@ -246,7 +260,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
|
246
260
|
def _reduce_duplicates(
|
|
247
261
|
self,
|
|
248
262
|
documents: Generator[Any, None, None],
|
|
249
|
-
|
|
263
|
+
index_name: str,
|
|
250
264
|
get_indexed_data: Callable,
|
|
251
265
|
key_fn: Callable,
|
|
252
266
|
compare_fn: Callable,
|
|
@@ -255,7 +269,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
|
255
269
|
) -> List[Any]:
|
|
256
270
|
"""Generic duplicate reduction logic for documents."""
|
|
257
271
|
self._log_data(log_msg, tool_name="index_documents")
|
|
258
|
-
indexed_data = get_indexed_data(
|
|
272
|
+
indexed_data = get_indexed_data(index_name)
|
|
259
273
|
indexed_keys = set(indexed_data.keys())
|
|
260
274
|
if not indexed_keys:
|
|
261
275
|
self._log_data("Vectorstore is empty, indexing all incoming documents", tool_name="index_documents")
|
|
@@ -266,14 +280,15 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
|
266
280
|
|
|
267
281
|
for document in documents:
|
|
268
282
|
key = key_fn(document)
|
|
269
|
-
|
|
283
|
+
key = key if isinstance(key, str) else str(key)
|
|
284
|
+
if key in indexed_keys and index_name == indexed_data[key]['metadata'].get('collection'):
|
|
270
285
|
if compare_fn(document, indexed_data[key]):
|
|
271
286
|
# Disabled addition of new collection to already indexed documents
|
|
272
287
|
# # check metadata.collection and update if needed
|
|
273
288
|
# for update_collection_id in remove_ids_fn(indexed_data, key):
|
|
274
289
|
# self._add_to_collection(
|
|
275
290
|
# update_collection_id,
|
|
276
|
-
#
|
|
291
|
+
# index_name
|
|
277
292
|
# )
|
|
278
293
|
continue
|
|
279
294
|
final_docs.append(document)
|
|
@@ -290,30 +305,10 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
|
290
305
|
|
|
291
306
|
return final_docs
|
|
292
307
|
|
|
293
|
-
def
|
|
294
|
-
return self._reduce_duplicates(
|
|
295
|
-
documents,
|
|
296
|
-
collection_suffix,
|
|
297
|
-
self._get_indexed_data,
|
|
298
|
-
lambda doc: doc.metadata.get('id'),
|
|
299
|
-
lambda doc, idx: (
|
|
300
|
-
doc.metadata.get('updated_on') and
|
|
301
|
-
idx['metadata'].get('updated_on') and
|
|
302
|
-
doc.metadata.get('updated_on') == idx['metadata'].get('updated_on')
|
|
303
|
-
),
|
|
304
|
-
lambda idx_data, key: (
|
|
305
|
-
idx_data[key]['all_chunks'] +
|
|
306
|
-
[idx_data[dep_id]['id'] for dep_id in idx_data[key][IndexerKeywords.DEPENDENT_DOCS.value]] +
|
|
307
|
-
[chunk_db_id for dep_id in idx_data[key][IndexerKeywords.DEPENDENT_DOCS.value]
|
|
308
|
-
for chunk_db_id in idx_data[dep_id]['all_chunks']]
|
|
309
|
-
),
|
|
310
|
-
log_msg="Verification of documents to index started"
|
|
311
|
-
)
|
|
312
|
-
|
|
313
|
-
def _reduce_code_duplicates(self, documents: Generator[Any, None, None], collection_suffix: str) -> List[Any]:
|
|
308
|
+
def _reduce_code_duplicates(self, documents: Generator[Any, None, None], index_name: str) -> List[Any]:
|
|
314
309
|
return self._reduce_duplicates(
|
|
315
310
|
documents,
|
|
316
|
-
|
|
311
|
+
index_name,
|
|
317
312
|
self._get_code_indexed_data,
|
|
318
313
|
lambda doc: doc.metadata.get('filename'),
|
|
319
314
|
lambda doc, idx: (
|
|
@@ -325,7 +320,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
|
325
320
|
log_msg="Verification of code documents to index started"
|
|
326
321
|
)
|
|
327
322
|
|
|
328
|
-
def index_documents(self, documents: Generator[Document, None, None],
|
|
323
|
+
def index_documents(self, documents: Generator[Document, None, None], index_name: str, progress_step: int = 20, clean_index: bool = True, is_code: bool = True):
|
|
329
324
|
""" Index documents in the vectorstore.
|
|
330
325
|
|
|
331
326
|
Args:
|
|
@@ -336,13 +331,13 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
|
336
331
|
|
|
337
332
|
from ..langchain.interfaces.llm_processor import add_documents
|
|
338
333
|
|
|
339
|
-
self._log_tool_event(message=f"Starting the indexing... Parameters: {
|
|
334
|
+
self._log_tool_event(message=f"Starting the indexing... Parameters: {index_name=}, {clean_index=}, {is_code}", tool_name="index_documents")
|
|
340
335
|
# pre-process documents if needed (find duplicates, etc.)
|
|
341
336
|
if clean_index:
|
|
342
337
|
logger.info("Cleaning index before re-indexing all documents.")
|
|
343
338
|
self._log_data("Cleaning index before re-indexing all documents. Previous index will be removed", tool_name="index_documents")
|
|
344
339
|
try:
|
|
345
|
-
self._clean_collection(
|
|
340
|
+
self._clean_collection(index_name)
|
|
346
341
|
self.vectoradapter.persist()
|
|
347
342
|
self.vectoradapter.vacuum()
|
|
348
343
|
self._log_data("Previous index has been removed",
|
|
@@ -356,8 +351,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
|
356
351
|
message="Filter for duplicates",
|
|
357
352
|
tool_name="index_documents")
|
|
358
353
|
# remove duplicates based on metadata 'id' and 'updated_on' or 'commit_hash' fields
|
|
359
|
-
documents = self._reduce_code_duplicates(documents,
|
|
360
|
-
else self._reduce_non_code_duplicates(documents, collection_suffix)
|
|
354
|
+
documents = self._reduce_code_duplicates(documents, index_name)
|
|
361
355
|
self._log_tool_event(
|
|
362
356
|
message="All the duplicates were filtered out. Proceeding with indexing.",
|
|
363
357
|
tool_name="index_documents")
|
|
@@ -385,13 +379,13 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
|
385
379
|
self._log_tool_event(message=f"Documents for indexing were processed. Total documents: {len(documents)}",
|
|
386
380
|
tool_name="index_documents")
|
|
387
381
|
|
|
388
|
-
# if
|
|
389
|
-
if
|
|
382
|
+
# if index_name is provided, add it to metadata of each document
|
|
383
|
+
if index_name:
|
|
390
384
|
for doc in documents:
|
|
391
385
|
if not doc.metadata.get('collection'):
|
|
392
|
-
doc.metadata['collection'] =
|
|
386
|
+
doc.metadata['collection'] = index_name
|
|
393
387
|
else:
|
|
394
|
-
doc.metadata['collection'] += f";{
|
|
388
|
+
doc.metadata['collection'] += f";{index_name}"
|
|
395
389
|
|
|
396
390
|
total_docs = len(documents)
|
|
397
391
|
documents_count = 0
|
|
@@ -422,7 +416,8 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
|
422
416
|
return {"status": "error", "message": f"Error: {format_exc()}"}
|
|
423
417
|
if _documents:
|
|
424
418
|
add_documents(vectorstore=self.vectorstore, documents=_documents)
|
|
425
|
-
return {"status": "ok", "message": f"successfully indexed {documents_count} documents"
|
|
419
|
+
return {"status": "ok", "message": f"successfully indexed {documents_count} documents" if documents_count > 0
|
|
420
|
+
else "No new documents to index."}
|
|
426
421
|
|
|
427
422
|
def search_documents(self, query:str, doctype: str = 'code',
|
|
428
423
|
filter:dict|str={}, cut_off: float=0.5,
|
|
@@ -542,11 +537,18 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
|
542
537
|
|
|
543
538
|
# Initialize document map for tracking by ID
|
|
544
539
|
doc_map = {
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
540
|
+
(
|
|
541
|
+
f"{doc.metadata.get('id', f'idx_{i}')}_{doc.metadata['chunk_id']}"
|
|
542
|
+
if 'chunk_id' in doc.metadata
|
|
543
|
+
else doc.metadata.get('id', f"idx_{i}")
|
|
544
|
+
): (doc, 1 - score)
|
|
548
545
|
for i, (doc, score) in enumerate(vector_items)
|
|
549
546
|
}
|
|
547
|
+
|
|
548
|
+
# Sort the items by the new score in descending order
|
|
549
|
+
doc_map = OrderedDict(
|
|
550
|
+
sorted(doc_map.items(), key=lambda x: x[1][1], reverse=True)
|
|
551
|
+
)
|
|
550
552
|
|
|
551
553
|
# Process full-text search if configured
|
|
552
554
|
if full_text_search and full_text_search.get('enabled') and full_text_search.get('fields'):
|
|
@@ -597,7 +599,7 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
|
597
599
|
# Apply cutoff filter
|
|
598
600
|
if cut_off:
|
|
599
601
|
# Filter out items above the cutoff score (since the lower the score, the better)
|
|
600
|
-
combined_items = [item for item in combined_items if abs(item[1])
|
|
602
|
+
combined_items = [item for item in combined_items if abs(item[1]) >= cut_off]
|
|
601
603
|
|
|
602
604
|
# Sort by score and limit results
|
|
603
605
|
# DISABLED: for chroma we want ascending order (lower score is better), for others descending
|
|
@@ -684,8 +686,10 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
|
684
686
|
]
|
|
685
687
|
)
|
|
686
688
|
])
|
|
689
|
+
# Extract text content safely (handles both string and list content from thinking models)
|
|
690
|
+
search_query = extract_text_from_completion(result)
|
|
687
691
|
search_results = self.search_documents(
|
|
688
|
-
|
|
692
|
+
search_query, doctype, filter, cut_off, search_top,
|
|
689
693
|
full_text_search=full_text_search,
|
|
690
694
|
reranking_config=reranking_config,
|
|
691
695
|
extended_search=extended_search
|
|
@@ -714,7 +718,8 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
|
714
718
|
]
|
|
715
719
|
)
|
|
716
720
|
])
|
|
717
|
-
|
|
721
|
+
# Extract text content safely (handles both string and list content from thinking models)
|
|
722
|
+
return extract_text_from_completion(result)
|
|
718
723
|
|
|
719
724
|
def _log_data(self, message: str, tool_name: str = "index_data"):
|
|
720
725
|
"""Log data and dispatch custom event for indexing progress"""
|
|
@@ -758,4 +763,3 @@ class VectorStoreWrapper(BaseToolApiWrapper):
|
|
|
758
763
|
"args_schema": StepBackSearchDocumentsModel
|
|
759
764
|
}
|
|
760
765
|
]
|
|
761
|
-
|