alita-sdk 0.3.257__py3-none-any.whl → 0.3.584__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of alita-sdk might be problematic. Click here for more details.
- alita_sdk/cli/__init__.py +10 -0
- alita_sdk/cli/__main__.py +17 -0
- alita_sdk/cli/agent/__init__.py +5 -0
- alita_sdk/cli/agent/default.py +258 -0
- alita_sdk/cli/agent_executor.py +155 -0
- alita_sdk/cli/agent_loader.py +215 -0
- alita_sdk/cli/agent_ui.py +228 -0
- alita_sdk/cli/agents.py +3794 -0
- alita_sdk/cli/callbacks.py +647 -0
- alita_sdk/cli/cli.py +168 -0
- alita_sdk/cli/config.py +306 -0
- alita_sdk/cli/context/__init__.py +30 -0
- alita_sdk/cli/context/cleanup.py +198 -0
- alita_sdk/cli/context/manager.py +731 -0
- alita_sdk/cli/context/message.py +285 -0
- alita_sdk/cli/context/strategies.py +289 -0
- alita_sdk/cli/context/token_estimation.py +127 -0
- alita_sdk/cli/formatting.py +182 -0
- alita_sdk/cli/input_handler.py +419 -0
- alita_sdk/cli/inventory.py +1073 -0
- alita_sdk/cli/mcp_loader.py +315 -0
- alita_sdk/cli/toolkit.py +327 -0
- alita_sdk/cli/toolkit_loader.py +85 -0
- alita_sdk/cli/tools/__init__.py +43 -0
- alita_sdk/cli/tools/approval.py +224 -0
- alita_sdk/cli/tools/filesystem.py +1751 -0
- alita_sdk/cli/tools/planning.py +389 -0
- alita_sdk/cli/tools/terminal.py +414 -0
- alita_sdk/community/__init__.py +72 -12
- alita_sdk/community/inventory/__init__.py +236 -0
- alita_sdk/community/inventory/config.py +257 -0
- alita_sdk/community/inventory/enrichment.py +2137 -0
- alita_sdk/community/inventory/extractors.py +1469 -0
- alita_sdk/community/inventory/ingestion.py +3172 -0
- alita_sdk/community/inventory/knowledge_graph.py +1457 -0
- alita_sdk/community/inventory/parsers/__init__.py +218 -0
- alita_sdk/community/inventory/parsers/base.py +295 -0
- alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
- alita_sdk/community/inventory/parsers/go_parser.py +851 -0
- alita_sdk/community/inventory/parsers/html_parser.py +389 -0
- alita_sdk/community/inventory/parsers/java_parser.py +593 -0
- alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
- alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
- alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
- alita_sdk/community/inventory/parsers/python_parser.py +604 -0
- alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
- alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
- alita_sdk/community/inventory/parsers/text_parser.py +322 -0
- alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
- alita_sdk/community/inventory/patterns/__init__.py +61 -0
- alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
- alita_sdk/community/inventory/patterns/loader.py +348 -0
- alita_sdk/community/inventory/patterns/registry.py +198 -0
- alita_sdk/community/inventory/presets.py +535 -0
- alita_sdk/community/inventory/retrieval.py +1403 -0
- alita_sdk/community/inventory/toolkit.py +173 -0
- alita_sdk/community/inventory/toolkit_utils.py +176 -0
- alita_sdk/community/inventory/visualize.py +1370 -0
- alita_sdk/configurations/__init__.py +11 -0
- alita_sdk/configurations/ado.py +148 -2
- alita_sdk/configurations/azure_search.py +1 -1
- alita_sdk/configurations/bigquery.py +1 -1
- alita_sdk/configurations/bitbucket.py +94 -2
- alita_sdk/configurations/browser.py +18 -0
- alita_sdk/configurations/carrier.py +19 -0
- alita_sdk/configurations/confluence.py +130 -1
- alita_sdk/configurations/delta_lake.py +1 -1
- alita_sdk/configurations/figma.py +76 -5
- alita_sdk/configurations/github.py +65 -1
- alita_sdk/configurations/gitlab.py +81 -0
- alita_sdk/configurations/google_places.py +17 -0
- alita_sdk/configurations/jira.py +103 -0
- alita_sdk/configurations/openapi.py +323 -0
- alita_sdk/configurations/postman.py +1 -1
- alita_sdk/configurations/qtest.py +72 -3
- alita_sdk/configurations/report_portal.py +115 -0
- alita_sdk/configurations/salesforce.py +19 -0
- alita_sdk/configurations/service_now.py +1 -12
- alita_sdk/configurations/sharepoint.py +167 -0
- alita_sdk/configurations/sonar.py +18 -0
- alita_sdk/configurations/sql.py +20 -0
- alita_sdk/configurations/testio.py +101 -0
- alita_sdk/configurations/testrail.py +88 -0
- alita_sdk/configurations/xray.py +94 -1
- alita_sdk/configurations/zephyr_enterprise.py +94 -1
- alita_sdk/configurations/zephyr_essential.py +95 -0
- alita_sdk/runtime/clients/artifact.py +21 -4
- alita_sdk/runtime/clients/client.py +458 -67
- alita_sdk/runtime/clients/mcp_discovery.py +342 -0
- alita_sdk/runtime/clients/mcp_manager.py +262 -0
- alita_sdk/runtime/clients/sandbox_client.py +352 -0
- alita_sdk/runtime/langchain/_constants_bkup.py +1318 -0
- alita_sdk/runtime/langchain/assistant.py +183 -43
- alita_sdk/runtime/langchain/constants.py +647 -1
- alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
- alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +209 -31
- alita_sdk/runtime/langchain/document_loaders/AlitaImageLoader.py +1 -1
- alita_sdk/runtime/langchain/document_loaders/AlitaJSONLinesLoader.py +77 -0
- alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +10 -3
- alita_sdk/runtime/langchain/document_loaders/AlitaMarkdownLoader.py +66 -0
- alita_sdk/runtime/langchain/document_loaders/AlitaPDFLoader.py +79 -10
- alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py +52 -15
- alita_sdk/runtime/langchain/document_loaders/AlitaPythonLoader.py +9 -0
- alita_sdk/runtime/langchain/document_loaders/AlitaTableLoader.py +1 -4
- alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py +15 -2
- alita_sdk/runtime/langchain/document_loaders/ImageParser.py +30 -0
- alita_sdk/runtime/langchain/document_loaders/constants.py +189 -41
- alita_sdk/runtime/langchain/interfaces/llm_processor.py +4 -2
- alita_sdk/runtime/langchain/langraph_agent.py +493 -105
- alita_sdk/runtime/langchain/utils.py +118 -8
- alita_sdk/runtime/llms/preloaded.py +2 -6
- alita_sdk/runtime/models/mcp_models.py +61 -0
- alita_sdk/runtime/skills/__init__.py +91 -0
- alita_sdk/runtime/skills/callbacks.py +498 -0
- alita_sdk/runtime/skills/discovery.py +540 -0
- alita_sdk/runtime/skills/executor.py +610 -0
- alita_sdk/runtime/skills/input_builder.py +371 -0
- alita_sdk/runtime/skills/models.py +330 -0
- alita_sdk/runtime/skills/registry.py +355 -0
- alita_sdk/runtime/skills/skill_runner.py +330 -0
- alita_sdk/runtime/toolkits/__init__.py +28 -0
- alita_sdk/runtime/toolkits/application.py +14 -4
- alita_sdk/runtime/toolkits/artifact.py +25 -9
- alita_sdk/runtime/toolkits/datasource.py +13 -6
- alita_sdk/runtime/toolkits/mcp.py +782 -0
- alita_sdk/runtime/toolkits/planning.py +178 -0
- alita_sdk/runtime/toolkits/skill_router.py +238 -0
- alita_sdk/runtime/toolkits/subgraph.py +11 -6
- alita_sdk/runtime/toolkits/tools.py +314 -70
- alita_sdk/runtime/toolkits/vectorstore.py +11 -5
- alita_sdk/runtime/tools/__init__.py +24 -0
- alita_sdk/runtime/tools/application.py +16 -4
- alita_sdk/runtime/tools/artifact.py +367 -33
- alita_sdk/runtime/tools/data_analysis.py +183 -0
- alita_sdk/runtime/tools/function.py +100 -4
- alita_sdk/runtime/tools/graph.py +81 -0
- alita_sdk/runtime/tools/image_generation.py +218 -0
- alita_sdk/runtime/tools/llm.py +1032 -177
- alita_sdk/runtime/tools/loop.py +3 -1
- alita_sdk/runtime/tools/loop_output.py +3 -1
- alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
- alita_sdk/runtime/tools/mcp_remote_tool.py +181 -0
- alita_sdk/runtime/tools/mcp_server_tool.py +3 -1
- alita_sdk/runtime/tools/planning/__init__.py +36 -0
- alita_sdk/runtime/tools/planning/models.py +246 -0
- alita_sdk/runtime/tools/planning/wrapper.py +607 -0
- alita_sdk/runtime/tools/router.py +2 -1
- alita_sdk/runtime/tools/sandbox.py +375 -0
- alita_sdk/runtime/tools/skill_router.py +776 -0
- alita_sdk/runtime/tools/tool.py +3 -1
- alita_sdk/runtime/tools/vectorstore.py +69 -65
- alita_sdk/runtime/tools/vectorstore_base.py +163 -90
- alita_sdk/runtime/utils/AlitaCallback.py +137 -21
- alita_sdk/runtime/utils/constants.py +5 -1
- alita_sdk/runtime/utils/mcp_client.py +492 -0
- alita_sdk/runtime/utils/mcp_oauth.py +361 -0
- alita_sdk/runtime/utils/mcp_sse_client.py +434 -0
- alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
- alita_sdk/runtime/utils/streamlit.py +41 -14
- alita_sdk/runtime/utils/toolkit_utils.py +28 -9
- alita_sdk/runtime/utils/utils.py +48 -0
- alita_sdk/tools/__init__.py +135 -37
- alita_sdk/tools/ado/__init__.py +2 -2
- alita_sdk/tools/ado/repos/__init__.py +16 -19
- alita_sdk/tools/ado/repos/repos_wrapper.py +12 -20
- alita_sdk/tools/ado/test_plan/__init__.py +27 -8
- alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +56 -28
- alita_sdk/tools/ado/wiki/__init__.py +28 -12
- alita_sdk/tools/ado/wiki/ado_wrapper.py +114 -40
- alita_sdk/tools/ado/work_item/__init__.py +28 -12
- alita_sdk/tools/ado/work_item/ado_wrapper.py +95 -11
- alita_sdk/tools/advanced_jira_mining/__init__.py +13 -8
- alita_sdk/tools/aws/delta_lake/__init__.py +15 -11
- alita_sdk/tools/aws/delta_lake/tool.py +5 -1
- alita_sdk/tools/azure_ai/search/__init__.py +14 -8
- alita_sdk/tools/base/tool.py +5 -1
- alita_sdk/tools/base_indexer_toolkit.py +454 -110
- alita_sdk/tools/bitbucket/__init__.py +28 -19
- alita_sdk/tools/bitbucket/api_wrapper.py +285 -27
- alita_sdk/tools/bitbucket/cloud_api_wrapper.py +5 -5
- alita_sdk/tools/browser/__init__.py +41 -16
- alita_sdk/tools/browser/crawler.py +3 -1
- alita_sdk/tools/browser/utils.py +15 -6
- alita_sdk/tools/carrier/__init__.py +18 -17
- alita_sdk/tools/carrier/backend_reports_tool.py +8 -4
- alita_sdk/tools/carrier/excel_reporter.py +8 -4
- alita_sdk/tools/chunkers/__init__.py +3 -1
- alita_sdk/tools/chunkers/code/codeparser.py +1 -1
- alita_sdk/tools/chunkers/sematic/json_chunker.py +2 -1
- alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
- alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
- alita_sdk/tools/chunkers/universal_chunker.py +270 -0
- alita_sdk/tools/cloud/aws/__init__.py +12 -7
- alita_sdk/tools/cloud/azure/__init__.py +12 -7
- alita_sdk/tools/cloud/gcp/__init__.py +12 -7
- alita_sdk/tools/cloud/k8s/__init__.py +12 -7
- alita_sdk/tools/code/linter/__init__.py +10 -8
- alita_sdk/tools/code/loaders/codesearcher.py +3 -2
- alita_sdk/tools/code/sonar/__init__.py +21 -13
- alita_sdk/tools/code_indexer_toolkit.py +199 -0
- alita_sdk/tools/confluence/__init__.py +22 -14
- alita_sdk/tools/confluence/api_wrapper.py +197 -58
- alita_sdk/tools/confluence/loader.py +14 -2
- alita_sdk/tools/custom_open_api/__init__.py +12 -5
- alita_sdk/tools/elastic/__init__.py +11 -8
- alita_sdk/tools/elitea_base.py +546 -64
- alita_sdk/tools/figma/__init__.py +60 -11
- alita_sdk/tools/figma/api_wrapper.py +1400 -167
- alita_sdk/tools/figma/figma_client.py +73 -0
- alita_sdk/tools/figma/toon_tools.py +2748 -0
- alita_sdk/tools/github/__init__.py +18 -17
- alita_sdk/tools/github/api_wrapper.py +9 -26
- alita_sdk/tools/github/github_client.py +81 -12
- alita_sdk/tools/github/schemas.py +2 -1
- alita_sdk/tools/github/tool.py +5 -1
- alita_sdk/tools/gitlab/__init__.py +19 -13
- alita_sdk/tools/gitlab/api_wrapper.py +256 -80
- alita_sdk/tools/gitlab_org/__init__.py +14 -10
- alita_sdk/tools/google/bigquery/__init__.py +14 -13
- alita_sdk/tools/google/bigquery/tool.py +5 -1
- alita_sdk/tools/google_places/__init__.py +21 -11
- alita_sdk/tools/jira/__init__.py +22 -11
- alita_sdk/tools/jira/api_wrapper.py +315 -168
- alita_sdk/tools/keycloak/__init__.py +11 -8
- alita_sdk/tools/localgit/__init__.py +9 -3
- alita_sdk/tools/localgit/local_git.py +62 -54
- alita_sdk/tools/localgit/tool.py +5 -1
- alita_sdk/tools/memory/__init__.py +38 -14
- alita_sdk/tools/non_code_indexer_toolkit.py +7 -2
- alita_sdk/tools/ocr/__init__.py +11 -8
- alita_sdk/tools/openapi/__init__.py +491 -106
- alita_sdk/tools/openapi/api_wrapper.py +1357 -0
- alita_sdk/tools/openapi/tool.py +20 -0
- alita_sdk/tools/pandas/__init__.py +20 -12
- alita_sdk/tools/pandas/api_wrapper.py +40 -45
- alita_sdk/tools/pandas/dataframe/generator/base.py +3 -1
- alita_sdk/tools/postman/__init__.py +11 -11
- alita_sdk/tools/postman/api_wrapper.py +19 -8
- alita_sdk/tools/postman/postman_analysis.py +8 -1
- alita_sdk/tools/pptx/__init__.py +11 -10
- alita_sdk/tools/qtest/__init__.py +22 -14
- alita_sdk/tools/qtest/api_wrapper.py +1784 -88
- alita_sdk/tools/rally/__init__.py +13 -10
- alita_sdk/tools/report_portal/__init__.py +23 -16
- alita_sdk/tools/salesforce/__init__.py +22 -16
- alita_sdk/tools/servicenow/__init__.py +21 -16
- alita_sdk/tools/servicenow/api_wrapper.py +1 -1
- alita_sdk/tools/sharepoint/__init__.py +17 -14
- alita_sdk/tools/sharepoint/api_wrapper.py +179 -39
- alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
- alita_sdk/tools/sharepoint/utils.py +8 -2
- alita_sdk/tools/slack/__init__.py +13 -8
- alita_sdk/tools/sql/__init__.py +22 -19
- alita_sdk/tools/sql/api_wrapper.py +71 -23
- alita_sdk/tools/testio/__init__.py +21 -13
- alita_sdk/tools/testrail/__init__.py +13 -11
- alita_sdk/tools/testrail/api_wrapper.py +214 -46
- alita_sdk/tools/utils/__init__.py +28 -4
- alita_sdk/tools/utils/content_parser.py +241 -55
- alita_sdk/tools/utils/text_operations.py +254 -0
- alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +83 -27
- alita_sdk/tools/xray/__init__.py +18 -14
- alita_sdk/tools/xray/api_wrapper.py +58 -113
- alita_sdk/tools/yagmail/__init__.py +9 -3
- alita_sdk/tools/zephyr/__init__.py +12 -7
- alita_sdk/tools/zephyr_enterprise/__init__.py +16 -9
- alita_sdk/tools/zephyr_enterprise/api_wrapper.py +30 -15
- alita_sdk/tools/zephyr_essential/__init__.py +16 -10
- alita_sdk/tools/zephyr_essential/api_wrapper.py +297 -54
- alita_sdk/tools/zephyr_essential/client.py +6 -4
- alita_sdk/tools/zephyr_scale/__init__.py +13 -8
- alita_sdk/tools/zephyr_scale/api_wrapper.py +39 -31
- alita_sdk/tools/zephyr_squad/__init__.py +12 -7
- {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.584.dist-info}/METADATA +184 -37
- alita_sdk-0.3.584.dist-info/RECORD +452 -0
- alita_sdk-0.3.584.dist-info/entry_points.txt +2 -0
- alita_sdk/tools/bitbucket/tools.py +0 -304
- alita_sdk-0.3.257.dist-info/RECORD +0 -343
- {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.584.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.584.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.584.dist-info}/top_level.txt +0 -0
alita_sdk/tools/elitea_base.py
CHANGED
|
@@ -11,12 +11,14 @@ from pydantic import BaseModel, create_model, Field, SecretStr
|
|
|
11
11
|
|
|
12
12
|
# from alita_sdk.runtime.langchain.interfaces.llm_processor import get_embeddings
|
|
13
13
|
from .chunkers import markdown_chunker
|
|
14
|
-
from .utils import TOOLKIT_SPLITTER
|
|
15
14
|
from .vector_adapters.VectorStoreAdapter import VectorStoreAdapterFactory
|
|
16
15
|
from ..runtime.utils.utils import IndexerKeywords
|
|
17
16
|
|
|
18
17
|
logger = logging.getLogger(__name__)
|
|
19
18
|
|
|
19
|
+
INDEX_TOOL_NAMES = ['index_data', 'remove_index', 'list_collections', 'search_index', 'stepback_search_index',
|
|
20
|
+
'stepback_summary_index']
|
|
21
|
+
|
|
20
22
|
LoaderSchema = create_model(
|
|
21
23
|
"LoaderSchema",
|
|
22
24
|
branch=(Optional[str], Field(
|
|
@@ -30,36 +32,39 @@ LoaderSchema = create_model(
|
|
|
30
32
|
# Base Vector Store Schema Models
|
|
31
33
|
BaseIndexParams = create_model(
|
|
32
34
|
"BaseIndexParams",
|
|
33
|
-
|
|
35
|
+
index_name=(str, Field(description="Index name (max 7 characters)", min_length=1, max_length=7)),
|
|
34
36
|
)
|
|
35
37
|
|
|
36
38
|
BaseCodeIndexParams = create_model(
|
|
37
39
|
"BaseCodeIndexParams",
|
|
38
|
-
|
|
40
|
+
index_name=(str, Field(description="Index name (max 7 characters)", min_length=1, max_length=7)),
|
|
41
|
+
clean_index=(Optional[bool], Field(default=False, description="Optional flag to enforce clean existing index before indexing new data")),
|
|
42
|
+
progress_step=(Optional[int], Field(default=5, ge=0, le=100,
|
|
43
|
+
description="Optional step size for progress reporting during indexing")),
|
|
39
44
|
branch=(Optional[str], Field(description="Branch to index files from. Defaults to active branch if None.", default=None)),
|
|
40
45
|
whitelist=(Optional[List[str]], Field(description='File extensions or paths to include. Defaults to all files if None. Example: ["*.md", "*.java"]', default=None)),
|
|
41
46
|
blacklist=(Optional[List[str]], Field(description='File extensions or paths to exclude. Defaults to no exclusions if None. Example: ["*.md", "*.java"]', default=None)),
|
|
42
|
-
|
|
47
|
+
|
|
43
48
|
)
|
|
44
49
|
|
|
45
50
|
RemoveIndexParams = create_model(
|
|
46
51
|
"RemoveIndexParams",
|
|
47
|
-
|
|
52
|
+
index_name=(Optional[str], Field(description="Optional index name (max 7 characters)", default="", max_length=7)),
|
|
48
53
|
)
|
|
49
54
|
|
|
50
55
|
BaseSearchParams = create_model(
|
|
51
56
|
"BaseSearchParams",
|
|
52
57
|
query=(str, Field(description="Query text to search in the index")),
|
|
53
|
-
|
|
54
|
-
description="Optional
|
|
58
|
+
index_name=(Optional[str], Field(
|
|
59
|
+
description="Optional index name (max 7 characters). Leave empty to search across all datasets",
|
|
55
60
|
default="", max_length=7)),
|
|
56
61
|
filter=(Optional[dict], Field(
|
|
57
62
|
description="Filter to apply to the search results. Can be a dictionary or a JSON string.",
|
|
58
63
|
default={},
|
|
59
64
|
examples=["{\"key\": \"value\"}", "{\"status\": \"active\"}"]
|
|
60
65
|
)),
|
|
61
|
-
cut_off=(Optional[float], Field(description="Cut-off score for search results", default=0.5)),
|
|
62
|
-
search_top=(Optional[int], Field(description="Number of top results to return", default=10)),
|
|
66
|
+
cut_off=(Optional[float], Field(description="Cut-off score for search results", default=0.5, ge=0, le=1)),
|
|
67
|
+
search_top=(Optional[int], Field(description="Number of top results to return", default=10, ge=0)),
|
|
63
68
|
full_text_search=(Optional[Dict[str, Any]], Field(
|
|
64
69
|
description="Full text search parameters. Can be a dictionary with search options.",
|
|
65
70
|
default=None
|
|
@@ -81,52 +86,130 @@ BaseSearchParams = create_model(
|
|
|
81
86
|
BaseStepbackSearchParams = create_model(
|
|
82
87
|
"BaseStepbackSearchParams",
|
|
83
88
|
query=(str, Field(description="Query text to search in the index")),
|
|
84
|
-
|
|
89
|
+
index_name=(Optional[str], Field(description="Optional index name (max 7 characters)", default="", max_length=7)),
|
|
85
90
|
messages=(Optional[List], Field(description="Chat messages for stepback search context", default=[])),
|
|
86
91
|
filter=(Optional[dict], Field(
|
|
87
92
|
description="Filter to apply to the search results. Can be a dictionary or a JSON string.",
|
|
88
93
|
default={},
|
|
89
94
|
examples=["{\"key\": \"value\"}", "{\"status\": \"active\"}"]
|
|
90
95
|
)),
|
|
91
|
-
cut_off=(Optional[float], Field(description="Cut-off score for search results", default=0.5)),
|
|
92
|
-
search_top=(Optional[int], Field(description="Number of top results to return", default=10)),
|
|
93
|
-
reranker=(Optional[dict], Field(
|
|
94
|
-
description="Reranker configuration. Can be a dictionary with reranking parameters.",
|
|
95
|
-
default={}
|
|
96
|
-
)),
|
|
96
|
+
cut_off=(Optional[float], Field(description="Cut-off score for search results", default=0.5, ge=0, le=1)),
|
|
97
|
+
search_top=(Optional[int], Field(description="Number of top results to return", default=10, ge=0)),
|
|
97
98
|
full_text_search=(Optional[Dict[str, Any]], Field(
|
|
98
99
|
description="Full text search parameters. Can be a dictionary with search options.",
|
|
99
100
|
default=None
|
|
100
101
|
)),
|
|
101
|
-
reranking_config=(Optional[Dict[str, Dict[str, Any]]], Field(
|
|
102
|
-
description="Reranking configuration. Can be a dictionary with reranking settings.",
|
|
103
|
-
default=None
|
|
104
|
-
)),
|
|
105
102
|
extended_search=(Optional[List[str]], Field(
|
|
106
103
|
description="List of additional fields to include in the search results.",
|
|
107
104
|
default=None
|
|
108
105
|
)),
|
|
106
|
+
reranker=(Optional[dict], Field(
|
|
107
|
+
description="Reranker configuration. Can be a dictionary with reranking parameters.",
|
|
108
|
+
default={}
|
|
109
|
+
)),
|
|
110
|
+
reranking_config=(Optional[Dict[str, Dict[str, Any]]], Field(
|
|
111
|
+
description="Reranking configuration. Can be a dictionary with reranking settings.",
|
|
112
|
+
default=None
|
|
113
|
+
)),
|
|
114
|
+
|
|
109
115
|
)
|
|
110
116
|
|
|
111
117
|
BaseIndexDataParams = create_model(
|
|
112
118
|
"indexData",
|
|
113
119
|
__base__=BaseIndexParams,
|
|
114
|
-
progress_step=(Optional[int], Field(default=5, ge=0, le=100,
|
|
115
|
-
description="Optional step size for progress reporting during indexing")),
|
|
116
120
|
clean_index=(Optional[bool], Field(default=False,
|
|
117
121
|
description="Optional flag to enforce clean existing index before indexing new data")),
|
|
122
|
+
progress_step=(Optional[int], Field(default=5, ge=0, le=100,
|
|
123
|
+
description="Optional step size for progress reporting during indexing")),
|
|
118
124
|
chunking_tool=(Literal[None,'markdown', 'statistical', 'proposal'], Field(description="Name of chunking tool", default=None)),
|
|
119
125
|
chunking_config=(Optional[dict], Field(description="Chunking tool configuration", default_factory=dict)),
|
|
120
126
|
)
|
|
121
127
|
|
|
128
|
+
# File Operations Schema Models
|
|
129
|
+
ReadFileInput = create_model(
|
|
130
|
+
"ReadFileInput",
|
|
131
|
+
file_path=(str, Field(description="Path to the file to read")),
|
|
132
|
+
branch=(Optional[str], Field(description="Branch name. If None, uses active branch.", default=None)),
|
|
133
|
+
offset=(Optional[int], Field(description="Starting line number (1-indexed, inclusive). Read from this line onwards.", default=None, ge=1)),
|
|
134
|
+
limit=(Optional[int], Field(description="Number of lines to read from offset. If None, reads to end.", default=None, ge=1)),
|
|
135
|
+
head=(Optional[int], Field(description="Read only the first N lines. Alternative to offset/limit.", default=None, ge=1)),
|
|
136
|
+
tail=(Optional[int], Field(description="Read only the last N lines. Alternative to offset/limit.", default=None, ge=1)),
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
ReadFileChunkInput = create_model(
|
|
140
|
+
"ReadFileChunkInput",
|
|
141
|
+
file_path=(str, Field(description="Path to the file to read")),
|
|
142
|
+
branch=(Optional[str], Field(description="Branch name. If None, uses active branch.", default=None)),
|
|
143
|
+
start_line=(int, Field(description="Starting line number (1-indexed, inclusive)", ge=1)),
|
|
144
|
+
end_line=(Optional[int], Field(description="Ending line number (1-indexed, inclusive). If None, reads to end.", default=None, ge=1)),
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
ReadMultipleFilesInput = create_model(
|
|
148
|
+
"ReadMultipleFilesInput",
|
|
149
|
+
file_paths=(List[str], Field(description="List of file paths to read", min_length=1)),
|
|
150
|
+
branch=(Optional[str], Field(description="Branch name. If None, uses active branch.", default=None)),
|
|
151
|
+
offset=(Optional[int], Field(description="Starting line number for all files (1-indexed)", default=None, ge=1)),
|
|
152
|
+
limit=(Optional[int], Field(description="Number of lines to read from offset for all files", default=None, ge=1)),
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
EditFileInput = create_model(
|
|
156
|
+
"EditFileInput",
|
|
157
|
+
file_path=(str, Field(description="Path to the file to edit. Must be a text file (markdown, txt, csv, json, xml, html, yaml, etc.)")),
|
|
158
|
+
file_query=(str, Field(description="""Edit instructions with OLD/NEW markers. Format:
|
|
159
|
+
OLD <<<<
|
|
160
|
+
old content to replace
|
|
161
|
+
>>>> OLD
|
|
162
|
+
NEW <<<<
|
|
163
|
+
new content
|
|
164
|
+
>>>> NEW
|
|
165
|
+
|
|
166
|
+
Multiple OLD/NEW pairs can be provided for multiple edits.""")),
|
|
167
|
+
branch=(Optional[str], Field(description="Branch name. If None, uses active branch.", default=None)),
|
|
168
|
+
commit_message=(Optional[str], Field(description="Commit message for the change (VCS toolkits only)", default=None)),
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
SearchFileInput = create_model(
|
|
172
|
+
"SearchFileInput",
|
|
173
|
+
file_path=(str, Field(description="Path to the file to search")),
|
|
174
|
+
pattern=(str, Field(description="Search pattern. Treated as regex by default unless is_regex=False.")),
|
|
175
|
+
branch=(Optional[str], Field(description="Branch name. If None, uses active branch.", default=None)),
|
|
176
|
+
is_regex=(bool, Field(description="Whether pattern is a regex. Default is True for flexible matching.", default=True)),
|
|
177
|
+
context_lines=(int, Field(description="Number of lines before/after match to include for context", default=2, ge=0)),
|
|
178
|
+
)
|
|
122
179
|
|
|
123
|
-
class BaseToolApiWrapper(BaseModel):
|
|
124
180
|
|
|
181
|
+
class BaseToolApiWrapper(BaseModel):
|
|
182
|
+
|
|
183
|
+
# Optional RunnableConfig for CLI/standalone usage (allows dispatch_custom_event to work)
|
|
184
|
+
_runnable_config: Optional[Dict[str, Any]] = None
|
|
185
|
+
# toolkit id propagated from backend
|
|
186
|
+
toolkit_id: int = 0
|
|
125
187
|
def get_available_tools(self):
|
|
126
188
|
raise NotImplementedError("Subclasses should implement this method")
|
|
127
189
|
|
|
128
|
-
def
|
|
129
|
-
"""
|
|
190
|
+
def set_runnable_config(self, config: Optional[Dict[str, Any]]) -> None:
|
|
191
|
+
"""
|
|
192
|
+
Set the RunnableConfig for dispatching custom events.
|
|
193
|
+
|
|
194
|
+
This is required when running outside of a LangChain agent context
|
|
195
|
+
(e.g., from CLI). Without a config containing a run_id,
|
|
196
|
+
dispatch_custom_event will fail with "Unable to dispatch an adhoc event
|
|
197
|
+
without a parent run id".
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
config: A RunnableConfig dict with at least {'run_id': uuid}
|
|
201
|
+
"""
|
|
202
|
+
self._runnable_config = config
|
|
203
|
+
|
|
204
|
+
def _log_tool_event(self, message: str, tool_name: str = None, config: Optional[Dict[str, Any]] = None):
|
|
205
|
+
"""Log data and dispatch custom event for the tool.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
message: The message to log
|
|
209
|
+
tool_name: Name of the tool (defaults to 'tool_progress')
|
|
210
|
+
config: Optional RunnableConfig. If not provided, uses self._runnable_config.
|
|
211
|
+
Required when running outside a LangChain agent context.
|
|
212
|
+
"""
|
|
130
213
|
|
|
131
214
|
try:
|
|
132
215
|
from langchain_core.callbacks import dispatch_custom_event
|
|
@@ -135,6 +218,10 @@ class BaseToolApiWrapper(BaseModel):
|
|
|
135
218
|
tool_name = 'tool_progress'
|
|
136
219
|
|
|
137
220
|
logger.info(message)
|
|
221
|
+
|
|
222
|
+
# Use provided config, fall back to instance config
|
|
223
|
+
effective_config = config or self._runnable_config
|
|
224
|
+
|
|
138
225
|
dispatch_custom_event(
|
|
139
226
|
name="thinking_step",
|
|
140
227
|
data={
|
|
@@ -142,14 +229,14 @@ class BaseToolApiWrapper(BaseModel):
|
|
|
142
229
|
"tool_name": tool_name,
|
|
143
230
|
"toolkit": self.__class__.__name__,
|
|
144
231
|
},
|
|
232
|
+
config=effective_config,
|
|
145
233
|
)
|
|
146
234
|
except Exception as e:
|
|
147
235
|
logger.warning(f"Failed to dispatch progress event: {str(e)}")
|
|
148
236
|
|
|
149
237
|
|
|
150
238
|
def run(self, mode: str, *args: Any, **kwargs: Any):
|
|
151
|
-
|
|
152
|
-
mode = mode.rsplit(TOOLKIT_SPLITTER, maxsplit=1)[1]
|
|
239
|
+
# Mode is now the clean tool name (no prefix to remove)
|
|
153
240
|
for tool in self.get_available_tools():
|
|
154
241
|
if tool["name"] == mode:
|
|
155
242
|
try:
|
|
@@ -158,6 +245,11 @@ class BaseToolApiWrapper(BaseModel):
|
|
|
158
245
|
# execution = str(execution)
|
|
159
246
|
return execution
|
|
160
247
|
except Exception as e:
|
|
248
|
+
# Re-raise McpAuthorizationRequired directly without wrapping
|
|
249
|
+
from alita_sdk.runtime.utils.mcp_oauth import McpAuthorizationRequired
|
|
250
|
+
if isinstance(e, McpAuthorizationRequired):
|
|
251
|
+
raise
|
|
252
|
+
|
|
161
253
|
# Catch all tool execution exceptions and provide user-friendly error messages
|
|
162
254
|
error_type = type(e).__name__
|
|
163
255
|
error_message = str(e)
|
|
@@ -317,12 +409,12 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
|
|
|
317
409
|
#
|
|
318
410
|
docs = base_chunker(file_content_generator=docs, config=base_chunking_config)
|
|
319
411
|
#
|
|
320
|
-
|
|
412
|
+
index_name = kwargs.get("index_name")
|
|
321
413
|
progress_step = kwargs.get("progress_step")
|
|
322
414
|
clean_index = kwargs.get("clean_index")
|
|
323
415
|
vs = self._init_vector_store()
|
|
324
416
|
#
|
|
325
|
-
return vs.index_documents(docs,
|
|
417
|
+
return vs.index_documents(docs, index_name=index_name, progress_step=progress_step, clean_index=clean_index)
|
|
326
418
|
|
|
327
419
|
def _process_documents(self, documents: List[Document]) -> Generator[Document, None, None]:
|
|
328
420
|
"""
|
|
@@ -392,10 +484,10 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
|
|
|
392
484
|
)
|
|
393
485
|
return self._vector_store
|
|
394
486
|
|
|
395
|
-
def remove_index(self,
|
|
487
|
+
def remove_index(self, index_name: str = ""):
|
|
396
488
|
"""Cleans the indexed data in the collection."""
|
|
397
|
-
self._init_vector_store()._clean_collection(
|
|
398
|
-
return (f"Collection '{
|
|
489
|
+
self._init_vector_store()._clean_collection(index_name=index_name)
|
|
490
|
+
return (f"Collection '{index_name}' has been removed from the vector store.\n"
|
|
399
491
|
f"Available collections: {self.list_collections()}")
|
|
400
492
|
|
|
401
493
|
def list_collections(self):
|
|
@@ -403,19 +495,19 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
|
|
|
403
495
|
vectorstore_wrapper = self._init_vector_store()
|
|
404
496
|
return vectorstore_wrapper.list_collections()
|
|
405
497
|
|
|
406
|
-
def _build_collection_filter(self, filter: dict | str,
|
|
498
|
+
def _build_collection_filter(self, filter: dict | str, index_name: str = "") -> dict:
|
|
407
499
|
"""Builds a filter for the collection based on the provided suffix."""
|
|
408
500
|
|
|
409
501
|
filter = filter if isinstance(filter, dict) else json.loads(filter)
|
|
410
|
-
if
|
|
502
|
+
if index_name:
|
|
411
503
|
filter.update({"collection": {
|
|
412
|
-
"$eq":
|
|
504
|
+
"$eq": index_name.strip()
|
|
413
505
|
}})
|
|
414
506
|
return filter
|
|
415
507
|
|
|
416
508
|
def search_index(self,
|
|
417
509
|
query: str,
|
|
418
|
-
|
|
510
|
+
index_name: str = "",
|
|
419
511
|
filter: dict | str = {}, cut_off: float = 0.5,
|
|
420
512
|
search_top: int = 10, reranker: dict = {},
|
|
421
513
|
full_text_search: Optional[Dict[str, Any]] = None,
|
|
@@ -424,7 +516,7 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
|
|
|
424
516
|
**kwargs):
|
|
425
517
|
""" Searches indexed documents in the vector store."""
|
|
426
518
|
vectorstore = self._init_vector_store()
|
|
427
|
-
filter = self._build_collection_filter(filter,
|
|
519
|
+
filter = self._build_collection_filter(filter, index_name)
|
|
428
520
|
found_docs = vectorstore.search_documents(
|
|
429
521
|
query,
|
|
430
522
|
doctype=self.doctype,
|
|
@@ -441,7 +533,7 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
|
|
|
441
533
|
def stepback_search_index(self,
|
|
442
534
|
query: str,
|
|
443
535
|
messages: List[Dict[str, Any]] = [],
|
|
444
|
-
|
|
536
|
+
index_name: str = "",
|
|
445
537
|
filter: dict | str = {}, cut_off: float = 0.5,
|
|
446
538
|
search_top: int = 10, reranker: dict = {},
|
|
447
539
|
full_text_search: Optional[Dict[str, Any]] = None,
|
|
@@ -450,7 +542,7 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
|
|
|
450
542
|
**kwargs):
|
|
451
543
|
""" Searches indexed documents in the vector store."""
|
|
452
544
|
|
|
453
|
-
filter = self._build_collection_filter(filter,
|
|
545
|
+
filter = self._build_collection_filter(filter, index_name)
|
|
454
546
|
vectorstore = self._init_vector_store()
|
|
455
547
|
found_docs = vectorstore.stepback_search(
|
|
456
548
|
query,
|
|
@@ -468,7 +560,7 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
|
|
|
468
560
|
def stepback_summary_index(self,
|
|
469
561
|
query: str,
|
|
470
562
|
messages: List[Dict[str, Any]] = [],
|
|
471
|
-
|
|
563
|
+
index_name: str = "",
|
|
472
564
|
filter: dict | str = {}, cut_off: float = 0.5,
|
|
473
565
|
search_top: int = 10, reranker: dict = {},
|
|
474
566
|
full_text_search: Optional[Dict[str, Any]] = None,
|
|
@@ -477,7 +569,7 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
|
|
|
477
569
|
**kwargs):
|
|
478
570
|
""" Generates a summary of indexed documents using stepback technique."""
|
|
479
571
|
vectorstore = self._init_vector_store()
|
|
480
|
-
filter = self._build_collection_filter(filter,
|
|
572
|
+
filter = self._build_collection_filter(filter, index_name)
|
|
481
573
|
|
|
482
574
|
found_docs = vectorstore.stepback_summary(
|
|
483
575
|
query,
|
|
@@ -547,11 +639,281 @@ class BaseCodeToolApiWrapper(BaseVectorStoreToolApiWrapper):
|
|
|
547
639
|
def _get_files(self):
|
|
548
640
|
raise NotImplementedError("Subclasses should implement this method")
|
|
549
641
|
|
|
550
|
-
def _read_file(
|
|
642
|
+
def _read_file(
|
|
643
|
+
self,
|
|
644
|
+
file_path: str,
|
|
645
|
+
branch: str = None,
|
|
646
|
+
offset: Optional[int] = None,
|
|
647
|
+
limit: Optional[int] = None,
|
|
648
|
+
head: Optional[int] = None,
|
|
649
|
+
tail: Optional[int] = None,
|
|
650
|
+
**kwargs # Allow subclasses to have additional parameters
|
|
651
|
+
) -> str:
|
|
652
|
+
"""
|
|
653
|
+
Read file content with optional partial read support.
|
|
654
|
+
|
|
655
|
+
Subclasses should implement this method. If they don't support partial reads,
|
|
656
|
+
they can accept **kwargs and ignore offset/limit/head/tail parameters - the base
|
|
657
|
+
class high-level methods will apply slicing client-side.
|
|
658
|
+
|
|
659
|
+
Args:
|
|
660
|
+
file_path: Path to the file
|
|
661
|
+
branch: Branch name (None for active branch)
|
|
662
|
+
offset: Starting line number (1-indexed)
|
|
663
|
+
limit: Number of lines to read from offset
|
|
664
|
+
head: Read only first N lines
|
|
665
|
+
tail: Read only last N lines
|
|
666
|
+
**kwargs: Additional toolkit-specific parameters (e.g., repo_name for GitHub)
|
|
667
|
+
|
|
668
|
+
Returns:
|
|
669
|
+
File content as string
|
|
670
|
+
"""
|
|
551
671
|
raise NotImplementedError("Subclasses should implement this method")
|
|
672
|
+
|
|
673
|
+
def _write_file(
|
|
674
|
+
self,
|
|
675
|
+
file_path: str,
|
|
676
|
+
content: str,
|
|
677
|
+
branch: str = None,
|
|
678
|
+
commit_message: str = None
|
|
679
|
+
) -> str:
|
|
680
|
+
"""
|
|
681
|
+
Write content to a file.
|
|
682
|
+
|
|
683
|
+
Subclasses should implement this method to enable edit_file functionality.
|
|
684
|
+
For VCS toolkits, this may involve creating or updating files with commits.
|
|
685
|
+
|
|
686
|
+
Args:
|
|
687
|
+
file_path: Path to the file
|
|
688
|
+
content: New file content
|
|
689
|
+
branch: Branch name (None for active branch)
|
|
690
|
+
commit_message: Commit message (VCS toolkits only)
|
|
691
|
+
|
|
692
|
+
Returns:
|
|
693
|
+
Success message
|
|
694
|
+
"""
|
|
695
|
+
raise NotImplementedError("Subclasses should implement _write_file to enable editing")
|
|
552
696
|
|
|
553
697
|
def _file_commit_hash(self, file_path: str, branch: str):
|
|
554
698
|
pass
|
|
699
|
+
|
|
700
|
+
def read_file_chunk(
|
|
701
|
+
self,
|
|
702
|
+
file_path: str,
|
|
703
|
+
start_line: int,
|
|
704
|
+
end_line: Optional[int] = None,
|
|
705
|
+
branch: str = None
|
|
706
|
+
) -> str:
|
|
707
|
+
"""
|
|
708
|
+
Read a specific range of lines from a file.
|
|
709
|
+
|
|
710
|
+
Args:
|
|
711
|
+
file_path: Path to the file
|
|
712
|
+
start_line: Starting line number (1-indexed, inclusive)
|
|
713
|
+
end_line: Ending line number (1-indexed, inclusive). If None, reads to end.
|
|
714
|
+
branch: Branch name (None for active branch)
|
|
715
|
+
|
|
716
|
+
Returns:
|
|
717
|
+
File content for the specified line range
|
|
718
|
+
"""
|
|
719
|
+
from .utils.text_operations import apply_line_slice
|
|
720
|
+
|
|
721
|
+
# Calculate offset and limit from start_line and end_line
|
|
722
|
+
offset = start_line
|
|
723
|
+
limit = (end_line - start_line + 1) if end_line is not None else None
|
|
724
|
+
|
|
725
|
+
# Read the file with offset/limit
|
|
726
|
+
content = self._read_file(file_path, branch, offset=offset, limit=limit)
|
|
727
|
+
|
|
728
|
+
# Apply client-side slicing if toolkit doesn't support partial reads
|
|
729
|
+
# (toolkit's _read_file will return full content if it ignores offset/limit)
|
|
730
|
+
return apply_line_slice(content, offset=offset, limit=limit)
|
|
731
|
+
|
|
732
|
+
def read_multiple_files(
|
|
733
|
+
self,
|
|
734
|
+
file_paths: List[str],
|
|
735
|
+
branch: str = None,
|
|
736
|
+
offset: Optional[int] = None,
|
|
737
|
+
limit: Optional[int] = None
|
|
738
|
+
) -> Dict[str, str]:
|
|
739
|
+
"""
|
|
740
|
+
Read multiple files in batch.
|
|
741
|
+
|
|
742
|
+
Args:
|
|
743
|
+
file_paths: List of file paths to read
|
|
744
|
+
branch: Branch name (None for active branch)
|
|
745
|
+
offset: Starting line number for all files (1-indexed)
|
|
746
|
+
limit: Number of lines to read from offset for all files
|
|
747
|
+
|
|
748
|
+
Returns:
|
|
749
|
+
Dictionary mapping file paths to their content (or error messages)
|
|
750
|
+
"""
|
|
751
|
+
results = {}
|
|
752
|
+
|
|
753
|
+
for file_path in file_paths:
|
|
754
|
+
try:
|
|
755
|
+
content = self._read_file(
|
|
756
|
+
file_path,
|
|
757
|
+
branch,
|
|
758
|
+
offset=offset,
|
|
759
|
+
limit=limit
|
|
760
|
+
)
|
|
761
|
+
results[file_path] = content
|
|
762
|
+
except Exception as e:
|
|
763
|
+
results[file_path] = f"Error reading file: {str(e)}"
|
|
764
|
+
logger.error(f"Failed to read {file_path}: {e}")
|
|
765
|
+
|
|
766
|
+
return results
|
|
767
|
+
|
|
768
|
+
def search_file(
|
|
769
|
+
self,
|
|
770
|
+
file_path: str,
|
|
771
|
+
pattern: str,
|
|
772
|
+
branch: str = None,
|
|
773
|
+
is_regex: bool = True,
|
|
774
|
+
context_lines: int = 2
|
|
775
|
+
) -> str:
|
|
776
|
+
"""
|
|
777
|
+
Search for pattern in file content with context.
|
|
778
|
+
|
|
779
|
+
Args:
|
|
780
|
+
file_path: Path to the file
|
|
781
|
+
pattern: Search pattern (regex if is_regex=True, else literal)
|
|
782
|
+
branch: Branch name (None for active branch)
|
|
783
|
+
is_regex: Whether pattern is regex (default True)
|
|
784
|
+
context_lines: Lines of context before/after matches (default 2)
|
|
785
|
+
|
|
786
|
+
Returns:
|
|
787
|
+
Formatted string with search results and context
|
|
788
|
+
"""
|
|
789
|
+
from .utils.text_operations import search_in_content
|
|
790
|
+
|
|
791
|
+
# Read full file content
|
|
792
|
+
content = self._read_file(file_path, branch)
|
|
793
|
+
|
|
794
|
+
# Search for pattern
|
|
795
|
+
matches = search_in_content(content, pattern, is_regex, context_lines)
|
|
796
|
+
|
|
797
|
+
if not matches:
|
|
798
|
+
return f"No matches found for pattern '{pattern}' in {file_path}"
|
|
799
|
+
|
|
800
|
+
# Format results
|
|
801
|
+
result_lines = [f"Found {len(matches)} match(es) for pattern '{pattern}' in {file_path}:\n"]
|
|
802
|
+
|
|
803
|
+
for i, match in enumerate(matches, 1):
|
|
804
|
+
result_lines.append(f"\n--- Match {i} at line {match['line_number']} ---")
|
|
805
|
+
|
|
806
|
+
# Context before
|
|
807
|
+
if match['context_before']:
|
|
808
|
+
for line in match['context_before']:
|
|
809
|
+
result_lines.append(f" {line}")
|
|
810
|
+
|
|
811
|
+
# Matching line (highlighted)
|
|
812
|
+
result_lines.append(f"> {match['line_content']}")
|
|
813
|
+
|
|
814
|
+
# Context after
|
|
815
|
+
if match['context_after']:
|
|
816
|
+
for line in match['context_after']:
|
|
817
|
+
result_lines.append(f" {line}")
|
|
818
|
+
|
|
819
|
+
return "\n".join(result_lines)
|
|
820
|
+
|
|
821
|
+
def edit_file(
|
|
822
|
+
self,
|
|
823
|
+
file_path: str,
|
|
824
|
+
file_query: str,
|
|
825
|
+
branch: str = None,
|
|
826
|
+
commit_message: str = None
|
|
827
|
+
) -> str:
|
|
828
|
+
"""
|
|
829
|
+
Edit file using OLD/NEW markers for precise replacements.
|
|
830
|
+
|
|
831
|
+
Only works with text files (markdown, txt, csv, json, xml, html, yaml, code files).
|
|
832
|
+
|
|
833
|
+
Args:
|
|
834
|
+
file_path: Path to the file to edit
|
|
835
|
+
file_query: Edit instructions with OLD/NEW markers
|
|
836
|
+
branch: Branch name (None for active branch)
|
|
837
|
+
commit_message: Commit message (VCS toolkits only)
|
|
838
|
+
|
|
839
|
+
Returns:
|
|
840
|
+
Success message or error
|
|
841
|
+
|
|
842
|
+
Raises:
|
|
843
|
+
ToolException: If file is not text-editable or edit fails
|
|
844
|
+
"""
|
|
845
|
+
from .utils.text_operations import parse_old_new_markers, is_text_editable
|
|
846
|
+
from langchain_core.callbacks import dispatch_custom_event
|
|
847
|
+
|
|
848
|
+
# Validate file is text-editable
|
|
849
|
+
if not is_text_editable(file_path):
|
|
850
|
+
raise ToolException(
|
|
851
|
+
f"Cannot edit binary/document file '{file_path}'. "
|
|
852
|
+
f"Supported text formats: markdown, txt, csv, json, xml, html, yaml, code files."
|
|
853
|
+
)
|
|
854
|
+
|
|
855
|
+
# Parse OLD/NEW markers
|
|
856
|
+
edits = parse_old_new_markers(file_query)
|
|
857
|
+
if not edits:
|
|
858
|
+
raise ToolException(
|
|
859
|
+
"No OLD/NEW marker pairs found in file_query. "
|
|
860
|
+
"Format: OLD <<<< old text >>>> OLD NEW <<<< new text >>>> NEW"
|
|
861
|
+
)
|
|
862
|
+
|
|
863
|
+
# Read current file content
|
|
864
|
+
try:
|
|
865
|
+
current_content = self._read_file(file_path, branch)
|
|
866
|
+
except Exception as e:
|
|
867
|
+
raise ToolException(f"Failed to read file {file_path}: {e}")
|
|
868
|
+
|
|
869
|
+
# Apply all edits
|
|
870
|
+
updated_content = current_content
|
|
871
|
+
for old_text, new_text in edits:
|
|
872
|
+
if not old_text.strip():
|
|
873
|
+
continue
|
|
874
|
+
|
|
875
|
+
if old_text not in updated_content:
|
|
876
|
+
logger.warning(
|
|
877
|
+
f"Old content not found in {file_path}. "
|
|
878
|
+
f"Looking for: {old_text[:100]}..."
|
|
879
|
+
)
|
|
880
|
+
continue
|
|
881
|
+
|
|
882
|
+
updated_content = updated_content.replace(old_text, new_text)
|
|
883
|
+
|
|
884
|
+
# Check if any changes were made
|
|
885
|
+
if current_content == updated_content:
|
|
886
|
+
return (
|
|
887
|
+
f"No changes made to {file_path}. "
|
|
888
|
+
"Old content was not found or is empty. "
|
|
889
|
+
"Use read_file or search_file to verify current content."
|
|
890
|
+
)
|
|
891
|
+
|
|
892
|
+
# Write updated content
|
|
893
|
+
try:
|
|
894
|
+
result = self._write_file(file_path, updated_content, branch, commit_message)
|
|
895
|
+
except NotImplementedError:
|
|
896
|
+
raise ToolException(
|
|
897
|
+
f"Editing not supported for this toolkit. "
|
|
898
|
+
f"The _write_file method is not implemented."
|
|
899
|
+
)
|
|
900
|
+
except Exception as e:
|
|
901
|
+
raise ToolException(f"Failed to write file {file_path}: {e}")
|
|
902
|
+
|
|
903
|
+
# Dispatch file modification event
|
|
904
|
+
try:
|
|
905
|
+
dispatch_custom_event("file_modified", {
|
|
906
|
+
"message": f"File '{file_path}' edited successfully",
|
|
907
|
+
"filename": file_path,
|
|
908
|
+
"tool_name": "edit_file",
|
|
909
|
+
"toolkit": self.__class__.__name__,
|
|
910
|
+
"operation_type": "modify",
|
|
911
|
+
"edits_applied": len(edits)
|
|
912
|
+
})
|
|
913
|
+
except Exception as e:
|
|
914
|
+
logger.warning(f"Failed to dispatch file_modified event: {e}")
|
|
915
|
+
|
|
916
|
+
return result
|
|
555
917
|
|
|
556
918
|
def __handle_get_files(self, path: str, branch: str):
|
|
557
919
|
"""
|
|
@@ -575,32 +937,46 @@ class BaseCodeToolApiWrapper(BaseVectorStoreToolApiWrapper):
|
|
|
575
937
|
raise ValueError("Expected a list of strings")
|
|
576
938
|
return _files
|
|
577
939
|
|
|
940
|
+
def __get_branch(self, branch):
|
|
941
|
+
return (branch or getattr(self, 'active_branch', None)
|
|
942
|
+
or getattr(self, '_active_branch', None) or getattr(self, 'branch', None))
|
|
943
|
+
|
|
578
944
|
def loader(self,
|
|
579
945
|
branch: Optional[str] = None,
|
|
580
946
|
whitelist: Optional[List[str]] = None,
|
|
581
|
-
blacklist: Optional[List[str]] = None
|
|
947
|
+
blacklist: Optional[List[str]] = None,
|
|
948
|
+
chunked: bool = True) -> Generator[Document, None, None]:
|
|
582
949
|
"""
|
|
583
|
-
Generates
|
|
950
|
+
Generates Documents from files in a branch, respecting whitelist and blacklist patterns.
|
|
584
951
|
|
|
585
952
|
Parameters:
|
|
586
953
|
- branch (Optional[str]): Branch for listing files. Defaults to the current branch if None.
|
|
587
954
|
- whitelist (Optional[List[str]]): File extensions or paths to include. Defaults to all files if None.
|
|
588
955
|
- blacklist (Optional[List[str]]): File extensions or paths to exclude. Defaults to no exclusions if None.
|
|
956
|
+
- chunked (bool): If True (default), applies universal chunker based on file type.
|
|
957
|
+
If False, returns raw Documents without chunking.
|
|
589
958
|
|
|
590
959
|
Returns:
|
|
591
|
-
- generator: Yields
|
|
960
|
+
- generator: Yields Documents from files matching the whitelist but not the blacklist.
|
|
592
961
|
|
|
593
962
|
Example:
|
|
594
963
|
# Use 'feature-branch', include '.py' files, exclude 'test_' files
|
|
595
|
-
|
|
964
|
+
for doc in loader(branch='feature-branch', whitelist=['*.py'], blacklist=['*test_*']):
|
|
965
|
+
print(doc.page_content)
|
|
596
966
|
|
|
597
967
|
Notes:
|
|
598
968
|
- Whitelist and blacklist use Unix shell-style wildcards.
|
|
599
969
|
- Files must match the whitelist and not the blacklist to be included.
|
|
970
|
+
- When chunked=True:
|
|
971
|
+
- .md files → markdown chunker (header-based splitting)
|
|
972
|
+
- .py/.js/.ts/etc → code parser (TreeSitter-based)
|
|
973
|
+
- .json files → JSON chunker
|
|
974
|
+
- other files → default text chunker
|
|
600
975
|
"""
|
|
601
|
-
from .
|
|
976
|
+
from langchain_core.documents import Document
|
|
977
|
+
import hashlib
|
|
602
978
|
|
|
603
|
-
_files = self.__handle_get_files("",
|
|
979
|
+
_files = self.__handle_get_files("", self.__get_branch(branch))
|
|
604
980
|
self._log_tool_event(message="Listing files in branch", tool_name="loader")
|
|
605
981
|
logger.info(f"Files in branch: {_files}")
|
|
606
982
|
|
|
@@ -616,28 +992,55 @@ class BaseCodeToolApiWrapper(BaseVectorStoreToolApiWrapper):
|
|
|
616
992
|
or any(file_path.endswith(f'.{pattern}') for pattern in blacklist))
|
|
617
993
|
return False
|
|
618
994
|
|
|
619
|
-
def
|
|
995
|
+
def raw_document_generator() -> Generator[Document, None, None]:
|
|
996
|
+
"""Yields raw Documents without chunking."""
|
|
620
997
|
self._log_tool_event(message="Reading the files", tool_name="loader")
|
|
621
|
-
# log the progress of file reading
|
|
622
998
|
total_files = len(_files)
|
|
999
|
+
processed = 0
|
|
1000
|
+
|
|
623
1001
|
for idx, file in enumerate(_files, 1):
|
|
624
1002
|
if is_whitelisted(file) and not is_blacklisted(file):
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
1003
|
+
try:
|
|
1004
|
+
file_content = self._read_file(file, self.__get_branch(branch))
|
|
1005
|
+
except Exception as e:
|
|
1006
|
+
logger.error(f"Failed to read file {file}: {e}")
|
|
1007
|
+
continue
|
|
1008
|
+
|
|
1009
|
+
if not file_content:
|
|
1010
|
+
continue
|
|
1011
|
+
|
|
1012
|
+
# Hash the file content for uniqueness tracking
|
|
629
1013
|
file_hash = hashlib.sha256(file_content.encode("utf-8")).hexdigest()
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
1014
|
+
processed += 1
|
|
1015
|
+
|
|
1016
|
+
yield Document(
|
|
1017
|
+
page_content=file_content,
|
|
1018
|
+
metadata={
|
|
1019
|
+
'file_path': file,
|
|
1020
|
+
'file_name': file,
|
|
1021
|
+
'source': file,
|
|
1022
|
+
'commit_hash': file_hash,
|
|
1023
|
+
}
|
|
1024
|
+
)
|
|
1025
|
+
|
|
633
1026
|
if idx % 10 == 0 or idx == total_files:
|
|
634
|
-
self._log_tool_event(
|
|
635
|
-
|
|
1027
|
+
self._log_tool_event(
|
|
1028
|
+
message=f"{idx} out of {total_files} files checked, {processed} matched",
|
|
1029
|
+
tool_name="loader"
|
|
1030
|
+
)
|
|
1031
|
+
|
|
1032
|
+
self._log_tool_event(message=f"{processed} files loaded", tool_name="loader")
|
|
636
1033
|
|
|
637
|
-
|
|
1034
|
+
if not chunked:
|
|
1035
|
+
# Return raw documents without chunking
|
|
1036
|
+
return raw_document_generator()
|
|
1037
|
+
|
|
1038
|
+
# Apply universal chunker based on file type
|
|
1039
|
+
from .chunkers.universal_chunker import universal_chunker
|
|
1040
|
+
return universal_chunker(raw_document_generator())
|
|
638
1041
|
|
|
639
1042
|
def index_data(self,
|
|
640
|
-
|
|
1043
|
+
index_name: str,
|
|
641
1044
|
branch: Optional[str] = None,
|
|
642
1045
|
whitelist: Optional[List[str]] = None,
|
|
643
1046
|
blacklist: Optional[List[str]] = None,
|
|
@@ -651,8 +1054,9 @@ class BaseCodeToolApiWrapper(BaseVectorStoreToolApiWrapper):
|
|
|
651
1054
|
)
|
|
652
1055
|
vectorstore = self._init_vector_store()
|
|
653
1056
|
clean_index = kwargs.get('clean_index', False)
|
|
654
|
-
return vectorstore.index_documents(documents,
|
|
655
|
-
clean_index=clean_index, is_code=True
|
|
1057
|
+
return vectorstore.index_documents(documents, index_name=index_name,
|
|
1058
|
+
clean_index=clean_index, is_code=True,
|
|
1059
|
+
progress_step=kwargs.get('progress_step', 5))
|
|
656
1060
|
|
|
657
1061
|
def _get_vector_search_tools(self):
|
|
658
1062
|
"""
|
|
@@ -686,4 +1090,82 @@ def extend_with_vector_tools(method):
|
|
|
686
1090
|
#
|
|
687
1091
|
return tools
|
|
688
1092
|
|
|
689
|
-
return wrapper
|
|
1093
|
+
return wrapper
|
|
1094
|
+
|
|
1095
|
+
|
|
1096
|
+
def extend_with_file_operations(method):
|
|
1097
|
+
"""
|
|
1098
|
+
Decorator to automatically add file operation tools to toolkits that implement
|
|
1099
|
+
_read_file and _write_file methods.
|
|
1100
|
+
|
|
1101
|
+
Adds:
|
|
1102
|
+
- read_file_chunk: Read specific line ranges
|
|
1103
|
+
- read_multiple_files: Batch read files
|
|
1104
|
+
- search_file: Search for patterns in files
|
|
1105
|
+
- edit_file: Edit files using OLD/NEW markers
|
|
1106
|
+
"""
|
|
1107
|
+
def wrapper(self, *args, **kwargs):
|
|
1108
|
+
tools = method(self, *args, **kwargs)
|
|
1109
|
+
|
|
1110
|
+
# Only add file operations if toolkit has implemented the required methods
|
|
1111
|
+
# Check for both _read_file and _write_file methods
|
|
1112
|
+
has_file_ops = (hasattr(self, '_read_file') and callable(getattr(self, '_read_file')) and
|
|
1113
|
+
hasattr(self, '_write_file') and callable(getattr(self, '_write_file')))
|
|
1114
|
+
|
|
1115
|
+
if has_file_ops:
|
|
1116
|
+
# Import schemas from elitea_base
|
|
1117
|
+
from . import elitea_base
|
|
1118
|
+
|
|
1119
|
+
file_operation_tools = [
|
|
1120
|
+
{
|
|
1121
|
+
"name": "read_file_chunk",
|
|
1122
|
+
"mode": "read_file_chunk",
|
|
1123
|
+
"ref": self.read_file_chunk,
|
|
1124
|
+
"description": self.read_file_chunk.__doc__,
|
|
1125
|
+
"args_schema": elitea_base.ReadFileChunkInput
|
|
1126
|
+
},
|
|
1127
|
+
{
|
|
1128
|
+
"name": "read_multiple_files",
|
|
1129
|
+
"mode": "read_multiple_files",
|
|
1130
|
+
"ref": self.read_multiple_files,
|
|
1131
|
+
"description": self.read_multiple_files.__doc__,
|
|
1132
|
+
"args_schema": elitea_base.ReadMultipleFilesInput
|
|
1133
|
+
},
|
|
1134
|
+
{
|
|
1135
|
+
"name": "search_file",
|
|
1136
|
+
"mode": "search_file",
|
|
1137
|
+
"ref": self.search_file,
|
|
1138
|
+
"description": self.search_file.__doc__,
|
|
1139
|
+
"args_schema": elitea_base.SearchFileInput
|
|
1140
|
+
},
|
|
1141
|
+
{
|
|
1142
|
+
"name": "edit_file",
|
|
1143
|
+
"mode": "edit_file",
|
|
1144
|
+
"ref": self.edit_file,
|
|
1145
|
+
"description": self.edit_file.__doc__,
|
|
1146
|
+
"args_schema": elitea_base.EditFileInput
|
|
1147
|
+
},
|
|
1148
|
+
]
|
|
1149
|
+
|
|
1150
|
+
tools.extend(file_operation_tools)
|
|
1151
|
+
|
|
1152
|
+
return tools
|
|
1153
|
+
|
|
1154
|
+
return wrapper
|
|
1155
|
+
|
|
1156
|
+
|
|
1157
|
+
def filter_missconfigured_index_tools(method):
|
|
1158
|
+
def wrapper(self, *args, **kwargs):
|
|
1159
|
+
toolkit = method(self, *args, **kwargs)
|
|
1160
|
+
|
|
1161
|
+
# Validate index tools misconfiguration and exclude them if necessary
|
|
1162
|
+
is_index_toolkit = any(tool.name in INDEX_TOOL_NAMES for tool in toolkit.tools)
|
|
1163
|
+
is_index_configuration_missing = not (kwargs.get('embedding_model')
|
|
1164
|
+
and kwargs.get('pgvector_configuration'))
|
|
1165
|
+
|
|
1166
|
+
if is_index_toolkit and is_index_configuration_missing:
|
|
1167
|
+
toolkit.tools = [tool for tool in toolkit.tools if tool.name not in INDEX_TOOL_NAMES]
|
|
1168
|
+
|
|
1169
|
+
return toolkit
|
|
1170
|
+
|
|
1171
|
+
return wrapper
|