alita-sdk 0.3.379__py3-none-any.whl → 0.3.627__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alita_sdk/cli/__init__.py +10 -0
- alita_sdk/cli/__main__.py +17 -0
- alita_sdk/cli/agent/__init__.py +5 -0
- alita_sdk/cli/agent/default.py +258 -0
- alita_sdk/cli/agent_executor.py +156 -0
- alita_sdk/cli/agent_loader.py +245 -0
- alita_sdk/cli/agent_ui.py +228 -0
- alita_sdk/cli/agents.py +3113 -0
- alita_sdk/cli/callbacks.py +647 -0
- alita_sdk/cli/cli.py +168 -0
- alita_sdk/cli/config.py +306 -0
- alita_sdk/cli/context/__init__.py +30 -0
- alita_sdk/cli/context/cleanup.py +198 -0
- alita_sdk/cli/context/manager.py +731 -0
- alita_sdk/cli/context/message.py +285 -0
- alita_sdk/cli/context/strategies.py +289 -0
- alita_sdk/cli/context/token_estimation.py +127 -0
- alita_sdk/cli/formatting.py +182 -0
- alita_sdk/cli/input_handler.py +419 -0
- alita_sdk/cli/inventory.py +1073 -0
- alita_sdk/cli/mcp_loader.py +315 -0
- alita_sdk/cli/testcases/__init__.py +94 -0
- alita_sdk/cli/testcases/data_generation.py +119 -0
- alita_sdk/cli/testcases/discovery.py +96 -0
- alita_sdk/cli/testcases/executor.py +84 -0
- alita_sdk/cli/testcases/logger.py +85 -0
- alita_sdk/cli/testcases/parser.py +172 -0
- alita_sdk/cli/testcases/prompts.py +91 -0
- alita_sdk/cli/testcases/reporting.py +125 -0
- alita_sdk/cli/testcases/setup.py +108 -0
- alita_sdk/cli/testcases/test_runner.py +282 -0
- alita_sdk/cli/testcases/utils.py +39 -0
- alita_sdk/cli/testcases/validation.py +90 -0
- alita_sdk/cli/testcases/workflow.py +196 -0
- alita_sdk/cli/toolkit.py +327 -0
- alita_sdk/cli/toolkit_loader.py +85 -0
- alita_sdk/cli/tools/__init__.py +43 -0
- alita_sdk/cli/tools/approval.py +224 -0
- alita_sdk/cli/tools/filesystem.py +1751 -0
- alita_sdk/cli/tools/planning.py +389 -0
- alita_sdk/cli/tools/terminal.py +414 -0
- alita_sdk/community/__init__.py +72 -12
- alita_sdk/community/inventory/__init__.py +236 -0
- alita_sdk/community/inventory/config.py +257 -0
- alita_sdk/community/inventory/enrichment.py +2137 -0
- alita_sdk/community/inventory/extractors.py +1469 -0
- alita_sdk/community/inventory/ingestion.py +3172 -0
- alita_sdk/community/inventory/knowledge_graph.py +1457 -0
- alita_sdk/community/inventory/parsers/__init__.py +218 -0
- alita_sdk/community/inventory/parsers/base.py +295 -0
- alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
- alita_sdk/community/inventory/parsers/go_parser.py +851 -0
- alita_sdk/community/inventory/parsers/html_parser.py +389 -0
- alita_sdk/community/inventory/parsers/java_parser.py +593 -0
- alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
- alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
- alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
- alita_sdk/community/inventory/parsers/python_parser.py +604 -0
- alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
- alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
- alita_sdk/community/inventory/parsers/text_parser.py +322 -0
- alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
- alita_sdk/community/inventory/patterns/__init__.py +61 -0
- alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
- alita_sdk/community/inventory/patterns/loader.py +348 -0
- alita_sdk/community/inventory/patterns/registry.py +198 -0
- alita_sdk/community/inventory/presets.py +535 -0
- alita_sdk/community/inventory/retrieval.py +1403 -0
- alita_sdk/community/inventory/toolkit.py +173 -0
- alita_sdk/community/inventory/toolkit_utils.py +176 -0
- alita_sdk/community/inventory/visualize.py +1370 -0
- alita_sdk/configurations/__init__.py +1 -1
- alita_sdk/configurations/ado.py +141 -20
- alita_sdk/configurations/bitbucket.py +94 -2
- alita_sdk/configurations/confluence.py +130 -1
- alita_sdk/configurations/figma.py +76 -0
- alita_sdk/configurations/gitlab.py +91 -0
- alita_sdk/configurations/jira.py +103 -0
- alita_sdk/configurations/openapi.py +329 -0
- alita_sdk/configurations/qtest.py +72 -1
- alita_sdk/configurations/report_portal.py +96 -0
- alita_sdk/configurations/sharepoint.py +148 -0
- alita_sdk/configurations/testio.py +83 -0
- alita_sdk/configurations/testrail.py +88 -0
- alita_sdk/configurations/xray.py +93 -0
- alita_sdk/configurations/zephyr_enterprise.py +93 -0
- alita_sdk/configurations/zephyr_essential.py +75 -0
- alita_sdk/runtime/clients/artifact.py +3 -3
- alita_sdk/runtime/clients/client.py +388 -46
- alita_sdk/runtime/clients/mcp_discovery.py +342 -0
- alita_sdk/runtime/clients/mcp_manager.py +262 -0
- alita_sdk/runtime/clients/sandbox_client.py +8 -21
- alita_sdk/runtime/langchain/_constants_bkup.py +1318 -0
- alita_sdk/runtime/langchain/assistant.py +157 -39
- alita_sdk/runtime/langchain/constants.py +647 -1
- alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
- alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +103 -60
- alita_sdk/runtime/langchain/document_loaders/AlitaJSONLinesLoader.py +77 -0
- alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +10 -4
- alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py +226 -7
- alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py +5 -2
- alita_sdk/runtime/langchain/document_loaders/constants.py +40 -19
- alita_sdk/runtime/langchain/langraph_agent.py +405 -84
- alita_sdk/runtime/langchain/utils.py +106 -7
- alita_sdk/runtime/llms/preloaded.py +2 -6
- alita_sdk/runtime/models/mcp_models.py +61 -0
- alita_sdk/runtime/skills/__init__.py +91 -0
- alita_sdk/runtime/skills/callbacks.py +498 -0
- alita_sdk/runtime/skills/discovery.py +540 -0
- alita_sdk/runtime/skills/executor.py +610 -0
- alita_sdk/runtime/skills/input_builder.py +371 -0
- alita_sdk/runtime/skills/models.py +330 -0
- alita_sdk/runtime/skills/registry.py +355 -0
- alita_sdk/runtime/skills/skill_runner.py +330 -0
- alita_sdk/runtime/toolkits/__init__.py +31 -0
- alita_sdk/runtime/toolkits/application.py +29 -10
- alita_sdk/runtime/toolkits/artifact.py +20 -11
- alita_sdk/runtime/toolkits/datasource.py +13 -6
- alita_sdk/runtime/toolkits/mcp.py +783 -0
- alita_sdk/runtime/toolkits/mcp_config.py +1048 -0
- alita_sdk/runtime/toolkits/planning.py +178 -0
- alita_sdk/runtime/toolkits/skill_router.py +238 -0
- alita_sdk/runtime/toolkits/subgraph.py +251 -6
- alita_sdk/runtime/toolkits/tools.py +356 -69
- alita_sdk/runtime/toolkits/vectorstore.py +11 -5
- alita_sdk/runtime/tools/__init__.py +10 -3
- alita_sdk/runtime/tools/application.py +27 -6
- alita_sdk/runtime/tools/artifact.py +511 -28
- alita_sdk/runtime/tools/data_analysis.py +183 -0
- alita_sdk/runtime/tools/function.py +67 -35
- alita_sdk/runtime/tools/graph.py +10 -4
- alita_sdk/runtime/tools/image_generation.py +148 -46
- alita_sdk/runtime/tools/llm.py +1003 -128
- alita_sdk/runtime/tools/loop.py +3 -1
- alita_sdk/runtime/tools/loop_output.py +3 -1
- alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
- alita_sdk/runtime/tools/mcp_remote_tool.py +181 -0
- alita_sdk/runtime/tools/mcp_server_tool.py +8 -5
- alita_sdk/runtime/tools/planning/__init__.py +36 -0
- alita_sdk/runtime/tools/planning/models.py +246 -0
- alita_sdk/runtime/tools/planning/wrapper.py +607 -0
- alita_sdk/runtime/tools/router.py +2 -4
- alita_sdk/runtime/tools/sandbox.py +65 -48
- alita_sdk/runtime/tools/skill_router.py +776 -0
- alita_sdk/runtime/tools/tool.py +3 -1
- alita_sdk/runtime/tools/vectorstore.py +9 -3
- alita_sdk/runtime/tools/vectorstore_base.py +70 -14
- alita_sdk/runtime/utils/AlitaCallback.py +137 -21
- alita_sdk/runtime/utils/constants.py +5 -1
- alita_sdk/runtime/utils/mcp_client.py +492 -0
- alita_sdk/runtime/utils/mcp_oauth.py +361 -0
- alita_sdk/runtime/utils/mcp_sse_client.py +434 -0
- alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
- alita_sdk/runtime/utils/serialization.py +155 -0
- alita_sdk/runtime/utils/streamlit.py +40 -13
- alita_sdk/runtime/utils/toolkit_utils.py +30 -9
- alita_sdk/runtime/utils/utils.py +36 -0
- alita_sdk/tools/__init__.py +134 -35
- alita_sdk/tools/ado/repos/__init__.py +51 -32
- alita_sdk/tools/ado/repos/repos_wrapper.py +148 -89
- alita_sdk/tools/ado/test_plan/__init__.py +25 -9
- alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +23 -1
- alita_sdk/tools/ado/utils.py +1 -18
- alita_sdk/tools/ado/wiki/__init__.py +25 -12
- alita_sdk/tools/ado/wiki/ado_wrapper.py +291 -22
- alita_sdk/tools/ado/work_item/__init__.py +26 -13
- alita_sdk/tools/ado/work_item/ado_wrapper.py +73 -11
- alita_sdk/tools/advanced_jira_mining/__init__.py +11 -8
- alita_sdk/tools/aws/delta_lake/__init__.py +13 -9
- alita_sdk/tools/aws/delta_lake/tool.py +5 -1
- alita_sdk/tools/azure_ai/search/__init__.py +11 -8
- alita_sdk/tools/azure_ai/search/api_wrapper.py +1 -1
- alita_sdk/tools/base/tool.py +5 -1
- alita_sdk/tools/base_indexer_toolkit.py +271 -84
- alita_sdk/tools/bitbucket/__init__.py +17 -11
- alita_sdk/tools/bitbucket/api_wrapper.py +59 -11
- alita_sdk/tools/bitbucket/cloud_api_wrapper.py +49 -35
- alita_sdk/tools/browser/__init__.py +5 -4
- alita_sdk/tools/carrier/__init__.py +5 -6
- alita_sdk/tools/carrier/backend_reports_tool.py +6 -6
- alita_sdk/tools/carrier/run_ui_test_tool.py +6 -6
- alita_sdk/tools/carrier/ui_reports_tool.py +5 -5
- alita_sdk/tools/chunkers/__init__.py +3 -1
- alita_sdk/tools/chunkers/code/treesitter/treesitter.py +37 -13
- alita_sdk/tools/chunkers/sematic/json_chunker.py +1 -0
- alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
- alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
- alita_sdk/tools/chunkers/universal_chunker.py +270 -0
- alita_sdk/tools/cloud/aws/__init__.py +10 -7
- alita_sdk/tools/cloud/azure/__init__.py +10 -7
- alita_sdk/tools/cloud/gcp/__init__.py +10 -7
- alita_sdk/tools/cloud/k8s/__init__.py +10 -7
- alita_sdk/tools/code/linter/__init__.py +10 -8
- alita_sdk/tools/code/loaders/codesearcher.py +3 -2
- alita_sdk/tools/code/sonar/__init__.py +11 -8
- alita_sdk/tools/code_indexer_toolkit.py +82 -22
- alita_sdk/tools/confluence/__init__.py +22 -16
- alita_sdk/tools/confluence/api_wrapper.py +107 -30
- alita_sdk/tools/confluence/loader.py +14 -2
- alita_sdk/tools/custom_open_api/__init__.py +12 -5
- alita_sdk/tools/elastic/__init__.py +11 -8
- alita_sdk/tools/elitea_base.py +493 -30
- alita_sdk/tools/figma/__init__.py +58 -11
- alita_sdk/tools/figma/api_wrapper.py +1235 -143
- alita_sdk/tools/figma/figma_client.py +73 -0
- alita_sdk/tools/figma/toon_tools.py +2748 -0
- alita_sdk/tools/github/__init__.py +14 -15
- alita_sdk/tools/github/github_client.py +224 -100
- alita_sdk/tools/github/graphql_client_wrapper.py +119 -33
- alita_sdk/tools/github/schemas.py +14 -5
- alita_sdk/tools/github/tool.py +5 -1
- alita_sdk/tools/github/tool_prompts.py +9 -22
- alita_sdk/tools/gitlab/__init__.py +16 -11
- alita_sdk/tools/gitlab/api_wrapper.py +218 -48
- alita_sdk/tools/gitlab_org/__init__.py +10 -9
- alita_sdk/tools/gitlab_org/api_wrapper.py +63 -64
- alita_sdk/tools/google/bigquery/__init__.py +13 -12
- alita_sdk/tools/google/bigquery/tool.py +5 -1
- alita_sdk/tools/google_places/__init__.py +11 -8
- alita_sdk/tools/google_places/api_wrapper.py +1 -1
- alita_sdk/tools/jira/__init__.py +17 -10
- alita_sdk/tools/jira/api_wrapper.py +92 -41
- alita_sdk/tools/keycloak/__init__.py +11 -8
- alita_sdk/tools/localgit/__init__.py +9 -3
- alita_sdk/tools/localgit/local_git.py +62 -54
- alita_sdk/tools/localgit/tool.py +5 -1
- alita_sdk/tools/memory/__init__.py +12 -4
- alita_sdk/tools/non_code_indexer_toolkit.py +1 -0
- alita_sdk/tools/ocr/__init__.py +11 -8
- alita_sdk/tools/openapi/__init__.py +491 -106
- alita_sdk/tools/openapi/api_wrapper.py +1368 -0
- alita_sdk/tools/openapi/tool.py +20 -0
- alita_sdk/tools/pandas/__init__.py +20 -12
- alita_sdk/tools/pandas/api_wrapper.py +38 -25
- alita_sdk/tools/pandas/dataframe/generator/base.py +3 -1
- alita_sdk/tools/postman/__init__.py +10 -9
- alita_sdk/tools/pptx/__init__.py +11 -10
- alita_sdk/tools/pptx/pptx_wrapper.py +1 -1
- alita_sdk/tools/qtest/__init__.py +31 -11
- alita_sdk/tools/qtest/api_wrapper.py +2135 -86
- alita_sdk/tools/rally/__init__.py +10 -9
- alita_sdk/tools/rally/api_wrapper.py +1 -1
- alita_sdk/tools/report_portal/__init__.py +12 -8
- alita_sdk/tools/salesforce/__init__.py +10 -8
- alita_sdk/tools/servicenow/__init__.py +17 -15
- alita_sdk/tools/servicenow/api_wrapper.py +1 -1
- alita_sdk/tools/sharepoint/__init__.py +10 -7
- alita_sdk/tools/sharepoint/api_wrapper.py +129 -38
- alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
- alita_sdk/tools/sharepoint/utils.py +8 -2
- alita_sdk/tools/slack/__init__.py +10 -7
- alita_sdk/tools/slack/api_wrapper.py +2 -2
- alita_sdk/tools/sql/__init__.py +12 -9
- alita_sdk/tools/testio/__init__.py +10 -7
- alita_sdk/tools/testrail/__init__.py +11 -10
- alita_sdk/tools/testrail/api_wrapper.py +1 -1
- alita_sdk/tools/utils/__init__.py +9 -4
- alita_sdk/tools/utils/content_parser.py +103 -18
- alita_sdk/tools/utils/text_operations.py +410 -0
- alita_sdk/tools/utils/tool_prompts.py +79 -0
- alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +30 -13
- alita_sdk/tools/xray/__init__.py +13 -9
- alita_sdk/tools/yagmail/__init__.py +9 -3
- alita_sdk/tools/zephyr/__init__.py +10 -7
- alita_sdk/tools/zephyr_enterprise/__init__.py +11 -7
- alita_sdk/tools/zephyr_essential/__init__.py +10 -7
- alita_sdk/tools/zephyr_essential/api_wrapper.py +30 -13
- alita_sdk/tools/zephyr_essential/client.py +2 -2
- alita_sdk/tools/zephyr_scale/__init__.py +11 -8
- alita_sdk/tools/zephyr_scale/api_wrapper.py +2 -2
- alita_sdk/tools/zephyr_squad/__init__.py +10 -7
- {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.627.dist-info}/METADATA +154 -8
- alita_sdk-0.3.627.dist-info/RECORD +468 -0
- alita_sdk-0.3.627.dist-info/entry_points.txt +2 -0
- alita_sdk-0.3.379.dist-info/RECORD +0 -360
- {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.627.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.627.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.627.dist-info}/top_level.txt +0 -0
|
@@ -5,7 +5,8 @@ from pydantic import BaseModel, ConfigDict, create_model, Field, SecretStr
|
|
|
5
5
|
|
|
6
6
|
from .api_wrapper import ELITEAElasticApiWrapper
|
|
7
7
|
from ..base.tool import BaseAction
|
|
8
|
-
from ..utils import clean_string,
|
|
8
|
+
from ..utils import clean_string, get_max_toolkit_length
|
|
9
|
+
from ...runtime.utils.constants import TOOLKIT_NAME_META, TOOL_NAME_META, TOOLKIT_TYPE_META
|
|
9
10
|
|
|
10
11
|
name = "elastic"
|
|
11
12
|
|
|
@@ -19,15 +20,13 @@ def get_tools(tool):
|
|
|
19
20
|
|
|
20
21
|
class ElasticToolkit(BaseToolkit):
|
|
21
22
|
tools: list[BaseTool] = []
|
|
22
|
-
toolkit_max_length: int = 0
|
|
23
23
|
|
|
24
24
|
@staticmethod
|
|
25
25
|
def toolkit_config_schema() -> BaseModel:
|
|
26
26
|
selected_tools = {x['name']: x['args_schema'].schema() for x in ELITEAElasticApiWrapper.model_construct().get_available_tools()}
|
|
27
|
-
ElasticToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
|
|
28
27
|
return create_model(
|
|
29
28
|
name,
|
|
30
|
-
url=(str, Field(default=None, title="Elasticsearch URL", description="Elasticsearch URL", json_schema_extra={'toolkit_name': True
|
|
29
|
+
url=(Optional[str], Field(default=None, title="Elasticsearch URL", description="Elasticsearch URL", json_schema_extra={'toolkit_name': True})),
|
|
31
30
|
api_key=(
|
|
32
31
|
Optional[SecretStr],
|
|
33
32
|
Field(
|
|
@@ -48,15 +47,19 @@ class ElasticToolkit(BaseToolkit):
|
|
|
48
47
|
elastic_api_wrapper = ELITEAElasticApiWrapper(**kwargs)
|
|
49
48
|
available_tools = elastic_api_wrapper.get_available_tools()
|
|
50
49
|
tools = []
|
|
51
|
-
prefix = clean_string(toolkit_name, ElasticToolkit.toolkit_max_length) + TOOLKIT_SPLITTER if toolkit_name else ''
|
|
52
50
|
for tool in available_tools:
|
|
53
51
|
if selected_tools and tool["name"] not in selected_tools:
|
|
54
52
|
continue
|
|
53
|
+
description = tool["description"]
|
|
54
|
+
if toolkit_name:
|
|
55
|
+
description = f"Toolkit: {toolkit_name}\n{description}"
|
|
56
|
+
description = description[:1000]
|
|
55
57
|
tools.append(BaseAction(
|
|
56
58
|
api_wrapper=elastic_api_wrapper,
|
|
57
|
-
name=
|
|
58
|
-
description=
|
|
59
|
-
args_schema=tool["args_schema"]
|
|
59
|
+
name=tool["name"],
|
|
60
|
+
description=description,
|
|
61
|
+
args_schema=tool["args_schema"],
|
|
62
|
+
metadata={TOOLKIT_NAME_META: toolkit_name, TOOLKIT_TYPE_META: name, TOOL_NAME_META: tool["name"]} if toolkit_name else {TOOL_NAME_META: tool["name"]}
|
|
60
63
|
))
|
|
61
64
|
return cls(tools=tools)
|
|
62
65
|
|
alita_sdk/tools/elitea_base.py
CHANGED
|
@@ -11,7 +11,6 @@ from pydantic import BaseModel, create_model, Field, SecretStr
|
|
|
11
11
|
|
|
12
12
|
# from alita_sdk.runtime.langchain.interfaces.llm_processor import get_embeddings
|
|
13
13
|
from .chunkers import markdown_chunker
|
|
14
|
-
from .utils import TOOLKIT_SPLITTER
|
|
15
14
|
from .vector_adapters.VectorStoreAdapter import VectorStoreAdapterFactory
|
|
16
15
|
from ..runtime.utils.utils import IndexerKeywords
|
|
17
16
|
|
|
@@ -126,14 +125,91 @@ BaseIndexDataParams = create_model(
|
|
|
126
125
|
chunking_config=(Optional[dict], Field(description="Chunking tool configuration", default_factory=dict)),
|
|
127
126
|
)
|
|
128
127
|
|
|
128
|
+
# File Operations Schema Models
|
|
129
|
+
ReadFileInput = create_model(
|
|
130
|
+
"ReadFileInput",
|
|
131
|
+
file_path=(str, Field(description="Path to the file to read")),
|
|
132
|
+
branch=(Optional[str], Field(description="Branch name. If None, uses active branch.", default=None)),
|
|
133
|
+
offset=(Optional[int], Field(description="Starting line number (1-indexed, inclusive). Read from this line onwards.", default=None, ge=1)),
|
|
134
|
+
limit=(Optional[int], Field(description="Number of lines to read from offset. If None, reads to end.", default=None, ge=1)),
|
|
135
|
+
head=(Optional[int], Field(description="Read only the first N lines. Alternative to offset/limit.", default=None, ge=1)),
|
|
136
|
+
tail=(Optional[int], Field(description="Read only the last N lines. Alternative to offset/limit.", default=None, ge=1)),
|
|
137
|
+
)
|
|
129
138
|
|
|
130
|
-
|
|
139
|
+
ReadFileChunkInput = create_model(
|
|
140
|
+
"ReadFileChunkInput",
|
|
141
|
+
file_path=(str, Field(description="Path to the file to read")),
|
|
142
|
+
branch=(Optional[str], Field(description="Branch name. If None, uses active branch.", default=None)),
|
|
143
|
+
start_line=(int, Field(description="Starting line number (1-indexed, inclusive)", ge=1)),
|
|
144
|
+
end_line=(Optional[int], Field(description="Ending line number (1-indexed, inclusive). If None, reads to end.", default=None, ge=1)),
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
ReadMultipleFilesInput = create_model(
|
|
148
|
+
"ReadMultipleFilesInput",
|
|
149
|
+
file_paths=(List[str], Field(description="List of file paths to read", min_length=1)),
|
|
150
|
+
branch=(Optional[str], Field(description="Branch name. If None, uses active branch.", default=None)),
|
|
151
|
+
offset=(Optional[int], Field(description="Starting line number for all files (1-indexed)", default=None, ge=1)),
|
|
152
|
+
limit=(Optional[int], Field(description="Number of lines to read from offset for all files", default=None, ge=1)),
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
EditFileInput = create_model(
|
|
156
|
+
"EditFileInput",
|
|
157
|
+
file_path=(str, Field(description="Path to the file to edit. Must be a text file (markdown, txt, csv, json, xml, html, yaml, etc.)")),
|
|
158
|
+
file_query=(str, Field(description="""Edit instructions with OLD/NEW markers. Format:
|
|
159
|
+
OLD <<<<
|
|
160
|
+
old content to replace
|
|
161
|
+
>>>> OLD
|
|
162
|
+
NEW <<<<
|
|
163
|
+
new content
|
|
164
|
+
>>>> NEW
|
|
165
|
+
|
|
166
|
+
Multiple OLD/NEW pairs can be provided for multiple edits.""")),
|
|
167
|
+
branch=(Optional[str], Field(description="Branch name. If None, uses active branch.", default=None)),
|
|
168
|
+
commit_message=(Optional[str], Field(description="Commit message for the change (VCS toolkits only)", default=None)),
|
|
169
|
+
)
|
|
131
170
|
|
|
171
|
+
SearchFileInput = create_model(
|
|
172
|
+
"SearchFileInput",
|
|
173
|
+
file_path=(str, Field(description="Path to the file to search")),
|
|
174
|
+
pattern=(str, Field(description="Search pattern. Treated as regex by default unless is_regex=False.")),
|
|
175
|
+
branch=(Optional[str], Field(description="Branch name. If None, uses active branch.", default=None)),
|
|
176
|
+
is_regex=(bool, Field(description="Whether pattern is a regex. Default is True for flexible matching.", default=True)),
|
|
177
|
+
context_lines=(int, Field(description="Number of lines before/after match to include for context", default=2, ge=0)),
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
class BaseToolApiWrapper(BaseModel):
|
|
182
|
+
|
|
183
|
+
# Optional RunnableConfig for CLI/standalone usage (allows dispatch_custom_event to work)
|
|
184
|
+
_runnable_config: Optional[Dict[str, Any]] = None
|
|
185
|
+
# toolkit id propagated from backend
|
|
186
|
+
toolkit_id: int = 0
|
|
132
187
|
def get_available_tools(self):
|
|
133
188
|
raise NotImplementedError("Subclasses should implement this method")
|
|
134
189
|
|
|
135
|
-
def
|
|
136
|
-
"""
|
|
190
|
+
def set_runnable_config(self, config: Optional[Dict[str, Any]]) -> None:
|
|
191
|
+
"""
|
|
192
|
+
Set the RunnableConfig for dispatching custom events.
|
|
193
|
+
|
|
194
|
+
This is required when running outside of a LangChain agent context
|
|
195
|
+
(e.g., from CLI). Without a config containing a run_id,
|
|
196
|
+
dispatch_custom_event will fail with "Unable to dispatch an adhoc event
|
|
197
|
+
without a parent run id".
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
config: A RunnableConfig dict with at least {'run_id': uuid}
|
|
201
|
+
"""
|
|
202
|
+
self._runnable_config = config
|
|
203
|
+
|
|
204
|
+
def _log_tool_event(self, message: str, tool_name: str = None, config: Optional[Dict[str, Any]] = None):
|
|
205
|
+
"""Log data and dispatch custom event for the tool.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
message: The message to log
|
|
209
|
+
tool_name: Name of the tool (defaults to 'tool_progress')
|
|
210
|
+
config: Optional RunnableConfig. If not provided, uses self._runnable_config.
|
|
211
|
+
Required when running outside a LangChain agent context.
|
|
212
|
+
"""
|
|
137
213
|
|
|
138
214
|
try:
|
|
139
215
|
from langchain_core.callbacks import dispatch_custom_event
|
|
@@ -142,6 +218,10 @@ class BaseToolApiWrapper(BaseModel):
|
|
|
142
218
|
tool_name = 'tool_progress'
|
|
143
219
|
|
|
144
220
|
logger.info(message)
|
|
221
|
+
|
|
222
|
+
# Use provided config, fall back to instance config
|
|
223
|
+
effective_config = config or self._runnable_config
|
|
224
|
+
|
|
145
225
|
dispatch_custom_event(
|
|
146
226
|
name="thinking_step",
|
|
147
227
|
data={
|
|
@@ -149,14 +229,14 @@ class BaseToolApiWrapper(BaseModel):
|
|
|
149
229
|
"tool_name": tool_name,
|
|
150
230
|
"toolkit": self.__class__.__name__,
|
|
151
231
|
},
|
|
232
|
+
config=effective_config,
|
|
152
233
|
)
|
|
153
234
|
except Exception as e:
|
|
154
235
|
logger.warning(f"Failed to dispatch progress event: {str(e)}")
|
|
155
236
|
|
|
156
237
|
|
|
157
238
|
def run(self, mode: str, *args: Any, **kwargs: Any):
|
|
158
|
-
|
|
159
|
-
mode = mode.rsplit(TOOLKIT_SPLITTER, maxsplit=1)[1]
|
|
239
|
+
# Mode is now the clean tool name (no prefix to remove)
|
|
160
240
|
for tool in self.get_available_tools():
|
|
161
241
|
if tool["name"] == mode:
|
|
162
242
|
try:
|
|
@@ -165,6 +245,11 @@ class BaseToolApiWrapper(BaseModel):
|
|
|
165
245
|
# execution = str(execution)
|
|
166
246
|
return execution
|
|
167
247
|
except Exception as e:
|
|
248
|
+
# Re-raise McpAuthorizationRequired directly without wrapping
|
|
249
|
+
from alita_sdk.runtime.utils.mcp_oauth import McpAuthorizationRequired
|
|
250
|
+
if isinstance(e, McpAuthorizationRequired):
|
|
251
|
+
raise
|
|
252
|
+
|
|
168
253
|
# Catch all tool execution exceptions and provide user-friendly error messages
|
|
169
254
|
error_type = type(e).__name__
|
|
170
255
|
error_message = str(e)
|
|
@@ -554,11 +639,284 @@ class BaseCodeToolApiWrapper(BaseVectorStoreToolApiWrapper):
|
|
|
554
639
|
def _get_files(self):
|
|
555
640
|
raise NotImplementedError("Subclasses should implement this method")
|
|
556
641
|
|
|
557
|
-
def _read_file(
|
|
642
|
+
def _read_file(
|
|
643
|
+
self,
|
|
644
|
+
file_path: str,
|
|
645
|
+
branch: str = None,
|
|
646
|
+
offset: Optional[int] = None,
|
|
647
|
+
limit: Optional[int] = None,
|
|
648
|
+
head: Optional[int] = None,
|
|
649
|
+
tail: Optional[int] = None,
|
|
650
|
+
**kwargs # Allow subclasses to have additional parameters
|
|
651
|
+
) -> str:
|
|
652
|
+
"""
|
|
653
|
+
Read file content with optional partial read support.
|
|
654
|
+
|
|
655
|
+
Subclasses should implement this method. If they don't support partial reads,
|
|
656
|
+
they can accept **kwargs and ignore offset/limit/head/tail parameters - the base
|
|
657
|
+
class high-level methods will apply slicing client-side.
|
|
658
|
+
|
|
659
|
+
Args:
|
|
660
|
+
file_path: Path to the file
|
|
661
|
+
branch: Branch name (None for active branch)
|
|
662
|
+
offset: Starting line number (1-indexed)
|
|
663
|
+
limit: Number of lines to read from offset
|
|
664
|
+
head: Read only first N lines
|
|
665
|
+
tail: Read only last N lines
|
|
666
|
+
**kwargs: Additional toolkit-specific parameters (e.g., repo_name for GitHub)
|
|
667
|
+
|
|
668
|
+
Returns:
|
|
669
|
+
File content as string
|
|
670
|
+
"""
|
|
558
671
|
raise NotImplementedError("Subclasses should implement this method")
|
|
672
|
+
|
|
673
|
+
def _write_file(
|
|
674
|
+
self,
|
|
675
|
+
file_path: str,
|
|
676
|
+
content: str,
|
|
677
|
+
branch: str = None,
|
|
678
|
+
commit_message: str = None
|
|
679
|
+
) -> str:
|
|
680
|
+
"""
|
|
681
|
+
Write content to a file.
|
|
682
|
+
|
|
683
|
+
Subclasses should implement this method to enable edit_file functionality.
|
|
684
|
+
For VCS toolkits, this may involve creating or updating files with commits.
|
|
685
|
+
|
|
686
|
+
Args:
|
|
687
|
+
file_path: Path to the file
|
|
688
|
+
content: New file content
|
|
689
|
+
branch: Branch name (None for active branch)
|
|
690
|
+
commit_message: Commit message (VCS toolkits only)
|
|
691
|
+
|
|
692
|
+
Returns:
|
|
693
|
+
Success message
|
|
694
|
+
"""
|
|
695
|
+
raise NotImplementedError("Subclasses should implement _write_file to enable editing")
|
|
559
696
|
|
|
560
697
|
def _file_commit_hash(self, file_path: str, branch: str):
|
|
561
698
|
pass
|
|
699
|
+
|
|
700
|
+
def read_file_chunk(
|
|
701
|
+
self,
|
|
702
|
+
file_path: str,
|
|
703
|
+
start_line: int,
|
|
704
|
+
end_line: Optional[int] = None,
|
|
705
|
+
branch: str = None
|
|
706
|
+
) -> str:
|
|
707
|
+
"""
|
|
708
|
+
Read a specific range of lines from a file.
|
|
709
|
+
|
|
710
|
+
Args:
|
|
711
|
+
file_path: Path to the file
|
|
712
|
+
start_line: Starting line number (1-indexed, inclusive)
|
|
713
|
+
end_line: Ending line number (1-indexed, inclusive). If None, reads to end.
|
|
714
|
+
branch: Branch name (None for active branch)
|
|
715
|
+
|
|
716
|
+
Returns:
|
|
717
|
+
File content for the specified line range
|
|
718
|
+
"""
|
|
719
|
+
from .utils.text_operations import apply_line_slice
|
|
720
|
+
|
|
721
|
+
# Calculate offset and limit from start_line and end_line
|
|
722
|
+
offset = start_line
|
|
723
|
+
limit = (end_line - start_line + 1) if end_line is not None else None
|
|
724
|
+
|
|
725
|
+
# Read the file with offset/limit
|
|
726
|
+
content = self._read_file(file_path, branch, offset=offset, limit=limit)
|
|
727
|
+
|
|
728
|
+
# Apply client-side slicing if toolkit doesn't support partial reads
|
|
729
|
+
# (toolkit's _read_file will return full content if it ignores offset/limit)
|
|
730
|
+
return apply_line_slice(content, offset=offset, limit=limit)
|
|
731
|
+
|
|
732
|
+
def read_multiple_files(
|
|
733
|
+
self,
|
|
734
|
+
file_paths: List[str],
|
|
735
|
+
branch: str = None,
|
|
736
|
+
offset: Optional[int] = None,
|
|
737
|
+
limit: Optional[int] = None
|
|
738
|
+
) -> Dict[str, str]:
|
|
739
|
+
"""
|
|
740
|
+
Read multiple files in batch.
|
|
741
|
+
|
|
742
|
+
Args:
|
|
743
|
+
file_paths: List of file paths to read
|
|
744
|
+
branch: Branch name (None for active branch)
|
|
745
|
+
offset: Starting line number for all files (1-indexed)
|
|
746
|
+
limit: Number of lines to read from offset for all files
|
|
747
|
+
|
|
748
|
+
Returns:
|
|
749
|
+
Dictionary mapping file paths to their content (or error messages)
|
|
750
|
+
"""
|
|
751
|
+
results = {}
|
|
752
|
+
|
|
753
|
+
for file_path in file_paths:
|
|
754
|
+
try:
|
|
755
|
+
content = self._read_file(
|
|
756
|
+
file_path,
|
|
757
|
+
branch,
|
|
758
|
+
offset=offset,
|
|
759
|
+
limit=limit
|
|
760
|
+
)
|
|
761
|
+
results[file_path] = content
|
|
762
|
+
except Exception as e:
|
|
763
|
+
results[file_path] = f"Error reading file: {str(e)}"
|
|
764
|
+
logger.error(f"Failed to read {file_path}: {e}")
|
|
765
|
+
|
|
766
|
+
return results
|
|
767
|
+
|
|
768
|
+
def search_file(
|
|
769
|
+
self,
|
|
770
|
+
file_path: str,
|
|
771
|
+
pattern: str,
|
|
772
|
+
branch: str = None,
|
|
773
|
+
is_regex: bool = True,
|
|
774
|
+
context_lines: int = 2
|
|
775
|
+
) -> str:
|
|
776
|
+
"""
|
|
777
|
+
Search for pattern in file content with context.
|
|
778
|
+
|
|
779
|
+
Args:
|
|
780
|
+
file_path: Path to the file
|
|
781
|
+
pattern: Search pattern (regex if is_regex=True, else literal)
|
|
782
|
+
branch: Branch name (None for active branch)
|
|
783
|
+
is_regex: Whether pattern is regex (default True)
|
|
784
|
+
context_lines: Lines of context before/after matches (default 2)
|
|
785
|
+
|
|
786
|
+
Returns:
|
|
787
|
+
Formatted string with search results and context
|
|
788
|
+
"""
|
|
789
|
+
from .utils.text_operations import search_in_content
|
|
790
|
+
|
|
791
|
+
# Read full file content
|
|
792
|
+
content = self._read_file(file_path, branch)
|
|
793
|
+
|
|
794
|
+
# Search for pattern
|
|
795
|
+
matches = search_in_content(content, pattern, is_regex, context_lines)
|
|
796
|
+
|
|
797
|
+
if not matches:
|
|
798
|
+
return f"No matches found for pattern '{pattern}' in {file_path}"
|
|
799
|
+
|
|
800
|
+
# Format results
|
|
801
|
+
result_lines = [f"Found {len(matches)} match(es) for pattern '{pattern}' in {file_path}:\n"]
|
|
802
|
+
|
|
803
|
+
for i, match in enumerate(matches, 1):
|
|
804
|
+
result_lines.append(f"\n--- Match {i} at line {match['line_number']} ---")
|
|
805
|
+
|
|
806
|
+
# Context before
|
|
807
|
+
if match['context_before']:
|
|
808
|
+
for line in match['context_before']:
|
|
809
|
+
result_lines.append(f" {line}")
|
|
810
|
+
|
|
811
|
+
# Matching line (highlighted)
|
|
812
|
+
result_lines.append(f"> {match['line_content']}")
|
|
813
|
+
|
|
814
|
+
# Context after
|
|
815
|
+
if match['context_after']:
|
|
816
|
+
for line in match['context_after']:
|
|
817
|
+
result_lines.append(f" {line}")
|
|
818
|
+
|
|
819
|
+
return "\n".join(result_lines)
|
|
820
|
+
|
|
821
|
+
def edit_file(
|
|
822
|
+
self,
|
|
823
|
+
file_path: str,
|
|
824
|
+
file_query: str,
|
|
825
|
+
branch: str = None,
|
|
826
|
+
commit_message: str = None
|
|
827
|
+
) -> str:
|
|
828
|
+
"""
|
|
829
|
+
Edit file using OLD/NEW markers for precise replacements.
|
|
830
|
+
|
|
831
|
+
Only works with text files (markdown, txt, csv, json, xml, html, yaml, code files).
|
|
832
|
+
|
|
833
|
+
Args:
|
|
834
|
+
file_path: Path to the file to edit
|
|
835
|
+
file_query: Edit instructions with OLD/NEW markers
|
|
836
|
+
branch: Branch name (None for active branch)
|
|
837
|
+
commit_message: Commit message (VCS toolkits only)
|
|
838
|
+
|
|
839
|
+
Returns:
|
|
840
|
+
Success message or raises ToolException on failure.
|
|
841
|
+
"""
|
|
842
|
+
from .utils.text_operations import parse_old_new_markers, is_text_editable, try_apply_edit
|
|
843
|
+
from langchain_core.callbacks import dispatch_custom_event
|
|
844
|
+
|
|
845
|
+
# Validate file is text-editable
|
|
846
|
+
if not is_text_editable(file_path):
|
|
847
|
+
raise ToolException(
|
|
848
|
+
f"Cannot edit binary/document file '{file_path}'. "
|
|
849
|
+
f"Supported text formats: markdown, txt, csv, json, xml, html, yaml, code files."
|
|
850
|
+
)
|
|
851
|
+
|
|
852
|
+
# Parse OLD/NEW markers
|
|
853
|
+
edits = parse_old_new_markers(file_query)
|
|
854
|
+
if not edits:
|
|
855
|
+
raise ToolException(
|
|
856
|
+
"No OLD/NEW marker pairs found in file_query. "
|
|
857
|
+
"Format: OLD <<<< old text >>>> OLD NEW <<<< new text >>>> NEW"
|
|
858
|
+
)
|
|
859
|
+
|
|
860
|
+
# Read current file content
|
|
861
|
+
try:
|
|
862
|
+
current_content = self._read_file(file_path, branch)
|
|
863
|
+
if not isinstance(current_content, str):
|
|
864
|
+
# If current_content is a ToolException or any non-str, raise or return it
|
|
865
|
+
raise current_content if isinstance(current_content, Exception) else ToolException(str(current_content))
|
|
866
|
+
except Exception as e:
|
|
867
|
+
raise ToolException(f"Failed to read file {file_path}: {e}")
|
|
868
|
+
|
|
869
|
+
# Apply all edits (stop on first warning/error)
|
|
870
|
+
updated_content = current_content
|
|
871
|
+
edits_applied = 0
|
|
872
|
+
for old_text, new_text in edits:
|
|
873
|
+
new_updated, error_message = try_apply_edit(
|
|
874
|
+
content=updated_content,
|
|
875
|
+
old_text=old_text,
|
|
876
|
+
new_text=new_text,
|
|
877
|
+
file_path=file_path,
|
|
878
|
+
)
|
|
879
|
+
|
|
880
|
+
if error_message:
|
|
881
|
+
return error_message
|
|
882
|
+
|
|
883
|
+
# A replacement was applied
|
|
884
|
+
edits_applied += 1
|
|
885
|
+
updated_content = new_updated
|
|
886
|
+
|
|
887
|
+
# Check if any changes were made
|
|
888
|
+
if current_content == updated_content:
|
|
889
|
+
# At least one edit was applied, but the final content is identical.
|
|
890
|
+
# This usually means the sequence of OLD/NEW pairs is redundant or cancels out.
|
|
891
|
+
return (f"Edits for {file_path} were applied but the final content is identical to the original. "
|
|
892
|
+
"The sequence of OLD/NEW pairs appears to be redundant or self-cancelling. "
|
|
893
|
+
"Please simplify or review the update_query.")
|
|
894
|
+
|
|
895
|
+
# Write updated content
|
|
896
|
+
try:
|
|
897
|
+
result = self._write_file(file_path, updated_content, branch, commit_message)
|
|
898
|
+
except NotImplementedError:
|
|
899
|
+
raise ToolException(
|
|
900
|
+
f"Editing not supported for this toolkit. "
|
|
901
|
+
f"The _write_file method is not implemented."
|
|
902
|
+
)
|
|
903
|
+
except Exception as e:
|
|
904
|
+
raise ToolException(f"Failed to write file {file_path}: {e}")
|
|
905
|
+
|
|
906
|
+
# Dispatch file modification event
|
|
907
|
+
try:
|
|
908
|
+
dispatch_custom_event("file_modified", {
|
|
909
|
+
"message": f"File '{file_path}' edited successfully",
|
|
910
|
+
"filename": file_path,
|
|
911
|
+
"tool_name": "edit_file",
|
|
912
|
+
"toolkit": self.__class__.__name__,
|
|
913
|
+
"operation_type": "modify",
|
|
914
|
+
"edits_applied": edits_applied,
|
|
915
|
+
})
|
|
916
|
+
except Exception as e:
|
|
917
|
+
logger.warning(f"Failed to dispatch file_modified event: {e}")
|
|
918
|
+
|
|
919
|
+
return result
|
|
562
920
|
|
|
563
921
|
def __handle_get_files(self, path: str, branch: str):
|
|
564
922
|
"""
|
|
@@ -589,27 +947,37 @@ class BaseCodeToolApiWrapper(BaseVectorStoreToolApiWrapper):
|
|
|
589
947
|
def loader(self,
|
|
590
948
|
branch: Optional[str] = None,
|
|
591
949
|
whitelist: Optional[List[str]] = None,
|
|
592
|
-
blacklist: Optional[List[str]] = None
|
|
950
|
+
blacklist: Optional[List[str]] = None,
|
|
951
|
+
chunked: bool = True) -> Generator[Document, None, None]:
|
|
593
952
|
"""
|
|
594
|
-
Generates
|
|
953
|
+
Generates Documents from files in a branch, respecting whitelist and blacklist patterns.
|
|
595
954
|
|
|
596
955
|
Parameters:
|
|
597
956
|
- branch (Optional[str]): Branch for listing files. Defaults to the current branch if None.
|
|
598
957
|
- whitelist (Optional[List[str]]): File extensions or paths to include. Defaults to all files if None.
|
|
599
958
|
- blacklist (Optional[List[str]]): File extensions or paths to exclude. Defaults to no exclusions if None.
|
|
959
|
+
- chunked (bool): If True (default), applies universal chunker based on file type.
|
|
960
|
+
If False, returns raw Documents without chunking.
|
|
600
961
|
|
|
601
962
|
Returns:
|
|
602
|
-
- generator: Yields
|
|
963
|
+
- generator: Yields Documents from files matching the whitelist but not the blacklist.
|
|
603
964
|
|
|
604
965
|
Example:
|
|
605
966
|
# Use 'feature-branch', include '.py' files, exclude 'test_' files
|
|
606
|
-
|
|
967
|
+
for doc in loader(branch='feature-branch', whitelist=['*.py'], blacklist=['*test_*']):
|
|
968
|
+
print(doc.page_content)
|
|
607
969
|
|
|
608
970
|
Notes:
|
|
609
971
|
- Whitelist and blacklist use Unix shell-style wildcards.
|
|
610
972
|
- Files must match the whitelist and not the blacklist to be included.
|
|
973
|
+
- When chunked=True:
|
|
974
|
+
- .md files → markdown chunker (header-based splitting)
|
|
975
|
+
- .py/.js/.ts/etc → code parser (TreeSitter-based)
|
|
976
|
+
- .json files → JSON chunker
|
|
977
|
+
- other files → default text chunker
|
|
611
978
|
"""
|
|
612
|
-
from .
|
|
979
|
+
from langchain_core.documents import Document
|
|
980
|
+
import hashlib
|
|
613
981
|
|
|
614
982
|
_files = self.__handle_get_files("", self.__get_branch(branch))
|
|
615
983
|
self._log_tool_event(message="Listing files in branch", tool_name="loader")
|
|
@@ -627,32 +995,52 @@ class BaseCodeToolApiWrapper(BaseVectorStoreToolApiWrapper):
|
|
|
627
995
|
or any(file_path.endswith(f'.{pattern}') for pattern in blacklist))
|
|
628
996
|
return False
|
|
629
997
|
|
|
630
|
-
def
|
|
998
|
+
def raw_document_generator() -> Generator[Document, None, None]:
|
|
999
|
+
"""Yields raw Documents without chunking."""
|
|
631
1000
|
self._log_tool_event(message="Reading the files", tool_name="loader")
|
|
632
|
-
# log the progress of file reading
|
|
633
1001
|
total_files = len(_files)
|
|
1002
|
+
processed = 0
|
|
1003
|
+
|
|
634
1004
|
for idx, file in enumerate(_files, 1):
|
|
635
1005
|
if is_whitelisted(file) and not is_blacklisted(file):
|
|
636
|
-
# read file ONLY if it matches whitelist and does not match blacklist
|
|
637
1006
|
try:
|
|
638
1007
|
file_content = self._read_file(file, self.__get_branch(branch))
|
|
639
1008
|
except Exception as e:
|
|
640
1009
|
logger.error(f"Failed to read file {file}: {e}")
|
|
641
|
-
|
|
1010
|
+
continue
|
|
1011
|
+
|
|
642
1012
|
if not file_content:
|
|
643
|
-
# empty file, skip
|
|
644
1013
|
continue
|
|
645
|
-
|
|
646
|
-
|
|
1014
|
+
|
|
1015
|
+
# Hash the file content for uniqueness tracking
|
|
647
1016
|
file_hash = hashlib.sha256(file_content.encode("utf-8")).hexdigest()
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
1017
|
+
processed += 1
|
|
1018
|
+
|
|
1019
|
+
yield Document(
|
|
1020
|
+
page_content=file_content,
|
|
1021
|
+
metadata={
|
|
1022
|
+
'file_path': file,
|
|
1023
|
+
'file_name': file,
|
|
1024
|
+
'source': file,
|
|
1025
|
+
'commit_hash': file_hash,
|
|
1026
|
+
}
|
|
1027
|
+
)
|
|
1028
|
+
|
|
651
1029
|
if idx % 10 == 0 or idx == total_files:
|
|
652
|
-
self._log_tool_event(
|
|
653
|
-
|
|
1030
|
+
self._log_tool_event(
|
|
1031
|
+
message=f"{idx} out of {total_files} files checked, {processed} matched",
|
|
1032
|
+
tool_name="loader"
|
|
1033
|
+
)
|
|
1034
|
+
|
|
1035
|
+
self._log_tool_event(message=f"{processed} files loaded", tool_name="loader")
|
|
654
1036
|
|
|
655
|
-
|
|
1037
|
+
if not chunked:
|
|
1038
|
+
# Return raw documents without chunking
|
|
1039
|
+
return raw_document_generator()
|
|
1040
|
+
|
|
1041
|
+
# Apply universal chunker based on file type
|
|
1042
|
+
from .chunkers.universal_chunker import universal_chunker
|
|
1043
|
+
return universal_chunker(raw_document_generator())
|
|
656
1044
|
|
|
657
1045
|
def index_data(self,
|
|
658
1046
|
index_name: str,
|
|
@@ -708,20 +1096,95 @@ def extend_with_vector_tools(method):
|
|
|
708
1096
|
return wrapper
|
|
709
1097
|
|
|
710
1098
|
|
|
1099
|
+
def extend_with_file_operations(method):
|
|
1100
|
+
"""
|
|
1101
|
+
Decorator to automatically add file operation tools to toolkits that implement
|
|
1102
|
+
_read_file and _write_file methods.
|
|
1103
|
+
|
|
1104
|
+
Adds:
|
|
1105
|
+
- read_file_chunk: Read specific line ranges
|
|
1106
|
+
- read_multiple_files: Batch read files
|
|
1107
|
+
- search_file: Search for patterns in files
|
|
1108
|
+
- edit_file: Edit files using OLD/NEW markers
|
|
1109
|
+
|
|
1110
|
+
Custom Schema Support:
|
|
1111
|
+
Toolkits can provide custom schemas by implementing _get_file_operation_schemas() method
|
|
1112
|
+
that returns a dict mapping tool names to Pydantic models. This allows toolkits like
|
|
1113
|
+
ArtifactWrapper to use bucket_name instead of branch.
|
|
1114
|
+
|
|
1115
|
+
Example:
|
|
1116
|
+
def _get_file_operation_schemas(self):
|
|
1117
|
+
return {
|
|
1118
|
+
"read_file_chunk": MyCustomReadFileChunkInput,
|
|
1119
|
+
"read_multiple_files": MyCustomReadMultipleFilesInput,
|
|
1120
|
+
}
|
|
1121
|
+
"""
|
|
1122
|
+
def wrapper(self, *args, **kwargs):
|
|
1123
|
+
tools = method(self, *args, **kwargs)
|
|
1124
|
+
|
|
1125
|
+
# Only add file operations if toolkit has implemented the required methods
|
|
1126
|
+
# Check for both _read_file and _write_file methods
|
|
1127
|
+
has_file_ops = (hasattr(self, '_read_file') and callable(getattr(self, '_read_file')) and
|
|
1128
|
+
hasattr(self, '_write_file') and callable(getattr(self, '_write_file')))
|
|
1129
|
+
|
|
1130
|
+
if has_file_ops:
|
|
1131
|
+
# Import schemas from elitea_base
|
|
1132
|
+
from . import elitea_base
|
|
1133
|
+
|
|
1134
|
+
# Check for toolkit-specific custom schemas
|
|
1135
|
+
custom_schemas = {}
|
|
1136
|
+
if hasattr(self, '_get_file_operation_schemas') and callable(getattr(self, '_get_file_operation_schemas')):
|
|
1137
|
+
custom_schemas = self._get_file_operation_schemas() or {}
|
|
1138
|
+
|
|
1139
|
+
file_operation_tools = [
|
|
1140
|
+
{
|
|
1141
|
+
"name": "read_file_chunk",
|
|
1142
|
+
"mode": "read_file_chunk",
|
|
1143
|
+
"ref": self.read_file_chunk,
|
|
1144
|
+
"description": self.read_file_chunk.__doc__,
|
|
1145
|
+
"args_schema": custom_schemas.get("read_file_chunk", elitea_base.ReadFileChunkInput)
|
|
1146
|
+
},
|
|
1147
|
+
{
|
|
1148
|
+
"name": "read_multiple_files",
|
|
1149
|
+
"mode": "read_multiple_files",
|
|
1150
|
+
"ref": self.read_multiple_files,
|
|
1151
|
+
"description": self.read_multiple_files.__doc__,
|
|
1152
|
+
"args_schema": custom_schemas.get("read_multiple_files", elitea_base.ReadMultipleFilesInput)
|
|
1153
|
+
},
|
|
1154
|
+
{
|
|
1155
|
+
"name": "search_file",
|
|
1156
|
+
"mode": "search_file",
|
|
1157
|
+
"ref": self.search_file,
|
|
1158
|
+
"description": self.search_file.__doc__,
|
|
1159
|
+
"args_schema": custom_schemas.get("search_file", elitea_base.SearchFileInput)
|
|
1160
|
+
},
|
|
1161
|
+
{
|
|
1162
|
+
"name": "edit_file",
|
|
1163
|
+
"mode": "edit_file",
|
|
1164
|
+
"ref": self.edit_file,
|
|
1165
|
+
"description": self.edit_file.__doc__,
|
|
1166
|
+
"args_schema": custom_schemas.get("edit_file", elitea_base.EditFileInput)
|
|
1167
|
+
},
|
|
1168
|
+
]
|
|
1169
|
+
|
|
1170
|
+
tools.extend(file_operation_tools)
|
|
1171
|
+
|
|
1172
|
+
return tools
|
|
1173
|
+
|
|
1174
|
+
return wrapper
|
|
1175
|
+
|
|
1176
|
+
|
|
711
1177
|
def filter_missconfigured_index_tools(method):
|
|
712
1178
|
def wrapper(self, *args, **kwargs):
|
|
713
1179
|
toolkit = method(self, *args, **kwargs)
|
|
714
1180
|
|
|
715
1181
|
# Validate index tools misconfiguration and exclude them if necessary
|
|
716
|
-
is_index_toolkit = any(tool.name.
|
|
717
|
-
if TOOLKIT_SPLITTER in tool.name else tool.name
|
|
718
|
-
in INDEX_TOOL_NAMES for tool in toolkit.tools)
|
|
1182
|
+
is_index_toolkit = any(tool.name in INDEX_TOOL_NAMES for tool in toolkit.tools)
|
|
719
1183
|
is_index_configuration_missing = not (kwargs.get('embedding_model')
|
|
720
1184
|
and kwargs.get('pgvector_configuration'))
|
|
721
1185
|
|
|
722
1186
|
if is_index_toolkit and is_index_configuration_missing:
|
|
723
|
-
toolkit.tools = [tool for tool in toolkit.tools if
|
|
724
|
-
1] if TOOLKIT_SPLITTER in tool.name else tool.name) not in INDEX_TOOL_NAMES]
|
|
1187
|
+
toolkit.tools = [tool for tool in toolkit.tools if tool.name not in INDEX_TOOL_NAMES]
|
|
725
1188
|
|
|
726
1189
|
return toolkit
|
|
727
1190
|
|