alita-sdk 0.3.351__py3-none-any.whl → 0.3.499__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alita_sdk/cli/__init__.py +10 -0
- alita_sdk/cli/__main__.py +17 -0
- alita_sdk/cli/agent/__init__.py +5 -0
- alita_sdk/cli/agent/default.py +258 -0
- alita_sdk/cli/agent_executor.py +155 -0
- alita_sdk/cli/agent_loader.py +215 -0
- alita_sdk/cli/agent_ui.py +228 -0
- alita_sdk/cli/agents.py +3601 -0
- alita_sdk/cli/callbacks.py +647 -0
- alita_sdk/cli/cli.py +168 -0
- alita_sdk/cli/config.py +306 -0
- alita_sdk/cli/context/__init__.py +30 -0
- alita_sdk/cli/context/cleanup.py +198 -0
- alita_sdk/cli/context/manager.py +731 -0
- alita_sdk/cli/context/message.py +285 -0
- alita_sdk/cli/context/strategies.py +289 -0
- alita_sdk/cli/context/token_estimation.py +127 -0
- alita_sdk/cli/formatting.py +182 -0
- alita_sdk/cli/input_handler.py +419 -0
- alita_sdk/cli/inventory.py +1256 -0
- alita_sdk/cli/mcp_loader.py +315 -0
- alita_sdk/cli/toolkit.py +327 -0
- alita_sdk/cli/toolkit_loader.py +85 -0
- alita_sdk/cli/tools/__init__.py +43 -0
- alita_sdk/cli/tools/approval.py +224 -0
- alita_sdk/cli/tools/filesystem.py +1751 -0
- alita_sdk/cli/tools/planning.py +389 -0
- alita_sdk/cli/tools/terminal.py +414 -0
- alita_sdk/community/__init__.py +64 -8
- alita_sdk/community/inventory/__init__.py +224 -0
- alita_sdk/community/inventory/config.py +257 -0
- alita_sdk/community/inventory/enrichment.py +2137 -0
- alita_sdk/community/inventory/extractors.py +1469 -0
- alita_sdk/community/inventory/ingestion.py +3172 -0
- alita_sdk/community/inventory/knowledge_graph.py +1457 -0
- alita_sdk/community/inventory/parsers/__init__.py +218 -0
- alita_sdk/community/inventory/parsers/base.py +295 -0
- alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
- alita_sdk/community/inventory/parsers/go_parser.py +851 -0
- alita_sdk/community/inventory/parsers/html_parser.py +389 -0
- alita_sdk/community/inventory/parsers/java_parser.py +593 -0
- alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
- alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
- alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
- alita_sdk/community/inventory/parsers/python_parser.py +604 -0
- alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
- alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
- alita_sdk/community/inventory/parsers/text_parser.py +322 -0
- alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
- alita_sdk/community/inventory/patterns/__init__.py +61 -0
- alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
- alita_sdk/community/inventory/patterns/loader.py +348 -0
- alita_sdk/community/inventory/patterns/registry.py +198 -0
- alita_sdk/community/inventory/presets.py +535 -0
- alita_sdk/community/inventory/retrieval.py +1403 -0
- alita_sdk/community/inventory/toolkit.py +173 -0
- alita_sdk/community/inventory/visualize.py +1370 -0
- alita_sdk/configurations/bitbucket.py +94 -2
- alita_sdk/configurations/confluence.py +96 -1
- alita_sdk/configurations/gitlab.py +79 -0
- alita_sdk/configurations/jira.py +103 -0
- alita_sdk/configurations/testrail.py +88 -0
- alita_sdk/configurations/xray.py +93 -0
- alita_sdk/configurations/zephyr_enterprise.py +93 -0
- alita_sdk/configurations/zephyr_essential.py +75 -0
- alita_sdk/runtime/clients/artifact.py +1 -1
- alita_sdk/runtime/clients/client.py +214 -42
- alita_sdk/runtime/clients/mcp_discovery.py +342 -0
- alita_sdk/runtime/clients/mcp_manager.py +262 -0
- alita_sdk/runtime/clients/sandbox_client.py +373 -0
- alita_sdk/runtime/langchain/assistant.py +118 -30
- alita_sdk/runtime/langchain/constants.py +8 -1
- alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
- alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +103 -60
- alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +4 -1
- alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py +41 -12
- alita_sdk/runtime/langchain/document_loaders/AlitaTableLoader.py +1 -1
- alita_sdk/runtime/langchain/document_loaders/constants.py +116 -99
- alita_sdk/runtime/langchain/interfaces/llm_processor.py +2 -2
- alita_sdk/runtime/langchain/langraph_agent.py +307 -71
- alita_sdk/runtime/langchain/utils.py +48 -8
- alita_sdk/runtime/llms/preloaded.py +2 -6
- alita_sdk/runtime/models/mcp_models.py +61 -0
- alita_sdk/runtime/toolkits/__init__.py +26 -0
- alita_sdk/runtime/toolkits/application.py +9 -2
- alita_sdk/runtime/toolkits/artifact.py +18 -6
- alita_sdk/runtime/toolkits/datasource.py +13 -6
- alita_sdk/runtime/toolkits/mcp.py +780 -0
- alita_sdk/runtime/toolkits/planning.py +178 -0
- alita_sdk/runtime/toolkits/tools.py +205 -55
- alita_sdk/runtime/toolkits/vectorstore.py +9 -4
- alita_sdk/runtime/tools/__init__.py +11 -3
- alita_sdk/runtime/tools/application.py +7 -0
- alita_sdk/runtime/tools/artifact.py +225 -12
- alita_sdk/runtime/tools/function.py +95 -5
- alita_sdk/runtime/tools/graph.py +10 -4
- alita_sdk/runtime/tools/image_generation.py +212 -0
- alita_sdk/runtime/tools/llm.py +494 -102
- alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
- alita_sdk/runtime/tools/mcp_remote_tool.py +181 -0
- alita_sdk/runtime/tools/mcp_server_tool.py +4 -4
- alita_sdk/runtime/tools/planning/__init__.py +36 -0
- alita_sdk/runtime/tools/planning/models.py +246 -0
- alita_sdk/runtime/tools/planning/wrapper.py +607 -0
- alita_sdk/runtime/tools/router.py +2 -1
- alita_sdk/runtime/tools/sandbox.py +180 -79
- alita_sdk/runtime/tools/vectorstore.py +22 -21
- alita_sdk/runtime/tools/vectorstore_base.py +125 -52
- alita_sdk/runtime/utils/AlitaCallback.py +106 -20
- alita_sdk/runtime/utils/mcp_client.py +465 -0
- alita_sdk/runtime/utils/mcp_oauth.py +244 -0
- alita_sdk/runtime/utils/mcp_sse_client.py +405 -0
- alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
- alita_sdk/runtime/utils/streamlit.py +40 -13
- alita_sdk/runtime/utils/toolkit_utils.py +28 -9
- alita_sdk/runtime/utils/utils.py +12 -0
- alita_sdk/tools/__init__.py +77 -33
- alita_sdk/tools/ado/repos/__init__.py +7 -6
- alita_sdk/tools/ado/repos/repos_wrapper.py +11 -11
- alita_sdk/tools/ado/test_plan/__init__.py +7 -7
- alita_sdk/tools/ado/wiki/__init__.py +7 -11
- alita_sdk/tools/ado/wiki/ado_wrapper.py +89 -15
- alita_sdk/tools/ado/work_item/__init__.py +7 -11
- alita_sdk/tools/ado/work_item/ado_wrapper.py +17 -8
- alita_sdk/tools/advanced_jira_mining/__init__.py +8 -7
- alita_sdk/tools/aws/delta_lake/__init__.py +11 -9
- alita_sdk/tools/azure_ai/search/__init__.py +7 -6
- alita_sdk/tools/base_indexer_toolkit.py +345 -70
- alita_sdk/tools/bitbucket/__init__.py +9 -8
- alita_sdk/tools/bitbucket/api_wrapper.py +50 -6
- alita_sdk/tools/browser/__init__.py +4 -4
- alita_sdk/tools/carrier/__init__.py +4 -6
- alita_sdk/tools/chunkers/__init__.py +3 -1
- alita_sdk/tools/chunkers/sematic/json_chunker.py +1 -0
- alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
- alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
- alita_sdk/tools/chunkers/universal_chunker.py +270 -0
- alita_sdk/tools/cloud/aws/__init__.py +7 -6
- alita_sdk/tools/cloud/azure/__init__.py +7 -6
- alita_sdk/tools/cloud/gcp/__init__.py +7 -6
- alita_sdk/tools/cloud/k8s/__init__.py +7 -6
- alita_sdk/tools/code/linter/__init__.py +7 -7
- alita_sdk/tools/code/loaders/codesearcher.py +3 -2
- alita_sdk/tools/code/sonar/__init__.py +8 -7
- alita_sdk/tools/code_indexer_toolkit.py +199 -0
- alita_sdk/tools/confluence/__init__.py +9 -8
- alita_sdk/tools/confluence/api_wrapper.py +171 -75
- alita_sdk/tools/confluence/loader.py +10 -0
- alita_sdk/tools/custom_open_api/__init__.py +9 -4
- alita_sdk/tools/elastic/__init__.py +8 -7
- alita_sdk/tools/elitea_base.py +492 -52
- alita_sdk/tools/figma/__init__.py +7 -7
- alita_sdk/tools/figma/api_wrapper.py +2 -1
- alita_sdk/tools/github/__init__.py +9 -9
- alita_sdk/tools/github/api_wrapper.py +9 -26
- alita_sdk/tools/github/github_client.py +62 -2
- alita_sdk/tools/gitlab/__init__.py +8 -8
- alita_sdk/tools/gitlab/api_wrapper.py +135 -33
- alita_sdk/tools/gitlab_org/__init__.py +7 -8
- alita_sdk/tools/google/bigquery/__init__.py +11 -12
- alita_sdk/tools/google_places/__init__.py +8 -7
- alita_sdk/tools/jira/__init__.py +9 -7
- alita_sdk/tools/jira/api_wrapper.py +100 -52
- alita_sdk/tools/keycloak/__init__.py +8 -7
- alita_sdk/tools/localgit/local_git.py +56 -54
- alita_sdk/tools/memory/__init__.py +1 -1
- alita_sdk/tools/non_code_indexer_toolkit.py +3 -2
- alita_sdk/tools/ocr/__init__.py +8 -7
- alita_sdk/tools/openapi/__init__.py +10 -1
- alita_sdk/tools/pandas/__init__.py +8 -7
- alita_sdk/tools/postman/__init__.py +7 -8
- alita_sdk/tools/postman/api_wrapper.py +19 -8
- alita_sdk/tools/postman/postman_analysis.py +8 -1
- alita_sdk/tools/pptx/__init__.py +8 -9
- alita_sdk/tools/qtest/__init__.py +16 -11
- alita_sdk/tools/qtest/api_wrapper.py +1784 -88
- alita_sdk/tools/rally/__init__.py +7 -8
- alita_sdk/tools/report_portal/__init__.py +9 -7
- alita_sdk/tools/salesforce/__init__.py +7 -7
- alita_sdk/tools/servicenow/__init__.py +10 -10
- alita_sdk/tools/sharepoint/__init__.py +7 -6
- alita_sdk/tools/sharepoint/api_wrapper.py +127 -36
- alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
- alita_sdk/tools/sharepoint/utils.py +8 -2
- alita_sdk/tools/slack/__init__.py +7 -6
- alita_sdk/tools/sql/__init__.py +8 -7
- alita_sdk/tools/sql/api_wrapper.py +71 -23
- alita_sdk/tools/testio/__init__.py +7 -6
- alita_sdk/tools/testrail/__init__.py +8 -9
- alita_sdk/tools/utils/__init__.py +26 -4
- alita_sdk/tools/utils/content_parser.py +88 -60
- alita_sdk/tools/utils/text_operations.py +254 -0
- alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +76 -26
- alita_sdk/tools/xray/__init__.py +9 -7
- alita_sdk/tools/zephyr/__init__.py +7 -6
- alita_sdk/tools/zephyr_enterprise/__init__.py +8 -6
- alita_sdk/tools/zephyr_essential/__init__.py +7 -6
- alita_sdk/tools/zephyr_essential/api_wrapper.py +12 -13
- alita_sdk/tools/zephyr_scale/__init__.py +7 -6
- alita_sdk/tools/zephyr_squad/__init__.py +7 -6
- {alita_sdk-0.3.351.dist-info → alita_sdk-0.3.499.dist-info}/METADATA +147 -2
- {alita_sdk-0.3.351.dist-info → alita_sdk-0.3.499.dist-info}/RECORD +206 -130
- alita_sdk-0.3.499.dist-info/entry_points.txt +2 -0
- {alita_sdk-0.3.351.dist-info → alita_sdk-0.3.499.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.351.dist-info → alita_sdk-0.3.499.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.351.dist-info → alita_sdk-0.3.499.dist-info}/top_level.txt +0 -0
alita_sdk/tools/elitea_base.py
CHANGED
|
@@ -11,7 +11,6 @@ from pydantic import BaseModel, create_model, Field, SecretStr
|
|
|
11
11
|
|
|
12
12
|
# from alita_sdk.runtime.langchain.interfaces.llm_processor import get_embeddings
|
|
13
13
|
from .chunkers import markdown_chunker
|
|
14
|
-
from .utils import TOOLKIT_SPLITTER
|
|
15
14
|
from .vector_adapters.VectorStoreAdapter import VectorStoreAdapterFactory
|
|
16
15
|
from ..runtime.utils.utils import IndexerKeywords
|
|
17
16
|
|
|
@@ -33,12 +32,12 @@ LoaderSchema = create_model(
|
|
|
33
32
|
# Base Vector Store Schema Models
|
|
34
33
|
BaseIndexParams = create_model(
|
|
35
34
|
"BaseIndexParams",
|
|
36
|
-
|
|
35
|
+
index_name=(str, Field(description="Index name (max 7 characters)", min_length=1, max_length=7)),
|
|
37
36
|
)
|
|
38
37
|
|
|
39
38
|
BaseCodeIndexParams = create_model(
|
|
40
39
|
"BaseCodeIndexParams",
|
|
41
|
-
|
|
40
|
+
index_name=(str, Field(description="Index name (max 7 characters)", min_length=1, max_length=7)),
|
|
42
41
|
clean_index=(Optional[bool], Field(default=False, description="Optional flag to enforce clean existing index before indexing new data")),
|
|
43
42
|
progress_step=(Optional[int], Field(default=5, ge=0, le=100,
|
|
44
43
|
description="Optional step size for progress reporting during indexing")),
|
|
@@ -50,14 +49,14 @@ BaseCodeIndexParams = create_model(
|
|
|
50
49
|
|
|
51
50
|
RemoveIndexParams = create_model(
|
|
52
51
|
"RemoveIndexParams",
|
|
53
|
-
|
|
52
|
+
index_name=(Optional[str], Field(description="Optional index name (max 7 characters)", default="", max_length=7)),
|
|
54
53
|
)
|
|
55
54
|
|
|
56
55
|
BaseSearchParams = create_model(
|
|
57
56
|
"BaseSearchParams",
|
|
58
57
|
query=(str, Field(description="Query text to search in the index")),
|
|
59
|
-
|
|
60
|
-
description="Optional
|
|
58
|
+
index_name=(Optional[str], Field(
|
|
59
|
+
description="Optional index name (max 7 characters). Leave empty to search across all datasets",
|
|
61
60
|
default="", max_length=7)),
|
|
62
61
|
filter=(Optional[dict], Field(
|
|
63
62
|
description="Filter to apply to the search results. Can be a dictionary or a JSON string.",
|
|
@@ -87,7 +86,7 @@ BaseSearchParams = create_model(
|
|
|
87
86
|
BaseStepbackSearchParams = create_model(
|
|
88
87
|
"BaseStepbackSearchParams",
|
|
89
88
|
query=(str, Field(description="Query text to search in the index")),
|
|
90
|
-
|
|
89
|
+
index_name=(Optional[str], Field(description="Optional index name (max 7 characters)", default="", max_length=7)),
|
|
91
90
|
messages=(Optional[List], Field(description="Chat messages for stepback search context", default=[])),
|
|
92
91
|
filter=(Optional[dict], Field(
|
|
93
92
|
description="Filter to apply to the search results. Can be a dictionary or a JSON string.",
|
|
@@ -126,14 +125,91 @@ BaseIndexDataParams = create_model(
|
|
|
126
125
|
chunking_config=(Optional[dict], Field(description="Chunking tool configuration", default_factory=dict)),
|
|
127
126
|
)
|
|
128
127
|
|
|
128
|
+
# File Operations Schema Models
|
|
129
|
+
ReadFileInput = create_model(
|
|
130
|
+
"ReadFileInput",
|
|
131
|
+
file_path=(str, Field(description="Path to the file to read")),
|
|
132
|
+
branch=(Optional[str], Field(description="Branch name. If None, uses active branch.", default=None)),
|
|
133
|
+
offset=(Optional[int], Field(description="Starting line number (1-indexed, inclusive). Read from this line onwards.", default=None, ge=1)),
|
|
134
|
+
limit=(Optional[int], Field(description="Number of lines to read from offset. If None, reads to end.", default=None, ge=1)),
|
|
135
|
+
head=(Optional[int], Field(description="Read only the first N lines. Alternative to offset/limit.", default=None, ge=1)),
|
|
136
|
+
tail=(Optional[int], Field(description="Read only the last N lines. Alternative to offset/limit.", default=None, ge=1)),
|
|
137
|
+
)
|
|
129
138
|
|
|
130
|
-
|
|
139
|
+
ReadFileChunkInput = create_model(
|
|
140
|
+
"ReadFileChunkInput",
|
|
141
|
+
file_path=(str, Field(description="Path to the file to read")),
|
|
142
|
+
branch=(Optional[str], Field(description="Branch name. If None, uses active branch.", default=None)),
|
|
143
|
+
start_line=(int, Field(description="Starting line number (1-indexed, inclusive)", ge=1)),
|
|
144
|
+
end_line=(Optional[int], Field(description="Ending line number (1-indexed, inclusive). If None, reads to end.", default=None, ge=1)),
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
ReadMultipleFilesInput = create_model(
|
|
148
|
+
"ReadMultipleFilesInput",
|
|
149
|
+
file_paths=(List[str], Field(description="List of file paths to read", min_length=1)),
|
|
150
|
+
branch=(Optional[str], Field(description="Branch name. If None, uses active branch.", default=None)),
|
|
151
|
+
offset=(Optional[int], Field(description="Starting line number for all files (1-indexed)", default=None, ge=1)),
|
|
152
|
+
limit=(Optional[int], Field(description="Number of lines to read from offset for all files", default=None, ge=1)),
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
EditFileInput = create_model(
|
|
156
|
+
"EditFileInput",
|
|
157
|
+
file_path=(str, Field(description="Path to the file to edit. Must be a text file (markdown, txt, csv, json, xml, html, yaml, etc.)")),
|
|
158
|
+
file_query=(str, Field(description="""Edit instructions with OLD/NEW markers. Format:
|
|
159
|
+
OLD <<<<
|
|
160
|
+
old content to replace
|
|
161
|
+
>>>> OLD
|
|
162
|
+
NEW <<<<
|
|
163
|
+
new content
|
|
164
|
+
>>>> NEW
|
|
165
|
+
|
|
166
|
+
Multiple OLD/NEW pairs can be provided for multiple edits.""")),
|
|
167
|
+
branch=(Optional[str], Field(description="Branch name. If None, uses active branch.", default=None)),
|
|
168
|
+
commit_message=(Optional[str], Field(description="Commit message for the change (VCS toolkits only)", default=None)),
|
|
169
|
+
)
|
|
131
170
|
|
|
171
|
+
SearchFileInput = create_model(
|
|
172
|
+
"SearchFileInput",
|
|
173
|
+
file_path=(str, Field(description="Path to the file to search")),
|
|
174
|
+
pattern=(str, Field(description="Search pattern. Treated as regex by default unless is_regex=False.")),
|
|
175
|
+
branch=(Optional[str], Field(description="Branch name. If None, uses active branch.", default=None)),
|
|
176
|
+
is_regex=(bool, Field(description="Whether pattern is a regex. Default is True for flexible matching.", default=True)),
|
|
177
|
+
context_lines=(int, Field(description="Number of lines before/after match to include for context", default=2, ge=0)),
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
class BaseToolApiWrapper(BaseModel):
|
|
182
|
+
|
|
183
|
+
# Optional RunnableConfig for CLI/standalone usage (allows dispatch_custom_event to work)
|
|
184
|
+
_runnable_config: Optional[Dict[str, Any]] = None
|
|
185
|
+
# toolkit id propagated from backend
|
|
186
|
+
toolkit_id: int = 0
|
|
132
187
|
def get_available_tools(self):
|
|
133
188
|
raise NotImplementedError("Subclasses should implement this method")
|
|
134
189
|
|
|
135
|
-
def
|
|
136
|
-
"""
|
|
190
|
+
def set_runnable_config(self, config: Optional[Dict[str, Any]]) -> None:
|
|
191
|
+
"""
|
|
192
|
+
Set the RunnableConfig for dispatching custom events.
|
|
193
|
+
|
|
194
|
+
This is required when running outside of a LangChain agent context
|
|
195
|
+
(e.g., from CLI). Without a config containing a run_id,
|
|
196
|
+
dispatch_custom_event will fail with "Unable to dispatch an adhoc event
|
|
197
|
+
without a parent run id".
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
config: A RunnableConfig dict with at least {'run_id': uuid}
|
|
201
|
+
"""
|
|
202
|
+
self._runnable_config = config
|
|
203
|
+
|
|
204
|
+
def _log_tool_event(self, message: str, tool_name: str = None, config: Optional[Dict[str, Any]] = None):
|
|
205
|
+
"""Log data and dispatch custom event for the tool.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
message: The message to log
|
|
209
|
+
tool_name: Name of the tool (defaults to 'tool_progress')
|
|
210
|
+
config: Optional RunnableConfig. If not provided, uses self._runnable_config.
|
|
211
|
+
Required when running outside a LangChain agent context.
|
|
212
|
+
"""
|
|
137
213
|
|
|
138
214
|
try:
|
|
139
215
|
from langchain_core.callbacks import dispatch_custom_event
|
|
@@ -142,6 +218,10 @@ class BaseToolApiWrapper(BaseModel):
|
|
|
142
218
|
tool_name = 'tool_progress'
|
|
143
219
|
|
|
144
220
|
logger.info(message)
|
|
221
|
+
|
|
222
|
+
# Use provided config, fall back to instance config
|
|
223
|
+
effective_config = config or self._runnable_config
|
|
224
|
+
|
|
145
225
|
dispatch_custom_event(
|
|
146
226
|
name="thinking_step",
|
|
147
227
|
data={
|
|
@@ -149,14 +229,14 @@ class BaseToolApiWrapper(BaseModel):
|
|
|
149
229
|
"tool_name": tool_name,
|
|
150
230
|
"toolkit": self.__class__.__name__,
|
|
151
231
|
},
|
|
232
|
+
config=effective_config,
|
|
152
233
|
)
|
|
153
234
|
except Exception as e:
|
|
154
235
|
logger.warning(f"Failed to dispatch progress event: {str(e)}")
|
|
155
236
|
|
|
156
237
|
|
|
157
238
|
def run(self, mode: str, *args: Any, **kwargs: Any):
|
|
158
|
-
|
|
159
|
-
mode = mode.rsplit(TOOLKIT_SPLITTER, maxsplit=1)[1]
|
|
239
|
+
# Mode is now the clean tool name (no prefix to remove)
|
|
160
240
|
for tool in self.get_available_tools():
|
|
161
241
|
if tool["name"] == mode:
|
|
162
242
|
try:
|
|
@@ -165,6 +245,11 @@ class BaseToolApiWrapper(BaseModel):
|
|
|
165
245
|
# execution = str(execution)
|
|
166
246
|
return execution
|
|
167
247
|
except Exception as e:
|
|
248
|
+
# Re-raise McpAuthorizationRequired directly without wrapping
|
|
249
|
+
from alita_sdk.runtime.utils.mcp_oauth import McpAuthorizationRequired
|
|
250
|
+
if isinstance(e, McpAuthorizationRequired):
|
|
251
|
+
raise
|
|
252
|
+
|
|
168
253
|
# Catch all tool execution exceptions and provide user-friendly error messages
|
|
169
254
|
error_type = type(e).__name__
|
|
170
255
|
error_message = str(e)
|
|
@@ -324,12 +409,12 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
|
|
|
324
409
|
#
|
|
325
410
|
docs = base_chunker(file_content_generator=docs, config=base_chunking_config)
|
|
326
411
|
#
|
|
327
|
-
|
|
412
|
+
index_name = kwargs.get("index_name")
|
|
328
413
|
progress_step = kwargs.get("progress_step")
|
|
329
414
|
clean_index = kwargs.get("clean_index")
|
|
330
415
|
vs = self._init_vector_store()
|
|
331
416
|
#
|
|
332
|
-
return vs.index_documents(docs,
|
|
417
|
+
return vs.index_documents(docs, index_name=index_name, progress_step=progress_step, clean_index=clean_index)
|
|
333
418
|
|
|
334
419
|
def _process_documents(self, documents: List[Document]) -> Generator[Document, None, None]:
|
|
335
420
|
"""
|
|
@@ -399,10 +484,10 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
|
|
|
399
484
|
)
|
|
400
485
|
return self._vector_store
|
|
401
486
|
|
|
402
|
-
def remove_index(self,
|
|
487
|
+
def remove_index(self, index_name: str = ""):
|
|
403
488
|
"""Cleans the indexed data in the collection."""
|
|
404
|
-
self._init_vector_store()._clean_collection(
|
|
405
|
-
return (f"Collection '{
|
|
489
|
+
self._init_vector_store()._clean_collection(index_name=index_name)
|
|
490
|
+
return (f"Collection '{index_name}' has been removed from the vector store.\n"
|
|
406
491
|
f"Available collections: {self.list_collections()}")
|
|
407
492
|
|
|
408
493
|
def list_collections(self):
|
|
@@ -410,19 +495,19 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
|
|
|
410
495
|
vectorstore_wrapper = self._init_vector_store()
|
|
411
496
|
return vectorstore_wrapper.list_collections()
|
|
412
497
|
|
|
413
|
-
def _build_collection_filter(self, filter: dict | str,
|
|
498
|
+
def _build_collection_filter(self, filter: dict | str, index_name: str = "") -> dict:
|
|
414
499
|
"""Builds a filter for the collection based on the provided suffix."""
|
|
415
500
|
|
|
416
501
|
filter = filter if isinstance(filter, dict) else json.loads(filter)
|
|
417
|
-
if
|
|
502
|
+
if index_name:
|
|
418
503
|
filter.update({"collection": {
|
|
419
|
-
"$eq":
|
|
504
|
+
"$eq": index_name.strip()
|
|
420
505
|
}})
|
|
421
506
|
return filter
|
|
422
507
|
|
|
423
508
|
def search_index(self,
|
|
424
509
|
query: str,
|
|
425
|
-
|
|
510
|
+
index_name: str = "",
|
|
426
511
|
filter: dict | str = {}, cut_off: float = 0.5,
|
|
427
512
|
search_top: int = 10, reranker: dict = {},
|
|
428
513
|
full_text_search: Optional[Dict[str, Any]] = None,
|
|
@@ -431,7 +516,7 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
|
|
|
431
516
|
**kwargs):
|
|
432
517
|
""" Searches indexed documents in the vector store."""
|
|
433
518
|
vectorstore = self._init_vector_store()
|
|
434
|
-
filter = self._build_collection_filter(filter,
|
|
519
|
+
filter = self._build_collection_filter(filter, index_name)
|
|
435
520
|
found_docs = vectorstore.search_documents(
|
|
436
521
|
query,
|
|
437
522
|
doctype=self.doctype,
|
|
@@ -448,7 +533,7 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
|
|
|
448
533
|
def stepback_search_index(self,
|
|
449
534
|
query: str,
|
|
450
535
|
messages: List[Dict[str, Any]] = [],
|
|
451
|
-
|
|
536
|
+
index_name: str = "",
|
|
452
537
|
filter: dict | str = {}, cut_off: float = 0.5,
|
|
453
538
|
search_top: int = 10, reranker: dict = {},
|
|
454
539
|
full_text_search: Optional[Dict[str, Any]] = None,
|
|
@@ -457,7 +542,7 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
|
|
|
457
542
|
**kwargs):
|
|
458
543
|
""" Searches indexed documents in the vector store."""
|
|
459
544
|
|
|
460
|
-
filter = self._build_collection_filter(filter,
|
|
545
|
+
filter = self._build_collection_filter(filter, index_name)
|
|
461
546
|
vectorstore = self._init_vector_store()
|
|
462
547
|
found_docs = vectorstore.stepback_search(
|
|
463
548
|
query,
|
|
@@ -475,7 +560,7 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
|
|
|
475
560
|
def stepback_summary_index(self,
|
|
476
561
|
query: str,
|
|
477
562
|
messages: List[Dict[str, Any]] = [],
|
|
478
|
-
|
|
563
|
+
index_name: str = "",
|
|
479
564
|
filter: dict | str = {}, cut_off: float = 0.5,
|
|
480
565
|
search_top: int = 10, reranker: dict = {},
|
|
481
566
|
full_text_search: Optional[Dict[str, Any]] = None,
|
|
@@ -484,7 +569,7 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
|
|
|
484
569
|
**kwargs):
|
|
485
570
|
""" Generates a summary of indexed documents using stepback technique."""
|
|
486
571
|
vectorstore = self._init_vector_store()
|
|
487
|
-
filter = self._build_collection_filter(filter,
|
|
572
|
+
filter = self._build_collection_filter(filter, index_name)
|
|
488
573
|
|
|
489
574
|
found_docs = vectorstore.stepback_summary(
|
|
490
575
|
query,
|
|
@@ -554,11 +639,281 @@ class BaseCodeToolApiWrapper(BaseVectorStoreToolApiWrapper):
|
|
|
554
639
|
def _get_files(self):
|
|
555
640
|
raise NotImplementedError("Subclasses should implement this method")
|
|
556
641
|
|
|
557
|
-
def _read_file(
|
|
642
|
+
def _read_file(
|
|
643
|
+
self,
|
|
644
|
+
file_path: str,
|
|
645
|
+
branch: str = None,
|
|
646
|
+
offset: Optional[int] = None,
|
|
647
|
+
limit: Optional[int] = None,
|
|
648
|
+
head: Optional[int] = None,
|
|
649
|
+
tail: Optional[int] = None,
|
|
650
|
+
**kwargs # Allow subclasses to have additional parameters
|
|
651
|
+
) -> str:
|
|
652
|
+
"""
|
|
653
|
+
Read file content with optional partial read support.
|
|
654
|
+
|
|
655
|
+
Subclasses should implement this method. If they don't support partial reads,
|
|
656
|
+
they can accept **kwargs and ignore offset/limit/head/tail parameters - the base
|
|
657
|
+
class high-level methods will apply slicing client-side.
|
|
658
|
+
|
|
659
|
+
Args:
|
|
660
|
+
file_path: Path to the file
|
|
661
|
+
branch: Branch name (None for active branch)
|
|
662
|
+
offset: Starting line number (1-indexed)
|
|
663
|
+
limit: Number of lines to read from offset
|
|
664
|
+
head: Read only first N lines
|
|
665
|
+
tail: Read only last N lines
|
|
666
|
+
**kwargs: Additional toolkit-specific parameters (e.g., repo_name for GitHub)
|
|
667
|
+
|
|
668
|
+
Returns:
|
|
669
|
+
File content as string
|
|
670
|
+
"""
|
|
558
671
|
raise NotImplementedError("Subclasses should implement this method")
|
|
672
|
+
|
|
673
|
+
def _write_file(
|
|
674
|
+
self,
|
|
675
|
+
file_path: str,
|
|
676
|
+
content: str,
|
|
677
|
+
branch: str = None,
|
|
678
|
+
commit_message: str = None
|
|
679
|
+
) -> str:
|
|
680
|
+
"""
|
|
681
|
+
Write content to a file.
|
|
682
|
+
|
|
683
|
+
Subclasses should implement this method to enable edit_file functionality.
|
|
684
|
+
For VCS toolkits, this may involve creating or updating files with commits.
|
|
685
|
+
|
|
686
|
+
Args:
|
|
687
|
+
file_path: Path to the file
|
|
688
|
+
content: New file content
|
|
689
|
+
branch: Branch name (None for active branch)
|
|
690
|
+
commit_message: Commit message (VCS toolkits only)
|
|
691
|
+
|
|
692
|
+
Returns:
|
|
693
|
+
Success message
|
|
694
|
+
"""
|
|
695
|
+
raise NotImplementedError("Subclasses should implement _write_file to enable editing")
|
|
559
696
|
|
|
560
697
|
def _file_commit_hash(self, file_path: str, branch: str):
|
|
561
698
|
pass
|
|
699
|
+
|
|
700
|
+
def read_file_chunk(
|
|
701
|
+
self,
|
|
702
|
+
file_path: str,
|
|
703
|
+
start_line: int,
|
|
704
|
+
end_line: Optional[int] = None,
|
|
705
|
+
branch: str = None
|
|
706
|
+
) -> str:
|
|
707
|
+
"""
|
|
708
|
+
Read a specific range of lines from a file.
|
|
709
|
+
|
|
710
|
+
Args:
|
|
711
|
+
file_path: Path to the file
|
|
712
|
+
start_line: Starting line number (1-indexed, inclusive)
|
|
713
|
+
end_line: Ending line number (1-indexed, inclusive). If None, reads to end.
|
|
714
|
+
branch: Branch name (None for active branch)
|
|
715
|
+
|
|
716
|
+
Returns:
|
|
717
|
+
File content for the specified line range
|
|
718
|
+
"""
|
|
719
|
+
from .utils.text_operations import apply_line_slice
|
|
720
|
+
|
|
721
|
+
# Calculate offset and limit from start_line and end_line
|
|
722
|
+
offset = start_line
|
|
723
|
+
limit = (end_line - start_line + 1) if end_line is not None else None
|
|
724
|
+
|
|
725
|
+
# Read the file with offset/limit
|
|
726
|
+
content = self._read_file(file_path, branch, offset=offset, limit=limit)
|
|
727
|
+
|
|
728
|
+
# Apply client-side slicing if toolkit doesn't support partial reads
|
|
729
|
+
# (toolkit's _read_file will return full content if it ignores offset/limit)
|
|
730
|
+
return apply_line_slice(content, offset=offset, limit=limit)
|
|
731
|
+
|
|
732
|
+
def read_multiple_files(
|
|
733
|
+
self,
|
|
734
|
+
file_paths: List[str],
|
|
735
|
+
branch: str = None,
|
|
736
|
+
offset: Optional[int] = None,
|
|
737
|
+
limit: Optional[int] = None
|
|
738
|
+
) -> Dict[str, str]:
|
|
739
|
+
"""
|
|
740
|
+
Read multiple files in batch.
|
|
741
|
+
|
|
742
|
+
Args:
|
|
743
|
+
file_paths: List of file paths to read
|
|
744
|
+
branch: Branch name (None for active branch)
|
|
745
|
+
offset: Starting line number for all files (1-indexed)
|
|
746
|
+
limit: Number of lines to read from offset for all files
|
|
747
|
+
|
|
748
|
+
Returns:
|
|
749
|
+
Dictionary mapping file paths to their content (or error messages)
|
|
750
|
+
"""
|
|
751
|
+
results = {}
|
|
752
|
+
|
|
753
|
+
for file_path in file_paths:
|
|
754
|
+
try:
|
|
755
|
+
content = self._read_file(
|
|
756
|
+
file_path,
|
|
757
|
+
branch,
|
|
758
|
+
offset=offset,
|
|
759
|
+
limit=limit
|
|
760
|
+
)
|
|
761
|
+
results[file_path] = content
|
|
762
|
+
except Exception as e:
|
|
763
|
+
results[file_path] = f"Error reading file: {str(e)}"
|
|
764
|
+
logger.error(f"Failed to read {file_path}: {e}")
|
|
765
|
+
|
|
766
|
+
return results
|
|
767
|
+
|
|
768
|
+
def search_file(
|
|
769
|
+
self,
|
|
770
|
+
file_path: str,
|
|
771
|
+
pattern: str,
|
|
772
|
+
branch: str = None,
|
|
773
|
+
is_regex: bool = True,
|
|
774
|
+
context_lines: int = 2
|
|
775
|
+
) -> str:
|
|
776
|
+
"""
|
|
777
|
+
Search for pattern in file content with context.
|
|
778
|
+
|
|
779
|
+
Args:
|
|
780
|
+
file_path: Path to the file
|
|
781
|
+
pattern: Search pattern (regex if is_regex=True, else literal)
|
|
782
|
+
branch: Branch name (None for active branch)
|
|
783
|
+
is_regex: Whether pattern is regex (default True)
|
|
784
|
+
context_lines: Lines of context before/after matches (default 2)
|
|
785
|
+
|
|
786
|
+
Returns:
|
|
787
|
+
Formatted string with search results and context
|
|
788
|
+
"""
|
|
789
|
+
from .utils.text_operations import search_in_content
|
|
790
|
+
|
|
791
|
+
# Read full file content
|
|
792
|
+
content = self._read_file(file_path, branch)
|
|
793
|
+
|
|
794
|
+
# Search for pattern
|
|
795
|
+
matches = search_in_content(content, pattern, is_regex, context_lines)
|
|
796
|
+
|
|
797
|
+
if not matches:
|
|
798
|
+
return f"No matches found for pattern '{pattern}' in {file_path}"
|
|
799
|
+
|
|
800
|
+
# Format results
|
|
801
|
+
result_lines = [f"Found {len(matches)} match(es) for pattern '{pattern}' in {file_path}:\n"]
|
|
802
|
+
|
|
803
|
+
for i, match in enumerate(matches, 1):
|
|
804
|
+
result_lines.append(f"\n--- Match {i} at line {match['line_number']} ---")
|
|
805
|
+
|
|
806
|
+
# Context before
|
|
807
|
+
if match['context_before']:
|
|
808
|
+
for line in match['context_before']:
|
|
809
|
+
result_lines.append(f" {line}")
|
|
810
|
+
|
|
811
|
+
# Matching line (highlighted)
|
|
812
|
+
result_lines.append(f"> {match['line_content']}")
|
|
813
|
+
|
|
814
|
+
# Context after
|
|
815
|
+
if match['context_after']:
|
|
816
|
+
for line in match['context_after']:
|
|
817
|
+
result_lines.append(f" {line}")
|
|
818
|
+
|
|
819
|
+
return "\n".join(result_lines)
|
|
820
|
+
|
|
821
|
+
def edit_file(
|
|
822
|
+
self,
|
|
823
|
+
file_path: str,
|
|
824
|
+
file_query: str,
|
|
825
|
+
branch: str = None,
|
|
826
|
+
commit_message: str = None
|
|
827
|
+
) -> str:
|
|
828
|
+
"""
|
|
829
|
+
Edit file using OLD/NEW markers for precise replacements.
|
|
830
|
+
|
|
831
|
+
Only works with text files (markdown, txt, csv, json, xml, html, yaml, code files).
|
|
832
|
+
|
|
833
|
+
Args:
|
|
834
|
+
file_path: Path to the file to edit
|
|
835
|
+
file_query: Edit instructions with OLD/NEW markers
|
|
836
|
+
branch: Branch name (None for active branch)
|
|
837
|
+
commit_message: Commit message (VCS toolkits only)
|
|
838
|
+
|
|
839
|
+
Returns:
|
|
840
|
+
Success message or error
|
|
841
|
+
|
|
842
|
+
Raises:
|
|
843
|
+
ToolException: If file is not text-editable or edit fails
|
|
844
|
+
"""
|
|
845
|
+
from .utils.text_operations import parse_old_new_markers, is_text_editable
|
|
846
|
+
from langchain_core.callbacks import dispatch_custom_event
|
|
847
|
+
|
|
848
|
+
# Validate file is text-editable
|
|
849
|
+
if not is_text_editable(file_path):
|
|
850
|
+
raise ToolException(
|
|
851
|
+
f"Cannot edit binary/document file '{file_path}'. "
|
|
852
|
+
f"Supported text formats: markdown, txt, csv, json, xml, html, yaml, code files."
|
|
853
|
+
)
|
|
854
|
+
|
|
855
|
+
# Parse OLD/NEW markers
|
|
856
|
+
edits = parse_old_new_markers(file_query)
|
|
857
|
+
if not edits:
|
|
858
|
+
raise ToolException(
|
|
859
|
+
"No OLD/NEW marker pairs found in file_query. "
|
|
860
|
+
"Format: OLD <<<< old text >>>> OLD NEW <<<< new text >>>> NEW"
|
|
861
|
+
)
|
|
862
|
+
|
|
863
|
+
# Read current file content
|
|
864
|
+
try:
|
|
865
|
+
current_content = self._read_file(file_path, branch)
|
|
866
|
+
except Exception as e:
|
|
867
|
+
raise ToolException(f"Failed to read file {file_path}: {e}")
|
|
868
|
+
|
|
869
|
+
# Apply all edits
|
|
870
|
+
updated_content = current_content
|
|
871
|
+
for old_text, new_text in edits:
|
|
872
|
+
if not old_text.strip():
|
|
873
|
+
continue
|
|
874
|
+
|
|
875
|
+
if old_text not in updated_content:
|
|
876
|
+
logger.warning(
|
|
877
|
+
f"Old content not found in {file_path}. "
|
|
878
|
+
f"Looking for: {old_text[:100]}..."
|
|
879
|
+
)
|
|
880
|
+
continue
|
|
881
|
+
|
|
882
|
+
updated_content = updated_content.replace(old_text, new_text)
|
|
883
|
+
|
|
884
|
+
# Check if any changes were made
|
|
885
|
+
if current_content == updated_content:
|
|
886
|
+
return (
|
|
887
|
+
f"No changes made to {file_path}. "
|
|
888
|
+
"Old content was not found or is empty. "
|
|
889
|
+
"Use read_file or search_file to verify current content."
|
|
890
|
+
)
|
|
891
|
+
|
|
892
|
+
# Write updated content
|
|
893
|
+
try:
|
|
894
|
+
result = self._write_file(file_path, updated_content, branch, commit_message)
|
|
895
|
+
except NotImplementedError:
|
|
896
|
+
raise ToolException(
|
|
897
|
+
f"Editing not supported for this toolkit. "
|
|
898
|
+
f"The _write_file method is not implemented."
|
|
899
|
+
)
|
|
900
|
+
except Exception as e:
|
|
901
|
+
raise ToolException(f"Failed to write file {file_path}: {e}")
|
|
902
|
+
|
|
903
|
+
# Dispatch file modification event
|
|
904
|
+
try:
|
|
905
|
+
dispatch_custom_event("file_modified", {
|
|
906
|
+
"message": f"File '{file_path}' edited successfully",
|
|
907
|
+
"filename": file_path,
|
|
908
|
+
"tool_name": "edit_file",
|
|
909
|
+
"toolkit": self.__class__.__name__,
|
|
910
|
+
"operation_type": "modify",
|
|
911
|
+
"edits_applied": len(edits)
|
|
912
|
+
})
|
|
913
|
+
except Exception as e:
|
|
914
|
+
logger.warning(f"Failed to dispatch file_modified event: {e}")
|
|
915
|
+
|
|
916
|
+
return result
|
|
562
917
|
|
|
563
918
|
def __handle_get_files(self, path: str, branch: str):
|
|
564
919
|
"""
|
|
@@ -589,27 +944,37 @@ class BaseCodeToolApiWrapper(BaseVectorStoreToolApiWrapper):
|
|
|
589
944
|
def loader(self,
|
|
590
945
|
branch: Optional[str] = None,
|
|
591
946
|
whitelist: Optional[List[str]] = None,
|
|
592
|
-
blacklist: Optional[List[str]] = None
|
|
947
|
+
blacklist: Optional[List[str]] = None,
|
|
948
|
+
chunked: bool = True) -> Generator[Document, None, None]:
|
|
593
949
|
"""
|
|
594
|
-
Generates
|
|
950
|
+
Generates Documents from files in a branch, respecting whitelist and blacklist patterns.
|
|
595
951
|
|
|
596
952
|
Parameters:
|
|
597
953
|
- branch (Optional[str]): Branch for listing files. Defaults to the current branch if None.
|
|
598
954
|
- whitelist (Optional[List[str]]): File extensions or paths to include. Defaults to all files if None.
|
|
599
955
|
- blacklist (Optional[List[str]]): File extensions or paths to exclude. Defaults to no exclusions if None.
|
|
956
|
+
- chunked (bool): If True (default), applies universal chunker based on file type.
|
|
957
|
+
If False, returns raw Documents without chunking.
|
|
600
958
|
|
|
601
959
|
Returns:
|
|
602
|
-
- generator: Yields
|
|
960
|
+
- generator: Yields Documents from files matching the whitelist but not the blacklist.
|
|
603
961
|
|
|
604
962
|
Example:
|
|
605
963
|
# Use 'feature-branch', include '.py' files, exclude 'test_' files
|
|
606
|
-
|
|
964
|
+
for doc in loader(branch='feature-branch', whitelist=['*.py'], blacklist=['*test_*']):
|
|
965
|
+
print(doc.page_content)
|
|
607
966
|
|
|
608
967
|
Notes:
|
|
609
968
|
- Whitelist and blacklist use Unix shell-style wildcards.
|
|
610
969
|
- Files must match the whitelist and not the blacklist to be included.
|
|
970
|
+
- When chunked=True:
|
|
971
|
+
- .md files → markdown chunker (header-based splitting)
|
|
972
|
+
- .py/.js/.ts/etc → code parser (TreeSitter-based)
|
|
973
|
+
- .json files → JSON chunker
|
|
974
|
+
- other files → default text chunker
|
|
611
975
|
"""
|
|
612
|
-
from .
|
|
976
|
+
from langchain_core.documents import Document
|
|
977
|
+
import hashlib
|
|
613
978
|
|
|
614
979
|
_files = self.__handle_get_files("", self.__get_branch(branch))
|
|
615
980
|
self._log_tool_event(message="Listing files in branch", tool_name="loader")
|
|
@@ -627,35 +992,55 @@ class BaseCodeToolApiWrapper(BaseVectorStoreToolApiWrapper):
|
|
|
627
992
|
or any(file_path.endswith(f'.{pattern}') for pattern in blacklist))
|
|
628
993
|
return False
|
|
629
994
|
|
|
630
|
-
def
|
|
995
|
+
def raw_document_generator() -> Generator[Document, None, None]:
|
|
996
|
+
"""Yields raw Documents without chunking."""
|
|
631
997
|
self._log_tool_event(message="Reading the files", tool_name="loader")
|
|
632
|
-
# log the progress of file reading
|
|
633
998
|
total_files = len(_files)
|
|
999
|
+
processed = 0
|
|
1000
|
+
|
|
634
1001
|
for idx, file in enumerate(_files, 1):
|
|
635
1002
|
if is_whitelisted(file) and not is_blacklisted(file):
|
|
636
|
-
# read file ONLY if it matches whitelist and does not match blacklist
|
|
637
1003
|
try:
|
|
638
1004
|
file_content = self._read_file(file, self.__get_branch(branch))
|
|
639
1005
|
except Exception as e:
|
|
640
1006
|
logger.error(f"Failed to read file {file}: {e}")
|
|
641
|
-
|
|
1007
|
+
continue
|
|
1008
|
+
|
|
642
1009
|
if not file_content:
|
|
643
|
-
# empty file, skip
|
|
644
1010
|
continue
|
|
645
|
-
|
|
646
|
-
|
|
1011
|
+
|
|
1012
|
+
# Hash the file content for uniqueness tracking
|
|
647
1013
|
file_hash = hashlib.sha256(file_content.encode("utf-8")).hexdigest()
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
1014
|
+
processed += 1
|
|
1015
|
+
|
|
1016
|
+
yield Document(
|
|
1017
|
+
page_content=file_content,
|
|
1018
|
+
metadata={
|
|
1019
|
+
'file_path': file,
|
|
1020
|
+
'file_name': file,
|
|
1021
|
+
'source': file,
|
|
1022
|
+
'commit_hash': file_hash,
|
|
1023
|
+
}
|
|
1024
|
+
)
|
|
1025
|
+
|
|
651
1026
|
if idx % 10 == 0 or idx == total_files:
|
|
652
|
-
self._log_tool_event(
|
|
653
|
-
|
|
1027
|
+
self._log_tool_event(
|
|
1028
|
+
message=f"{idx} out of {total_files} files checked, {processed} matched",
|
|
1029
|
+
tool_name="loader"
|
|
1030
|
+
)
|
|
1031
|
+
|
|
1032
|
+
self._log_tool_event(message=f"{processed} files loaded", tool_name="loader")
|
|
654
1033
|
|
|
655
|
-
|
|
1034
|
+
if not chunked:
|
|
1035
|
+
# Return raw documents without chunking
|
|
1036
|
+
return raw_document_generator()
|
|
1037
|
+
|
|
1038
|
+
# Apply universal chunker based on file type
|
|
1039
|
+
from .chunkers.universal_chunker import universal_chunker
|
|
1040
|
+
return universal_chunker(raw_document_generator())
|
|
656
1041
|
|
|
657
1042
|
def index_data(self,
|
|
658
|
-
|
|
1043
|
+
index_name: str,
|
|
659
1044
|
branch: Optional[str] = None,
|
|
660
1045
|
whitelist: Optional[List[str]] = None,
|
|
661
1046
|
blacklist: Optional[List[str]] = None,
|
|
@@ -669,7 +1054,7 @@ class BaseCodeToolApiWrapper(BaseVectorStoreToolApiWrapper):
|
|
|
669
1054
|
)
|
|
670
1055
|
vectorstore = self._init_vector_store()
|
|
671
1056
|
clean_index = kwargs.get('clean_index', False)
|
|
672
|
-
return vectorstore.index_documents(documents,
|
|
1057
|
+
return vectorstore.index_documents(documents, index_name=index_name,
|
|
673
1058
|
clean_index=clean_index, is_code=True,
|
|
674
1059
|
progress_step=kwargs.get('progress_step', 5))
|
|
675
1060
|
|
|
@@ -708,20 +1093,75 @@ def extend_with_vector_tools(method):
|
|
|
708
1093
|
return wrapper
|
|
709
1094
|
|
|
710
1095
|
|
|
1096
|
+
def extend_with_file_operations(method):
|
|
1097
|
+
"""
|
|
1098
|
+
Decorator to automatically add file operation tools to toolkits that implement
|
|
1099
|
+
_read_file and _write_file methods.
|
|
1100
|
+
|
|
1101
|
+
Adds:
|
|
1102
|
+
- read_file_chunk: Read specific line ranges
|
|
1103
|
+
- read_multiple_files: Batch read files
|
|
1104
|
+
- search_file: Search for patterns in files
|
|
1105
|
+
- edit_file: Edit files using OLD/NEW markers
|
|
1106
|
+
"""
|
|
1107
|
+
def wrapper(self, *args, **kwargs):
|
|
1108
|
+
tools = method(self, *args, **kwargs)
|
|
1109
|
+
|
|
1110
|
+
# Only add file operations if toolkit inherits from BaseCodeToolApiWrapper
|
|
1111
|
+
# and has implemented the required methods
|
|
1112
|
+
if isinstance(self, BaseCodeToolApiWrapper):
|
|
1113
|
+
# Import schemas from elitea_base
|
|
1114
|
+
from . import elitea_base
|
|
1115
|
+
|
|
1116
|
+
file_operation_tools = [
|
|
1117
|
+
{
|
|
1118
|
+
"name": "read_file_chunk",
|
|
1119
|
+
"mode": "read_file_chunk",
|
|
1120
|
+
"ref": self.read_file_chunk,
|
|
1121
|
+
"description": self.read_file_chunk.__doc__,
|
|
1122
|
+
"args_schema": elitea_base.ReadFileChunkInput
|
|
1123
|
+
},
|
|
1124
|
+
{
|
|
1125
|
+
"name": "read_multiple_files",
|
|
1126
|
+
"mode": "read_multiple_files",
|
|
1127
|
+
"ref": self.read_multiple_files,
|
|
1128
|
+
"description": self.read_multiple_files.__doc__,
|
|
1129
|
+
"args_schema": elitea_base.ReadMultipleFilesInput
|
|
1130
|
+
},
|
|
1131
|
+
{
|
|
1132
|
+
"name": "search_file",
|
|
1133
|
+
"mode": "search_file",
|
|
1134
|
+
"ref": self.search_file,
|
|
1135
|
+
"description": self.search_file.__doc__,
|
|
1136
|
+
"args_schema": elitea_base.SearchFileInput
|
|
1137
|
+
},
|
|
1138
|
+
{
|
|
1139
|
+
"name": "edit_file",
|
|
1140
|
+
"mode": "edit_file",
|
|
1141
|
+
"ref": self.edit_file,
|
|
1142
|
+
"description": self.edit_file.__doc__,
|
|
1143
|
+
"args_schema": elitea_base.EditFileInput
|
|
1144
|
+
},
|
|
1145
|
+
]
|
|
1146
|
+
|
|
1147
|
+
tools.extend(file_operation_tools)
|
|
1148
|
+
|
|
1149
|
+
return tools
|
|
1150
|
+
|
|
1151
|
+
return wrapper
|
|
1152
|
+
|
|
1153
|
+
|
|
711
1154
|
def filter_missconfigured_index_tools(method):
|
|
712
1155
|
def wrapper(self, *args, **kwargs):
|
|
713
1156
|
toolkit = method(self, *args, **kwargs)
|
|
714
1157
|
|
|
715
1158
|
# Validate index tools misconfiguration and exclude them if necessary
|
|
716
|
-
is_index_toolkit = any(tool.name.
|
|
717
|
-
if TOOLKIT_SPLITTER in tool.name else tool.name
|
|
718
|
-
in INDEX_TOOL_NAMES for tool in toolkit.tools)
|
|
1159
|
+
is_index_toolkit = any(tool.name in INDEX_TOOL_NAMES for tool in toolkit.tools)
|
|
719
1160
|
is_index_configuration_missing = not (kwargs.get('embedding_model')
|
|
720
1161
|
and kwargs.get('pgvector_configuration'))
|
|
721
1162
|
|
|
722
1163
|
if is_index_toolkit and is_index_configuration_missing:
|
|
723
|
-
toolkit.tools = [tool for tool in toolkit.tools if
|
|
724
|
-
1] if TOOLKIT_SPLITTER in tool.name else tool.name) not in INDEX_TOOL_NAMES]
|
|
1164
|
+
toolkit.tools = [tool for tool in toolkit.tools if tool.name not in INDEX_TOOL_NAMES]
|
|
725
1165
|
|
|
726
1166
|
return toolkit
|
|
727
1167
|
|