alita-sdk 0.3.462__py3-none-any.whl → 0.3.627__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alita_sdk/cli/agent/__init__.py +5 -0
- alita_sdk/cli/agent/default.py +258 -0
- alita_sdk/cli/agent_executor.py +15 -3
- alita_sdk/cli/agent_loader.py +56 -8
- alita_sdk/cli/agent_ui.py +93 -31
- alita_sdk/cli/agents.py +2274 -230
- alita_sdk/cli/callbacks.py +96 -25
- alita_sdk/cli/cli.py +10 -1
- alita_sdk/cli/config.py +162 -9
- alita_sdk/cli/context/__init__.py +30 -0
- alita_sdk/cli/context/cleanup.py +198 -0
- alita_sdk/cli/context/manager.py +731 -0
- alita_sdk/cli/context/message.py +285 -0
- alita_sdk/cli/context/strategies.py +289 -0
- alita_sdk/cli/context/token_estimation.py +127 -0
- alita_sdk/cli/input_handler.py +419 -0
- alita_sdk/cli/inventory.py +1073 -0
- alita_sdk/cli/testcases/__init__.py +94 -0
- alita_sdk/cli/testcases/data_generation.py +119 -0
- alita_sdk/cli/testcases/discovery.py +96 -0
- alita_sdk/cli/testcases/executor.py +84 -0
- alita_sdk/cli/testcases/logger.py +85 -0
- alita_sdk/cli/testcases/parser.py +172 -0
- alita_sdk/cli/testcases/prompts.py +91 -0
- alita_sdk/cli/testcases/reporting.py +125 -0
- alita_sdk/cli/testcases/setup.py +108 -0
- alita_sdk/cli/testcases/test_runner.py +282 -0
- alita_sdk/cli/testcases/utils.py +39 -0
- alita_sdk/cli/testcases/validation.py +90 -0
- alita_sdk/cli/testcases/workflow.py +196 -0
- alita_sdk/cli/toolkit.py +14 -17
- alita_sdk/cli/toolkit_loader.py +35 -5
- alita_sdk/cli/tools/__init__.py +36 -2
- alita_sdk/cli/tools/approval.py +224 -0
- alita_sdk/cli/tools/filesystem.py +910 -64
- alita_sdk/cli/tools/planning.py +389 -0
- alita_sdk/cli/tools/terminal.py +414 -0
- alita_sdk/community/__init__.py +72 -12
- alita_sdk/community/inventory/__init__.py +236 -0
- alita_sdk/community/inventory/config.py +257 -0
- alita_sdk/community/inventory/enrichment.py +2137 -0
- alita_sdk/community/inventory/extractors.py +1469 -0
- alita_sdk/community/inventory/ingestion.py +3172 -0
- alita_sdk/community/inventory/knowledge_graph.py +1457 -0
- alita_sdk/community/inventory/parsers/__init__.py +218 -0
- alita_sdk/community/inventory/parsers/base.py +295 -0
- alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
- alita_sdk/community/inventory/parsers/go_parser.py +851 -0
- alita_sdk/community/inventory/parsers/html_parser.py +389 -0
- alita_sdk/community/inventory/parsers/java_parser.py +593 -0
- alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
- alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
- alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
- alita_sdk/community/inventory/parsers/python_parser.py +604 -0
- alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
- alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
- alita_sdk/community/inventory/parsers/text_parser.py +322 -0
- alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
- alita_sdk/community/inventory/patterns/__init__.py +61 -0
- alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
- alita_sdk/community/inventory/patterns/loader.py +348 -0
- alita_sdk/community/inventory/patterns/registry.py +198 -0
- alita_sdk/community/inventory/presets.py +535 -0
- alita_sdk/community/inventory/retrieval.py +1403 -0
- alita_sdk/community/inventory/toolkit.py +173 -0
- alita_sdk/community/inventory/toolkit_utils.py +176 -0
- alita_sdk/community/inventory/visualize.py +1370 -0
- alita_sdk/configurations/__init__.py +1 -1
- alita_sdk/configurations/ado.py +141 -20
- alita_sdk/configurations/bitbucket.py +0 -3
- alita_sdk/configurations/confluence.py +76 -42
- alita_sdk/configurations/figma.py +76 -0
- alita_sdk/configurations/gitlab.py +17 -5
- alita_sdk/configurations/openapi.py +329 -0
- alita_sdk/configurations/qtest.py +72 -1
- alita_sdk/configurations/report_portal.py +96 -0
- alita_sdk/configurations/sharepoint.py +148 -0
- alita_sdk/configurations/testio.py +83 -0
- alita_sdk/runtime/clients/artifact.py +3 -3
- alita_sdk/runtime/clients/client.py +353 -48
- alita_sdk/runtime/clients/sandbox_client.py +0 -21
- alita_sdk/runtime/langchain/_constants_bkup.py +1318 -0
- alita_sdk/runtime/langchain/assistant.py +123 -26
- alita_sdk/runtime/langchain/constants.py +642 -1
- alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +103 -60
- alita_sdk/runtime/langchain/document_loaders/AlitaJSONLinesLoader.py +77 -0
- alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +6 -3
- alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py +226 -7
- alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py +5 -2
- alita_sdk/runtime/langchain/document_loaders/constants.py +12 -7
- alita_sdk/runtime/langchain/langraph_agent.py +279 -73
- alita_sdk/runtime/langchain/utils.py +82 -15
- alita_sdk/runtime/llms/preloaded.py +2 -6
- alita_sdk/runtime/skills/__init__.py +91 -0
- alita_sdk/runtime/skills/callbacks.py +498 -0
- alita_sdk/runtime/skills/discovery.py +540 -0
- alita_sdk/runtime/skills/executor.py +610 -0
- alita_sdk/runtime/skills/input_builder.py +371 -0
- alita_sdk/runtime/skills/models.py +330 -0
- alita_sdk/runtime/skills/registry.py +355 -0
- alita_sdk/runtime/skills/skill_runner.py +330 -0
- alita_sdk/runtime/toolkits/__init__.py +7 -0
- alita_sdk/runtime/toolkits/application.py +21 -9
- alita_sdk/runtime/toolkits/artifact.py +15 -5
- alita_sdk/runtime/toolkits/datasource.py +13 -6
- alita_sdk/runtime/toolkits/mcp.py +139 -251
- alita_sdk/runtime/toolkits/mcp_config.py +1048 -0
- alita_sdk/runtime/toolkits/planning.py +178 -0
- alita_sdk/runtime/toolkits/skill_router.py +238 -0
- alita_sdk/runtime/toolkits/subgraph.py +251 -6
- alita_sdk/runtime/toolkits/tools.py +238 -32
- alita_sdk/runtime/toolkits/vectorstore.py +11 -5
- alita_sdk/runtime/tools/__init__.py +3 -1
- alita_sdk/runtime/tools/application.py +20 -6
- alita_sdk/runtime/tools/artifact.py +511 -28
- alita_sdk/runtime/tools/data_analysis.py +183 -0
- alita_sdk/runtime/tools/function.py +43 -15
- alita_sdk/runtime/tools/image_generation.py +50 -44
- alita_sdk/runtime/tools/llm.py +852 -67
- alita_sdk/runtime/tools/loop.py +3 -1
- alita_sdk/runtime/tools/loop_output.py +3 -1
- alita_sdk/runtime/tools/mcp_remote_tool.py +25 -10
- alita_sdk/runtime/tools/mcp_server_tool.py +7 -6
- alita_sdk/runtime/tools/planning/__init__.py +36 -0
- alita_sdk/runtime/tools/planning/models.py +246 -0
- alita_sdk/runtime/tools/planning/wrapper.py +607 -0
- alita_sdk/runtime/tools/router.py +2 -4
- alita_sdk/runtime/tools/sandbox.py +9 -6
- alita_sdk/runtime/tools/skill_router.py +776 -0
- alita_sdk/runtime/tools/tool.py +3 -1
- alita_sdk/runtime/tools/vectorstore.py +7 -2
- alita_sdk/runtime/tools/vectorstore_base.py +51 -11
- alita_sdk/runtime/utils/AlitaCallback.py +137 -21
- alita_sdk/runtime/utils/constants.py +5 -1
- alita_sdk/runtime/utils/mcp_client.py +492 -0
- alita_sdk/runtime/utils/mcp_oauth.py +202 -5
- alita_sdk/runtime/utils/mcp_sse_client.py +36 -7
- alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
- alita_sdk/runtime/utils/serialization.py +155 -0
- alita_sdk/runtime/utils/streamlit.py +6 -10
- alita_sdk/runtime/utils/toolkit_utils.py +16 -5
- alita_sdk/runtime/utils/utils.py +36 -0
- alita_sdk/tools/__init__.py +113 -29
- alita_sdk/tools/ado/repos/__init__.py +51 -33
- alita_sdk/tools/ado/repos/repos_wrapper.py +148 -89
- alita_sdk/tools/ado/test_plan/__init__.py +25 -9
- alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +23 -1
- alita_sdk/tools/ado/utils.py +1 -18
- alita_sdk/tools/ado/wiki/__init__.py +25 -8
- alita_sdk/tools/ado/wiki/ado_wrapper.py +291 -22
- alita_sdk/tools/ado/work_item/__init__.py +26 -9
- alita_sdk/tools/ado/work_item/ado_wrapper.py +56 -3
- alita_sdk/tools/advanced_jira_mining/__init__.py +11 -8
- alita_sdk/tools/aws/delta_lake/__init__.py +13 -9
- alita_sdk/tools/aws/delta_lake/tool.py +5 -1
- alita_sdk/tools/azure_ai/search/__init__.py +11 -8
- alita_sdk/tools/azure_ai/search/api_wrapper.py +1 -1
- alita_sdk/tools/base/tool.py +5 -1
- alita_sdk/tools/base_indexer_toolkit.py +170 -45
- alita_sdk/tools/bitbucket/__init__.py +17 -12
- alita_sdk/tools/bitbucket/api_wrapper.py +59 -11
- alita_sdk/tools/bitbucket/cloud_api_wrapper.py +49 -35
- alita_sdk/tools/browser/__init__.py +5 -4
- alita_sdk/tools/carrier/__init__.py +5 -6
- alita_sdk/tools/carrier/backend_reports_tool.py +6 -6
- alita_sdk/tools/carrier/run_ui_test_tool.py +6 -6
- alita_sdk/tools/carrier/ui_reports_tool.py +5 -5
- alita_sdk/tools/chunkers/__init__.py +3 -1
- alita_sdk/tools/chunkers/code/treesitter/treesitter.py +37 -13
- alita_sdk/tools/chunkers/sematic/json_chunker.py +1 -0
- alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
- alita_sdk/tools/chunkers/universal_chunker.py +270 -0
- alita_sdk/tools/cloud/aws/__init__.py +10 -7
- alita_sdk/tools/cloud/azure/__init__.py +10 -7
- alita_sdk/tools/cloud/gcp/__init__.py +10 -7
- alita_sdk/tools/cloud/k8s/__init__.py +10 -7
- alita_sdk/tools/code/linter/__init__.py +10 -8
- alita_sdk/tools/code/loaders/codesearcher.py +3 -2
- alita_sdk/tools/code/sonar/__init__.py +10 -7
- alita_sdk/tools/code_indexer_toolkit.py +73 -23
- alita_sdk/tools/confluence/__init__.py +21 -15
- alita_sdk/tools/confluence/api_wrapper.py +78 -23
- alita_sdk/tools/confluence/loader.py +4 -2
- alita_sdk/tools/custom_open_api/__init__.py +12 -5
- alita_sdk/tools/elastic/__init__.py +11 -8
- alita_sdk/tools/elitea_base.py +493 -30
- alita_sdk/tools/figma/__init__.py +58 -11
- alita_sdk/tools/figma/api_wrapper.py +1235 -143
- alita_sdk/tools/figma/figma_client.py +73 -0
- alita_sdk/tools/figma/toon_tools.py +2748 -0
- alita_sdk/tools/github/__init__.py +13 -14
- alita_sdk/tools/github/github_client.py +224 -100
- alita_sdk/tools/github/graphql_client_wrapper.py +119 -33
- alita_sdk/tools/github/schemas.py +14 -5
- alita_sdk/tools/github/tool.py +5 -1
- alita_sdk/tools/github/tool_prompts.py +9 -22
- alita_sdk/tools/gitlab/__init__.py +15 -11
- alita_sdk/tools/gitlab/api_wrapper.py +207 -41
- alita_sdk/tools/gitlab_org/__init__.py +10 -8
- alita_sdk/tools/gitlab_org/api_wrapper.py +63 -64
- alita_sdk/tools/google/bigquery/__init__.py +13 -12
- alita_sdk/tools/google/bigquery/tool.py +5 -1
- alita_sdk/tools/google_places/__init__.py +10 -8
- alita_sdk/tools/google_places/api_wrapper.py +1 -1
- alita_sdk/tools/jira/__init__.py +17 -11
- alita_sdk/tools/jira/api_wrapper.py +91 -40
- alita_sdk/tools/keycloak/__init__.py +11 -8
- alita_sdk/tools/localgit/__init__.py +9 -3
- alita_sdk/tools/localgit/local_git.py +62 -54
- alita_sdk/tools/localgit/tool.py +5 -1
- alita_sdk/tools/memory/__init__.py +11 -3
- alita_sdk/tools/non_code_indexer_toolkit.py +1 -0
- alita_sdk/tools/ocr/__init__.py +11 -8
- alita_sdk/tools/openapi/__init__.py +490 -114
- alita_sdk/tools/openapi/api_wrapper.py +1368 -0
- alita_sdk/tools/openapi/tool.py +20 -0
- alita_sdk/tools/pandas/__init__.py +20 -12
- alita_sdk/tools/pandas/api_wrapper.py +38 -25
- alita_sdk/tools/pandas/dataframe/generator/base.py +3 -1
- alita_sdk/tools/postman/__init__.py +11 -11
- alita_sdk/tools/pptx/__init__.py +10 -9
- alita_sdk/tools/pptx/pptx_wrapper.py +1 -1
- alita_sdk/tools/qtest/__init__.py +30 -10
- alita_sdk/tools/qtest/api_wrapper.py +430 -13
- alita_sdk/tools/rally/__init__.py +10 -8
- alita_sdk/tools/rally/api_wrapper.py +1 -1
- alita_sdk/tools/report_portal/__init__.py +12 -9
- alita_sdk/tools/salesforce/__init__.py +10 -9
- alita_sdk/tools/servicenow/__init__.py +17 -14
- alita_sdk/tools/servicenow/api_wrapper.py +1 -1
- alita_sdk/tools/sharepoint/__init__.py +10 -8
- alita_sdk/tools/sharepoint/api_wrapper.py +4 -4
- alita_sdk/tools/slack/__init__.py +10 -8
- alita_sdk/tools/slack/api_wrapper.py +2 -2
- alita_sdk/tools/sql/__init__.py +11 -9
- alita_sdk/tools/testio/__init__.py +10 -8
- alita_sdk/tools/testrail/__init__.py +11 -8
- alita_sdk/tools/testrail/api_wrapper.py +1 -1
- alita_sdk/tools/utils/__init__.py +9 -4
- alita_sdk/tools/utils/content_parser.py +77 -3
- alita_sdk/tools/utils/text_operations.py +410 -0
- alita_sdk/tools/utils/tool_prompts.py +79 -0
- alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +17 -13
- alita_sdk/tools/xray/__init__.py +12 -9
- alita_sdk/tools/yagmail/__init__.py +9 -3
- alita_sdk/tools/zephyr/__init__.py +9 -7
- alita_sdk/tools/zephyr_enterprise/__init__.py +11 -8
- alita_sdk/tools/zephyr_essential/__init__.py +10 -8
- alita_sdk/tools/zephyr_essential/api_wrapper.py +30 -13
- alita_sdk/tools/zephyr_essential/client.py +2 -2
- alita_sdk/tools/zephyr_scale/__init__.py +11 -9
- alita_sdk/tools/zephyr_scale/api_wrapper.py +2 -2
- alita_sdk/tools/zephyr_squad/__init__.py +10 -8
- {alita_sdk-0.3.462.dist-info → alita_sdk-0.3.627.dist-info}/METADATA +147 -7
- alita_sdk-0.3.627.dist-info/RECORD +468 -0
- alita_sdk-0.3.627.dist-info/entry_points.txt +2 -0
- alita_sdk-0.3.462.dist-info/RECORD +0 -384
- alita_sdk-0.3.462.dist-info/entry_points.txt +0 -2
- {alita_sdk-0.3.462.dist-info → alita_sdk-0.3.627.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.462.dist-info → alita_sdk-0.3.627.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.462.dist-info → alita_sdk-0.3.627.dist-info}/top_level.txt +0 -0
|
@@ -3,14 +3,56 @@ Filesystem tools for CLI agents.
|
|
|
3
3
|
|
|
4
4
|
Provides comprehensive file system operations restricted to specific directories.
|
|
5
5
|
Inspired by MCP filesystem server implementation.
|
|
6
|
+
|
|
7
|
+
Also provides a FilesystemApiWrapper for integration with the inventory ingestion
|
|
8
|
+
pipeline, enabling local document loading and chunking.
|
|
6
9
|
"""
|
|
7
10
|
|
|
11
|
+
import base64
|
|
12
|
+
import fnmatch
|
|
13
|
+
import hashlib
|
|
14
|
+
import logging
|
|
8
15
|
import os
|
|
9
16
|
from pathlib import Path
|
|
10
|
-
from typing import Optional, List, Dict, Any
|
|
17
|
+
from typing import Optional, List, Dict, Any, Generator, ClassVar
|
|
11
18
|
from datetime import datetime
|
|
12
|
-
from langchain_core.tools import BaseTool
|
|
13
|
-
from
|
|
19
|
+
from langchain_core.tools import BaseTool, ToolException
|
|
20
|
+
from langchain_core.documents import Document
|
|
21
|
+
from pydantic import BaseModel, Field, model_validator
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# Maximum recommended content size for single write operations (in characters)
|
|
27
|
+
MAX_RECOMMENDED_CONTENT_SIZE = 5000 # ~5KB, roughly 1,200-1,500 tokens
|
|
28
|
+
|
|
29
|
+
# Helpful error message for truncated content
|
|
30
|
+
CONTENT_TRUNCATED_ERROR = """
|
|
31
|
+
⚠️ CONTENT FIELD MISSING - OUTPUT TRUNCATED
|
|
32
|
+
|
|
33
|
+
Your tool call was cut off because the content was too large for the context window.
|
|
34
|
+
The JSON was truncated, leaving the 'content' field incomplete or missing.
|
|
35
|
+
|
|
36
|
+
🔧 HOW TO FIX THIS:
|
|
37
|
+
|
|
38
|
+
1. **Use incremental writes** - Don't write large files in one call:
|
|
39
|
+
- First: filesystem_write_file(path, "# Header\\nimport x\\n\\n")
|
|
40
|
+
- Then: filesystem_append_file(path, "def func1():\\n ...\\n\\n")
|
|
41
|
+
- Then: filesystem_append_file(path, "def func2():\\n ...\\n\\n")
|
|
42
|
+
|
|
43
|
+
2. **Keep each chunk small** - Under 2000 characters per call
|
|
44
|
+
|
|
45
|
+
3. **Structure first, details later**:
|
|
46
|
+
- Write skeleton/structure first
|
|
47
|
+
- Add implementations section by section
|
|
48
|
+
|
|
49
|
+
4. **For documentation/reports**:
|
|
50
|
+
- Write one section at a time
|
|
51
|
+
- Use append_file for each new section
|
|
52
|
+
|
|
53
|
+
❌ DON'T: Try to write the entire file content again
|
|
54
|
+
✅ DO: Break it into 3-5 smaller append_file calls
|
|
55
|
+
"""
|
|
14
56
|
|
|
15
57
|
|
|
16
58
|
class ReadFileInput(BaseModel):
|
|
@@ -47,7 +89,38 @@ class ReadMultipleFilesInput(BaseModel):
|
|
|
47
89
|
class WriteFileInput(BaseModel):
|
|
48
90
|
"""Input for writing to a file."""
|
|
49
91
|
path: str = Field(description="Relative path to the file to write")
|
|
50
|
-
content: str = Field(
|
|
92
|
+
content: Optional[str] = Field(
|
|
93
|
+
default=None,
|
|
94
|
+
description="Content to write to the file. REQUIRED - this field cannot be empty or omitted."
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
@model_validator(mode='after')
|
|
98
|
+
def validate_content_required(self):
|
|
99
|
+
"""Provide helpful error message when content is missing or truncated."""
|
|
100
|
+
if self.content is None:
|
|
101
|
+
raise ToolException(CONTENT_TRUNCATED_ERROR)
|
|
102
|
+
if len(self.content) > MAX_RECOMMENDED_CONTENT_SIZE:
|
|
103
|
+
logger.warning(
|
|
104
|
+
f"Content is very large ({len(self.content)} chars). Consider using append_file "
|
|
105
|
+
"for incremental writes to avoid truncation issues."
|
|
106
|
+
)
|
|
107
|
+
return self
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
class AppendFileInput(BaseModel):
|
|
111
|
+
"""Input for appending to a file."""
|
|
112
|
+
path: str = Field(description="Relative path to the file to append to")
|
|
113
|
+
content: Optional[str] = Field(
|
|
114
|
+
default=None,
|
|
115
|
+
description="Content to append to the end of the file. REQUIRED - this field cannot be empty or omitted."
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
@model_validator(mode='after')
|
|
119
|
+
def validate_content_required(self):
|
|
120
|
+
"""Provide helpful error message when content is missing or truncated."""
|
|
121
|
+
if self.content is None:
|
|
122
|
+
raise ToolException(CONTENT_TRUNCATED_ERROR)
|
|
123
|
+
return self
|
|
51
124
|
|
|
52
125
|
|
|
53
126
|
class EditFileInput(BaseModel):
|
|
@@ -62,18 +135,21 @@ class ListDirectoryInput(BaseModel):
|
|
|
62
135
|
path: str = Field(default=".", description="Relative path to the directory to list")
|
|
63
136
|
include_sizes: bool = Field(default=False, description="Include file sizes in the output")
|
|
64
137
|
sort_by: str = Field(default="name", description="Sort by 'name' or 'size'")
|
|
138
|
+
max_results: Optional[int] = Field(default=200, description="Maximum number of entries to return. Default is 200 to prevent context overflow.")
|
|
65
139
|
|
|
66
140
|
|
|
67
141
|
class DirectoryTreeInput(BaseModel):
|
|
68
142
|
"""Input for getting a directory tree."""
|
|
69
143
|
path: str = Field(default=".", description="Relative path to the directory")
|
|
70
|
-
max_depth: Optional[int] = Field(
|
|
144
|
+
max_depth: Optional[int] = Field(default=3, description="Maximum depth to traverse. Default is 3 to prevent excessive output. Use None for unlimited (caution: may exceed context limits).")
|
|
145
|
+
max_items: Optional[int] = Field(default=200, description="Maximum number of files/directories to include. Default is 200 to prevent context window overflow. Use None for unlimited (caution: large directories may exceed context limits).")
|
|
71
146
|
|
|
72
147
|
|
|
73
148
|
class SearchFilesInput(BaseModel):
|
|
74
149
|
"""Input for searching files."""
|
|
75
150
|
path: str = Field(default=".", description="Relative path to search from")
|
|
76
151
|
pattern: str = Field(description="Glob pattern to match (e.g., '*.py', '**/*.txt')")
|
|
152
|
+
max_results: Optional[int] = Field(default=100, description="Maximum number of results to return. Default is 100 to prevent context overflow. Use None for unlimited.")
|
|
77
153
|
|
|
78
154
|
|
|
79
155
|
class DeleteFileInput(BaseModel):
|
|
@@ -104,29 +180,110 @@ class EmptyInput(BaseModel):
|
|
|
104
180
|
|
|
105
181
|
class FileSystemTool(BaseTool):
|
|
106
182
|
"""Base class for filesystem tools with directory restriction."""
|
|
107
|
-
base_directory: str
|
|
183
|
+
base_directory: str # Primary directory (for backward compatibility)
|
|
184
|
+
allowed_directories: List[str] = [] # Additional allowed directories
|
|
185
|
+
_basename_collision_detected: bool = False # Cache for collision detection
|
|
186
|
+
_basename_collision_checked: bool = False # Whether we've checked for collisions
|
|
187
|
+
|
|
188
|
+
def _get_all_allowed_directories(self) -> List[Path]:
|
|
189
|
+
"""Get all allowed directories as resolved Paths."""
|
|
190
|
+
dirs = [Path(self.base_directory).resolve()]
|
|
191
|
+
for d in self.allowed_directories:
|
|
192
|
+
resolved = Path(d).resolve()
|
|
193
|
+
if resolved not in dirs:
|
|
194
|
+
dirs.append(resolved)
|
|
195
|
+
return dirs
|
|
196
|
+
|
|
197
|
+
def _check_basename_collision(self) -> bool:
|
|
198
|
+
"""Check if multiple allowed directories have the same basename."""
|
|
199
|
+
if self._basename_collision_checked:
|
|
200
|
+
return self._basename_collision_detected
|
|
201
|
+
|
|
202
|
+
allowed_dirs = self._get_all_allowed_directories()
|
|
203
|
+
basenames = [d.name for d in allowed_dirs]
|
|
204
|
+
self._basename_collision_detected = len(basenames) != len(set(basenames))
|
|
205
|
+
self._basename_collision_checked = True
|
|
206
|
+
return self._basename_collision_detected
|
|
207
|
+
|
|
208
|
+
def _get_relative_path_from_allowed_dirs(self, absolute_path: Path) -> tuple:
|
|
209
|
+
"""Get relative path and directory name for a file in allowed directories.
|
|
210
|
+
|
|
211
|
+
Args:
|
|
212
|
+
absolute_path: Absolute path to the file
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
Tuple of (relative_path, directory_name)
|
|
216
|
+
|
|
217
|
+
Raises:
|
|
218
|
+
ValueError: If path is not within any allowed directory
|
|
219
|
+
"""
|
|
220
|
+
allowed_dirs = self._get_all_allowed_directories()
|
|
221
|
+
|
|
222
|
+
# Find which allowed directory contains this path
|
|
223
|
+
for base in allowed_dirs:
|
|
224
|
+
try:
|
|
225
|
+
rel_path = absolute_path.relative_to(base)
|
|
226
|
+
|
|
227
|
+
# Determine directory name for prefix
|
|
228
|
+
if self._check_basename_collision():
|
|
229
|
+
# Use parent/basename format to disambiguate
|
|
230
|
+
dir_name = f"{base.parent.name}/{base.name}"
|
|
231
|
+
else:
|
|
232
|
+
# Use just basename
|
|
233
|
+
dir_name = base.name
|
|
234
|
+
|
|
235
|
+
return (str(rel_path), dir_name)
|
|
236
|
+
except ValueError:
|
|
237
|
+
continue
|
|
238
|
+
|
|
239
|
+
# Path not in any allowed directory
|
|
240
|
+
allowed_paths = [str(d) for d in allowed_dirs]
|
|
241
|
+
raise ValueError(
|
|
242
|
+
f"Path '{absolute_path}' is not within any allowed directory.\n"
|
|
243
|
+
f"Allowed directories: {allowed_paths}\n"
|
|
244
|
+
f"Attempted path: {absolute_path}"
|
|
245
|
+
)
|
|
108
246
|
|
|
109
247
|
def _resolve_path(self, relative_path: str) -> Path:
|
|
110
248
|
"""
|
|
111
|
-
Resolve and validate a path within the
|
|
249
|
+
Resolve and validate a path within any of the allowed directories.
|
|
112
250
|
|
|
113
|
-
Security: Ensures resolved path is within allowed
|
|
251
|
+
Security: Ensures resolved path is within one of the allowed directories.
|
|
114
252
|
"""
|
|
115
|
-
|
|
253
|
+
allowed_dirs = self._get_all_allowed_directories()
|
|
116
254
|
|
|
117
|
-
# Handle
|
|
255
|
+
# Handle absolute paths - check if within any allowed directory
|
|
118
256
|
if Path(relative_path).is_absolute():
|
|
119
257
|
target = Path(relative_path).resolve()
|
|
120
|
-
|
|
121
|
-
|
|
258
|
+
for base in allowed_dirs:
|
|
259
|
+
try:
|
|
260
|
+
target.relative_to(base)
|
|
261
|
+
return target
|
|
262
|
+
except ValueError:
|
|
263
|
+
continue
|
|
264
|
+
raise ValueError(f"Access denied: path '{relative_path}' is outside allowed directories")
|
|
122
265
|
|
|
123
|
-
#
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
266
|
+
# For relative paths, try to resolve against each allowed directory
|
|
267
|
+
# First check primary base_directory
|
|
268
|
+
primary_base = allowed_dirs[0]
|
|
269
|
+
target = (primary_base / relative_path).resolve()
|
|
270
|
+
|
|
271
|
+
# Check if target is within any allowed directory
|
|
272
|
+
for base in allowed_dirs:
|
|
273
|
+
try:
|
|
274
|
+
target.relative_to(base)
|
|
275
|
+
return target
|
|
276
|
+
except ValueError:
|
|
277
|
+
continue
|
|
128
278
|
|
|
129
|
-
|
|
279
|
+
# If relative path doesn't work from primary, try finding the file in other directories
|
|
280
|
+
for base in allowed_dirs[1:]:
|
|
281
|
+
candidate = (base / relative_path).resolve()
|
|
282
|
+
if candidate.exists():
|
|
283
|
+
return candidate
|
|
284
|
+
|
|
285
|
+
# Default to primary base directory resolution
|
|
286
|
+
raise ValueError(f"Access denied: path '{relative_path}' is outside allowed directories")
|
|
130
287
|
|
|
131
288
|
def _format_size(self, size: int) -> str:
|
|
132
289
|
"""Format file size in human-readable format."""
|
|
@@ -147,6 +304,11 @@ class ReadFileTool(FileSystemTool):
|
|
|
147
304
|
"Only works within allowed directories."
|
|
148
305
|
)
|
|
149
306
|
args_schema: type[BaseModel] = ReadFileInput
|
|
307
|
+
truncation_suggestions: ClassVar[List[str]] = [
|
|
308
|
+
"Use head=100 to read only the first 100 lines",
|
|
309
|
+
"Use tail=100 to read only the last 100 lines",
|
|
310
|
+
"Use filesystem_read_file_chunk with start_line and end_line for specific sections",
|
|
311
|
+
]
|
|
150
312
|
|
|
151
313
|
def _run(self, path: str, head: Optional[int] = None, tail: Optional[int] = None) -> str:
|
|
152
314
|
"""Read a file with optional head/tail."""
|
|
@@ -196,6 +358,10 @@ class ReadFileChunkTool(FileSystemTool):
|
|
|
196
358
|
"Only works within allowed directories."
|
|
197
359
|
)
|
|
198
360
|
args_schema: type[BaseModel] = ReadFileChunkInput
|
|
361
|
+
truncation_suggestions: ClassVar[List[str]] = [
|
|
362
|
+
"Reduce the line range (end_line - start_line) to read fewer lines at once",
|
|
363
|
+
"Read smaller chunks sequentially if you need to process the entire file",
|
|
364
|
+
]
|
|
199
365
|
|
|
200
366
|
def _run(self, path: str, start_line: int = 1, end_line: Optional[int] = None) -> str:
|
|
201
367
|
"""Read a chunk of a file by line range."""
|
|
@@ -246,6 +412,10 @@ class ReadMultipleFilesTool(FileSystemTool):
|
|
|
246
412
|
"Only works within allowed directories."
|
|
247
413
|
)
|
|
248
414
|
args_schema: type[BaseModel] = ReadMultipleFilesInput
|
|
415
|
+
truncation_suggestions: ClassVar[List[str]] = [
|
|
416
|
+
"Read fewer files at once - split into multiple smaller batches",
|
|
417
|
+
"Use filesystem_read_file with head parameter on individual large files instead",
|
|
418
|
+
]
|
|
249
419
|
|
|
250
420
|
def _run(self, paths: List[str]) -> str:
|
|
251
421
|
"""Read multiple files."""
|
|
@@ -291,6 +461,43 @@ class WriteFileTool(FileSystemTool):
|
|
|
291
461
|
return f"Error writing to file '{path}': {str(e)}"
|
|
292
462
|
|
|
293
463
|
|
|
464
|
+
class AppendFileTool(FileSystemTool):
|
|
465
|
+
"""Append content to the end of a file."""
|
|
466
|
+
name: str = "filesystem_append_file"
|
|
467
|
+
description: str = (
|
|
468
|
+
"Append content to the end of an existing file. Creates the file if it doesn't exist. "
|
|
469
|
+
"Use this for incremental file creation - write initial structure with write_file, "
|
|
470
|
+
"then add sections progressively with append_file. This is safer than rewriting "
|
|
471
|
+
"entire files and prevents context overflow. Only works within allowed directories."
|
|
472
|
+
)
|
|
473
|
+
args_schema: type[BaseModel] = AppendFileInput
|
|
474
|
+
|
|
475
|
+
def _run(self, path: str, content: str) -> str:
|
|
476
|
+
"""Append to a file."""
|
|
477
|
+
try:
|
|
478
|
+
target = self._resolve_path(path)
|
|
479
|
+
|
|
480
|
+
# Create parent directories if they don't exist
|
|
481
|
+
target.parent.mkdir(parents=True, exist_ok=True)
|
|
482
|
+
|
|
483
|
+
# Check current file size if it exists
|
|
484
|
+
existed = target.exists()
|
|
485
|
+
original_size = target.stat().st_size if existed else 0
|
|
486
|
+
|
|
487
|
+
with open(target, 'a', encoding='utf-8') as f:
|
|
488
|
+
f.write(content)
|
|
489
|
+
|
|
490
|
+
appended_size = len(content.encode('utf-8'))
|
|
491
|
+
new_size = original_size + appended_size
|
|
492
|
+
|
|
493
|
+
if existed:
|
|
494
|
+
return f"Successfully appended {self._format_size(appended_size)} to '{path}' (total: {self._format_size(new_size)})"
|
|
495
|
+
else:
|
|
496
|
+
return f"Created '{path}' and wrote {self._format_size(appended_size)}"
|
|
497
|
+
except Exception as e:
|
|
498
|
+
return f"Error appending to file '{path}': {str(e)}"
|
|
499
|
+
|
|
500
|
+
|
|
294
501
|
class EditFileTool(FileSystemTool):
|
|
295
502
|
"""Edit file with precise text replacement."""
|
|
296
503
|
name: str = "filesystem_edit_file"
|
|
@@ -443,8 +650,12 @@ class ListDirectoryTool(FileSystemTool):
|
|
|
443
650
|
"Only works within allowed directories."
|
|
444
651
|
)
|
|
445
652
|
args_schema: type[BaseModel] = ListDirectoryInput
|
|
653
|
+
truncation_suggestions: ClassVar[List[str]] = [
|
|
654
|
+
"List a specific subdirectory instead of the root directory",
|
|
655
|
+
"Consider using filesystem_directory_tree with max_depth=1 for hierarchical overview",
|
|
656
|
+
]
|
|
446
657
|
|
|
447
|
-
def _run(self, path: str = ".", include_sizes: bool = False, sort_by: str = "name") -> str:
|
|
658
|
+
def _run(self, path: str = ".", include_sizes: bool = False, sort_by: str = "name", max_results: Optional[int] = 200) -> str:
|
|
448
659
|
"""List directory contents."""
|
|
449
660
|
try:
|
|
450
661
|
target = self._resolve_path(path)
|
|
@@ -460,7 +671,8 @@ class ListDirectoryTool(FileSystemTool):
|
|
|
460
671
|
entry_info = {
|
|
461
672
|
'name': entry.name,
|
|
462
673
|
'is_dir': entry.is_dir(),
|
|
463
|
-
'size': entry.stat().st_size if entry.is_file() else 0
|
|
674
|
+
'size': entry.stat().st_size if entry.is_file() else 0,
|
|
675
|
+
'path': entry
|
|
464
676
|
}
|
|
465
677
|
entries.append(entry_info)
|
|
466
678
|
|
|
@@ -470,6 +682,18 @@ class ListDirectoryTool(FileSystemTool):
|
|
|
470
682
|
else:
|
|
471
683
|
entries.sort(key=lambda x: x['name'].lower())
|
|
472
684
|
|
|
685
|
+
# Apply limit
|
|
686
|
+
total_count = len(entries)
|
|
687
|
+
truncated = False
|
|
688
|
+
if max_results is not None and total_count > max_results:
|
|
689
|
+
entries = entries[:max_results]
|
|
690
|
+
truncated = True
|
|
691
|
+
|
|
692
|
+
# Get directory name for multi-directory configs
|
|
693
|
+
allowed_dirs = self._get_all_allowed_directories()
|
|
694
|
+
has_multiple_dirs = len(allowed_dirs) > 1
|
|
695
|
+
_, dir_name = self._get_relative_path_from_allowed_dirs(target) if has_multiple_dirs else ("", "")
|
|
696
|
+
|
|
473
697
|
# Format output
|
|
474
698
|
lines = []
|
|
475
699
|
total_files = 0
|
|
@@ -478,7 +702,12 @@ class ListDirectoryTool(FileSystemTool):
|
|
|
478
702
|
|
|
479
703
|
for entry in entries:
|
|
480
704
|
prefix = "[DIR] " if entry['is_dir'] else "[FILE]"
|
|
481
|
-
|
|
705
|
+
|
|
706
|
+
# Add directory prefix for multi-directory configs
|
|
707
|
+
if has_multiple_dirs:
|
|
708
|
+
name = f"{dir_name}/{entry['name']}"
|
|
709
|
+
else:
|
|
710
|
+
name = entry['name']
|
|
482
711
|
|
|
483
712
|
if include_sizes and not entry['is_dir']:
|
|
484
713
|
size_str = self._format_size(entry['size'])
|
|
@@ -494,13 +723,27 @@ class ListDirectoryTool(FileSystemTool):
|
|
|
494
723
|
|
|
495
724
|
result = "\n".join(lines)
|
|
496
725
|
|
|
726
|
+
# Add header showing the listing context
|
|
727
|
+
if path in (".", "", "./"):
|
|
728
|
+
header = "Contents of working directory (./):\n\n"
|
|
729
|
+
else:
|
|
730
|
+
header = f"Contents of {path}/:\n\n"
|
|
731
|
+
result = header + result
|
|
732
|
+
|
|
497
733
|
if include_sizes:
|
|
498
734
|
summary = f"\n\nTotal: {total_files} files, {total_dirs} directories"
|
|
499
735
|
if total_files > 0:
|
|
500
736
|
summary += f"\nCombined size: {self._format_size(total_size)}"
|
|
501
737
|
result += summary
|
|
502
738
|
|
|
503
|
-
|
|
739
|
+
if truncated:
|
|
740
|
+
result += f"\n\n⚠️ OUTPUT TRUNCATED: Showing {len(entries)} of {total_count} entries from '{dir_name if has_multiple_dirs else path}' (max_results={max_results})"
|
|
741
|
+
result += "\n To see more: increase max_results or list a specific subdirectory"
|
|
742
|
+
|
|
743
|
+
# Add note about how to access files
|
|
744
|
+
result += "\n\nNote: Access files using paths shown above (e.g., 'agents/file.md' for items in agents/ directory)"
|
|
745
|
+
|
|
746
|
+
return result if lines else "Directory is empty"
|
|
504
747
|
except Exception as e:
|
|
505
748
|
return f"Error listing directory '{path}': {str(e)}"
|
|
506
749
|
|
|
@@ -511,25 +754,51 @@ class DirectoryTreeTool(FileSystemTool):
|
|
|
511
754
|
description: str = (
|
|
512
755
|
"Get a recursive tree view of files and directories. "
|
|
513
756
|
"Shows the complete structure in an easy-to-read tree format. "
|
|
514
|
-
"
|
|
757
|
+
"IMPORTANT: For large directories, use max_depth (default: 3) and max_items (default: 200) "
|
|
758
|
+
"to prevent context window overflow. Increase these only if needed for smaller directories. "
|
|
515
759
|
"Only works within allowed directories."
|
|
516
760
|
)
|
|
517
761
|
args_schema: type[BaseModel] = DirectoryTreeInput
|
|
762
|
+
truncation_suggestions: ClassVar[List[str]] = [
|
|
763
|
+
"Use max_depth=2 to limit directory traversal depth",
|
|
764
|
+
"Use max_items=50 to limit total items returned",
|
|
765
|
+
"Target a specific subdirectory instead of the root",
|
|
766
|
+
]
|
|
767
|
+
|
|
768
|
+
# Track item count during tree building
|
|
769
|
+
_item_count: int = 0
|
|
770
|
+
_max_items: Optional[int] = None
|
|
771
|
+
_truncated: bool = False
|
|
518
772
|
|
|
519
773
|
def _build_tree(self, directory: Path, prefix: str = "", depth: int = 0, max_depth: Optional[int] = None) -> List[str]:
|
|
520
|
-
"""Recursively build directory tree."""
|
|
774
|
+
"""Recursively build directory tree with item limit."""
|
|
775
|
+
# Check depth limit
|
|
521
776
|
if max_depth is not None and depth >= max_depth:
|
|
522
777
|
return []
|
|
523
778
|
|
|
779
|
+
# Check item limit
|
|
780
|
+
if self._max_items is not None and self._item_count >= self._max_items:
|
|
781
|
+
if not self._truncated:
|
|
782
|
+
self._truncated = True
|
|
783
|
+
return []
|
|
784
|
+
|
|
524
785
|
lines = []
|
|
525
786
|
try:
|
|
526
787
|
entries = sorted(directory.iterdir(), key=lambda x: (not x.is_dir(), x.name.lower()))
|
|
527
788
|
|
|
528
789
|
for i, entry in enumerate(entries):
|
|
790
|
+
# Check item limit before adding each entry
|
|
791
|
+
if self._max_items is not None and self._item_count >= self._max_items:
|
|
792
|
+
if not self._truncated:
|
|
793
|
+
self._truncated = True
|
|
794
|
+
break
|
|
795
|
+
|
|
529
796
|
is_last = i == len(entries) - 1
|
|
530
797
|
current_prefix = "└── " if is_last else "├── "
|
|
531
798
|
next_prefix = " " if is_last else "│ "
|
|
532
799
|
|
|
800
|
+
self._item_count += 1
|
|
801
|
+
|
|
533
802
|
if entry.is_dir():
|
|
534
803
|
lines.append(f"{prefix}{current_prefix}📁 {entry.name}/")
|
|
535
804
|
lines.extend(self._build_tree(entry, prefix + next_prefix, depth + 1, max_depth))
|
|
@@ -541,8 +810,8 @@ class DirectoryTreeTool(FileSystemTool):
|
|
|
541
810
|
|
|
542
811
|
return lines
|
|
543
812
|
|
|
544
|
-
def _run(self, path: str = ".", max_depth: Optional[int] =
|
|
545
|
-
"""Get directory tree."""
|
|
813
|
+
def _run(self, path: str = ".", max_depth: Optional[int] = 3, max_items: Optional[int] = 200) -> str:
|
|
814
|
+
"""Get directory tree with size limits to prevent context overflow."""
|
|
546
815
|
try:
|
|
547
816
|
target = self._resolve_path(path)
|
|
548
817
|
|
|
@@ -552,9 +821,31 @@ class DirectoryTreeTool(FileSystemTool):
|
|
|
552
821
|
if not target.is_dir():
|
|
553
822
|
return f"Error: '{path}' is not a directory"
|
|
554
823
|
|
|
555
|
-
|
|
824
|
+
# Reset counters for this run
|
|
825
|
+
self._item_count = 0
|
|
826
|
+
self._max_items = max_items
|
|
827
|
+
self._truncated = False
|
|
828
|
+
|
|
829
|
+
# Show relative path from base directory, use '.' for root
|
|
830
|
+
# This prevents confusion - files should be accessed relative to working directory
|
|
831
|
+
if path in (".", "", "./"):
|
|
832
|
+
display_root = "." # Root of working directory
|
|
833
|
+
else:
|
|
834
|
+
display_root = path.rstrip('/')
|
|
835
|
+
|
|
836
|
+
lines = [f"📁 {display_root}/"]
|
|
556
837
|
lines.extend(self._build_tree(target, "", 0, max_depth))
|
|
557
838
|
|
|
839
|
+
# Add truncation warning if limit was reached
|
|
840
|
+
if self._truncated:
|
|
841
|
+
lines.append("")
|
|
842
|
+
lines.append(f"⚠️ OUTPUT TRUNCATED: Showing {self._item_count} of more items (max_items={max_items}, max_depth={max_depth})")
|
|
843
|
+
lines.append(f" To see more: increase max_items or max_depth, or use filesystem_list_directory on specific subdirectories")
|
|
844
|
+
|
|
845
|
+
# Add note about file paths
|
|
846
|
+
lines.append("")
|
|
847
|
+
lines.append("Note: Use paths relative to working directory (e.g., 'agents/file.md', not including the root directory name)")
|
|
848
|
+
|
|
558
849
|
return "\n".join(lines)
|
|
559
850
|
except Exception as e:
|
|
560
851
|
return f"Error building directory tree for '{path}': {str(e)}"
|
|
@@ -566,13 +857,18 @@ class SearchFilesTool(FileSystemTool):
|
|
|
566
857
|
description: str = (
|
|
567
858
|
"Recursively search for files and directories matching a glob pattern. "
|
|
568
859
|
"Use patterns like '*.py' for Python files in current dir, or '**/*.py' for all Python files recursively. "
|
|
569
|
-
"Returns
|
|
860
|
+
"Returns paths to matching items (default limit: 100 results to prevent context overflow). "
|
|
570
861
|
"Only searches within allowed directories."
|
|
571
862
|
)
|
|
572
863
|
args_schema: type[BaseModel] = SearchFilesInput
|
|
864
|
+
truncation_suggestions: ClassVar[List[str]] = [
|
|
865
|
+
"Use max_results=50 to limit number of results",
|
|
866
|
+
"Use a more specific glob pattern (e.g., 'src/**/*.py' instead of '**/*.py')",
|
|
867
|
+
"Search in a specific subdirectory instead of the root",
|
|
868
|
+
]
|
|
573
869
|
|
|
574
|
-
def _run(self, path: str = ".", pattern: str = "*") -> str:
|
|
575
|
-
"""Search for files."""
|
|
870
|
+
def _run(self, path: str = ".", pattern: str = "*", max_results: Optional[int] = 100) -> str:
|
|
871
|
+
"""Search for files with result limit."""
|
|
576
872
|
try:
|
|
577
873
|
target = self._resolve_path(path)
|
|
578
874
|
|
|
@@ -583,28 +879,51 @@ class SearchFilesTool(FileSystemTool):
|
|
|
583
879
|
return f"Error: '{path}' is not a directory"
|
|
584
880
|
|
|
585
881
|
# Use glob to find matching files
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
else:
|
|
589
|
-
matches = list(target.glob(pattern))
|
|
882
|
+
all_matches = list(target.glob(pattern))
|
|
883
|
+
total_count = len(all_matches)
|
|
590
884
|
|
|
591
|
-
if not
|
|
885
|
+
if not all_matches:
|
|
592
886
|
return f"No files matching '{pattern}' found in '{path}'"
|
|
593
887
|
|
|
594
|
-
#
|
|
595
|
-
|
|
596
|
-
|
|
888
|
+
# Apply limit
|
|
889
|
+
truncated = False
|
|
890
|
+
if max_results is not None and total_count > max_results:
|
|
891
|
+
matches = sorted(all_matches)[:max_results]
|
|
892
|
+
truncated = True
|
|
893
|
+
else:
|
|
894
|
+
matches = sorted(all_matches)
|
|
597
895
|
|
|
598
|
-
for
|
|
599
|
-
|
|
896
|
+
# Format results with directory prefixes for multi-directory configs
|
|
897
|
+
allowed_dirs = self._get_all_allowed_directories()
|
|
898
|
+
has_multiple_dirs = len(allowed_dirs) > 1
|
|
899
|
+
results = []
|
|
900
|
+
search_dir_name = None
|
|
901
|
+
|
|
902
|
+
for match in matches:
|
|
903
|
+
if has_multiple_dirs:
|
|
904
|
+
rel_path_str, dir_name = self._get_relative_path_from_allowed_dirs(match)
|
|
905
|
+
display_path = f"{dir_name}/{rel_path_str}"
|
|
906
|
+
if search_dir_name is None:
|
|
907
|
+
search_dir_name = dir_name
|
|
908
|
+
else:
|
|
909
|
+
rel_path_str = str(match.relative_to(Path(self.base_directory).resolve()))
|
|
910
|
+
display_path = rel_path_str
|
|
911
|
+
|
|
600
912
|
if match.is_dir():
|
|
601
|
-
results.append(f"📁 {
|
|
913
|
+
results.append(f"📁 {display_path}/")
|
|
602
914
|
else:
|
|
603
915
|
size = self._format_size(match.stat().st_size)
|
|
604
|
-
results.append(f"📄 {
|
|
916
|
+
results.append(f"📄 {display_path} ({size})")
|
|
917
|
+
|
|
918
|
+
header = f"Found {total_count} matches for '{pattern}':\n\n"
|
|
919
|
+
output = header + "\n".join(results)
|
|
605
920
|
|
|
606
|
-
|
|
607
|
-
|
|
921
|
+
if truncated:
|
|
922
|
+
location_str = f"from '{search_dir_name}' " if search_dir_name else ""
|
|
923
|
+
output += f"\n\n⚠️ OUTPUT TRUNCATED: Showing {max_results} of {total_count} results {location_str}(max_results={max_results})"
|
|
924
|
+
output += "\n To see more: increase max_results or use a more specific pattern"
|
|
925
|
+
|
|
926
|
+
return output
|
|
608
927
|
except Exception as e:
|
|
609
928
|
return f"Error searching files in '{path}': {str(e)}"
|
|
610
929
|
|
|
@@ -753,7 +1072,524 @@ class ListAllowedDirectoriesTool(FileSystemTool):
|
|
|
753
1072
|
|
|
754
1073
|
def _run(self) -> str:
|
|
755
1074
|
"""List allowed directories."""
|
|
756
|
-
|
|
1075
|
+
dirs = self._get_all_allowed_directories()
|
|
1076
|
+
if len(dirs) == 1:
|
|
1077
|
+
return f"Allowed directory:\n{dirs[0]}\n\nAll subdirectories within this path are accessible."
|
|
1078
|
+
else:
|
|
1079
|
+
dir_list = "\n".join(f" - {d}" for d in dirs)
|
|
1080
|
+
return f"Allowed directories:\n{dir_list}\n\nAll subdirectories within these paths are accessible."
|
|
1081
|
+
|
|
1082
|
+
|
|
1083
|
+
# ========== Filesystem API Wrapper for Inventory Ingestion ==========
|
|
1084
|
+
|
|
1085
|
+
class FilesystemApiWrapper:
|
|
1086
|
+
"""
|
|
1087
|
+
API Wrapper for filesystem operations compatible with inventory ingestion pipeline.
|
|
1088
|
+
|
|
1089
|
+
Supports both text and non-text files:
|
|
1090
|
+
- Text files: .py, .md, .txt, .json, .yaml, etc.
|
|
1091
|
+
- Documents: .pdf, .docx, .pptx, .xlsx, .xls (converted to markdown)
|
|
1092
|
+
- Images: .png, .jpg, .gif, .webp (base64 encoded or described via LLM)
|
|
1093
|
+
|
|
1094
|
+
Usage:
|
|
1095
|
+
# Create wrapper for a directory
|
|
1096
|
+
wrapper = FilesystemApiWrapper(base_directory="/path/to/docs")
|
|
1097
|
+
|
|
1098
|
+
# Load documents (uses inherited loader())
|
|
1099
|
+
for doc in wrapper.loader(whitelist=["*.md", "*.pdf"]):
|
|
1100
|
+
print(doc.page_content[:100])
|
|
1101
|
+
|
|
1102
|
+
# For image description, provide an LLM
|
|
1103
|
+
wrapper = FilesystemApiWrapper(base_directory="/path/to/docs", llm=my_llm)
|
|
1104
|
+
for doc in wrapper.loader(whitelist=["*.png"]):
|
|
1105
|
+
print(doc.page_content) # LLM-generated description
|
|
1106
|
+
|
|
1107
|
+
# Use with inventory ingestion
|
|
1108
|
+
pipeline = IngestionPipeline(llm=llm, graph_path="./graph.json")
|
|
1109
|
+
pipeline.register_toolkit("local_docs", wrapper)
|
|
1110
|
+
result = pipeline.run(source="local_docs", whitelist=["*.md", "*.pdf"])
|
|
1111
|
+
"""
|
|
1112
|
+
|
|
1113
|
+
# Filesystem-specific settings
|
|
1114
|
+
base_directory: str = ""
|
|
1115
|
+
recursive: bool = True
|
|
1116
|
+
follow_symlinks: bool = False
|
|
1117
|
+
llm: Any = None # Optional LLM for image processing
|
|
1118
|
+
|
|
1119
|
+
# File type categories
|
|
1120
|
+
BINARY_EXTENSIONS = {'.pdf', '.docx', '.doc', '.pptx', '.ppt', '.xlsx', '.xls'}
|
|
1121
|
+
IMAGE_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp', '.svg'}
|
|
1122
|
+
|
|
1123
|
+
def __init__(
|
|
1124
|
+
self,
|
|
1125
|
+
base_directory: str,
|
|
1126
|
+
recursive: bool = True,
|
|
1127
|
+
follow_symlinks: bool = False,
|
|
1128
|
+
llm: Any = None,
|
|
1129
|
+
**kwargs
|
|
1130
|
+
):
|
|
1131
|
+
"""
|
|
1132
|
+
Initialize filesystem wrapper.
|
|
1133
|
+
|
|
1134
|
+
Args:
|
|
1135
|
+
base_directory: Root directory for file operations
|
|
1136
|
+
recursive: If True, search subdirectories recursively
|
|
1137
|
+
follow_symlinks: If True, follow symbolic links
|
|
1138
|
+
llm: Optional LLM for image description (if not provided, images are base64 encoded)
|
|
1139
|
+
**kwargs: Additional arguments (ignored, for compatibility)
|
|
1140
|
+
"""
|
|
1141
|
+
self.base_directory = str(Path(base_directory).resolve())
|
|
1142
|
+
self.recursive = recursive
|
|
1143
|
+
self.follow_symlinks = follow_symlinks
|
|
1144
|
+
self.llm = llm
|
|
1145
|
+
|
|
1146
|
+
# For compatibility with BaseCodeToolApiWrapper.loader()
|
|
1147
|
+
self.active_branch = None
|
|
1148
|
+
|
|
1149
|
+
# Validate directory
|
|
1150
|
+
if not Path(self.base_directory).exists():
|
|
1151
|
+
raise ValueError(f"Directory does not exist: {self.base_directory}")
|
|
1152
|
+
if not Path(self.base_directory).is_dir():
|
|
1153
|
+
raise ValueError(f"Path is not a directory: {self.base_directory}")
|
|
1154
|
+
|
|
1155
|
+
# Optional RunnableConfig for CLI/standalone usage
|
|
1156
|
+
self._runnable_config = None
|
|
1157
|
+
|
|
1158
|
+
def set_runnable_config(self, config: Optional[Dict[str, Any]]) -> None:
|
|
1159
|
+
"""
|
|
1160
|
+
Set the RunnableConfig for dispatching custom events.
|
|
1161
|
+
|
|
1162
|
+
This is required when running outside of a LangChain agent context
|
|
1163
|
+
(e.g., from CLI). Without a config containing a run_id,
|
|
1164
|
+
dispatch_custom_event will fail with "Unable to dispatch an adhoc event
|
|
1165
|
+
without a parent run id".
|
|
1166
|
+
|
|
1167
|
+
Args:
|
|
1168
|
+
config: A RunnableConfig dict with at least {'run_id': uuid}
|
|
1169
|
+
"""
|
|
1170
|
+
self._runnable_config = config
|
|
1171
|
+
|
|
1172
|
+
def _log_tool_event(self, message: str, tool_name: str = None, config: Optional[Dict[str, Any]] = None):
|
|
1173
|
+
"""Log progress events (mirrors BaseToolApiWrapper).
|
|
1174
|
+
|
|
1175
|
+
Args:
|
|
1176
|
+
message: The message to log
|
|
1177
|
+
tool_name: Name of the tool (defaults to 'filesystem')
|
|
1178
|
+
config: Optional RunnableConfig. If not provided, uses self._runnable_config.
|
|
1179
|
+
Required when running outside a LangChain agent context.
|
|
1180
|
+
"""
|
|
1181
|
+
logger.info(f"[{tool_name or 'filesystem'}] {message}")
|
|
1182
|
+
try:
|
|
1183
|
+
from langchain_core.callbacks import dispatch_custom_event
|
|
1184
|
+
|
|
1185
|
+
# Use provided config, fall back to instance config
|
|
1186
|
+
effective_config = config or getattr(self, '_runnable_config', None)
|
|
1187
|
+
|
|
1188
|
+
dispatch_custom_event(
|
|
1189
|
+
name="thinking_step",
|
|
1190
|
+
data={
|
|
1191
|
+
"message": message,
|
|
1192
|
+
"tool_name": tool_name or "filesystem",
|
|
1193
|
+
"toolkit": "FilesystemApiWrapper",
|
|
1194
|
+
},
|
|
1195
|
+
config=effective_config,
|
|
1196
|
+
)
|
|
1197
|
+
except Exception:
|
|
1198
|
+
pass
|
|
1199
|
+
|
|
1200
|
+
def _get_files(self, path: str = "", branch: str = None) -> List[str]:
|
|
1201
|
+
"""
|
|
1202
|
+
Get list of files in the directory.
|
|
1203
|
+
|
|
1204
|
+
Implements BaseCodeToolApiWrapper._get_files() for filesystem.
|
|
1205
|
+
|
|
1206
|
+
Args:
|
|
1207
|
+
path: Subdirectory path (relative to base_directory)
|
|
1208
|
+
branch: Ignored for filesystem (compatibility with git-based toolkits)
|
|
1209
|
+
|
|
1210
|
+
Returns:
|
|
1211
|
+
List of file paths relative to base_directory
|
|
1212
|
+
"""
|
|
1213
|
+
base = Path(self.base_directory)
|
|
1214
|
+
search_path = base / path if path else base
|
|
1215
|
+
|
|
1216
|
+
if not search_path.exists():
|
|
1217
|
+
return []
|
|
1218
|
+
|
|
1219
|
+
files = []
|
|
1220
|
+
|
|
1221
|
+
if self.recursive:
|
|
1222
|
+
for root, dirs, filenames in os.walk(search_path, followlinks=self.follow_symlinks):
|
|
1223
|
+
# Skip hidden directories
|
|
1224
|
+
dirs[:] = [d for d in dirs if not d.startswith('.')]
|
|
1225
|
+
|
|
1226
|
+
for filename in filenames:
|
|
1227
|
+
if filename.startswith('.'):
|
|
1228
|
+
continue
|
|
1229
|
+
|
|
1230
|
+
full_path = Path(root) / filename
|
|
1231
|
+
try:
|
|
1232
|
+
rel_path = str(full_path.relative_to(base))
|
|
1233
|
+
files.append(rel_path)
|
|
1234
|
+
except ValueError:
|
|
1235
|
+
continue
|
|
1236
|
+
else:
|
|
1237
|
+
for entry in search_path.iterdir():
|
|
1238
|
+
if entry.is_file() and not entry.name.startswith('.'):
|
|
1239
|
+
try:
|
|
1240
|
+
rel_path = str(entry.relative_to(base))
|
|
1241
|
+
files.append(rel_path)
|
|
1242
|
+
except ValueError:
|
|
1243
|
+
continue
|
|
1244
|
+
|
|
1245
|
+
return sorted(files)
|
|
1246
|
+
|
|
1247
|
+
def _is_binary_file(self, file_path: str) -> bool:
|
|
1248
|
+
"""Check if file is a binary document (PDF, DOCX, etc.)."""
|
|
1249
|
+
ext = Path(file_path).suffix.lower()
|
|
1250
|
+
return ext in self.BINARY_EXTENSIONS
|
|
1251
|
+
|
|
1252
|
+
def _is_image_file(self, file_path: str) -> bool:
|
|
1253
|
+
"""Check if file is an image."""
|
|
1254
|
+
ext = Path(file_path).suffix.lower()
|
|
1255
|
+
return ext in self.IMAGE_EXTENSIONS
|
|
1256
|
+
|
|
1257
|
+
def _read_binary_file(self, file_path: str) -> Optional[str]:
|
|
1258
|
+
"""
|
|
1259
|
+
Read binary file (PDF, DOCX, PPTX, Excel) and convert to text/markdown.
|
|
1260
|
+
|
|
1261
|
+
Uses the SDK's content_parser for document conversion.
|
|
1262
|
+
|
|
1263
|
+
Args:
|
|
1264
|
+
file_path: Path relative to base_directory
|
|
1265
|
+
|
|
1266
|
+
Returns:
|
|
1267
|
+
Converted text content, or None if conversion fails
|
|
1268
|
+
"""
|
|
1269
|
+
full_path = Path(self.base_directory) / file_path
|
|
1270
|
+
|
|
1271
|
+
try:
|
|
1272
|
+
from alita_sdk.tools.utils.content_parser import parse_file_content
|
|
1273
|
+
|
|
1274
|
+
result = parse_file_content(
|
|
1275
|
+
file_path=str(full_path),
|
|
1276
|
+
is_capture_image=bool(self.llm), # Capture images if LLM available
|
|
1277
|
+
llm=self.llm
|
|
1278
|
+
)
|
|
1279
|
+
|
|
1280
|
+
if isinstance(result, Exception):
|
|
1281
|
+
logger.warning(f"Failed to parse {file_path}: {result}")
|
|
1282
|
+
return None
|
|
1283
|
+
|
|
1284
|
+
return result
|
|
1285
|
+
|
|
1286
|
+
except ImportError:
|
|
1287
|
+
logger.warning("content_parser not available, skipping binary file")
|
|
1288
|
+
return None
|
|
1289
|
+
except Exception as e:
|
|
1290
|
+
logger.warning(f"Error parsing {file_path}: {e}")
|
|
1291
|
+
return None
|
|
1292
|
+
|
|
1293
|
+
def _read_image_file(self, file_path: str) -> Optional[str]:
|
|
1294
|
+
"""
|
|
1295
|
+
Read image file and convert to text representation.
|
|
1296
|
+
|
|
1297
|
+
If LLM is available, uses it to describe the image.
|
|
1298
|
+
Otherwise, returns base64-encoded data URI.
|
|
1299
|
+
|
|
1300
|
+
Args:
|
|
1301
|
+
file_path: Path relative to base_directory
|
|
1302
|
+
|
|
1303
|
+
Returns:
|
|
1304
|
+
Image description or base64 data URI
|
|
1305
|
+
"""
|
|
1306
|
+
full_path = Path(self.base_directory) / file_path
|
|
1307
|
+
|
|
1308
|
+
if not full_path.exists():
|
|
1309
|
+
return None
|
|
1310
|
+
|
|
1311
|
+
ext = full_path.suffix.lower()
|
|
1312
|
+
|
|
1313
|
+
try:
|
|
1314
|
+
# Read image bytes
|
|
1315
|
+
image_bytes = full_path.read_bytes()
|
|
1316
|
+
|
|
1317
|
+
if self.llm:
|
|
1318
|
+
# Use content_parser with LLM for image description
|
|
1319
|
+
try:
|
|
1320
|
+
from alita_sdk.tools.utils.content_parser import parse_file_content
|
|
1321
|
+
|
|
1322
|
+
result = parse_file_content(
|
|
1323
|
+
file_path=str(full_path),
|
|
1324
|
+
is_capture_image=True,
|
|
1325
|
+
llm=self.llm
|
|
1326
|
+
)
|
|
1327
|
+
|
|
1328
|
+
if isinstance(result, Exception):
|
|
1329
|
+
logger.warning(f"Failed to describe image {file_path}: {result}")
|
|
1330
|
+
else:
|
|
1331
|
+
return f"[Image: {Path(file_path).name}]\n\n{result}"
|
|
1332
|
+
|
|
1333
|
+
except ImportError:
|
|
1334
|
+
pass
|
|
1335
|
+
|
|
1336
|
+
# Fallback: return base64 data URI
|
|
1337
|
+
mime_types = {
|
|
1338
|
+
'.png': 'image/png',
|
|
1339
|
+
'.jpg': 'image/jpeg',
|
|
1340
|
+
'.jpeg': 'image/jpeg',
|
|
1341
|
+
'.gif': 'image/gif',
|
|
1342
|
+
'.webp': 'image/webp',
|
|
1343
|
+
'.bmp': 'image/bmp',
|
|
1344
|
+
'.svg': 'image/svg+xml',
|
|
1345
|
+
}
|
|
1346
|
+
mime_type = mime_types.get(ext, 'application/octet-stream')
|
|
1347
|
+
b64_data = base64.b64encode(image_bytes).decode('utf-8')
|
|
1348
|
+
|
|
1349
|
+
return f"[Image: {Path(file_path).name}]\ndata:{mime_type};base64,{b64_data}"
|
|
1350
|
+
|
|
1351
|
+
except Exception as e:
|
|
1352
|
+
logger.warning(f"Error reading image {file_path}: {e}")
|
|
1353
|
+
return None
|
|
1354
|
+
|
|
1355
|
+
def _read_file(
|
|
1356
|
+
self,
|
|
1357
|
+
file_path: str,
|
|
1358
|
+
branch: str = None,
|
|
1359
|
+
offset: Optional[int] = None,
|
|
1360
|
+
limit: Optional[int] = None,
|
|
1361
|
+
head: Optional[int] = None,
|
|
1362
|
+
tail: Optional[int] = None,
|
|
1363
|
+
) -> Optional[str]:
|
|
1364
|
+
"""
|
|
1365
|
+
Read file content, handling text, binary documents, and images.
|
|
1366
|
+
|
|
1367
|
+
Supports:
|
|
1368
|
+
- Text files: Read directly with encoding detection
|
|
1369
|
+
- Binary documents (PDF, DOCX, PPTX, Excel): Convert to markdown
|
|
1370
|
+
- Images: Return LLM description or base64 data URI
|
|
1371
|
+
|
|
1372
|
+
Args:
|
|
1373
|
+
file_path: Path relative to base_directory
|
|
1374
|
+
branch: Ignored for filesystem (compatibility with git-based toolkits)
|
|
1375
|
+
offset: Start line number (1-indexed). If None, start from beginning.
|
|
1376
|
+
limit: Maximum number of lines to read. If None, read to end.
|
|
1377
|
+
head: Read only first N lines (alternative to offset/limit)
|
|
1378
|
+
tail: Read only last N lines (alternative to offset/limit)
|
|
1379
|
+
|
|
1380
|
+
Returns:
|
|
1381
|
+
File content as string, or None if unreadable
|
|
1382
|
+
"""
|
|
1383
|
+
full_path = Path(self.base_directory) / file_path
|
|
1384
|
+
|
|
1385
|
+
# Security check - prevent path traversal
|
|
1386
|
+
try:
|
|
1387
|
+
full_path.resolve().relative_to(Path(self.base_directory).resolve())
|
|
1388
|
+
except ValueError:
|
|
1389
|
+
logger.warning(f"Access denied: {file_path} is outside base directory")
|
|
1390
|
+
return None
|
|
1391
|
+
|
|
1392
|
+
if not full_path.exists() or not full_path.is_file():
|
|
1393
|
+
return None
|
|
1394
|
+
|
|
1395
|
+
# Route to appropriate reader based on file type
|
|
1396
|
+
# Note: offset/limit only apply to text files
|
|
1397
|
+
if self._is_binary_file(file_path):
|
|
1398
|
+
return self._read_binary_file(file_path)
|
|
1399
|
+
|
|
1400
|
+
if self._is_image_file(file_path):
|
|
1401
|
+
return self._read_image_file(file_path)
|
|
1402
|
+
|
|
1403
|
+
# Default: read as text with encoding detection
|
|
1404
|
+
encodings = ['utf-8', 'utf-8-sig', 'latin-1', 'cp1252']
|
|
1405
|
+
|
|
1406
|
+
for encoding in encodings:
|
|
1407
|
+
try:
|
|
1408
|
+
content = full_path.read_text(encoding=encoding)
|
|
1409
|
+
|
|
1410
|
+
# Apply line filtering if specified
|
|
1411
|
+
if offset is not None or limit is not None or head is not None or tail is not None:
|
|
1412
|
+
lines = content.splitlines(keepends=True)
|
|
1413
|
+
|
|
1414
|
+
if head is not None:
|
|
1415
|
+
# Read first N lines
|
|
1416
|
+
lines = lines[:head]
|
|
1417
|
+
elif tail is not None:
|
|
1418
|
+
# Read last N lines
|
|
1419
|
+
lines = lines[-tail:] if tail > 0 else []
|
|
1420
|
+
else:
|
|
1421
|
+
# Use offset/limit
|
|
1422
|
+
start_idx = (offset - 1) if offset and offset > 0 else 0
|
|
1423
|
+
if limit is not None:
|
|
1424
|
+
end_idx = start_idx + limit
|
|
1425
|
+
lines = lines[start_idx:end_idx]
|
|
1426
|
+
else:
|
|
1427
|
+
lines = lines[start_idx:]
|
|
1428
|
+
|
|
1429
|
+
content = ''.join(lines)
|
|
1430
|
+
|
|
1431
|
+
return content
|
|
1432
|
+
|
|
1433
|
+
except UnicodeDecodeError:
|
|
1434
|
+
continue
|
|
1435
|
+
except Exception as e:
|
|
1436
|
+
logger.warning(f"Failed to read {file_path}: {e}")
|
|
1437
|
+
return None
|
|
1438
|
+
|
|
1439
|
+
logger.warning(f"Could not decode {file_path} with any known encoding")
|
|
1440
|
+
return None
|
|
1441
|
+
|
|
1442
|
+
def read_file(
|
|
1443
|
+
self,
|
|
1444
|
+
file_path: str,
|
|
1445
|
+
offset: Optional[int] = None,
|
|
1446
|
+
limit: Optional[int] = None,
|
|
1447
|
+
head: Optional[int] = None,
|
|
1448
|
+
tail: Optional[int] = None,
|
|
1449
|
+
) -> Optional[str]:
|
|
1450
|
+
"""
|
|
1451
|
+
Public method to read file content with optional line range.
|
|
1452
|
+
|
|
1453
|
+
Args:
|
|
1454
|
+
file_path: Path relative to base_directory
|
|
1455
|
+
offset: Start line number (1-indexed)
|
|
1456
|
+
limit: Maximum number of lines to read
|
|
1457
|
+
head: Read only first N lines
|
|
1458
|
+
tail: Read only last N lines
|
|
1459
|
+
|
|
1460
|
+
Returns:
|
|
1461
|
+
File content as string
|
|
1462
|
+
"""
|
|
1463
|
+
return self._read_file(file_path, offset=offset, limit=limit, head=head, tail=tail)
|
|
1464
|
+
|
|
1465
|
+
def loader(
|
|
1466
|
+
self,
|
|
1467
|
+
branch: Optional[str] = None,
|
|
1468
|
+
whitelist: Optional[List[str]] = None,
|
|
1469
|
+
blacklist: Optional[List[str]] = None,
|
|
1470
|
+
chunked: bool = True,
|
|
1471
|
+
) -> Generator[Document, None, None]:
|
|
1472
|
+
"""
|
|
1473
|
+
Load documents from the filesystem.
|
|
1474
|
+
|
|
1475
|
+
Mirrors BaseCodeToolApiWrapper.loader() interface for compatibility.
|
|
1476
|
+
|
|
1477
|
+
Args:
|
|
1478
|
+
branch: Ignored (kept for API compatibility with git-based loaders)
|
|
1479
|
+
whitelist: File patterns to include (e.g., ['*.py', 'src/**/*.js'])
|
|
1480
|
+
blacklist: File patterns to exclude (e.g., ['*test*', 'node_modules/**'])
|
|
1481
|
+
chunked: If True, applies universal chunker based on file type
|
|
1482
|
+
|
|
1483
|
+
Yields:
|
|
1484
|
+
Document objects with page_content and metadata
|
|
1485
|
+
"""
|
|
1486
|
+
import glob as glob_module
|
|
1487
|
+
|
|
1488
|
+
base = Path(self.base_directory)
|
|
1489
|
+
|
|
1490
|
+
def is_blacklisted(file_path: str) -> bool:
|
|
1491
|
+
if not blacklist:
|
|
1492
|
+
return False
|
|
1493
|
+
return (
|
|
1494
|
+
any(fnmatch.fnmatch(file_path, p) for p in blacklist) or
|
|
1495
|
+
any(fnmatch.fnmatch(Path(file_path).name, p) for p in blacklist)
|
|
1496
|
+
)
|
|
1497
|
+
|
|
1498
|
+
# Optimization: Use glob directly when whitelist has path patterns
|
|
1499
|
+
# This avoids scanning 100K+ files in node_modules etc.
|
|
1500
|
+
def get_files_via_glob() -> Generator[str, None, None]:
|
|
1501
|
+
"""Use glob patterns directly - much faster than scanning all files."""
|
|
1502
|
+
seen = set()
|
|
1503
|
+
for pattern in whitelist:
|
|
1504
|
+
# Handle glob patterns
|
|
1505
|
+
full_pattern = str(base / pattern)
|
|
1506
|
+
for match in glob_module.glob(full_pattern, recursive=True):
|
|
1507
|
+
match_path = Path(match)
|
|
1508
|
+
if match_path.is_file():
|
|
1509
|
+
try:
|
|
1510
|
+
rel_path = str(match_path.relative_to(base))
|
|
1511
|
+
if rel_path not in seen and not is_blacklisted(rel_path):
|
|
1512
|
+
seen.add(rel_path)
|
|
1513
|
+
yield rel_path
|
|
1514
|
+
except ValueError:
|
|
1515
|
+
continue
|
|
1516
|
+
|
|
1517
|
+
def get_files_via_scan() -> Generator[str, None, None]:
|
|
1518
|
+
"""Fall back to scanning all files when no whitelist or simple extension patterns."""
|
|
1519
|
+
_files = self._get_files()
|
|
1520
|
+
self._log_tool_event(f"Found {len(_files)} files in {self.base_directory}", "loader")
|
|
1521
|
+
|
|
1522
|
+
def is_whitelisted(file_path: str) -> bool:
|
|
1523
|
+
if not whitelist:
|
|
1524
|
+
return True
|
|
1525
|
+
return (
|
|
1526
|
+
any(fnmatch.fnmatch(file_path, p) for p in whitelist) or
|
|
1527
|
+
any(fnmatch.fnmatch(Path(file_path).name, p) for p in whitelist) or
|
|
1528
|
+
any(file_path.endswith(f'.{p.lstrip("*.")}') for p in whitelist if p.startswith('*.'))
|
|
1529
|
+
)
|
|
1530
|
+
|
|
1531
|
+
for file_path in _files:
|
|
1532
|
+
if is_whitelisted(file_path) and not is_blacklisted(file_path):
|
|
1533
|
+
yield file_path
|
|
1534
|
+
|
|
1535
|
+
# Decide strategy: use glob if whitelist has path patterns (contains / or **)
|
|
1536
|
+
use_glob = whitelist and any('/' in p or '**' in p for p in whitelist)
|
|
1537
|
+
|
|
1538
|
+
if use_glob:
|
|
1539
|
+
self._log_tool_event(f"Using glob patterns: {whitelist}", "loader")
|
|
1540
|
+
file_iterator = get_files_via_glob()
|
|
1541
|
+
else:
|
|
1542
|
+
file_iterator = get_files_via_scan()
|
|
1543
|
+
|
|
1544
|
+
def raw_document_generator() -> Generator[Document, None, None]:
|
|
1545
|
+
self._log_tool_event("Reading files...", "loader")
|
|
1546
|
+
processed = 0
|
|
1547
|
+
|
|
1548
|
+
for file_path in file_iterator:
|
|
1549
|
+
content = self._read_file(file_path)
|
|
1550
|
+
if not content:
|
|
1551
|
+
continue
|
|
1552
|
+
|
|
1553
|
+
content_hash = hashlib.sha256(content.encode('utf-8')).hexdigest()
|
|
1554
|
+
processed += 1
|
|
1555
|
+
|
|
1556
|
+
yield Document(
|
|
1557
|
+
page_content=content,
|
|
1558
|
+
metadata={
|
|
1559
|
+
'file_path': file_path,
|
|
1560
|
+
'file_name': Path(file_path).name,
|
|
1561
|
+
'source': file_path,
|
|
1562
|
+
'commit_hash': content_hash,
|
|
1563
|
+
}
|
|
1564
|
+
)
|
|
1565
|
+
|
|
1566
|
+
# Log progress every 100 files
|
|
1567
|
+
if processed % 100 == 0:
|
|
1568
|
+
logger.debug(f"[loader] Read {processed} files...")
|
|
1569
|
+
|
|
1570
|
+
self._log_tool_event(f"Loaded {processed} files", "loader")
|
|
1571
|
+
|
|
1572
|
+
if not chunked:
|
|
1573
|
+
return raw_document_generator()
|
|
1574
|
+
|
|
1575
|
+
try:
|
|
1576
|
+
from alita_sdk.tools.chunkers.universal_chunker import universal_chunker
|
|
1577
|
+
return universal_chunker(raw_document_generator())
|
|
1578
|
+
except ImportError:
|
|
1579
|
+
logger.warning("Universal chunker not available, returning raw documents")
|
|
1580
|
+
return raw_document_generator()
|
|
1581
|
+
|
|
1582
|
+
def chunker(self, documents: Generator[Document, None, None]) -> Generator[Document, None, None]:
|
|
1583
|
+
"""Apply universal chunker to documents."""
|
|
1584
|
+
try:
|
|
1585
|
+
from alita_sdk.tools.chunkers.universal_chunker import universal_chunker
|
|
1586
|
+
return universal_chunker(documents)
|
|
1587
|
+
except ImportError:
|
|
1588
|
+
return documents
|
|
1589
|
+
|
|
1590
|
+
def get_files_content(self, file_path: str) -> Optional[str]:
|
|
1591
|
+
"""Get file content (compatibility alias for retrieval toolkit)."""
|
|
1592
|
+
return self._read_file(file_path)
|
|
757
1593
|
|
|
758
1594
|
|
|
759
1595
|
# Predefined tool presets for common use cases
|
|
@@ -761,6 +1597,7 @@ FILESYSTEM_TOOL_PRESETS = {
|
|
|
761
1597
|
'read_only': {
|
|
762
1598
|
'exclude_tools': [
|
|
763
1599
|
'filesystem_write_file',
|
|
1600
|
+
'filesystem_append_file',
|
|
764
1601
|
'filesystem_edit_file',
|
|
765
1602
|
'filesystem_apply_patch',
|
|
766
1603
|
'filesystem_delete_file',
|
|
@@ -775,6 +1612,7 @@ FILESYSTEM_TOOL_PRESETS = {
|
|
|
775
1612
|
'include_tools': [
|
|
776
1613
|
'filesystem_read_file',
|
|
777
1614
|
'filesystem_write_file',
|
|
1615
|
+
'filesystem_append_file',
|
|
778
1616
|
'filesystem_list_directory',
|
|
779
1617
|
'filesystem_create_directory',
|
|
780
1618
|
]
|
|
@@ -792,20 +1630,21 @@ def get_filesystem_tools(
|
|
|
792
1630
|
base_directory: str,
|
|
793
1631
|
include_tools: Optional[List[str]] = None,
|
|
794
1632
|
exclude_tools: Optional[List[str]] = None,
|
|
795
|
-
preset: Optional[str] = None
|
|
1633
|
+
preset: Optional[str] = None,
|
|
1634
|
+
allowed_directories: Optional[List[str]] = None
|
|
796
1635
|
) -> List[BaseTool]:
|
|
797
1636
|
"""
|
|
798
|
-
Get filesystem tools for the specified
|
|
1637
|
+
Get filesystem tools for the specified directories.
|
|
799
1638
|
|
|
800
1639
|
Args:
|
|
801
|
-
base_directory: Absolute or relative path to the directory to restrict access to
|
|
1640
|
+
base_directory: Absolute or relative path to the primary directory to restrict access to
|
|
802
1641
|
include_tools: Optional list of tool names to include. If provided, only these tools are returned.
|
|
803
1642
|
If None, all tools are included (unless excluded).
|
|
804
1643
|
exclude_tools: Optional list of tool names to exclude. Applied after include_tools.
|
|
805
1644
|
preset: Optional preset name to use predefined tool sets. Presets:
|
|
806
1645
|
- 'read_only': Excludes all write/modify operations
|
|
807
1646
|
- 'no_delete': All tools except delete
|
|
808
|
-
- 'basic': Read, write, list, create directory
|
|
1647
|
+
- 'basic': Read, write, append, list, create directory
|
|
809
1648
|
- 'minimal': Only read and list
|
|
810
1649
|
Note: If preset is used with include_tools or exclude_tools,
|
|
811
1650
|
preset is applied first, then custom filters.
|
|
@@ -818,6 +1657,7 @@ def get_filesystem_tools(
|
|
|
818
1657
|
- filesystem_read_file_chunk
|
|
819
1658
|
- filesystem_read_multiple_files
|
|
820
1659
|
- filesystem_write_file
|
|
1660
|
+
- filesystem_append_file (for incremental file creation)
|
|
821
1661
|
- filesystem_edit_file
|
|
822
1662
|
- filesystem_apply_patch
|
|
823
1663
|
- filesystem_list_directory
|
|
@@ -847,6 +1687,10 @@ def get_filesystem_tools(
|
|
|
847
1687
|
# Use preset and add custom exclusions
|
|
848
1688
|
get_filesystem_tools('/path/to/dir', preset='read_only',
|
|
849
1689
|
exclude_tools=['filesystem_search_files'])
|
|
1690
|
+
|
|
1691
|
+
# Multiple allowed directories
|
|
1692
|
+
get_filesystem_tools('/path/to/primary',
|
|
1693
|
+
allowed_directories=['/path/to/other1', '/path/to/other2'])
|
|
850
1694
|
"""
|
|
851
1695
|
# Apply preset if specified
|
|
852
1696
|
preset_include = None
|
|
@@ -870,25 +1714,27 @@ def get_filesystem_tools(
|
|
|
870
1714
|
final_exclude.extend(exclude_tools)
|
|
871
1715
|
final_exclude = list(set(final_exclude)) if final_exclude else None
|
|
872
1716
|
|
|
873
|
-
# Resolve to absolute
|
|
1717
|
+
# Resolve to absolute paths
|
|
874
1718
|
base_dir = str(Path(base_directory).resolve())
|
|
1719
|
+
extra_dirs = [str(Path(d).resolve()) for d in (allowed_directories or [])]
|
|
875
1720
|
|
|
876
1721
|
# Define all available tools with their names
|
|
877
1722
|
all_tools = {
|
|
878
|
-
'filesystem_read_file': ReadFileTool(base_directory=base_dir),
|
|
879
|
-
'filesystem_read_file_chunk': ReadFileChunkTool(base_directory=base_dir),
|
|
880
|
-
'filesystem_read_multiple_files': ReadMultipleFilesTool(base_directory=base_dir),
|
|
881
|
-
'filesystem_write_file': WriteFileTool(base_directory=base_dir),
|
|
882
|
-
'
|
|
883
|
-
'
|
|
884
|
-
'
|
|
885
|
-
'
|
|
886
|
-
'
|
|
887
|
-
'
|
|
888
|
-
'
|
|
889
|
-
'
|
|
890
|
-
'
|
|
891
|
-
'
|
|
1723
|
+
'filesystem_read_file': ReadFileTool(base_directory=base_dir, allowed_directories=extra_dirs),
|
|
1724
|
+
'filesystem_read_file_chunk': ReadFileChunkTool(base_directory=base_dir, allowed_directories=extra_dirs),
|
|
1725
|
+
'filesystem_read_multiple_files': ReadMultipleFilesTool(base_directory=base_dir, allowed_directories=extra_dirs),
|
|
1726
|
+
'filesystem_write_file': WriteFileTool(base_directory=base_dir, allowed_directories=extra_dirs),
|
|
1727
|
+
'filesystem_append_file': AppendFileTool(base_directory=base_dir, allowed_directories=extra_dirs),
|
|
1728
|
+
'filesystem_edit_file': EditFileTool(base_directory=base_dir, allowed_directories=extra_dirs),
|
|
1729
|
+
'filesystem_apply_patch': ApplyPatchTool(base_directory=base_dir, allowed_directories=extra_dirs),
|
|
1730
|
+
'filesystem_list_directory': ListDirectoryTool(base_directory=base_dir, allowed_directories=extra_dirs),
|
|
1731
|
+
'filesystem_directory_tree': DirectoryTreeTool(base_directory=base_dir, allowed_directories=extra_dirs),
|
|
1732
|
+
'filesystem_search_files': SearchFilesTool(base_directory=base_dir, allowed_directories=extra_dirs),
|
|
1733
|
+
'filesystem_delete_file': DeleteFileTool(base_directory=base_dir, allowed_directories=extra_dirs),
|
|
1734
|
+
'filesystem_move_file': MoveFileTool(base_directory=base_dir, allowed_directories=extra_dirs),
|
|
1735
|
+
'filesystem_create_directory': CreateDirectoryTool(base_directory=base_dir, allowed_directories=extra_dirs),
|
|
1736
|
+
'filesystem_get_file_info': GetFileInfoTool(base_directory=base_dir, allowed_directories=extra_dirs),
|
|
1737
|
+
'filesystem_list_allowed_directories': ListAllowedDirectoriesTool(base_directory=base_dir, allowed_directories=extra_dirs),
|
|
892
1738
|
}
|
|
893
1739
|
|
|
894
1740
|
# Start with all tools or only included ones
|