alita-sdk 0.3.462__py3-none-any.whl → 0.3.627__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. alita_sdk/cli/agent/__init__.py +5 -0
  2. alita_sdk/cli/agent/default.py +258 -0
  3. alita_sdk/cli/agent_executor.py +15 -3
  4. alita_sdk/cli/agent_loader.py +56 -8
  5. alita_sdk/cli/agent_ui.py +93 -31
  6. alita_sdk/cli/agents.py +2274 -230
  7. alita_sdk/cli/callbacks.py +96 -25
  8. alita_sdk/cli/cli.py +10 -1
  9. alita_sdk/cli/config.py +162 -9
  10. alita_sdk/cli/context/__init__.py +30 -0
  11. alita_sdk/cli/context/cleanup.py +198 -0
  12. alita_sdk/cli/context/manager.py +731 -0
  13. alita_sdk/cli/context/message.py +285 -0
  14. alita_sdk/cli/context/strategies.py +289 -0
  15. alita_sdk/cli/context/token_estimation.py +127 -0
  16. alita_sdk/cli/input_handler.py +419 -0
  17. alita_sdk/cli/inventory.py +1073 -0
  18. alita_sdk/cli/testcases/__init__.py +94 -0
  19. alita_sdk/cli/testcases/data_generation.py +119 -0
  20. alita_sdk/cli/testcases/discovery.py +96 -0
  21. alita_sdk/cli/testcases/executor.py +84 -0
  22. alita_sdk/cli/testcases/logger.py +85 -0
  23. alita_sdk/cli/testcases/parser.py +172 -0
  24. alita_sdk/cli/testcases/prompts.py +91 -0
  25. alita_sdk/cli/testcases/reporting.py +125 -0
  26. alita_sdk/cli/testcases/setup.py +108 -0
  27. alita_sdk/cli/testcases/test_runner.py +282 -0
  28. alita_sdk/cli/testcases/utils.py +39 -0
  29. alita_sdk/cli/testcases/validation.py +90 -0
  30. alita_sdk/cli/testcases/workflow.py +196 -0
  31. alita_sdk/cli/toolkit.py +14 -17
  32. alita_sdk/cli/toolkit_loader.py +35 -5
  33. alita_sdk/cli/tools/__init__.py +36 -2
  34. alita_sdk/cli/tools/approval.py +224 -0
  35. alita_sdk/cli/tools/filesystem.py +910 -64
  36. alita_sdk/cli/tools/planning.py +389 -0
  37. alita_sdk/cli/tools/terminal.py +414 -0
  38. alita_sdk/community/__init__.py +72 -12
  39. alita_sdk/community/inventory/__init__.py +236 -0
  40. alita_sdk/community/inventory/config.py +257 -0
  41. alita_sdk/community/inventory/enrichment.py +2137 -0
  42. alita_sdk/community/inventory/extractors.py +1469 -0
  43. alita_sdk/community/inventory/ingestion.py +3172 -0
  44. alita_sdk/community/inventory/knowledge_graph.py +1457 -0
  45. alita_sdk/community/inventory/parsers/__init__.py +218 -0
  46. alita_sdk/community/inventory/parsers/base.py +295 -0
  47. alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
  48. alita_sdk/community/inventory/parsers/go_parser.py +851 -0
  49. alita_sdk/community/inventory/parsers/html_parser.py +389 -0
  50. alita_sdk/community/inventory/parsers/java_parser.py +593 -0
  51. alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
  52. alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
  53. alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
  54. alita_sdk/community/inventory/parsers/python_parser.py +604 -0
  55. alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
  56. alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
  57. alita_sdk/community/inventory/parsers/text_parser.py +322 -0
  58. alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
  59. alita_sdk/community/inventory/patterns/__init__.py +61 -0
  60. alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
  61. alita_sdk/community/inventory/patterns/loader.py +348 -0
  62. alita_sdk/community/inventory/patterns/registry.py +198 -0
  63. alita_sdk/community/inventory/presets.py +535 -0
  64. alita_sdk/community/inventory/retrieval.py +1403 -0
  65. alita_sdk/community/inventory/toolkit.py +173 -0
  66. alita_sdk/community/inventory/toolkit_utils.py +176 -0
  67. alita_sdk/community/inventory/visualize.py +1370 -0
  68. alita_sdk/configurations/__init__.py +1 -1
  69. alita_sdk/configurations/ado.py +141 -20
  70. alita_sdk/configurations/bitbucket.py +0 -3
  71. alita_sdk/configurations/confluence.py +76 -42
  72. alita_sdk/configurations/figma.py +76 -0
  73. alita_sdk/configurations/gitlab.py +17 -5
  74. alita_sdk/configurations/openapi.py +329 -0
  75. alita_sdk/configurations/qtest.py +72 -1
  76. alita_sdk/configurations/report_portal.py +96 -0
  77. alita_sdk/configurations/sharepoint.py +148 -0
  78. alita_sdk/configurations/testio.py +83 -0
  79. alita_sdk/runtime/clients/artifact.py +3 -3
  80. alita_sdk/runtime/clients/client.py +353 -48
  81. alita_sdk/runtime/clients/sandbox_client.py +0 -21
  82. alita_sdk/runtime/langchain/_constants_bkup.py +1318 -0
  83. alita_sdk/runtime/langchain/assistant.py +123 -26
  84. alita_sdk/runtime/langchain/constants.py +642 -1
  85. alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +103 -60
  86. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLinesLoader.py +77 -0
  87. alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +6 -3
  88. alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py +226 -7
  89. alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py +5 -2
  90. alita_sdk/runtime/langchain/document_loaders/constants.py +12 -7
  91. alita_sdk/runtime/langchain/langraph_agent.py +279 -73
  92. alita_sdk/runtime/langchain/utils.py +82 -15
  93. alita_sdk/runtime/llms/preloaded.py +2 -6
  94. alita_sdk/runtime/skills/__init__.py +91 -0
  95. alita_sdk/runtime/skills/callbacks.py +498 -0
  96. alita_sdk/runtime/skills/discovery.py +540 -0
  97. alita_sdk/runtime/skills/executor.py +610 -0
  98. alita_sdk/runtime/skills/input_builder.py +371 -0
  99. alita_sdk/runtime/skills/models.py +330 -0
  100. alita_sdk/runtime/skills/registry.py +355 -0
  101. alita_sdk/runtime/skills/skill_runner.py +330 -0
  102. alita_sdk/runtime/toolkits/__init__.py +7 -0
  103. alita_sdk/runtime/toolkits/application.py +21 -9
  104. alita_sdk/runtime/toolkits/artifact.py +15 -5
  105. alita_sdk/runtime/toolkits/datasource.py +13 -6
  106. alita_sdk/runtime/toolkits/mcp.py +139 -251
  107. alita_sdk/runtime/toolkits/mcp_config.py +1048 -0
  108. alita_sdk/runtime/toolkits/planning.py +178 -0
  109. alita_sdk/runtime/toolkits/skill_router.py +238 -0
  110. alita_sdk/runtime/toolkits/subgraph.py +251 -6
  111. alita_sdk/runtime/toolkits/tools.py +238 -32
  112. alita_sdk/runtime/toolkits/vectorstore.py +11 -5
  113. alita_sdk/runtime/tools/__init__.py +3 -1
  114. alita_sdk/runtime/tools/application.py +20 -6
  115. alita_sdk/runtime/tools/artifact.py +511 -28
  116. alita_sdk/runtime/tools/data_analysis.py +183 -0
  117. alita_sdk/runtime/tools/function.py +43 -15
  118. alita_sdk/runtime/tools/image_generation.py +50 -44
  119. alita_sdk/runtime/tools/llm.py +852 -67
  120. alita_sdk/runtime/tools/loop.py +3 -1
  121. alita_sdk/runtime/tools/loop_output.py +3 -1
  122. alita_sdk/runtime/tools/mcp_remote_tool.py +25 -10
  123. alita_sdk/runtime/tools/mcp_server_tool.py +7 -6
  124. alita_sdk/runtime/tools/planning/__init__.py +36 -0
  125. alita_sdk/runtime/tools/planning/models.py +246 -0
  126. alita_sdk/runtime/tools/planning/wrapper.py +607 -0
  127. alita_sdk/runtime/tools/router.py +2 -4
  128. alita_sdk/runtime/tools/sandbox.py +9 -6
  129. alita_sdk/runtime/tools/skill_router.py +776 -0
  130. alita_sdk/runtime/tools/tool.py +3 -1
  131. alita_sdk/runtime/tools/vectorstore.py +7 -2
  132. alita_sdk/runtime/tools/vectorstore_base.py +51 -11
  133. alita_sdk/runtime/utils/AlitaCallback.py +137 -21
  134. alita_sdk/runtime/utils/constants.py +5 -1
  135. alita_sdk/runtime/utils/mcp_client.py +492 -0
  136. alita_sdk/runtime/utils/mcp_oauth.py +202 -5
  137. alita_sdk/runtime/utils/mcp_sse_client.py +36 -7
  138. alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
  139. alita_sdk/runtime/utils/serialization.py +155 -0
  140. alita_sdk/runtime/utils/streamlit.py +6 -10
  141. alita_sdk/runtime/utils/toolkit_utils.py +16 -5
  142. alita_sdk/runtime/utils/utils.py +36 -0
  143. alita_sdk/tools/__init__.py +113 -29
  144. alita_sdk/tools/ado/repos/__init__.py +51 -33
  145. alita_sdk/tools/ado/repos/repos_wrapper.py +148 -89
  146. alita_sdk/tools/ado/test_plan/__init__.py +25 -9
  147. alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +23 -1
  148. alita_sdk/tools/ado/utils.py +1 -18
  149. alita_sdk/tools/ado/wiki/__init__.py +25 -8
  150. alita_sdk/tools/ado/wiki/ado_wrapper.py +291 -22
  151. alita_sdk/tools/ado/work_item/__init__.py +26 -9
  152. alita_sdk/tools/ado/work_item/ado_wrapper.py +56 -3
  153. alita_sdk/tools/advanced_jira_mining/__init__.py +11 -8
  154. alita_sdk/tools/aws/delta_lake/__init__.py +13 -9
  155. alita_sdk/tools/aws/delta_lake/tool.py +5 -1
  156. alita_sdk/tools/azure_ai/search/__init__.py +11 -8
  157. alita_sdk/tools/azure_ai/search/api_wrapper.py +1 -1
  158. alita_sdk/tools/base/tool.py +5 -1
  159. alita_sdk/tools/base_indexer_toolkit.py +170 -45
  160. alita_sdk/tools/bitbucket/__init__.py +17 -12
  161. alita_sdk/tools/bitbucket/api_wrapper.py +59 -11
  162. alita_sdk/tools/bitbucket/cloud_api_wrapper.py +49 -35
  163. alita_sdk/tools/browser/__init__.py +5 -4
  164. alita_sdk/tools/carrier/__init__.py +5 -6
  165. alita_sdk/tools/carrier/backend_reports_tool.py +6 -6
  166. alita_sdk/tools/carrier/run_ui_test_tool.py +6 -6
  167. alita_sdk/tools/carrier/ui_reports_tool.py +5 -5
  168. alita_sdk/tools/chunkers/__init__.py +3 -1
  169. alita_sdk/tools/chunkers/code/treesitter/treesitter.py +37 -13
  170. alita_sdk/tools/chunkers/sematic/json_chunker.py +1 -0
  171. alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
  172. alita_sdk/tools/chunkers/universal_chunker.py +270 -0
  173. alita_sdk/tools/cloud/aws/__init__.py +10 -7
  174. alita_sdk/tools/cloud/azure/__init__.py +10 -7
  175. alita_sdk/tools/cloud/gcp/__init__.py +10 -7
  176. alita_sdk/tools/cloud/k8s/__init__.py +10 -7
  177. alita_sdk/tools/code/linter/__init__.py +10 -8
  178. alita_sdk/tools/code/loaders/codesearcher.py +3 -2
  179. alita_sdk/tools/code/sonar/__init__.py +10 -7
  180. alita_sdk/tools/code_indexer_toolkit.py +73 -23
  181. alita_sdk/tools/confluence/__init__.py +21 -15
  182. alita_sdk/tools/confluence/api_wrapper.py +78 -23
  183. alita_sdk/tools/confluence/loader.py +4 -2
  184. alita_sdk/tools/custom_open_api/__init__.py +12 -5
  185. alita_sdk/tools/elastic/__init__.py +11 -8
  186. alita_sdk/tools/elitea_base.py +493 -30
  187. alita_sdk/tools/figma/__init__.py +58 -11
  188. alita_sdk/tools/figma/api_wrapper.py +1235 -143
  189. alita_sdk/tools/figma/figma_client.py +73 -0
  190. alita_sdk/tools/figma/toon_tools.py +2748 -0
  191. alita_sdk/tools/github/__init__.py +13 -14
  192. alita_sdk/tools/github/github_client.py +224 -100
  193. alita_sdk/tools/github/graphql_client_wrapper.py +119 -33
  194. alita_sdk/tools/github/schemas.py +14 -5
  195. alita_sdk/tools/github/tool.py +5 -1
  196. alita_sdk/tools/github/tool_prompts.py +9 -22
  197. alita_sdk/tools/gitlab/__init__.py +15 -11
  198. alita_sdk/tools/gitlab/api_wrapper.py +207 -41
  199. alita_sdk/tools/gitlab_org/__init__.py +10 -8
  200. alita_sdk/tools/gitlab_org/api_wrapper.py +63 -64
  201. alita_sdk/tools/google/bigquery/__init__.py +13 -12
  202. alita_sdk/tools/google/bigquery/tool.py +5 -1
  203. alita_sdk/tools/google_places/__init__.py +10 -8
  204. alita_sdk/tools/google_places/api_wrapper.py +1 -1
  205. alita_sdk/tools/jira/__init__.py +17 -11
  206. alita_sdk/tools/jira/api_wrapper.py +91 -40
  207. alita_sdk/tools/keycloak/__init__.py +11 -8
  208. alita_sdk/tools/localgit/__init__.py +9 -3
  209. alita_sdk/tools/localgit/local_git.py +62 -54
  210. alita_sdk/tools/localgit/tool.py +5 -1
  211. alita_sdk/tools/memory/__init__.py +11 -3
  212. alita_sdk/tools/non_code_indexer_toolkit.py +1 -0
  213. alita_sdk/tools/ocr/__init__.py +11 -8
  214. alita_sdk/tools/openapi/__init__.py +490 -114
  215. alita_sdk/tools/openapi/api_wrapper.py +1368 -0
  216. alita_sdk/tools/openapi/tool.py +20 -0
  217. alita_sdk/tools/pandas/__init__.py +20 -12
  218. alita_sdk/tools/pandas/api_wrapper.py +38 -25
  219. alita_sdk/tools/pandas/dataframe/generator/base.py +3 -1
  220. alita_sdk/tools/postman/__init__.py +11 -11
  221. alita_sdk/tools/pptx/__init__.py +10 -9
  222. alita_sdk/tools/pptx/pptx_wrapper.py +1 -1
  223. alita_sdk/tools/qtest/__init__.py +30 -10
  224. alita_sdk/tools/qtest/api_wrapper.py +430 -13
  225. alita_sdk/tools/rally/__init__.py +10 -8
  226. alita_sdk/tools/rally/api_wrapper.py +1 -1
  227. alita_sdk/tools/report_portal/__init__.py +12 -9
  228. alita_sdk/tools/salesforce/__init__.py +10 -9
  229. alita_sdk/tools/servicenow/__init__.py +17 -14
  230. alita_sdk/tools/servicenow/api_wrapper.py +1 -1
  231. alita_sdk/tools/sharepoint/__init__.py +10 -8
  232. alita_sdk/tools/sharepoint/api_wrapper.py +4 -4
  233. alita_sdk/tools/slack/__init__.py +10 -8
  234. alita_sdk/tools/slack/api_wrapper.py +2 -2
  235. alita_sdk/tools/sql/__init__.py +11 -9
  236. alita_sdk/tools/testio/__init__.py +10 -8
  237. alita_sdk/tools/testrail/__init__.py +11 -8
  238. alita_sdk/tools/testrail/api_wrapper.py +1 -1
  239. alita_sdk/tools/utils/__init__.py +9 -4
  240. alita_sdk/tools/utils/content_parser.py +77 -3
  241. alita_sdk/tools/utils/text_operations.py +410 -0
  242. alita_sdk/tools/utils/tool_prompts.py +79 -0
  243. alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +17 -13
  244. alita_sdk/tools/xray/__init__.py +12 -9
  245. alita_sdk/tools/yagmail/__init__.py +9 -3
  246. alita_sdk/tools/zephyr/__init__.py +9 -7
  247. alita_sdk/tools/zephyr_enterprise/__init__.py +11 -8
  248. alita_sdk/tools/zephyr_essential/__init__.py +10 -8
  249. alita_sdk/tools/zephyr_essential/api_wrapper.py +30 -13
  250. alita_sdk/tools/zephyr_essential/client.py +2 -2
  251. alita_sdk/tools/zephyr_scale/__init__.py +11 -9
  252. alita_sdk/tools/zephyr_scale/api_wrapper.py +2 -2
  253. alita_sdk/tools/zephyr_squad/__init__.py +10 -8
  254. {alita_sdk-0.3.462.dist-info → alita_sdk-0.3.627.dist-info}/METADATA +147 -7
  255. alita_sdk-0.3.627.dist-info/RECORD +468 -0
  256. alita_sdk-0.3.627.dist-info/entry_points.txt +2 -0
  257. alita_sdk-0.3.462.dist-info/RECORD +0 -384
  258. alita_sdk-0.3.462.dist-info/entry_points.txt +0 -2
  259. {alita_sdk-0.3.462.dist-info → alita_sdk-0.3.627.dist-info}/WHEEL +0 -0
  260. {alita_sdk-0.3.462.dist-info → alita_sdk-0.3.627.dist-info}/licenses/LICENSE +0 -0
  261. {alita_sdk-0.3.462.dist-info → alita_sdk-0.3.627.dist-info}/top_level.txt +0 -0
@@ -3,14 +3,56 @@ Filesystem tools for CLI agents.
3
3
 
4
4
  Provides comprehensive file system operations restricted to specific directories.
5
5
  Inspired by MCP filesystem server implementation.
6
+
7
+ Also provides a FilesystemApiWrapper for integration with the inventory ingestion
8
+ pipeline, enabling local document loading and chunking.
6
9
  """
7
10
 
11
+ import base64
12
+ import fnmatch
13
+ import hashlib
14
+ import logging
8
15
  import os
9
16
  from pathlib import Path
10
- from typing import Optional, List, Dict, Any
17
+ from typing import Optional, List, Dict, Any, Generator, ClassVar
11
18
  from datetime import datetime
12
- from langchain_core.tools import BaseTool
13
- from pydantic import BaseModel, Field
19
+ from langchain_core.tools import BaseTool, ToolException
20
+ from langchain_core.documents import Document
21
+ from pydantic import BaseModel, Field, model_validator
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ # Maximum recommended content size for single write operations (in characters)
27
+ MAX_RECOMMENDED_CONTENT_SIZE = 5000 # ~5KB, roughly 1,200-1,500 tokens
28
+
29
+ # Helpful error message for truncated content
30
+ CONTENT_TRUNCATED_ERROR = """
31
+ ⚠️ CONTENT FIELD MISSING - OUTPUT TRUNCATED
32
+
33
+ Your tool call was cut off because the content was too large for the context window.
34
+ The JSON was truncated, leaving the 'content' field incomplete or missing.
35
+
36
+ 🔧 HOW TO FIX THIS:
37
+
38
+ 1. **Use incremental writes** - Don't write large files in one call:
39
+ - First: filesystem_write_file(path, "# Header\\nimport x\\n\\n")
40
+ - Then: filesystem_append_file(path, "def func1():\\n ...\\n\\n")
41
+ - Then: filesystem_append_file(path, "def func2():\\n ...\\n\\n")
42
+
43
+ 2. **Keep each chunk small** - Under 2000 characters per call
44
+
45
+ 3. **Structure first, details later**:
46
+ - Write skeleton/structure first
47
+ - Add implementations section by section
48
+
49
+ 4. **For documentation/reports**:
50
+ - Write one section at a time
51
+ - Use append_file for each new section
52
+
53
+ ❌ DON'T: Try to write the entire file content again
54
+ ✅ DO: Break it into 3-5 smaller append_file calls
55
+ """
14
56
 
15
57
 
16
58
  class ReadFileInput(BaseModel):
@@ -47,7 +89,38 @@ class ReadMultipleFilesInput(BaseModel):
47
89
  class WriteFileInput(BaseModel):
48
90
  """Input for writing to a file."""
49
91
  path: str = Field(description="Relative path to the file to write")
50
- content: str = Field(description="Content to write to the file")
92
+ content: Optional[str] = Field(
93
+ default=None,
94
+ description="Content to write to the file. REQUIRED - this field cannot be empty or omitted."
95
+ )
96
+
97
+ @model_validator(mode='after')
98
+ def validate_content_required(self):
99
+ """Provide helpful error message when content is missing or truncated."""
100
+ if self.content is None:
101
+ raise ToolException(CONTENT_TRUNCATED_ERROR)
102
+ if len(self.content) > MAX_RECOMMENDED_CONTENT_SIZE:
103
+ logger.warning(
104
+ f"Content is very large ({len(self.content)} chars). Consider using append_file "
105
+ "for incremental writes to avoid truncation issues."
106
+ )
107
+ return self
108
+
109
+
110
+ class AppendFileInput(BaseModel):
111
+ """Input for appending to a file."""
112
+ path: str = Field(description="Relative path to the file to append to")
113
+ content: Optional[str] = Field(
114
+ default=None,
115
+ description="Content to append to the end of the file. REQUIRED - this field cannot be empty or omitted."
116
+ )
117
+
118
+ @model_validator(mode='after')
119
+ def validate_content_required(self):
120
+ """Provide helpful error message when content is missing or truncated."""
121
+ if self.content is None:
122
+ raise ToolException(CONTENT_TRUNCATED_ERROR)
123
+ return self
51
124
 
52
125
 
53
126
  class EditFileInput(BaseModel):
@@ -62,18 +135,21 @@ class ListDirectoryInput(BaseModel):
62
135
  path: str = Field(default=".", description="Relative path to the directory to list")
63
136
  include_sizes: bool = Field(default=False, description="Include file sizes in the output")
64
137
  sort_by: str = Field(default="name", description="Sort by 'name' or 'size'")
138
+ max_results: Optional[int] = Field(default=200, description="Maximum number of entries to return. Default is 200 to prevent context overflow.")
65
139
 
66
140
 
67
141
  class DirectoryTreeInput(BaseModel):
68
142
  """Input for getting a directory tree."""
69
143
  path: str = Field(default=".", description="Relative path to the directory")
70
- max_depth: Optional[int] = Field(None, description="Maximum depth to traverse (None for unlimited)")
144
+ max_depth: Optional[int] = Field(default=3, description="Maximum depth to traverse. Default is 3 to prevent excessive output. Use None for unlimited (caution: may exceed context limits).")
145
+ max_items: Optional[int] = Field(default=200, description="Maximum number of files/directories to include. Default is 200 to prevent context window overflow. Use None for unlimited (caution: large directories may exceed context limits).")
71
146
 
72
147
 
73
148
  class SearchFilesInput(BaseModel):
74
149
  """Input for searching files."""
75
150
  path: str = Field(default=".", description="Relative path to search from")
76
151
  pattern: str = Field(description="Glob pattern to match (e.g., '*.py', '**/*.txt')")
152
+ max_results: Optional[int] = Field(default=100, description="Maximum number of results to return. Default is 100 to prevent context overflow. Use None for unlimited.")
77
153
 
78
154
 
79
155
  class DeleteFileInput(BaseModel):
@@ -104,29 +180,110 @@ class EmptyInput(BaseModel):
104
180
 
105
181
  class FileSystemTool(BaseTool):
106
182
  """Base class for filesystem tools with directory restriction."""
107
- base_directory: str
183
+ base_directory: str # Primary directory (for backward compatibility)
184
+ allowed_directories: List[str] = [] # Additional allowed directories
185
+ _basename_collision_detected: bool = False # Cache for collision detection
186
+ _basename_collision_checked: bool = False # Whether we've checked for collisions
187
+
188
+ def _get_all_allowed_directories(self) -> List[Path]:
189
+ """Get all allowed directories as resolved Paths."""
190
+ dirs = [Path(self.base_directory).resolve()]
191
+ for d in self.allowed_directories:
192
+ resolved = Path(d).resolve()
193
+ if resolved not in dirs:
194
+ dirs.append(resolved)
195
+ return dirs
196
+
197
+ def _check_basename_collision(self) -> bool:
198
+ """Check if multiple allowed directories have the same basename."""
199
+ if self._basename_collision_checked:
200
+ return self._basename_collision_detected
201
+
202
+ allowed_dirs = self._get_all_allowed_directories()
203
+ basenames = [d.name for d in allowed_dirs]
204
+ self._basename_collision_detected = len(basenames) != len(set(basenames))
205
+ self._basename_collision_checked = True
206
+ return self._basename_collision_detected
207
+
208
+ def _get_relative_path_from_allowed_dirs(self, absolute_path: Path) -> tuple:
209
+ """Get relative path and directory name for a file in allowed directories.
210
+
211
+ Args:
212
+ absolute_path: Absolute path to the file
213
+
214
+ Returns:
215
+ Tuple of (relative_path, directory_name)
216
+
217
+ Raises:
218
+ ValueError: If path is not within any allowed directory
219
+ """
220
+ allowed_dirs = self._get_all_allowed_directories()
221
+
222
+ # Find which allowed directory contains this path
223
+ for base in allowed_dirs:
224
+ try:
225
+ rel_path = absolute_path.relative_to(base)
226
+
227
+ # Determine directory name for prefix
228
+ if self._check_basename_collision():
229
+ # Use parent/basename format to disambiguate
230
+ dir_name = f"{base.parent.name}/{base.name}"
231
+ else:
232
+ # Use just basename
233
+ dir_name = base.name
234
+
235
+ return (str(rel_path), dir_name)
236
+ except ValueError:
237
+ continue
238
+
239
+ # Path not in any allowed directory
240
+ allowed_paths = [str(d) for d in allowed_dirs]
241
+ raise ValueError(
242
+ f"Path '{absolute_path}' is not within any allowed directory.\n"
243
+ f"Allowed directories: {allowed_paths}\n"
244
+ f"Attempted path: {absolute_path}"
245
+ )
108
246
 
109
247
  def _resolve_path(self, relative_path: str) -> Path:
110
248
  """
111
- Resolve and validate a path within the base directory.
249
+ Resolve and validate a path within any of the allowed directories.
112
250
 
113
- Security: Ensures resolved path is within allowed directory.
251
+ Security: Ensures resolved path is within one of the allowed directories.
114
252
  """
115
- base = Path(self.base_directory).resolve()
253
+ allowed_dirs = self._get_all_allowed_directories()
116
254
 
117
- # Handle both relative and absolute paths
255
+ # Handle absolute paths - check if within any allowed directory
118
256
  if Path(relative_path).is_absolute():
119
257
  target = Path(relative_path).resolve()
120
- else:
121
- target = (base / relative_path).resolve()
258
+ for base in allowed_dirs:
259
+ try:
260
+ target.relative_to(base)
261
+ return target
262
+ except ValueError:
263
+ continue
264
+ raise ValueError(f"Access denied: path '{relative_path}' is outside allowed directories")
122
265
 
123
- # Security check: ensure the resolved path is within base directory
124
- try:
125
- target.relative_to(base)
126
- except ValueError:
127
- raise ValueError(f"Access denied: path '{relative_path}' is outside allowed directory")
266
+ # For relative paths, try to resolve against each allowed directory
267
+ # First check primary base_directory
268
+ primary_base = allowed_dirs[0]
269
+ target = (primary_base / relative_path).resolve()
270
+
271
+ # Check if target is within any allowed directory
272
+ for base in allowed_dirs:
273
+ try:
274
+ target.relative_to(base)
275
+ return target
276
+ except ValueError:
277
+ continue
128
278
 
129
- return target
279
+ # If relative path doesn't work from primary, try finding the file in other directories
280
+ for base in allowed_dirs[1:]:
281
+ candidate = (base / relative_path).resolve()
282
+ if candidate.exists():
283
+ return candidate
284
+
285
+ # Default to primary base directory resolution
286
+ raise ValueError(f"Access denied: path '{relative_path}' is outside allowed directories")
130
287
 
131
288
  def _format_size(self, size: int) -> str:
132
289
  """Format file size in human-readable format."""
@@ -147,6 +304,11 @@ class ReadFileTool(FileSystemTool):
147
304
  "Only works within allowed directories."
148
305
  )
149
306
  args_schema: type[BaseModel] = ReadFileInput
307
+ truncation_suggestions: ClassVar[List[str]] = [
308
+ "Use head=100 to read only the first 100 lines",
309
+ "Use tail=100 to read only the last 100 lines",
310
+ "Use filesystem_read_file_chunk with start_line and end_line for specific sections",
311
+ ]
150
312
 
151
313
  def _run(self, path: str, head: Optional[int] = None, tail: Optional[int] = None) -> str:
152
314
  """Read a file with optional head/tail."""
@@ -196,6 +358,10 @@ class ReadFileChunkTool(FileSystemTool):
196
358
  "Only works within allowed directories."
197
359
  )
198
360
  args_schema: type[BaseModel] = ReadFileChunkInput
361
+ truncation_suggestions: ClassVar[List[str]] = [
362
+ "Reduce the line range (end_line - start_line) to read fewer lines at once",
363
+ "Read smaller chunks sequentially if you need to process the entire file",
364
+ ]
199
365
 
200
366
  def _run(self, path: str, start_line: int = 1, end_line: Optional[int] = None) -> str:
201
367
  """Read a chunk of a file by line range."""
@@ -246,6 +412,10 @@ class ReadMultipleFilesTool(FileSystemTool):
246
412
  "Only works within allowed directories."
247
413
  )
248
414
  args_schema: type[BaseModel] = ReadMultipleFilesInput
415
+ truncation_suggestions: ClassVar[List[str]] = [
416
+ "Read fewer files at once - split into multiple smaller batches",
417
+ "Use filesystem_read_file with head parameter on individual large files instead",
418
+ ]
249
419
 
250
420
  def _run(self, paths: List[str]) -> str:
251
421
  """Read multiple files."""
@@ -291,6 +461,43 @@ class WriteFileTool(FileSystemTool):
291
461
  return f"Error writing to file '{path}': {str(e)}"
292
462
 
293
463
 
464
+ class AppendFileTool(FileSystemTool):
465
+ """Append content to the end of a file."""
466
+ name: str = "filesystem_append_file"
467
+ description: str = (
468
+ "Append content to the end of an existing file. Creates the file if it doesn't exist. "
469
+ "Use this for incremental file creation - write initial structure with write_file, "
470
+ "then add sections progressively with append_file. This is safer than rewriting "
471
+ "entire files and prevents context overflow. Only works within allowed directories."
472
+ )
473
+ args_schema: type[BaseModel] = AppendFileInput
474
+
475
+ def _run(self, path: str, content: str) -> str:
476
+ """Append to a file."""
477
+ try:
478
+ target = self._resolve_path(path)
479
+
480
+ # Create parent directories if they don't exist
481
+ target.parent.mkdir(parents=True, exist_ok=True)
482
+
483
+ # Check current file size if it exists
484
+ existed = target.exists()
485
+ original_size = target.stat().st_size if existed else 0
486
+
487
+ with open(target, 'a', encoding='utf-8') as f:
488
+ f.write(content)
489
+
490
+ appended_size = len(content.encode('utf-8'))
491
+ new_size = original_size + appended_size
492
+
493
+ if existed:
494
+ return f"Successfully appended {self._format_size(appended_size)} to '{path}' (total: {self._format_size(new_size)})"
495
+ else:
496
+ return f"Created '{path}' and wrote {self._format_size(appended_size)}"
497
+ except Exception as e:
498
+ return f"Error appending to file '{path}': {str(e)}"
499
+
500
+
294
501
  class EditFileTool(FileSystemTool):
295
502
  """Edit file with precise text replacement."""
296
503
  name: str = "filesystem_edit_file"
@@ -443,8 +650,12 @@ class ListDirectoryTool(FileSystemTool):
443
650
  "Only works within allowed directories."
444
651
  )
445
652
  args_schema: type[BaseModel] = ListDirectoryInput
653
+ truncation_suggestions: ClassVar[List[str]] = [
654
+ "List a specific subdirectory instead of the root directory",
655
+ "Consider using filesystem_directory_tree with max_depth=1 for hierarchical overview",
656
+ ]
446
657
 
447
- def _run(self, path: str = ".", include_sizes: bool = False, sort_by: str = "name") -> str:
658
+ def _run(self, path: str = ".", include_sizes: bool = False, sort_by: str = "name", max_results: Optional[int] = 200) -> str:
448
659
  """List directory contents."""
449
660
  try:
450
661
  target = self._resolve_path(path)
@@ -460,7 +671,8 @@ class ListDirectoryTool(FileSystemTool):
460
671
  entry_info = {
461
672
  'name': entry.name,
462
673
  'is_dir': entry.is_dir(),
463
- 'size': entry.stat().st_size if entry.is_file() else 0
674
+ 'size': entry.stat().st_size if entry.is_file() else 0,
675
+ 'path': entry
464
676
  }
465
677
  entries.append(entry_info)
466
678
 
@@ -470,6 +682,18 @@ class ListDirectoryTool(FileSystemTool):
470
682
  else:
471
683
  entries.sort(key=lambda x: x['name'].lower())
472
684
 
685
+ # Apply limit
686
+ total_count = len(entries)
687
+ truncated = False
688
+ if max_results is not None and total_count > max_results:
689
+ entries = entries[:max_results]
690
+ truncated = True
691
+
692
+ # Get directory name for multi-directory configs
693
+ allowed_dirs = self._get_all_allowed_directories()
694
+ has_multiple_dirs = len(allowed_dirs) > 1
695
+ _, dir_name = self._get_relative_path_from_allowed_dirs(target) if has_multiple_dirs else ("", "")
696
+
473
697
  # Format output
474
698
  lines = []
475
699
  total_files = 0
@@ -478,7 +702,12 @@ class ListDirectoryTool(FileSystemTool):
478
702
 
479
703
  for entry in entries:
480
704
  prefix = "[DIR] " if entry['is_dir'] else "[FILE]"
481
- name = entry['name']
705
+
706
+ # Add directory prefix for multi-directory configs
707
+ if has_multiple_dirs:
708
+ name = f"{dir_name}/{entry['name']}"
709
+ else:
710
+ name = entry['name']
482
711
 
483
712
  if include_sizes and not entry['is_dir']:
484
713
  size_str = self._format_size(entry['size'])
@@ -494,13 +723,27 @@ class ListDirectoryTool(FileSystemTool):
494
723
 
495
724
  result = "\n".join(lines)
496
725
 
726
+ # Add header showing the listing context
727
+ if path in (".", "", "./"):
728
+ header = "Contents of working directory (./):\n\n"
729
+ else:
730
+ header = f"Contents of {path}/:\n\n"
731
+ result = header + result
732
+
497
733
  if include_sizes:
498
734
  summary = f"\n\nTotal: {total_files} files, {total_dirs} directories"
499
735
  if total_files > 0:
500
736
  summary += f"\nCombined size: {self._format_size(total_size)}"
501
737
  result += summary
502
738
 
503
- return result if result else "Directory is empty"
739
+ if truncated:
740
+ result += f"\n\n⚠️ OUTPUT TRUNCATED: Showing {len(entries)} of {total_count} entries from '{dir_name if has_multiple_dirs else path}' (max_results={max_results})"
741
+ result += "\n To see more: increase max_results or list a specific subdirectory"
742
+
743
+ # Add note about how to access files
744
+ result += "\n\nNote: Access files using paths shown above (e.g., 'agents/file.md' for items in agents/ directory)"
745
+
746
+ return result if lines else "Directory is empty"
504
747
  except Exception as e:
505
748
  return f"Error listing directory '{path}': {str(e)}"
506
749
 
@@ -511,25 +754,51 @@ class DirectoryTreeTool(FileSystemTool):
511
754
  description: str = (
512
755
  "Get a recursive tree view of files and directories. "
513
756
  "Shows the complete structure in an easy-to-read tree format. "
514
- "Use max_depth to limit recursion depth. "
757
+ "IMPORTANT: For large directories, use max_depth (default: 3) and max_items (default: 200) "
758
+ "to prevent context window overflow. Increase these only if needed for smaller directories. "
515
759
  "Only works within allowed directories."
516
760
  )
517
761
  args_schema: type[BaseModel] = DirectoryTreeInput
762
+ truncation_suggestions: ClassVar[List[str]] = [
763
+ "Use max_depth=2 to limit directory traversal depth",
764
+ "Use max_items=50 to limit total items returned",
765
+ "Target a specific subdirectory instead of the root",
766
+ ]
767
+
768
+ # Track item count during tree building
769
+ _item_count: int = 0
770
+ _max_items: Optional[int] = None
771
+ _truncated: bool = False
518
772
 
519
773
  def _build_tree(self, directory: Path, prefix: str = "", depth: int = 0, max_depth: Optional[int] = None) -> List[str]:
520
- """Recursively build directory tree."""
774
+ """Recursively build directory tree with item limit."""
775
+ # Check depth limit
521
776
  if max_depth is not None and depth >= max_depth:
522
777
  return []
523
778
 
779
+ # Check item limit
780
+ if self._max_items is not None and self._item_count >= self._max_items:
781
+ if not self._truncated:
782
+ self._truncated = True
783
+ return []
784
+
524
785
  lines = []
525
786
  try:
526
787
  entries = sorted(directory.iterdir(), key=lambda x: (not x.is_dir(), x.name.lower()))
527
788
 
528
789
  for i, entry in enumerate(entries):
790
+ # Check item limit before adding each entry
791
+ if self._max_items is not None and self._item_count >= self._max_items:
792
+ if not self._truncated:
793
+ self._truncated = True
794
+ break
795
+
529
796
  is_last = i == len(entries) - 1
530
797
  current_prefix = "└── " if is_last else "├── "
531
798
  next_prefix = " " if is_last else "│ "
532
799
 
800
+ self._item_count += 1
801
+
533
802
  if entry.is_dir():
534
803
  lines.append(f"{prefix}{current_prefix}📁 {entry.name}/")
535
804
  lines.extend(self._build_tree(entry, prefix + next_prefix, depth + 1, max_depth))
@@ -541,8 +810,8 @@ class DirectoryTreeTool(FileSystemTool):
541
810
 
542
811
  return lines
543
812
 
544
- def _run(self, path: str = ".", max_depth: Optional[int] = None) -> str:
545
- """Get directory tree."""
813
+ def _run(self, path: str = ".", max_depth: Optional[int] = 3, max_items: Optional[int] = 200) -> str:
814
+ """Get directory tree with size limits to prevent context overflow."""
546
815
  try:
547
816
  target = self._resolve_path(path)
548
817
 
@@ -552,9 +821,31 @@ class DirectoryTreeTool(FileSystemTool):
552
821
  if not target.is_dir():
553
822
  return f"Error: '{path}' is not a directory"
554
823
 
555
- lines = [f"📁 {target.name or path}/"]
824
+ # Reset counters for this run
825
+ self._item_count = 0
826
+ self._max_items = max_items
827
+ self._truncated = False
828
+
829
+ # Show relative path from base directory, use '.' for root
830
+ # This prevents confusion - files should be accessed relative to working directory
831
+ if path in (".", "", "./"):
832
+ display_root = "." # Root of working directory
833
+ else:
834
+ display_root = path.rstrip('/')
835
+
836
+ lines = [f"📁 {display_root}/"]
556
837
  lines.extend(self._build_tree(target, "", 0, max_depth))
557
838
 
839
+ # Add truncation warning if limit was reached
840
+ if self._truncated:
841
+ lines.append("")
842
+ lines.append(f"⚠️ OUTPUT TRUNCATED: Showing {self._item_count} of more items (max_items={max_items}, max_depth={max_depth})")
843
+ lines.append(f" To see more: increase max_items or max_depth, or use filesystem_list_directory on specific subdirectories")
844
+
845
+ # Add note about file paths
846
+ lines.append("")
847
+ lines.append("Note: Use paths relative to working directory (e.g., 'agents/file.md', not including the root directory name)")
848
+
558
849
  return "\n".join(lines)
559
850
  except Exception as e:
560
851
  return f"Error building directory tree for '{path}': {str(e)}"
@@ -566,13 +857,18 @@ class SearchFilesTool(FileSystemTool):
566
857
  description: str = (
567
858
  "Recursively search for files and directories matching a glob pattern. "
568
859
  "Use patterns like '*.py' for Python files in current dir, or '**/*.py' for all Python files recursively. "
569
- "Returns full paths to all matching items. "
860
+ "Returns paths to matching items (default limit: 100 results to prevent context overflow). "
570
861
  "Only searches within allowed directories."
571
862
  )
572
863
  args_schema: type[BaseModel] = SearchFilesInput
864
+ truncation_suggestions: ClassVar[List[str]] = [
865
+ "Use max_results=50 to limit number of results",
866
+ "Use a more specific glob pattern (e.g., 'src/**/*.py' instead of '**/*.py')",
867
+ "Search in a specific subdirectory instead of the root",
868
+ ]
573
869
 
574
- def _run(self, path: str = ".", pattern: str = "*") -> str:
575
- """Search for files."""
870
+ def _run(self, path: str = ".", pattern: str = "*", max_results: Optional[int] = 100) -> str:
871
+ """Search for files with result limit."""
576
872
  try:
577
873
  target = self._resolve_path(path)
578
874
 
@@ -583,28 +879,51 @@ class SearchFilesTool(FileSystemTool):
583
879
  return f"Error: '{path}' is not a directory"
584
880
 
585
881
  # Use glob to find matching files
586
- if '**' in pattern:
587
- matches = list(target.glob(pattern))
588
- else:
589
- matches = list(target.glob(pattern))
882
+ all_matches = list(target.glob(pattern))
883
+ total_count = len(all_matches)
590
884
 
591
- if not matches:
885
+ if not all_matches:
592
886
  return f"No files matching '{pattern}' found in '{path}'"
593
887
 
594
- # Format results
595
- base = Path(self.base_directory).resolve()
596
- results = []
888
+ # Apply limit
889
+ truncated = False
890
+ if max_results is not None and total_count > max_results:
891
+ matches = sorted(all_matches)[:max_results]
892
+ truncated = True
893
+ else:
894
+ matches = sorted(all_matches)
597
895
 
598
- for match in sorted(matches):
599
- rel_path = match.relative_to(base)
896
+ # Format results with directory prefixes for multi-directory configs
897
+ allowed_dirs = self._get_all_allowed_directories()
898
+ has_multiple_dirs = len(allowed_dirs) > 1
899
+ results = []
900
+ search_dir_name = None
901
+
902
+ for match in matches:
903
+ if has_multiple_dirs:
904
+ rel_path_str, dir_name = self._get_relative_path_from_allowed_dirs(match)
905
+ display_path = f"{dir_name}/{rel_path_str}"
906
+ if search_dir_name is None:
907
+ search_dir_name = dir_name
908
+ else:
909
+ rel_path_str = str(match.relative_to(Path(self.base_directory).resolve()))
910
+ display_path = rel_path_str
911
+
600
912
  if match.is_dir():
601
- results.append(f"📁 {rel_path}/")
913
+ results.append(f"📁 {display_path}/")
602
914
  else:
603
915
  size = self._format_size(match.stat().st_size)
604
- results.append(f"📄 {rel_path} ({size})")
916
+ results.append(f"📄 {display_path} ({size})")
917
+
918
+ header = f"Found {total_count} matches for '{pattern}':\n\n"
919
+ output = header + "\n".join(results)
605
920
 
606
- header = f"Found {len(matches)} matches for '{pattern}':\n\n"
607
- return header + "\n".join(results)
921
+ if truncated:
922
+ location_str = f"from '{search_dir_name}' " if search_dir_name else ""
923
+ output += f"\n\n⚠️ OUTPUT TRUNCATED: Showing {max_results} of {total_count} results {location_str}(max_results={max_results})"
924
+ output += "\n To see more: increase max_results or use a more specific pattern"
925
+
926
+ return output
608
927
  except Exception as e:
609
928
  return f"Error searching files in '{path}': {str(e)}"
610
929
 
@@ -753,7 +1072,524 @@ class ListAllowedDirectoriesTool(FileSystemTool):
753
1072
 
754
1073
  def _run(self) -> str:
755
1074
  """List allowed directories."""
756
- return f"Allowed directory:\n{self.base_directory}\n\nAll subdirectories within this path are accessible."
1075
+ dirs = self._get_all_allowed_directories()
1076
+ if len(dirs) == 1:
1077
+ return f"Allowed directory:\n{dirs[0]}\n\nAll subdirectories within this path are accessible."
1078
+ else:
1079
+ dir_list = "\n".join(f" - {d}" for d in dirs)
1080
+ return f"Allowed directories:\n{dir_list}\n\nAll subdirectories within these paths are accessible."
1081
+
1082
+
1083
+ # ========== Filesystem API Wrapper for Inventory Ingestion ==========
1084
+
1085
+ class FilesystemApiWrapper:
1086
+ """
1087
+ API Wrapper for filesystem operations compatible with inventory ingestion pipeline.
1088
+
1089
+ Supports both text and non-text files:
1090
+ - Text files: .py, .md, .txt, .json, .yaml, etc.
1091
+ - Documents: .pdf, .docx, .pptx, .xlsx, .xls (converted to markdown)
1092
+ - Images: .png, .jpg, .gif, .webp (base64 encoded or described via LLM)
1093
+
1094
+ Usage:
1095
+ # Create wrapper for a directory
1096
+ wrapper = FilesystemApiWrapper(base_directory="/path/to/docs")
1097
+
1098
+ # Load documents (uses inherited loader())
1099
+ for doc in wrapper.loader(whitelist=["*.md", "*.pdf"]):
1100
+ print(doc.page_content[:100])
1101
+
1102
+ # For image description, provide an LLM
1103
+ wrapper = FilesystemApiWrapper(base_directory="/path/to/docs", llm=my_llm)
1104
+ for doc in wrapper.loader(whitelist=["*.png"]):
1105
+ print(doc.page_content) # LLM-generated description
1106
+
1107
+ # Use with inventory ingestion
1108
+ pipeline = IngestionPipeline(llm=llm, graph_path="./graph.json")
1109
+ pipeline.register_toolkit("local_docs", wrapper)
1110
+ result = pipeline.run(source="local_docs", whitelist=["*.md", "*.pdf"])
1111
+ """
1112
+
1113
+ # Filesystem-specific settings
1114
+ base_directory: str = ""
1115
+ recursive: bool = True
1116
+ follow_symlinks: bool = False
1117
+ llm: Any = None # Optional LLM for image processing
1118
+
1119
+ # File type categories
1120
+ BINARY_EXTENSIONS = {'.pdf', '.docx', '.doc', '.pptx', '.ppt', '.xlsx', '.xls'}
1121
+ IMAGE_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp', '.svg'}
1122
+
1123
+ def __init__(
1124
+ self,
1125
+ base_directory: str,
1126
+ recursive: bool = True,
1127
+ follow_symlinks: bool = False,
1128
+ llm: Any = None,
1129
+ **kwargs
1130
+ ):
1131
+ """
1132
+ Initialize filesystem wrapper.
1133
+
1134
+ Args:
1135
+ base_directory: Root directory for file operations
1136
+ recursive: If True, search subdirectories recursively
1137
+ follow_symlinks: If True, follow symbolic links
1138
+ llm: Optional LLM for image description (if not provided, images are base64 encoded)
1139
+ **kwargs: Additional arguments (ignored, for compatibility)
1140
+ """
1141
+ self.base_directory = str(Path(base_directory).resolve())
1142
+ self.recursive = recursive
1143
+ self.follow_symlinks = follow_symlinks
1144
+ self.llm = llm
1145
+
1146
+ # For compatibility with BaseCodeToolApiWrapper.loader()
1147
+ self.active_branch = None
1148
+
1149
+ # Validate directory
1150
+ if not Path(self.base_directory).exists():
1151
+ raise ValueError(f"Directory does not exist: {self.base_directory}")
1152
+ if not Path(self.base_directory).is_dir():
1153
+ raise ValueError(f"Path is not a directory: {self.base_directory}")
1154
+
1155
+ # Optional RunnableConfig for CLI/standalone usage
1156
+ self._runnable_config = None
1157
+
1158
+ def set_runnable_config(self, config: Optional[Dict[str, Any]]) -> None:
1159
+ """
1160
+ Set the RunnableConfig for dispatching custom events.
1161
+
1162
+ This is required when running outside of a LangChain agent context
1163
+ (e.g., from CLI). Without a config containing a run_id,
1164
+ dispatch_custom_event will fail with "Unable to dispatch an adhoc event
1165
+ without a parent run id".
1166
+
1167
+ Args:
1168
+ config: A RunnableConfig dict with at least {'run_id': uuid}
1169
+ """
1170
+ self._runnable_config = config
1171
+
1172
+ def _log_tool_event(self, message: str, tool_name: str = None, config: Optional[Dict[str, Any]] = None):
1173
+ """Log progress events (mirrors BaseToolApiWrapper).
1174
+
1175
+ Args:
1176
+ message: The message to log
1177
+ tool_name: Name of the tool (defaults to 'filesystem')
1178
+ config: Optional RunnableConfig. If not provided, uses self._runnable_config.
1179
+ Required when running outside a LangChain agent context.
1180
+ """
1181
+ logger.info(f"[{tool_name or 'filesystem'}] {message}")
1182
+ try:
1183
+ from langchain_core.callbacks import dispatch_custom_event
1184
+
1185
+ # Use provided config, fall back to instance config
1186
+ effective_config = config or getattr(self, '_runnable_config', None)
1187
+
1188
+ dispatch_custom_event(
1189
+ name="thinking_step",
1190
+ data={
1191
+ "message": message,
1192
+ "tool_name": tool_name or "filesystem",
1193
+ "toolkit": "FilesystemApiWrapper",
1194
+ },
1195
+ config=effective_config,
1196
+ )
1197
+ except Exception:
1198
+ pass
1199
+
1200
+ def _get_files(self, path: str = "", branch: str = None) -> List[str]:
1201
+ """
1202
+ Get list of files in the directory.
1203
+
1204
+ Implements BaseCodeToolApiWrapper._get_files() for filesystem.
1205
+
1206
+ Args:
1207
+ path: Subdirectory path (relative to base_directory)
1208
+ branch: Ignored for filesystem (compatibility with git-based toolkits)
1209
+
1210
+ Returns:
1211
+ List of file paths relative to base_directory
1212
+ """
1213
+ base = Path(self.base_directory)
1214
+ search_path = base / path if path else base
1215
+
1216
+ if not search_path.exists():
1217
+ return []
1218
+
1219
+ files = []
1220
+
1221
+ if self.recursive:
1222
+ for root, dirs, filenames in os.walk(search_path, followlinks=self.follow_symlinks):
1223
+ # Skip hidden directories
1224
+ dirs[:] = [d for d in dirs if not d.startswith('.')]
1225
+
1226
+ for filename in filenames:
1227
+ if filename.startswith('.'):
1228
+ continue
1229
+
1230
+ full_path = Path(root) / filename
1231
+ try:
1232
+ rel_path = str(full_path.relative_to(base))
1233
+ files.append(rel_path)
1234
+ except ValueError:
1235
+ continue
1236
+ else:
1237
+ for entry in search_path.iterdir():
1238
+ if entry.is_file() and not entry.name.startswith('.'):
1239
+ try:
1240
+ rel_path = str(entry.relative_to(base))
1241
+ files.append(rel_path)
1242
+ except ValueError:
1243
+ continue
1244
+
1245
+ return sorted(files)
1246
+
1247
+ def _is_binary_file(self, file_path: str) -> bool:
1248
+ """Check if file is a binary document (PDF, DOCX, etc.)."""
1249
+ ext = Path(file_path).suffix.lower()
1250
+ return ext in self.BINARY_EXTENSIONS
1251
+
1252
+ def _is_image_file(self, file_path: str) -> bool:
1253
+ """Check if file is an image."""
1254
+ ext = Path(file_path).suffix.lower()
1255
+ return ext in self.IMAGE_EXTENSIONS
1256
+
1257
+ def _read_binary_file(self, file_path: str) -> Optional[str]:
1258
+ """
1259
+ Read binary file (PDF, DOCX, PPTX, Excel) and convert to text/markdown.
1260
+
1261
+ Uses the SDK's content_parser for document conversion.
1262
+
1263
+ Args:
1264
+ file_path: Path relative to base_directory
1265
+
1266
+ Returns:
1267
+ Converted text content, or None if conversion fails
1268
+ """
1269
+ full_path = Path(self.base_directory) / file_path
1270
+
1271
+ try:
1272
+ from alita_sdk.tools.utils.content_parser import parse_file_content
1273
+
1274
+ result = parse_file_content(
1275
+ file_path=str(full_path),
1276
+ is_capture_image=bool(self.llm), # Capture images if LLM available
1277
+ llm=self.llm
1278
+ )
1279
+
1280
+ if isinstance(result, Exception):
1281
+ logger.warning(f"Failed to parse {file_path}: {result}")
1282
+ return None
1283
+
1284
+ return result
1285
+
1286
+ except ImportError:
1287
+ logger.warning("content_parser not available, skipping binary file")
1288
+ return None
1289
+ except Exception as e:
1290
+ logger.warning(f"Error parsing {file_path}: {e}")
1291
+ return None
1292
+
1293
+ def _read_image_file(self, file_path: str) -> Optional[str]:
1294
+ """
1295
+ Read image file and convert to text representation.
1296
+
1297
+ If LLM is available, uses it to describe the image.
1298
+ Otherwise, returns base64-encoded data URI.
1299
+
1300
+ Args:
1301
+ file_path: Path relative to base_directory
1302
+
1303
+ Returns:
1304
+ Image description or base64 data URI
1305
+ """
1306
+ full_path = Path(self.base_directory) / file_path
1307
+
1308
+ if not full_path.exists():
1309
+ return None
1310
+
1311
+ ext = full_path.suffix.lower()
1312
+
1313
+ try:
1314
+ # Read image bytes
1315
+ image_bytes = full_path.read_bytes()
1316
+
1317
+ if self.llm:
1318
+ # Use content_parser with LLM for image description
1319
+ try:
1320
+ from alita_sdk.tools.utils.content_parser import parse_file_content
1321
+
1322
+ result = parse_file_content(
1323
+ file_path=str(full_path),
1324
+ is_capture_image=True,
1325
+ llm=self.llm
1326
+ )
1327
+
1328
+ if isinstance(result, Exception):
1329
+ logger.warning(f"Failed to describe image {file_path}: {result}")
1330
+ else:
1331
+ return f"[Image: {Path(file_path).name}]\n\n{result}"
1332
+
1333
+ except ImportError:
1334
+ pass
1335
+
1336
+ # Fallback: return base64 data URI
1337
+ mime_types = {
1338
+ '.png': 'image/png',
1339
+ '.jpg': 'image/jpeg',
1340
+ '.jpeg': 'image/jpeg',
1341
+ '.gif': 'image/gif',
1342
+ '.webp': 'image/webp',
1343
+ '.bmp': 'image/bmp',
1344
+ '.svg': 'image/svg+xml',
1345
+ }
1346
+ mime_type = mime_types.get(ext, 'application/octet-stream')
1347
+ b64_data = base64.b64encode(image_bytes).decode('utf-8')
1348
+
1349
+ return f"[Image: {Path(file_path).name}]\ndata:{mime_type};base64,{b64_data}"
1350
+
1351
+ except Exception as e:
1352
+ logger.warning(f"Error reading image {file_path}: {e}")
1353
+ return None
1354
+
1355
+ def _read_file(
1356
+ self,
1357
+ file_path: str,
1358
+ branch: str = None,
1359
+ offset: Optional[int] = None,
1360
+ limit: Optional[int] = None,
1361
+ head: Optional[int] = None,
1362
+ tail: Optional[int] = None,
1363
+ ) -> Optional[str]:
1364
+ """
1365
+ Read file content, handling text, binary documents, and images.
1366
+
1367
+ Supports:
1368
+ - Text files: Read directly with encoding detection
1369
+ - Binary documents (PDF, DOCX, PPTX, Excel): Convert to markdown
1370
+ - Images: Return LLM description or base64 data URI
1371
+
1372
+ Args:
1373
+ file_path: Path relative to base_directory
1374
+ branch: Ignored for filesystem (compatibility with git-based toolkits)
1375
+ offset: Start line number (1-indexed). If None, start from beginning.
1376
+ limit: Maximum number of lines to read. If None, read to end.
1377
+ head: Read only first N lines (alternative to offset/limit)
1378
+ tail: Read only last N lines (alternative to offset/limit)
1379
+
1380
+ Returns:
1381
+ File content as string, or None if unreadable
1382
+ """
1383
+ full_path = Path(self.base_directory) / file_path
1384
+
1385
+ # Security check - prevent path traversal
1386
+ try:
1387
+ full_path.resolve().relative_to(Path(self.base_directory).resolve())
1388
+ except ValueError:
1389
+ logger.warning(f"Access denied: {file_path} is outside base directory")
1390
+ return None
1391
+
1392
+ if not full_path.exists() or not full_path.is_file():
1393
+ return None
1394
+
1395
+ # Route to appropriate reader based on file type
1396
+ # Note: offset/limit only apply to text files
1397
+ if self._is_binary_file(file_path):
1398
+ return self._read_binary_file(file_path)
1399
+
1400
+ if self._is_image_file(file_path):
1401
+ return self._read_image_file(file_path)
1402
+
1403
+ # Default: read as text with encoding detection
1404
+ encodings = ['utf-8', 'utf-8-sig', 'latin-1', 'cp1252']
1405
+
1406
+ for encoding in encodings:
1407
+ try:
1408
+ content = full_path.read_text(encoding=encoding)
1409
+
1410
+ # Apply line filtering if specified
1411
+ if offset is not None or limit is not None or head is not None or tail is not None:
1412
+ lines = content.splitlines(keepends=True)
1413
+
1414
+ if head is not None:
1415
+ # Read first N lines
1416
+ lines = lines[:head]
1417
+ elif tail is not None:
1418
+ # Read last N lines
1419
+ lines = lines[-tail:] if tail > 0 else []
1420
+ else:
1421
+ # Use offset/limit
1422
+ start_idx = (offset - 1) if offset and offset > 0 else 0
1423
+ if limit is not None:
1424
+ end_idx = start_idx + limit
1425
+ lines = lines[start_idx:end_idx]
1426
+ else:
1427
+ lines = lines[start_idx:]
1428
+
1429
+ content = ''.join(lines)
1430
+
1431
+ return content
1432
+
1433
+ except UnicodeDecodeError:
1434
+ continue
1435
+ except Exception as e:
1436
+ logger.warning(f"Failed to read {file_path}: {e}")
1437
+ return None
1438
+
1439
+ logger.warning(f"Could not decode {file_path} with any known encoding")
1440
+ return None
1441
+
1442
+ def read_file(
1443
+ self,
1444
+ file_path: str,
1445
+ offset: Optional[int] = None,
1446
+ limit: Optional[int] = None,
1447
+ head: Optional[int] = None,
1448
+ tail: Optional[int] = None,
1449
+ ) -> Optional[str]:
1450
+ """
1451
+ Public method to read file content with optional line range.
1452
+
1453
+ Args:
1454
+ file_path: Path relative to base_directory
1455
+ offset: Start line number (1-indexed)
1456
+ limit: Maximum number of lines to read
1457
+ head: Read only first N lines
1458
+ tail: Read only last N lines
1459
+
1460
+ Returns:
1461
+ File content as string
1462
+ """
1463
+ return self._read_file(file_path, offset=offset, limit=limit, head=head, tail=tail)
1464
+
1465
+ def loader(
1466
+ self,
1467
+ branch: Optional[str] = None,
1468
+ whitelist: Optional[List[str]] = None,
1469
+ blacklist: Optional[List[str]] = None,
1470
+ chunked: bool = True,
1471
+ ) -> Generator[Document, None, None]:
1472
+ """
1473
+ Load documents from the filesystem.
1474
+
1475
+ Mirrors BaseCodeToolApiWrapper.loader() interface for compatibility.
1476
+
1477
+ Args:
1478
+ branch: Ignored (kept for API compatibility with git-based loaders)
1479
+ whitelist: File patterns to include (e.g., ['*.py', 'src/**/*.js'])
1480
+ blacklist: File patterns to exclude (e.g., ['*test*', 'node_modules/**'])
1481
+ chunked: If True, applies universal chunker based on file type
1482
+
1483
+ Yields:
1484
+ Document objects with page_content and metadata
1485
+ """
1486
+ import glob as glob_module
1487
+
1488
+ base = Path(self.base_directory)
1489
+
1490
+ def is_blacklisted(file_path: str) -> bool:
1491
+ if not blacklist:
1492
+ return False
1493
+ return (
1494
+ any(fnmatch.fnmatch(file_path, p) for p in blacklist) or
1495
+ any(fnmatch.fnmatch(Path(file_path).name, p) for p in blacklist)
1496
+ )
1497
+
1498
+ # Optimization: Use glob directly when whitelist has path patterns
1499
+ # This avoids scanning 100K+ files in node_modules etc.
1500
+ def get_files_via_glob() -> Generator[str, None, None]:
1501
+ """Use glob patterns directly - much faster than scanning all files."""
1502
+ seen = set()
1503
+ for pattern in whitelist:
1504
+ # Handle glob patterns
1505
+ full_pattern = str(base / pattern)
1506
+ for match in glob_module.glob(full_pattern, recursive=True):
1507
+ match_path = Path(match)
1508
+ if match_path.is_file():
1509
+ try:
1510
+ rel_path = str(match_path.relative_to(base))
1511
+ if rel_path not in seen and not is_blacklisted(rel_path):
1512
+ seen.add(rel_path)
1513
+ yield rel_path
1514
+ except ValueError:
1515
+ continue
1516
+
1517
+ def get_files_via_scan() -> Generator[str, None, None]:
1518
+ """Fall back to scanning all files when no whitelist or simple extension patterns."""
1519
+ _files = self._get_files()
1520
+ self._log_tool_event(f"Found {len(_files)} files in {self.base_directory}", "loader")
1521
+
1522
+ def is_whitelisted(file_path: str) -> bool:
1523
+ if not whitelist:
1524
+ return True
1525
+ return (
1526
+ any(fnmatch.fnmatch(file_path, p) for p in whitelist) or
1527
+ any(fnmatch.fnmatch(Path(file_path).name, p) for p in whitelist) or
1528
+ any(file_path.endswith(f'.{p.lstrip("*.")}') for p in whitelist if p.startswith('*.'))
1529
+ )
1530
+
1531
+ for file_path in _files:
1532
+ if is_whitelisted(file_path) and not is_blacklisted(file_path):
1533
+ yield file_path
1534
+
1535
+ # Decide strategy: use glob if whitelist has path patterns (contains / or **)
1536
+ use_glob = whitelist and any('/' in p or '**' in p for p in whitelist)
1537
+
1538
+ if use_glob:
1539
+ self._log_tool_event(f"Using glob patterns: {whitelist}", "loader")
1540
+ file_iterator = get_files_via_glob()
1541
+ else:
1542
+ file_iterator = get_files_via_scan()
1543
+
1544
+ def raw_document_generator() -> Generator[Document, None, None]:
1545
+ self._log_tool_event("Reading files...", "loader")
1546
+ processed = 0
1547
+
1548
+ for file_path in file_iterator:
1549
+ content = self._read_file(file_path)
1550
+ if not content:
1551
+ continue
1552
+
1553
+ content_hash = hashlib.sha256(content.encode('utf-8')).hexdigest()
1554
+ processed += 1
1555
+
1556
+ yield Document(
1557
+ page_content=content,
1558
+ metadata={
1559
+ 'file_path': file_path,
1560
+ 'file_name': Path(file_path).name,
1561
+ 'source': file_path,
1562
+ 'commit_hash': content_hash,
1563
+ }
1564
+ )
1565
+
1566
+ # Log progress every 100 files
1567
+ if processed % 100 == 0:
1568
+ logger.debug(f"[loader] Read {processed} files...")
1569
+
1570
+ self._log_tool_event(f"Loaded {processed} files", "loader")
1571
+
1572
+ if not chunked:
1573
+ return raw_document_generator()
1574
+
1575
+ try:
1576
+ from alita_sdk.tools.chunkers.universal_chunker import universal_chunker
1577
+ return universal_chunker(raw_document_generator())
1578
+ except ImportError:
1579
+ logger.warning("Universal chunker not available, returning raw documents")
1580
+ return raw_document_generator()
1581
+
1582
+ def chunker(self, documents: Generator[Document, None, None]) -> Generator[Document, None, None]:
1583
+ """Apply universal chunker to documents."""
1584
+ try:
1585
+ from alita_sdk.tools.chunkers.universal_chunker import universal_chunker
1586
+ return universal_chunker(documents)
1587
+ except ImportError:
1588
+ return documents
1589
+
1590
+ def get_files_content(self, file_path: str) -> Optional[str]:
1591
+ """Get file content (compatibility alias for retrieval toolkit)."""
1592
+ return self._read_file(file_path)
757
1593
 
758
1594
 
759
1595
  # Predefined tool presets for common use cases
@@ -761,6 +1597,7 @@ FILESYSTEM_TOOL_PRESETS = {
761
1597
  'read_only': {
762
1598
  'exclude_tools': [
763
1599
  'filesystem_write_file',
1600
+ 'filesystem_append_file',
764
1601
  'filesystem_edit_file',
765
1602
  'filesystem_apply_patch',
766
1603
  'filesystem_delete_file',
@@ -775,6 +1612,7 @@ FILESYSTEM_TOOL_PRESETS = {
775
1612
  'include_tools': [
776
1613
  'filesystem_read_file',
777
1614
  'filesystem_write_file',
1615
+ 'filesystem_append_file',
778
1616
  'filesystem_list_directory',
779
1617
  'filesystem_create_directory',
780
1618
  ]
@@ -792,20 +1630,21 @@ def get_filesystem_tools(
792
1630
  base_directory: str,
793
1631
  include_tools: Optional[List[str]] = None,
794
1632
  exclude_tools: Optional[List[str]] = None,
795
- preset: Optional[str] = None
1633
+ preset: Optional[str] = None,
1634
+ allowed_directories: Optional[List[str]] = None
796
1635
  ) -> List[BaseTool]:
797
1636
  """
798
- Get filesystem tools for the specified base directory.
1637
+ Get filesystem tools for the specified directories.
799
1638
 
800
1639
  Args:
801
- base_directory: Absolute or relative path to the directory to restrict access to
1640
+ base_directory: Absolute or relative path to the primary directory to restrict access to
802
1641
  include_tools: Optional list of tool names to include. If provided, only these tools are returned.
803
1642
  If None, all tools are included (unless excluded).
804
1643
  exclude_tools: Optional list of tool names to exclude. Applied after include_tools.
805
1644
  preset: Optional preset name to use predefined tool sets. Presets:
806
1645
  - 'read_only': Excludes all write/modify operations
807
1646
  - 'no_delete': All tools except delete
808
- - 'basic': Read, write, list, create directory
1647
+ - 'basic': Read, write, append, list, create directory
809
1648
  - 'minimal': Only read and list
810
1649
  Note: If preset is used with include_tools or exclude_tools,
811
1650
  preset is applied first, then custom filters.
@@ -818,6 +1657,7 @@ def get_filesystem_tools(
818
1657
  - filesystem_read_file_chunk
819
1658
  - filesystem_read_multiple_files
820
1659
  - filesystem_write_file
1660
+ - filesystem_append_file (for incremental file creation)
821
1661
  - filesystem_edit_file
822
1662
  - filesystem_apply_patch
823
1663
  - filesystem_list_directory
@@ -847,6 +1687,10 @@ def get_filesystem_tools(
847
1687
  # Use preset and add custom exclusions
848
1688
  get_filesystem_tools('/path/to/dir', preset='read_only',
849
1689
  exclude_tools=['filesystem_search_files'])
1690
+
1691
+ # Multiple allowed directories
1692
+ get_filesystem_tools('/path/to/primary',
1693
+ allowed_directories=['/path/to/other1', '/path/to/other2'])
850
1694
  """
851
1695
  # Apply preset if specified
852
1696
  preset_include = None
@@ -870,25 +1714,27 @@ def get_filesystem_tools(
870
1714
  final_exclude.extend(exclude_tools)
871
1715
  final_exclude = list(set(final_exclude)) if final_exclude else None
872
1716
 
873
- # Resolve to absolute path
1717
+ # Resolve to absolute paths
874
1718
  base_dir = str(Path(base_directory).resolve())
1719
+ extra_dirs = [str(Path(d).resolve()) for d in (allowed_directories or [])]
875
1720
 
876
1721
  # Define all available tools with their names
877
1722
  all_tools = {
878
- 'filesystem_read_file': ReadFileTool(base_directory=base_dir),
879
- 'filesystem_read_file_chunk': ReadFileChunkTool(base_directory=base_dir),
880
- 'filesystem_read_multiple_files': ReadMultipleFilesTool(base_directory=base_dir),
881
- 'filesystem_write_file': WriteFileTool(base_directory=base_dir),
882
- 'filesystem_edit_file': EditFileTool(base_directory=base_dir),
883
- 'filesystem_apply_patch': ApplyPatchTool(base_directory=base_dir),
884
- 'filesystem_list_directory': ListDirectoryTool(base_directory=base_dir),
885
- 'filesystem_directory_tree': DirectoryTreeTool(base_directory=base_dir),
886
- 'filesystem_search_files': SearchFilesTool(base_directory=base_dir),
887
- 'filesystem_delete_file': DeleteFileTool(base_directory=base_dir),
888
- 'filesystem_move_file': MoveFileTool(base_directory=base_dir),
889
- 'filesystem_create_directory': CreateDirectoryTool(base_directory=base_dir),
890
- 'filesystem_get_file_info': GetFileInfoTool(base_directory=base_dir),
891
- 'filesystem_list_allowed_directories': ListAllowedDirectoriesTool(base_directory=base_dir),
1723
+ 'filesystem_read_file': ReadFileTool(base_directory=base_dir, allowed_directories=extra_dirs),
1724
+ 'filesystem_read_file_chunk': ReadFileChunkTool(base_directory=base_dir, allowed_directories=extra_dirs),
1725
+ 'filesystem_read_multiple_files': ReadMultipleFilesTool(base_directory=base_dir, allowed_directories=extra_dirs),
1726
+ 'filesystem_write_file': WriteFileTool(base_directory=base_dir, allowed_directories=extra_dirs),
1727
+ 'filesystem_append_file': AppendFileTool(base_directory=base_dir, allowed_directories=extra_dirs),
1728
+ 'filesystem_edit_file': EditFileTool(base_directory=base_dir, allowed_directories=extra_dirs),
1729
+ 'filesystem_apply_patch': ApplyPatchTool(base_directory=base_dir, allowed_directories=extra_dirs),
1730
+ 'filesystem_list_directory': ListDirectoryTool(base_directory=base_dir, allowed_directories=extra_dirs),
1731
+ 'filesystem_directory_tree': DirectoryTreeTool(base_directory=base_dir, allowed_directories=extra_dirs),
1732
+ 'filesystem_search_files': SearchFilesTool(base_directory=base_dir, allowed_directories=extra_dirs),
1733
+ 'filesystem_delete_file': DeleteFileTool(base_directory=base_dir, allowed_directories=extra_dirs),
1734
+ 'filesystem_move_file': MoveFileTool(base_directory=base_dir, allowed_directories=extra_dirs),
1735
+ 'filesystem_create_directory': CreateDirectoryTool(base_directory=base_dir, allowed_directories=extra_dirs),
1736
+ 'filesystem_get_file_info': GetFileInfoTool(base_directory=base_dir, allowed_directories=extra_dirs),
1737
+ 'filesystem_list_allowed_directories': ListAllowedDirectoriesTool(base_directory=base_dir, allowed_directories=extra_dirs),
892
1738
  }
893
1739
 
894
1740
  # Start with all tools or only included ones