@burtson-labs/agent-core 1.6.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (195) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +88 -0
  3. package/dist/index.d.ts +16 -0
  4. package/dist/index.d.ts.map +1 -0
  5. package/dist/index.js +52 -0
  6. package/dist/index.js.map +1 -0
  7. package/dist/mcp/activation.d.ts +60 -0
  8. package/dist/mcp/activation.d.ts.map +1 -0
  9. package/dist/mcp/activation.js +139 -0
  10. package/dist/mcp/activation.js.map +1 -0
  11. package/dist/mcp/clientPool.d.ts +202 -0
  12. package/dist/mcp/clientPool.d.ts.map +1 -0
  13. package/dist/mcp/clientPool.js +469 -0
  14. package/dist/mcp/clientPool.js.map +1 -0
  15. package/dist/mcp/index.d.ts +18 -0
  16. package/dist/mcp/index.d.ts.map +1 -0
  17. package/dist/mcp/index.js +28 -0
  18. package/dist/mcp/index.js.map +1 -0
  19. package/dist/mcp/server.d.ts +43 -0
  20. package/dist/mcp/server.d.ts.map +1 -0
  21. package/dist/mcp/server.js +130 -0
  22. package/dist/mcp/server.js.map +1 -0
  23. package/dist/mcp/toolAdapter.d.ts +57 -0
  24. package/dist/mcp/toolAdapter.d.ts.map +1 -0
  25. package/dist/mcp/toolAdapter.js +223 -0
  26. package/dist/mcp/toolAdapter.js.map +1 -0
  27. package/dist/mcp/types.d.ts +122 -0
  28. package/dist/mcp/types.d.ts.map +1 -0
  29. package/dist/mcp/types.js +15 -0
  30. package/dist/mcp/types.js.map +1 -0
  31. package/dist/providers/deterministic-provider.d.ts +21 -0
  32. package/dist/providers/deterministic-provider.d.ts.map +1 -0
  33. package/dist/providers/deterministic-provider.js +80 -0
  34. package/dist/providers/deterministic-provider.js.map +1 -0
  35. package/dist/providers/provider-client.d.ts +12 -0
  36. package/dist/providers/provider-client.d.ts.map +1 -0
  37. package/dist/providers/provider-client.js +11 -0
  38. package/dist/providers/provider-client.js.map +1 -0
  39. package/dist/runtime/AgentRuntime.d.ts +67 -0
  40. package/dist/runtime/AgentRuntime.d.ts.map +1 -0
  41. package/dist/runtime/AgentRuntime.js +382 -0
  42. package/dist/runtime/AgentRuntime.js.map +1 -0
  43. package/dist/security/secretPatterns.d.ts +76 -0
  44. package/dist/security/secretPatterns.d.ts.map +1 -0
  45. package/dist/security/secretPatterns.js +290 -0
  46. package/dist/security/secretPatterns.js.map +1 -0
  47. package/dist/tools/ask-user-tool.d.ts +19 -0
  48. package/dist/tools/ask-user-tool.d.ts.map +1 -0
  49. package/dist/tools/ask-user-tool.js +148 -0
  50. package/dist/tools/ask-user-tool.js.map +1 -0
  51. package/dist/tools/compactMessages.d.ts +52 -0
  52. package/dist/tools/compactMessages.d.ts.map +1 -0
  53. package/dist/tools/compactMessages.js +158 -0
  54. package/dist/tools/compactMessages.js.map +1 -0
  55. package/dist/tools/core-tools.d.ts +29 -0
  56. package/dist/tools/core-tools.d.ts.map +1 -0
  57. package/dist/tools/core-tools.js +2214 -0
  58. package/dist/tools/core-tools.js.map +1 -0
  59. package/dist/tools/git-tools.d.ts +32 -0
  60. package/dist/tools/git-tools.d.ts.map +1 -0
  61. package/dist/tools/git-tools.js +330 -0
  62. package/dist/tools/git-tools.js.map +1 -0
  63. package/dist/tools/index.d.ts +15 -0
  64. package/dist/tools/index.d.ts.map +1 -0
  65. package/dist/tools/index.js +31 -0
  66. package/dist/tools/index.js.map +1 -0
  67. package/dist/tools/language-adapters.d.ts +48 -0
  68. package/dist/tools/language-adapters.d.ts.map +1 -0
  69. package/dist/tools/language-adapters.js +299 -0
  70. package/dist/tools/language-adapters.js.map +1 -0
  71. package/dist/tools/loop/compactionTrigger.d.ts +47 -0
  72. package/dist/tools/loop/compactionTrigger.d.ts.map +1 -0
  73. package/dist/tools/loop/compactionTrigger.js +32 -0
  74. package/dist/tools/loop/compactionTrigger.js.map +1 -0
  75. package/dist/tools/loop/finalAnswerNudges.d.ts +68 -0
  76. package/dist/tools/loop/finalAnswerNudges.d.ts.map +1 -0
  77. package/dist/tools/loop/finalAnswerNudges.js +87 -0
  78. package/dist/tools/loop/finalAnswerNudges.js.map +1 -0
  79. package/dist/tools/loop/goalAnchor.d.ts +72 -0
  80. package/dist/tools/loop/goalAnchor.d.ts.map +1 -0
  81. package/dist/tools/loop/goalAnchor.js +76 -0
  82. package/dist/tools/loop/goalAnchor.js.map +1 -0
  83. package/dist/tools/loop/llmStream.d.ts +70 -0
  84. package/dist/tools/loop/llmStream.d.ts.map +1 -0
  85. package/dist/tools/loop/llmStream.js +181 -0
  86. package/dist/tools/loop/llmStream.js.map +1 -0
  87. package/dist/tools/loop/parallelExecute.d.ts +57 -0
  88. package/dist/tools/loop/parallelExecute.d.ts.map +1 -0
  89. package/dist/tools/loop/parallelExecute.js +54 -0
  90. package/dist/tools/loop/parallelExecute.js.map +1 -0
  91. package/dist/tools/loop/singleToolExecute.d.ts +71 -0
  92. package/dist/tools/loop/singleToolExecute.d.ts.map +1 -0
  93. package/dist/tools/loop/singleToolExecute.js +139 -0
  94. package/dist/tools/loop/singleToolExecute.js.map +1 -0
  95. package/dist/tools/loop/toolCallNormalize.d.ts +57 -0
  96. package/dist/tools/loop/toolCallNormalize.d.ts.map +1 -0
  97. package/dist/tools/loop/toolCallNormalize.js +99 -0
  98. package/dist/tools/loop/toolCallNormalize.js.map +1 -0
  99. package/dist/tools/loop/turnSetup.d.ts +43 -0
  100. package/dist/tools/loop/turnSetup.d.ts.map +1 -0
  101. package/dist/tools/loop/turnSetup.js +48 -0
  102. package/dist/tools/loop/turnSetup.js.map +1 -0
  103. package/dist/tools/ocr.d.ts +52 -0
  104. package/dist/tools/ocr.d.ts.map +1 -0
  105. package/dist/tools/ocr.js +238 -0
  106. package/dist/tools/ocr.js.map +1 -0
  107. package/dist/tools/post-edit-checks.d.ts +46 -0
  108. package/dist/tools/post-edit-checks.d.ts.map +1 -0
  109. package/dist/tools/post-edit-checks.js +236 -0
  110. package/dist/tools/post-edit-checks.js.map +1 -0
  111. package/dist/tools/skill-loader.d.ts +94 -0
  112. package/dist/tools/skill-loader.d.ts.map +1 -0
  113. package/dist/tools/skill-loader.js +422 -0
  114. package/dist/tools/skill-loader.js.map +1 -0
  115. package/dist/tools/skill-registry.d.ts +44 -0
  116. package/dist/tools/skill-registry.d.ts.map +1 -0
  117. package/dist/tools/skill-registry.js +118 -0
  118. package/dist/tools/skill-registry.js.map +1 -0
  119. package/dist/tools/skill-types.d.ts +38 -0
  120. package/dist/tools/skill-types.d.ts.map +1 -0
  121. package/dist/tools/skill-types.js +10 -0
  122. package/dist/tools/skill-types.js.map +1 -0
  123. package/dist/tools/skills/code-review-skill.d.ts +9 -0
  124. package/dist/tools/skills/code-review-skill.d.ts.map +1 -0
  125. package/dist/tools/skills/code-review-skill.js +66 -0
  126. package/dist/tools/skills/code-review-skill.js.map +1 -0
  127. package/dist/tools/skills/core-skill.d.ts +13 -0
  128. package/dist/tools/skills/core-skill.d.ts.map +1 -0
  129. package/dist/tools/skills/core-skill.js +23 -0
  130. package/dist/tools/skills/core-skill.js.map +1 -0
  131. package/dist/tools/skills/git-skill.d.ts +10 -0
  132. package/dist/tools/skills/git-skill.d.ts.map +1 -0
  133. package/dist/tools/skills/git-skill.js +30 -0
  134. package/dist/tools/skills/git-skill.js.map +1 -0
  135. package/dist/tools/skills/index.d.ts +17 -0
  136. package/dist/tools/skills/index.d.ts.map +1 -0
  137. package/dist/tools/skills/index.js +49 -0
  138. package/dist/tools/skills/index.js.map +1 -0
  139. package/dist/tools/skills/interaction-skill.d.ts +14 -0
  140. package/dist/tools/skills/interaction-skill.d.ts.map +1 -0
  141. package/dist/tools/skills/interaction-skill.js +24 -0
  142. package/dist/tools/skills/interaction-skill.js.map +1 -0
  143. package/dist/tools/skills/mail-search-skill.d.ts +25 -0
  144. package/dist/tools/skills/mail-search-skill.d.ts.map +1 -0
  145. package/dist/tools/skills/mail-search-skill.js +343 -0
  146. package/dist/tools/skills/mail-search-skill.js.map +1 -0
  147. package/dist/tools/skills/plan-skill.d.ts +10 -0
  148. package/dist/tools/skills/plan-skill.d.ts.map +1 -0
  149. package/dist/tools/skills/plan-skill.js +126 -0
  150. package/dist/tools/skills/plan-skill.js.map +1 -0
  151. package/dist/tools/skills/semantic-search-skill.d.ts +22 -0
  152. package/dist/tools/skills/semantic-search-skill.d.ts.map +1 -0
  153. package/dist/tools/skills/semantic-search-skill.js +244 -0
  154. package/dist/tools/skills/semantic-search-skill.js.map +1 -0
  155. package/dist/tools/skills/test-gen-skill.d.ts +9 -0
  156. package/dist/tools/skills/test-gen-skill.d.ts.map +1 -0
  157. package/dist/tools/skills/test-gen-skill.js +123 -0
  158. package/dist/tools/skills/test-gen-skill.js.map +1 -0
  159. package/dist/tools/tool-registry.d.ts +60 -0
  160. package/dist/tools/tool-registry.d.ts.map +1 -0
  161. package/dist/tools/tool-registry.js +200 -0
  162. package/dist/tools/tool-registry.js.map +1 -0
  163. package/dist/tools/tool-types.d.ts +281 -0
  164. package/dist/tools/tool-types.d.ts.map +1 -0
  165. package/dist/tools/tool-types.js +10 -0
  166. package/dist/tools/tool-types.js.map +1 -0
  167. package/dist/tools/tool-use-loop.d.ts +231 -0
  168. package/dist/tools/tool-use-loop.d.ts.map +1 -0
  169. package/dist/tools/tool-use-loop.js +2057 -0
  170. package/dist/tools/tool-use-loop.js.map +1 -0
  171. package/dist/tools/tool-use-parser.d.ts +78 -0
  172. package/dist/tools/tool-use-parser.d.ts.map +1 -0
  173. package/dist/tools/tool-use-parser.js +427 -0
  174. package/dist/tools/tool-use-parser.js.map +1 -0
  175. package/dist/tools/toolAvailabilityDetector.d.ts +48 -0
  176. package/dist/tools/toolAvailabilityDetector.d.ts.map +1 -0
  177. package/dist/tools/toolAvailabilityDetector.js +156 -0
  178. package/dist/tools/toolAvailabilityDetector.js.map +1 -0
  179. package/dist/tools/unified-patch.d.ts +87 -0
  180. package/dist/tools/unified-patch.d.ts.map +1 -0
  181. package/dist/tools/unified-patch.js +217 -0
  182. package/dist/tools/unified-patch.js.map +1 -0
  183. package/dist/types/agent.d.ts +69 -0
  184. package/dist/types/agent.d.ts.map +1 -0
  185. package/dist/types/agent.js +54 -0
  186. package/dist/types/agent.js.map +1 -0
  187. package/dist/types/tasks.d.ts +22 -0
  188. package/dist/types/tasks.d.ts.map +1 -0
  189. package/dist/types/tasks.js +3 -0
  190. package/dist/types/tasks.js.map +1 -0
  191. package/dist/utils/event-emitter.d.ts +13 -0
  192. package/dist/utils/event-emitter.d.ts.map +1 -0
  193. package/dist/utils/event-emitter.js +54 -0
  194. package/dist/utils/event-emitter.js.map +1 -0
  195. package/package.json +33 -0
@@ -0,0 +1,2214 @@
1
+ "use strict";
2
+ /**
3
+ * Core agent tools: read_file, write_file, apply_edit, replace_range, list_files, search_code, run_command.
4
+ *
5
+ * All tools delegate to the injected ToolExecutionContext — no direct
6
+ * dependency on Node.js APIs, VS Code, or any specific host.
7
+ */
8
+ Object.defineProperty(exports, "__esModule", { value: true });
9
+ exports.watchCommandTool = exports.runCommandTool = exports.searchCodeTool = exports.findDirectoryTool = exports.lsTool = exports.listFilesTool = exports.applyPatchTool = exports.replaceRangeTool = exports.applyEditTool = exports.deleteFileTool = exports.writeFileTool = exports.readFileTool = void 0;
10
+ exports.createCoreToolRegistry = createCoreToolRegistry;
11
+ const post_edit_checks_1 = require("./post-edit-checks");
12
+ const tool_registry_1 = require("./tool-registry");
13
+ const unified_patch_1 = require("./unified-patch");
14
+ const MAX_FILE_CHARS = 80000; // ~20k tokens — hard cap for read_file output
15
+ const MAX_SEARCH_CHARS = 16000; // ~4k tokens — cap search results
16
+ const MAX_COMMAND_CHARS = 8000; // cap command output
17
+ /**
18
+ * Cross-platform "is this an absolute path?" check. POSIX-only callers
19
+ * used `startsWith('/')` which silently misclassifies Windows absolute
20
+ * paths (`C:\foo`, `\\server\share`) as relative — they then get
21
+ * concatenated onto `workspaceRoot` and the resulting path looks like
22
+ * `C:\Users\…\workspace/C:\Users\…\target`. Centralizing here so every
23
+ * core tool resolves paths the same way on every platform.
24
+ */
25
+ /**
26
+ * Post-write syntactic validation. Runs AFTER write_file / apply_edit / replace_range
27
+ * has already saved the file — the goal is to inject feedback into the
28
+ * agent's next turn ("you wrote invalid JSON, fix it") rather than
29
+ * blocking the write itself. Pre-write semantic validation (TS type
30
+ * errors, etc) lives in language-adapters; this layer is for cheap
31
+ * syntactic gates that have a near-zero false positive rate.
32
+ *
33
+ * Currently covers JSON. Designed as a switchable framework so
34
+ * .yaml / .toml / .js parser hooks can drop in later without changing
35
+ * the call sites in apply_edit / write_file.
36
+ *
37
+ * Returns null on success or when the format isn't validated. Returns
38
+ * a short diagnostic when a violation is detected — appended to the
39
+ * tool result so the agent reads it on the next turn.
40
+ */
41
+ function validatePostWrite(absolutePath, content) {
42
+ // Strip query strings / fragments that can ride along on paths and
43
+ // lower-case the extension before dispatch.
44
+ const cleanPath = absolutePath.split(/[?#]/, 1)[0];
45
+ const ext = (cleanPath.match(/\.([A-Za-z0-9]+)$/)?.[1] ?? '').toLowerCase();
46
+ switch (ext) {
47
+ case 'json':
48
+ case 'jsonc': {
49
+ // Models routinely emit trailing commas, missing quotes, mis-
50
+ // matched braces. JSON.parse is sub-millisecond so we always
51
+ // run it — there's no perf reason to skip. We tolerate the
52
+ // common BOM + trim leading whitespace because some hosts
53
+ // prepend a UTF-8 BOM to written files.
54
+ const trimmed = content.replace(/^/, '').trimStart();
55
+ if (trimmed.length === 0)
56
+ return null; // empty file is valid JSON-zero
57
+ try {
58
+ JSON.parse(content);
59
+ return null;
60
+ }
61
+ catch (err) {
62
+ const msg = err instanceof Error ? err.message : String(err);
63
+ return `⚠️ Post-edit JSON validation failed: ${msg}. The file was saved as-is — fix the JSON shape on your next turn.`;
64
+ }
65
+ }
66
+ default:
67
+ return null;
68
+ }
69
+ }
70
+ function isAbsolutePath(p) {
71
+ if (p.startsWith('/') || p.startsWith('~'))
72
+ return true;
73
+ if (/^[A-Za-z]:[\\/]/.test(p))
74
+ return true; // C:\foo or C:/foo
75
+ if (p.startsWith('\\\\'))
76
+ return true; // UNC \\server\share
77
+ return false;
78
+ }
79
+ function truncate(text, max, label) {
80
+ if (text.length <= max)
81
+ return text;
82
+ return `${text.slice(0, max)}\n\n[${label}: truncated — ${text.length - max} chars omitted]`;
83
+ }
84
+ function stableContentHash(text) {
85
+ let hash = 0x811c9dc5;
86
+ for (let i = 0; i < text.length; i += 1) {
87
+ hash ^= text.charCodeAt(i);
88
+ hash = Math.imul(hash, 0x01000193);
89
+ }
90
+ return (hash >>> 0).toString(16).padStart(8, '0');
91
+ }
92
+ function splitTextLines(text) {
93
+ const eol = text.includes('\r\n') ? '\r\n' : '\n';
94
+ return { lines: text.split(eol), eol };
95
+ }
96
+ // ── read_file ──────────────────────────────────────────────────────────────────
97
+ // File extensions we KNOW are binary/archive. Refuse early with a helpful
98
+ // pointer instead of dumping 200 KB of garbled UTF-8 at the model (which
99
+ // burns context and leads to hallucination).
100
+ const BINARY_EXTENSIONS = new Set([
101
+ '.pdf', '.pages', '.docx', '.xlsx', '.pptx', '.key', '.numbers',
102
+ '.zip', '.tar', '.gz', '.bz2', '.7z', '.rar',
103
+ '.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp', '.ico', '.heic',
104
+ '.mp3', '.mp4', '.mov', '.wav', '.flac', '.ogg',
105
+ '.exe', '.dll', '.so', '.dylib', '.bin', '.dat', '.db', '.sqlite'
106
+ ]);
107
+ function binaryRefusalMessage(ext, relPath) {
108
+ const baseMsg = `"${relPath}" is a ${ext} file — its bytes are not plain text and cannot be read as UTF-8.`;
109
+ const hints = {
110
+ '.pdf': 'Use the `read_pdf` tool to extract the text content (host-provided, uses pdf-parse).',
111
+ '.pages': 'Apple Pages documents are zipped XML bundles. Ask the user to export to PDF or DOCX first.',
112
+ '.docx': 'Microsoft Word documents are zipped XML. Not yet supported for direct text extraction.',
113
+ '.xlsx': 'Microsoft Excel files are zipped XML. Not yet supported for direct text extraction.',
114
+ '.pptx': 'Microsoft PowerPoint files are zipped XML. Not yet supported for direct text extraction.'
115
+ };
116
+ const hint = hints[ext] ?? 'Not a text format — ask the user what they want extracted or converted.';
117
+ return `${baseMsg}\n${hint}`;
118
+ }
119
+ const readFileTool = {
120
+ name: 'read_file',
121
+ description: 'Read the text content of a file with line numbers and a shown_hash for the displayed range. For files larger than ~600 lines, paginate with `offset` (1-based start line) and `limit` (number of lines). Common pattern: read_file(path) first, then if the result is truncated or oversized, follow up with read_file(path, offset=N, limit=120) for the next chunk. When replacing a large displayed block, pass the shown_hash to replace_range.expected_hash. For PDFs use `read_pdf` instead — this tool cannot decode binary formats.',
122
+ parameters: [
123
+ { name: 'path', description: 'File path. Relative paths resolve against the workspace root (e.g. "src/index.ts"). Absolute paths are also accepted (e.g. "/Users/name/Desktop/notes.md", "/etc/hosts").', required: true },
124
+ { name: 'offset', description: 'Optional 1-based start line. When set, only lines from this position onward are returned. Use for paginating large files (e.g. offset=200 to start at line 200).' },
125
+ { name: 'limit', description: 'Optional max number of lines to return starting at `offset` (or line 1 when offset is omitted). Default is "all remaining lines, capped by the global byte budget".' }
126
+ ],
127
+ async execute(params, ctx) {
128
+ const relPath = params.path?.trim();
129
+ if (!relPath)
130
+ return { output: 'Error: path parameter is required', isError: true };
131
+ // Extension check first so we don't burn bytes decoding a binary blob.
132
+ const lastDot = relPath.lastIndexOf('.');
133
+ const ext = lastDot >= 0 ? relPath.slice(lastDot).toLowerCase() : '';
134
+ if (BINARY_EXTENSIONS.has(ext)) {
135
+ return { output: binaryRefusalMessage(ext, relPath), isError: true };
136
+ }
137
+ // A path starting with "~" is home-relative — let the host's tool context
138
+ // expand it rather than prepending the workspace root (which would produce
139
+ // nonsense like /Users/mark/~/Desktop/file.md).
140
+ const absPath = isAbsolutePath(relPath)
141
+ ? relPath
142
+ : `${ctx.workspaceRoot}/${relPath}`;
143
+ try {
144
+ const content = await ctx.readFile(absPath);
145
+ // Heuristic: if the first 4 KB contains a high ratio of non-printable
146
+ // bytes, the file is effectively binary even without a known extension.
147
+ const sample = content.slice(0, 4096);
148
+ // eslint-disable-next-line no-control-regex
149
+ const nonPrintable = (sample.match(/[\u0000-\u0008\u000E-\u001F]/g) ?? []).length;
150
+ if (sample.length > 0 && nonPrintable / sample.length > 0.1) {
151
+ return {
152
+ output: `"${relPath}" appears to be binary (${Math.round((nonPrintable / sample.length) * 100)}% non-printable bytes in the first 4 KB). Skipping the raw byte dump. If this is a known format, there may be a dedicated extraction tool.`,
153
+ isError: true
154
+ };
155
+ }
156
+ const { lines: allLines, eol } = splitTextLines(content);
157
+ // Pagination: 1-based offset, limit = max lines returned. Both
158
+ // optional. NaN / non-positive values fall through to "all".
159
+ const parsedOffset = parseInt(params.offset ?? '', 10);
160
+ const parsedLimit = parseInt(params.limit ?? '', 10);
161
+ const startLine = Number.isFinite(parsedOffset) && parsedOffset > 0 ? parsedOffset : 1;
162
+ const limit = Number.isFinite(parsedLimit) && parsedLimit > 0 ? parsedLimit : allLines.length;
163
+ const startIdx = Math.min(allLines.length, startLine - 1);
164
+ const endIdx = Math.min(allLines.length, startIdx + limit);
165
+ const slice = allLines.slice(startIdx, endIdx);
166
+ const isPaginated = startLine > 1 || endIdx < allLines.length;
167
+ // Preserve real line numbers in the rendered output — the model
168
+ // needs them to call apply_edit / a follow-up read_file with an
169
+ // accurate offset.
170
+ const numbered = slice
171
+ .map((line, i) => `${String(startIdx + i + 1).padStart(4, ' ')} │ ${line}`)
172
+ .join('\n');
173
+ // The `<num> │ ` prefix on each line is for the model's
174
+ // navigation only — it is NOT in the file on disk. Smaller
175
+ // models (4B-class) routinely copy-paste those prefix bytes
176
+ // into apply_edit `find` strings, where they never match the
177
+ // real file content and the edit silently no-ops. Observed
178
+ // 2026-05-01 on a portfolio sandbox with gemma4:e4b: model
179
+ // emitted `Find: " 10 │ <link href=..."` and the loop
180
+ // terminated with no edit landed. The header note + the
181
+ // explicit reminder in apply_edit's `find` parameter
182
+ // description together give models a much better chance of
183
+ // stripping the prefix.
184
+ const shownHash = stableContentHash(slice.join(eol));
185
+ const headerSuffix = ` · shown_hash=${shownHash} · \`<num> │ \` prefix is display-only, not part of the file`;
186
+ const header = isPaginated
187
+ ? `File: ${relPath} (${allLines.length} lines total — showing ${startIdx + 1}-${endIdx}${headerSuffix})`
188
+ : `File: ${relPath} (${allLines.length} lines${headerSuffix})`;
189
+ // Hint the model toward the next chunk when more remains. Cheap
190
+ // nudge that consistently produces a follow-up read_file with a
191
+ // correct offset instead of forcing the model to compute it.
192
+ const moreHint = endIdx < allLines.length
193
+ ? `\n\n[read_file: ${allLines.length - endIdx} more lines remain. Next chunk: read_file(path="${relPath}", offset=${endIdx + 1}, limit=${Math.min(120, allLines.length - endIdx)})]`
194
+ : '';
195
+ const output = `${header}\n\n${numbered}${moreHint}`;
196
+ // Mark the file as read so apply_edit / write_file (overwrite)
197
+ // can verify the model actually inspected it before editing. We
198
+ // mark on ANY successful read — even a partial slice — because
199
+ // reading the relevant chunk counts as inspection.
200
+ // No-op when the host context doesn't implement the tracker.
201
+ ctx.markFileRead?.(absPath);
202
+ return { output: truncate(output, MAX_FILE_CHARS, 'read_file') };
203
+ }
204
+ catch (err) {
205
+ const msg = err instanceof Error ? err.message : String(err);
206
+ // Most common failure mode for small models: passing a directory
207
+ // path to read_file when they wanted to list its contents. The
208
+ // raw EISDIR / "Is a directory" error gives them no recovery
209
+ // path and they tend to ask the user for clarification instead
210
+ // of switching tools. Translate the error into an explicit
211
+ // "use ls instead" hint that names the tool the model should
212
+ // have used in the first place.
213
+ if (/EISDIR|is a directory|illegal operation on a directory/i.test(msg)) {
214
+ return {
215
+ output: `"${relPath}" is a directory, not a file. Use \`ls(path="${relPath}")\` to list its contents, or read a specific file inside it (for project discovery, try \`read_file(path="${relPath === '.' ? 'package.json' : `${relPath}/package.json`}")\` for JS projects, \`Cargo.toml\` for Rust, \`pyproject.toml\` for Python, \`go.mod\` for Go).`,
216
+ isError: true
217
+ };
218
+ }
219
+ return { output: `Error reading file "${relPath}": ${msg}`, isError: true };
220
+ }
221
+ }
222
+ };
223
+ exports.readFileTool = readFileTool;
224
+ // ── write_file ─────────────────────────────────────────────────────────────────
225
+ const writeFileTool = {
226
+ name: 'write_file',
227
+ description: 'Write content to a file, creating it if it does not exist or overwriting it if it does. Returns a confirmation with a line count.',
228
+ parameters: [
229
+ { name: 'path', description: 'File path relative to the workspace root', required: true },
230
+ { name: 'content', description: 'The complete new content for the file', required: true }
231
+ ],
232
+ async execute(params, ctx) {
233
+ const relPath = params.path?.trim();
234
+ const content = params.content;
235
+ if (!relPath)
236
+ return { output: 'Error: path parameter is required', isError: true };
237
+ if (content === undefined || content === null)
238
+ return { output: 'Error: content parameter is required', isError: true };
239
+ // Same rule as read_file: a "~" path is home-relative, not workspace-
240
+ // relative. Leave the "~" for the host context (CliToolExecutionContext
241
+ // expands it via os.homedir) rather than creating a literal "~" dir.
242
+ const absPath = isAbsolutePath(relPath)
243
+ ? relPath
244
+ : `${ctx.workspaceRoot}/${relPath}`;
245
+ // Read-before-edit guard. If the host tracks reads AND the file
246
+ // already exists AND the model never read it this turn, reject.
247
+ // Eliminates "blind overwrite" — the model fabricating content for
248
+ // a file it never inspected (and clobbering whatever was there).
249
+ // Only enforced for OVERWRITES; creating a new file doesn't need
250
+ // a prior read.
251
+ if (ctx.hasFileBeenRead && !ctx.hasFileBeenRead(absPath)) {
252
+ let exists = false;
253
+ try {
254
+ await ctx.readFile(absPath);
255
+ exists = true;
256
+ // We just read it for the existence check. Mark it so the
257
+ // model can proceed if it retries. Honest: the model still
258
+ // hasn't seen the content, so we DON'T mark and reject below.
259
+ }
260
+ catch {
261
+ exists = false;
262
+ }
263
+ if (exists) {
264
+ return {
265
+ output: `write_file rejected for "${relPath}": this file already exists but you have not read it in this conversation. Overwriting blind would clobber whatever is there. Call read_file("${relPath}") first to inspect the current contents, then retry the write. (For targeted edits to an existing file, prefer apply_edit for small changes or replace_range for larger line-numbered blocks.)`,
266
+ isError: true
267
+ };
268
+ }
269
+ }
270
+ try {
271
+ // Pre-write language validation — if adapters are configured,
272
+ // validate before touching disk. Same lenient handling as
273
+ // apply_edit: if the file already had errors, only block when
274
+ // THIS write introduced new ones. Pre-existing rot doesn't get
275
+ // to gate every subsequent edit.
276
+ if (ctx.languageAdapters) {
277
+ const validation = await ctx.languageAdapters.validate(absPath, content, ctx);
278
+ if (!validation.ok) {
279
+ let beforeError;
280
+ try {
281
+ const existing = await ctx.readFile(absPath);
282
+ const beforeValidation = await ctx.languageAdapters.validate(absPath, existing, ctx);
283
+ beforeError = beforeValidation.error;
284
+ }
285
+ catch {
286
+ // File doesn't exist yet — write_file is creating it.
287
+ // No before state to be lenient about.
288
+ beforeError = undefined;
289
+ }
290
+ if (beforeError === undefined || introducedNewErrors(beforeError, validation.error)) {
291
+ return {
292
+ output: `Validation failed for "${relPath}":\n${validation.error}\n\nThe file was NOT written. Fix the errors and retry.`,
293
+ isError: true
294
+ };
295
+ }
296
+ }
297
+ }
298
+ await ctx.writeFile(absPath, content);
299
+ const lineCount = content.split('\n').length;
300
+ // Same "don't restate" footer as apply_edit — same Qwen failure
301
+ // mode applies here when the model overwrites an entire file.
302
+ const baseMessage = `Wrote ${lineCount} lines to ${relPath}. File saved. Do not restate the file contents — the user can see the diff. Move on to the next pending task or reply with a brief summary if the work is complete.`;
303
+ const validationWarning = validatePostWrite(absPath, content);
304
+ const postEditCheck = await (0, post_edit_checks_1.runPostEditTypeCheck)(absPath, ctx).catch(() => ({ newErrorCount: 0, warning: undefined }));
305
+ const trailers = [validationWarning, postEditCheck.warning].filter(Boolean).join('\n\n');
306
+ return {
307
+ output: trailers ? `${baseMessage}\n\n${trailers}` : baseMessage
308
+ };
309
+ }
310
+ catch (err) {
311
+ return { output: `Error writing file "${relPath}": ${err instanceof Error ? err.message : String(err)}`, isError: true };
312
+ }
313
+ }
314
+ };
315
+ exports.writeFileTool = writeFileTool;
316
+ // ── delete_file ────────────────────────────────────────────────────────────────
317
+ //
318
+ // Standalone file deletion. Exists because `rm` isn't in the run_command
319
+ // allow-list (and shouldn't be — its arg surface is too broad to reason
320
+ // about) and the agent reaching for `rm` left it stranded on cleanup
321
+ // tasks. `apply_patch` with a `*** Delete File:` block does the same job
322
+ // but isn't discoverable by name; this tool is. Routes through the host's
323
+ // `ctx.deleteFile` (workspace-contained `fs.unlink` on Node hosts) and
324
+ // surfaces the per-call permission gate like any other mutation tool.
325
+ const deleteFileTool = {
326
+ name: 'delete_file',
327
+ description: 'Permanently delete a file from the workspace. Use this for cleanup tasks (unused components, orphaned templates, dead scripts) instead of run_command("rm ..."). Path must be inside the workspace; the host rejects deletions outside the workspace root. The per-call permission gate still prompts before the delete fires.',
328
+ parameters: [
329
+ { name: 'path', description: 'File path relative to the workspace root (or absolute, but must be inside the workspace).', required: true }
330
+ ],
331
+ async execute(params, ctx) {
332
+ const relPath = params.path?.trim();
333
+ if (!relPath)
334
+ return { output: 'Error: path parameter is required', isError: true };
335
+ const absPath = isAbsolutePath(relPath)
336
+ ? relPath
337
+ : `${ctx.workspaceRoot}/${relPath}`;
338
+ if (typeof ctx.deleteFile !== 'function') {
339
+ // Older host without deleteFile wiring. Don't fall back to
340
+ // blanking (the apply_patch fallback) — silent 0-byte ghosts
341
+ // are worse than a clear error the model can react to.
342
+ return {
343
+ output: `delete_file is not supported by this host. Use run_command("rm ${relPath.replace(/"/g, '\\"')}") instead, or upgrade the host to expose ctx.deleteFile.`,
344
+ isError: true
345
+ };
346
+ }
347
+ try {
348
+ await ctx.deleteFile(absPath);
349
+ }
350
+ catch (err) {
351
+ return {
352
+ output: `Error deleting "${relPath}": ${err instanceof Error ? err.message : String(err)}`,
353
+ isError: true
354
+ };
355
+ }
356
+ // Same anti-restate footer pattern as write_file/apply_edit. The
357
+ // model has a tendency to narrate what was deleted; the diff
358
+ // already shows it.
359
+ const baseMessage = `Deleted ${relPath}. Do not restate the deletion — the user can see it in the diff. Move on to the next pending task or reply with a brief summary if the work is complete.`;
360
+ // Post-delete project-level type check. Deleting a TS file can
361
+ // break imports across the codebase — caller can be a dozen files
362
+ // that reach into the deleted module's exports. Mirrors the
363
+ // post-edit check on write_file/apply_edit/apply_patch so the
364
+ // model finds out about the breakage on THIS turn, not on the
365
+ // user's next "the build is broken" report.
366
+ const postEditCheck = await (0, post_edit_checks_1.runPostEditTypeCheck)(absPath, ctx).catch(() => ({ newErrorCount: 0, warning: undefined }));
367
+ return {
368
+ output: postEditCheck.warning ? `${baseMessage}${postEditCheck.warning}` : baseMessage
369
+ };
370
+ }
371
+ };
372
+ exports.deleteFileTool = deleteFileTool;
373
+ // ── apply_edit ─────────────────────────────────────────────────────────────────
374
+ //
375
+ // Targeted find/replace on an existing file. Prefer this over write_file for
376
+ // small edits — it prevents the "model was asked for a one-line comment, wrote
377
+ // a new file" scope blowup we saw on model-rewritten READMEs. Semantics are
378
+ // modelled on Claude Code's Edit tool so the pattern is familiar to users
379
+ // coming from there:
380
+ //
381
+ // - `find` must appear in the file — not found is an error.
382
+ // - `find` must be UNIQUE unless `replace_all=true` — ambiguous matches are
383
+ // rejected so the model can't silently replace the wrong hit.
384
+ // - Multi-line find/replace is supported (the string is matched verbatim).
385
+ // - The tool cannot be used to create a new file — direct the model to
386
+ // write_file for that case.
387
+ //
388
+ // The same language-adapter validation write_file runs is applied here too,
389
+ // so syntax errors in the edited result are caught pre-write.
390
+ const applyEditTool = {
391
+ name: 'apply_edit',
392
+ description: 'Apply a targeted find/replace edit to an existing file. PREFERRED over write_file for small changes (renames, one-line fixes, adding a comment, tweaking a value) — it does not rewrite the rest of the file. For larger line-numbered blocks, prefer replace_range after read_file. Fails if `find` is not found, or if `find` appears multiple times unless `replace_all` is "true". Multi-line find/replace is supported.',
393
+ parameters: [
394
+ { name: 'path', description: 'File path. Relative paths resolve against the workspace root; absolute and ~ paths are also accepted.', required: true },
395
+ { name: 'find', description: 'Exact text to locate in the file. Matched verbatim including whitespace and newlines. Must be unique unless replace_all="true" or near_line is set. IMPORTANT: do NOT include the `<num> │ ` line-number prefix from read_file output — that prefix is display-only and is not part of the file. Pass only the raw line content.', required: true },
396
+ { name: 'replace', description: 'Replacement text. May be empty (to delete the matched text).', required: true },
397
+ { name: 'replace_all', description: 'If "true", replace every occurrence of `find`. Default "false" (require unique match).' },
398
+ { name: 'near_line', description: 'Optional 1-based line number. When `find` matches multiple places, pick the occurrence whose start line is closest to this number. Use this when the multi-match error lists candidate line numbers — pick one of those. Ignored if find is unique or replace_all="true".' }
399
+ ],
400
+ async execute(params, ctx) {
401
+ const relPath = params.path?.trim();
402
+ // Accept common param-name aliases the model reaches for. Canonical:
403
+ // find/replace (what we document). Also accepted: old_text/new_text
404
+ // (some fine-tunes default to this) and old_string/new_string
405
+ // (Claude Code Edit-tool convention). Surfaced by the eval when
406
+ // bandit-core-1 emitted old_text/new_text and our tool rejected it
407
+ // with "find parameter is required" even though the payload had all
408
+ // the data we needed.
409
+ const find = params.find ?? params.old_text ?? params.old_string;
410
+ const replace = params.replace ?? params.new_text ?? params.new_string;
411
+ if (!relPath)
412
+ return { output: 'Error: path parameter is required', isError: true };
413
+ if (find === undefined || find === null)
414
+ return { output: 'Error: find parameter is required (also accepts old_text, old_string)', isError: true };
415
+ if (replace === undefined || replace === null)
416
+ return { output: 'Error: replace parameter is required (also accepts new_text, new_string)', isError: true };
417
+ if (find === '')
418
+ return { output: 'Error: find parameter must not be empty — use write_file to create a new file', isError: true };
419
+ if (find === replace)
420
+ return { output: 'Error: find and replace are identical — no edit to apply', isError: true };
421
+ // Scratchpad-placeholder detector. Small models occasionally dump their
422
+ // own internal reasoning into `replace` as a bracketed "token" where
423
+ // code should go, e.g.
424
+ // [pre-existing-code-to-ensure-match-is-not-needed-...]
425
+ // [... existing code ...]
426
+ // [ORIGINAL_CODE]
427
+ // The bracket balance still looks fine so the structure validator
428
+ // passes, but the model has effectively written prose in place of real
429
+ // code. Catch it here and force a retry. on S3Api
430
+ // DownloadSharedFile (Gemma/Bandit Core wrote the placeholder into a
431
+ // method signature).
432
+ const placeholderPatterns = [
433
+ /\[(?:pre-?existing|existing|original|unchanged|same|keep|placeholder|todo|insert|your)[^\]]{0,200}(?:code|lines?|logic|content|here|unchanged)[^\]]{0,200}\]/i,
434
+ /\[\.\.\.\s*(?:existing|original|unchanged|same)[^\]]{0,100}\.\.\.\]/i,
435
+ /\[<[^>]+>\]/, // <CODE_GOES_HERE>
436
+ /\[(?:TODO|FIXME|HERE|CODE|LINES?|CONTENT)\]/
437
+ ];
438
+ for (const re of placeholderPatterns) {
439
+ const match = re.exec(replace);
440
+ if (match) {
441
+ return {
442
+ output: `apply_edit rejected: \`replace\` contains a scratchpad placeholder (${JSON.stringify(match[0])}). Placeholders like \`[... existing code ...]\` or \`[pre-existing-code-...]\` are NOT substituted — the literal bracketed text lands in the file and breaks it. Re-read the file, copy the actual lines you want preserved into the \`replace\` string verbatim, and retry.`,
443
+ isError: true
444
+ };
445
+ }
446
+ }
447
+ // Double-escape detector. on HealthController.cs:
448
+ // bandit-logic emitted `replace` containing `\n` as two-char escape
449
+ // sequences (backslash+n) rather than real newlines, so the file ended
450
+ // up with `// comment\n// comment\npublic IActionResult Get()` all
451
+ // crammed onto one line with literal backslash-n text between tokens.
452
+ // Narrow trigger: `find` spans multiple lines (so we KNOW multi-line
453
+ // content is expected) AND `replace` contains `\n` escape sequences
454
+ // AND `replace` contains NO real newlines. Legitimate single-line
455
+ // replacements like `console.log("foo\nbar")` won't trip this because
456
+ // `find` would be single-line.
457
+ const findSpansLines = find.includes('\n');
458
+ const replaceHasLiteralNewlineEscape = /\\n/.test(replace);
459
+ const replaceHasActualNewline = replace.includes('\n');
460
+ if (findSpansLines && replaceHasLiteralNewlineEscape && !replaceHasActualNewline) {
461
+ return {
462
+ output: 'apply_edit rejected: `replace` contains literal `\\n` escape sequences but no actual newlines, while `find` spans multiple lines. The replacement looks double-escaped — the two-character `\\n` would land verbatim in the file, collapsing your multi-line edit onto one line. Emit real newline characters in `replace` (a raw newline in the JSON string value), not the literal `\\n` sequence.',
463
+ isError: true
464
+ };
465
+ }
466
+ const absPath = isAbsolutePath(relPath)
467
+ ? relPath
468
+ : `${ctx.workspaceRoot}/${relPath}`;
469
+ // Read-before-edit guard. apply_edit ALWAYS targets an existing
470
+ // file, so the model MUST have read it this turn. Reject blind
471
+ // edits with a copyable error pointing at read_file. The model
472
+ // can't reconstruct file content from training memory; "find"
473
+ // strings will mismatch whitespace/imports it didn't see.
474
+ if (ctx.hasFileBeenRead && !ctx.hasFileBeenRead(absPath)) {
475
+ return {
476
+ output: `apply_edit rejected for "${relPath}": you have not read this file in this conversation. The \`find\` text must match the file verbatim including whitespace; reconstructing it from memory routinely fails. Call read_file("${relPath}") first, then retry apply_edit with the exact text you saw.`,
477
+ isError: true
478
+ };
479
+ }
480
+ let before;
481
+ try {
482
+ before = await ctx.readFile(absPath);
483
+ }
484
+ catch (err) {
485
+ return { output: `Error reading "${relPath}": ${err instanceof Error ? err.message : String(err)}. apply_edit only works on existing files — use write_file to create a new one.`, isError: true };
486
+ }
487
+ // Count occurrences with a literal (non-regex) scan so metacharacters in
488
+ // `find` don't blow up. split+length-1 is cheap and correct for literals.
489
+ let occurrences = before.split(find).length - 1;
490
+ let usedFuzzyWhitespace = false;
491
+ let fuzzySpan = null;
492
+ if (occurrences === 0) {
493
+ // Whitespace-tolerant fallback. Smaller models routinely emit a
494
+ // `find` whose non-whitespace content is correct but whose
495
+ // indentation is one or two columns off — // when bandit-core fired 9 apply_edits in a row, every one
496
+ // failing because the JSX block it was matching had 12 spaces
497
+ // of indent in the file and 14 in the find. Build a regex
498
+ // from `find` that flexes every whitespace run into `\s+`,
499
+ // run it against the file, and accept the edit only when the
500
+ // fuzzy match is unique. If 0 or 2+ fuzzy hits, fall through
501
+ // to the strict error so we don't paper over real ambiguity.
502
+ const escapedFind = find
503
+ .replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
504
+ .replace(/\s+/g, '\\s+');
505
+ const fuzzyMatches = [...before.matchAll(new RegExp(escapedFind, 'g'))];
506
+ if (fuzzyMatches.length === 1 && fuzzyMatches[0].index !== undefined) {
507
+ const m = fuzzyMatches[0];
508
+ fuzzySpan = { start: m.index, end: m.index + m[0].length };
509
+ occurrences = 1;
510
+ usedFuzzyWhitespace = true;
511
+ }
512
+ else if (fuzzyMatches.length > 1) {
513
+ // Tell the model fuzzy matching saw it but it's ambiguous —
514
+ // they have to add context. Different from "find not found"
515
+ // and different from "exact-match multiple"; honesty about
516
+ // why we couldn't apply.
517
+ return {
518
+ output: `\`find\` text was not found verbatim in "${relPath}", but a whitespace-tolerant match found ${fuzzyMatches.length} candidates and we won't guess which you meant. Re-read the file, extend \`find\` with a unique surrounding line, and retry.`,
519
+ isError: true
520
+ };
521
+ }
522
+ else {
523
+ // No exact match AND no fuzzy match. Real miss. Surface the
524
+ // closest line in the file so the model can correct the find
525
+ // text without burning an iteration on a re-read.
526
+ const hint = findIndentationHint(before, find);
527
+ const snippet = nearestMatchSnippet(before, find);
528
+ return {
529
+ output: `\`find\` text was not found in "${relPath}". ${hint}Re-read the file with read_file, copy the exact text verbatim (including leading whitespace), and retry.${snippet}`,
530
+ isError: true
531
+ };
532
+ }
533
+ }
534
+ const replaceAll = params.replace_all === 'true';
535
+ const nearLineRaw = params.near_line;
536
+ const nearLine = nearLineRaw !== undefined && nearLineRaw !== null && nearLineRaw !== ''
537
+ ? parseInt(String(nearLineRaw), 10)
538
+ : NaN;
539
+ let nearLineSpan = null;
540
+ if (occurrences > 1 && !replaceAll) {
541
+ // Build the list of candidate match positions once — used both
542
+ // by the near_line picker (when set) and the multi-match error
543
+ // message (when it isn't).
544
+ const matchPositions = [];
545
+ let scanIdx = 0;
546
+ while (true) {
547
+ const idx = before.indexOf(find, scanIdx);
548
+ if (idx === -1)
549
+ break;
550
+ const lineNum = before.slice(0, idx).split('\n').length;
551
+ matchPositions.push({ lineNum, charIdx: idx });
552
+ scanIdx = idx + find.length;
553
+ if (matchPositions.length >= 32)
554
+ break;
555
+ }
556
+ if (Number.isFinite(nearLine) && matchPositions.length > 0) {
557
+ // Pick the candidate whose start line is closest to near_line.
558
+ // Tie goes to the earlier match. Gives the model a
559
+ // deterministic escape from the multi-match trap when GROW
560
+ // guidance isn't enough — with bandit-core
561
+ // 12B which kept *shrinking* its find string on retries.
562
+ let best = matchPositions[0];
563
+ let bestDist = Math.abs(best.lineNum - nearLine);
564
+ for (let i = 1; i < matchPositions.length; i++) {
565
+ const dist = Math.abs(matchPositions[i].lineNum - nearLine);
566
+ if (dist < bestDist) {
567
+ best = matchPositions[i];
568
+ bestDist = dist;
569
+ }
570
+ }
571
+ nearLineSpan = { start: best.charIdx, end: best.charIdx + find.length };
572
+ }
573
+ else {
574
+ // Surface the line numbers of each candidate so the model can
575
+ // either GROW its `find` or pass `near_line` on the next
576
+ // attempt. The previous error said "extend with surrounding
577
+ // context" and small models would routinely interpret that as
578
+ // "try a smaller, simpler find" — going from an 8-line block
579
+ // to a single line and making the ambiguity worse on every
580
+ // retry. The error now lists candidate line numbers AND
581
+ // points at the deterministic `near_line` parameter.
582
+ const lineList = matchPositions.length > 0
583
+ ? ` Matches start at line${matchPositions.length === 1 ? '' : 's'} ${matchPositions.map(m => m.lineNum).join(', ')}${occurrences > matchPositions.length ? `, …` : ''}.`
584
+ : '';
585
+ return {
586
+ output: `\`find\` text matches ${occurrences} places in "${relPath}".${lineList} Two ways to disambiguate: (1) re-call with \`near_line: <one of the line numbers above>\` to pick that specific match, or (2) GROW your \`find\` string by including 1-2 lines BEFORE or AFTER the change site so the surrounding context is unique. Do NOT shrink \`find\` to a smaller snippet — that increases ambiguity. Or pass replace_all="true" if you really do want every occurrence replaced.`,
587
+ isError: true
588
+ };
589
+ }
590
+ }
591
+ // Indentation-preserving rewrite of `replace`. Models routinely emit
592
+ // multi-line `replace` strings without the matched line's leading
593
+ // whitespace — substring replacement keeps the first line at the
594
+ // original column (it inherits the position of the match) but every
595
+ // subsequent line lands at column 0. Result on disk:
596
+ // [HttpGet] ← original 8-space indent
597
+ // becomes:
598
+ // /// <summary> ← inherits 8-space indent
599
+ // /// returns the health ← lost indent (col 0)
600
+ // /// </summary> ← lost indent (col 0)
601
+ // [HttpGet] ← lost indent (col 0)
602
+ //
603
+ // Heuristic: when find is a single line that matches at a non-zero
604
+ // column AND replace is multi-line AND the first line of replace
605
+ // does not start with whitespace, prepend the match's leading
606
+ // indent to every additional line. Skipped on `replace_all` (each
607
+ // match could have a different indent) and on edits where the model
608
+ // already supplied absolute indent on the first line.
609
+ const finalReplace = (() => {
610
+ if (replaceAll)
611
+ return replace;
612
+ if (find.includes('\n'))
613
+ return replace;
614
+ if (!replace.includes('\n'))
615
+ return replace;
616
+ if (/^\s/.test(replace))
617
+ return replace;
618
+ const matchIndex = before.indexOf(find);
619
+ if (matchIndex === -1)
620
+ return replace;
621
+ const lineStart = before.lastIndexOf('\n', matchIndex - 1) + 1;
622
+ const indent = before.slice(lineStart, matchIndex);
623
+ if (indent.length === 0 || !/^[ \t]+$/.test(indent))
624
+ return replace;
625
+ const lines = replace.split('\n');
626
+ return lines
627
+ .map((line, i) => (i === 0 || line.length === 0 ? line : indent + line))
628
+ .join('\n');
629
+ })();
630
+ // When fuzzy whitespace matched, the model's `find` had different
631
+ // indentation than the file. Its `replace` was almost certainly
632
+ // written at the same (wrong) indent as its `find`, so splicing
633
+ // it verbatim into the matched span would land mis-indented code
634
+ // in the middle of correctly-indented code. Compute the indent
635
+ // delta between the find and the matched text and shift every
636
+ // line of `replace` by that delta so the edit lands at the
637
+ // right column. No-op when find and matched first lines have
638
+ // the same indent (fuzzy fired on inner-line whitespace, not
639
+ // outer indent).
640
+ let spliceReplace = finalReplace;
641
+ if (usedFuzzyWhitespace && fuzzySpan) {
642
+ const matchedText = before.slice(fuzzySpan.start, fuzzySpan.end);
643
+ const findIndent = (find.match(/^[ \t]*/) ?? [''])[0];
644
+ const matchedIndent = (matchedText.match(/^[ \t]*/) ?? [''])[0];
645
+ const delta = matchedIndent.length - findIndent.length;
646
+ if (delta !== 0) {
647
+ spliceReplace = finalReplace
648
+ .split('\n')
649
+ .map((line) => {
650
+ if (line.length === 0)
651
+ return line;
652
+ if (delta > 0)
653
+ return ' '.repeat(delta) + line;
654
+ const leading = (line.match(/^[ \t]*/) ?? [''])[0].length;
655
+ return line.slice(Math.min(-delta, leading));
656
+ })
657
+ .join('\n');
658
+ }
659
+ }
660
+ const after = replaceAll
661
+ ? before.split(find).join(finalReplace)
662
+ : nearLineSpan
663
+ ? before.slice(0, nearLineSpan.start) + finalReplace + before.slice(nearLineSpan.end)
664
+ : usedFuzzyWhitespace && fuzzySpan
665
+ ? before.slice(0, fuzzySpan.start) + spliceReplace + before.slice(fuzzySpan.end)
666
+ : before.replace(find, finalReplace);
667
+ if (after === before) {
668
+ // Defensive — should be impossible given the guards above, but stay honest.
669
+ return { output: 'Edit produced no change to the file.', isError: true };
670
+ }
671
+ if (ctx.languageAdapters) {
672
+ const afterValidation = await ctx.languageAdapters.validate(absPath, after, ctx);
673
+ if (!afterValidation.ok) {
674
+ // Pre-existing errors must not gate this edit. If the file was
675
+ // ALREADY broken before our change AND the post-edit errors
676
+ // aren't worse (no new error lines), let the write through.
677
+ // Without this, the model gets stuck unable to edit any file
678
+ // that has unrelated rot — on a real
679
+ // project where plans.tsx had Grid-deprecation + GlossaryKey
680
+ // type issues unrelated to a one-line CSS fix the user asked
681
+ // for. Every apply_edit returned a 16KB TS-compiler dump and
682
+ // the model gave up after iterating on its find/replace 8x.
683
+ const beforeValidation = await ctx.languageAdapters.validate(absPath, before, ctx);
684
+ if (introducedNewErrors(beforeValidation.error, afterValidation.error)) {
685
+ return {
686
+ output: `Validation failed after apply_edit on "${relPath}":\n${afterValidation.error}\n\nThe file was NOT written. Fix the \`find\`/\`replace\` values and retry.`,
687
+ isError: true
688
+ };
689
+ }
690
+ // Pre-existing errors only — write proceeds. Surface the
691
+ // situation in the result so the model knows the file isn't
692
+ // perfectly clean (and won't be tempted to "fix" the
693
+ // unrelated errors in a follow-up turn unless the user asked).
694
+ // Note appended after the success message below.
695
+ }
696
+ }
697
+ try {
698
+ await ctx.writeFile(absPath, after);
699
+ }
700
+ catch (err) {
701
+ return { output: `Error writing "${relPath}": ${err instanceof Error ? err.message : String(err)}`, isError: true };
702
+ }
703
+ const lineDelta = after.split('\n').length - before.split('\n').length;
704
+ const charDelta = after.length - before.length;
705
+ // Build a delta label that doesn't read as "no change" when the
706
+ // edit only mutated text WITHIN existing lines. Previous version
707
+ // reported "±0 lines" for any line-internal swap (e.g. replacing
708
+ // a footer string with a slightly longer one), which made users
709
+ // think the edit silently no-op'd. Now we always surface byte
710
+ // delta when lines net to zero, so the agent + user see the real
711
+ // change ("modified, +47 chars" instead of "±0 lines").
712
+ const deltaLabel = lineDelta > 0 ? `+${lineDelta} line${lineDelta === 1 ? '' : 's'}` :
713
+ lineDelta < 0 ? `${lineDelta} line${lineDelta === -1 ? '' : 's'}` :
714
+ charDelta === 0 ? '±0 lines (unchanged length)' :
715
+ `±0 lines, ${charDelta > 0 ? '+' : ''}${charDelta} chars`;
716
+ const matches = replaceAll && occurrences > 1 ? `${occurrences} occurrences` : '1 occurrence';
717
+ // Post-write syntactic validation. Surfaces invalid-JSON style
718
+ // problems to the agent on its NEXT turn so it can self-correct
719
+ // without the user having to flag the bad output. Append to the
720
+ // tool result rather than fail the edit — the file is already on
721
+ // disk, the agent needs to see the diagnostic to fix it.
722
+ const validationWarning = validatePostWrite(absPath, after);
723
+ // Completion footer. Qwen 2.5 Coder specifically has a tendency to
724
+ // echo the entire updated file back in prose after apply_edit
725
+ // succeeds ("Here is the updated content of FileController.cs: …"
726
+ // with the full 100+ line body). on S3Api. The
727
+ // bare "Replaced X" result leaves the model guessing at next steps;
728
+ // a terse explicit directive converts ~all cases to either another
729
+ // tool call or a one-sentence summary.
730
+ //
731
+ // Also discourage the common "apply_edit → read_file → apply_edit"
732
+ // pattern: after a successful edit, re-reading the whole file just
733
+ // to do another edit bloats context and slows every subsequent
734
+ // turn. on S3Api: a 9-iteration run with 5
735
+ // apply_edits each followed by a full read_file pushed the LLM
736
+ // call to 38s+ and eventually tripped a 504.
737
+ const baseMessage = `Replaced ${matches} in ${relPath} (${deltaLabel}). File saved. Do not restate the file contents — the user can see the diff. Do not re-read this file just to make another edit — you already have the structure in context. Move on to the next pending task or reply with a brief summary if the work is complete.`;
738
+ const postEditCheck = await (0, post_edit_checks_1.runPostEditTypeCheck)(absPath, ctx).catch(() => ({ newErrorCount: 0, warning: undefined }));
739
+ const trailers = [validationWarning, postEditCheck.warning].filter(Boolean).join('\n\n');
740
+ return {
741
+ output: trailers ? `${baseMessage}\n\n${trailers}` : baseMessage
742
+ };
743
+ }
744
+ };
745
+ exports.applyEditTool = applyEditTool;
746
+ // ── replace_range ──────────────────────────────────────────────────────────────
747
+ //
748
+ // Line-number based edit for large files. This is deliberately narrower than
749
+ // write_file and less brittle than apply_edit when the model has already read a
750
+ // paginated slice and needs to replace a whole method/component block.
751
+ const replaceRangeTool = {
752
+ name: 'replace_range',
753
+ description: 'Replace an inclusive 1-based line range in an existing text file. Best for large-file refactors after read_file(path, offset, limit): use the visible line numbers instead of sending a huge exact find string. For insertion before line N, set start_line=N and end_line=N-1. The framework requires you to have read the file at least once this conversation (read-tracking guard); you do NOT need to pass expected_hash for normal edits.',
754
+ parameters: [
755
+ { name: 'path', description: 'File path. Relative paths resolve against the workspace root; absolute and ~ paths are also accepted.', required: true },
756
+ { name: 'start_line', description: '1-based first line to replace. For insertion, this is the line to insert before.', required: true },
757
+ { name: 'end_line', description: '1-based last line to replace, inclusive. Use start_line-1 to insert before start_line. Defaults to start_line for a one-line replacement.' },
758
+ { name: 'content', description: 'Replacement text for the range. Empty string deletes the range. Use real newline characters for multi-line replacements.', required: true },
759
+ { name: 'expected_hash', description: 'Advisory only — when passed, the framework compares it against the current range hash and records a warning in the result if they differ, but the edit still proceeds. The read-tracking guard is the real safety mechanism; you do not need to pass this for normal edits. Kept for backwards compatibility with callers that copy shown_hash from read_file.' },
760
+ { name: 'expected_old', description: 'Optional exact old text for the range. When passed, the edit is rejected if the current content does not match — use for short, surgical replacements where the exact source line is known. Stricter than expected_hash; intentionally NOT advisory.' }
761
+ ],
762
+ async execute(params, ctx) {
763
+ const relPath = params.path?.trim();
764
+ const content = params.content ?? params.replace ?? params.new_text;
765
+ if (!relPath)
766
+ return { output: 'Error: path parameter is required', isError: true };
767
+ if (content === undefined || content === null)
768
+ return { output: 'Error: content parameter is required', isError: true };
769
+ const parsedStart = parseInt(params.start_line ?? params.start ?? params.from_line ?? '', 10);
770
+ const parsedEnd = params.end_line !== undefined && params.end_line !== null && params.end_line !== ''
771
+ ? parseInt(params.end_line, 10)
772
+ : Number.isFinite(parsedStart) ? parsedStart : NaN;
773
+ if (!Number.isFinite(parsedStart) || parsedStart < 1) {
774
+ return { output: 'replace_range rejected: start_line must be a positive 1-based line number.', isError: true };
775
+ }
776
+ if (!Number.isFinite(parsedEnd)) {
777
+ return { output: 'replace_range rejected: end_line must be a 1-based line number, or omit it for a one-line replacement.', isError: true };
778
+ }
779
+ if (parsedEnd < parsedStart - 1) {
780
+ return { output: 'replace_range rejected: end_line can only be less than start_line when inserting, and then it must equal start_line - 1.', isError: true };
781
+ }
782
+ const absPath = isAbsolutePath(relPath)
783
+ ? relPath
784
+ : `${ctx.workspaceRoot}/${relPath}`;
785
+ if (ctx.hasFileBeenRead && !ctx.hasFileBeenRead(absPath)) {
786
+ return {
787
+ output: `replace_range rejected for "${relPath}": you have not read this file in this conversation. Line numbers must come from read_file output, not memory. Call read_file("${relPath}", offset=<near the target>, limit=120) first, then retry replace_range.`,
788
+ isError: true
789
+ };
790
+ }
791
+ let before;
792
+ try {
793
+ before = await ctx.readFile(absPath);
794
+ }
795
+ catch (err) {
796
+ return { output: `Error reading "${relPath}": ${err instanceof Error ? err.message : String(err)}. replace_range only works on existing files — use write_file to create a new one.`, isError: true };
797
+ }
798
+ const { lines, eol } = splitTextLines(before);
799
+ const startLine = parsedStart;
800
+ const endLine = parsedEnd;
801
+ if (startLine > lines.length + 1) {
802
+ return { output: `replace_range rejected for "${relPath}": start_line ${startLine} is beyond the end of the file (${lines.length} lines). Re-read the file with read_file to get current line numbers.`, isError: true };
803
+ }
804
+ if (endLine > lines.length) {
805
+ return { output: `replace_range rejected for "${relPath}": end_line ${endLine} is beyond the end of the file (${lines.length} lines). Re-read the file with read_file to get current line numbers.`, isError: true };
806
+ }
807
+ const startIdx = startLine - 1;
808
+ const endIdx = Math.max(startIdx, endLine);
809
+ const currentRange = lines.slice(startIdx, endIdx).join(eol);
810
+ // Mark 2026-05-26: replace_range used to REJECT on hash mismatch.
811
+ // Combined with the per-read shown_hash mechanic, that turned into
812
+ // a loop trap: model reads lines 40-54, copies that wider hash
813
+ // into a replace_range(43-50) call, hashes diverge (because they
814
+ // cover different bytes), edit rejected, model re-reads, picks
815
+ // up a still-wrong hash from the wider read, retries — repeat
816
+ // indefinitely. Captured 2026-05-26 Portfolio session: 3-5
817
+ // iterations spinning on a single 8-line replacement.
818
+ //
819
+ // The hash was always weaker safety than the read-tracking guard
820
+ // (hasFileBeenRead) above. In Bandit's single-process single-turn
821
+ // model the file ONLY changes between read and write if WE wrote
822
+ // it, and apply_edit/write_file/replace_range all go through the
823
+ // same context. The hash mainly caught the case where the model
824
+ // misremembers which range it's editing — which the model now
825
+ // gets a warning about, not a rejection.
826
+ //
827
+ // expected_old (below) STAYS strict — it's a tighter check the
828
+ // model opts into for surgical line-level edits, and it always
829
+ // matched intent rather than incidental hash strings.
830
+ const expectedHash = params.expected_hash ?? params.expected_range_hash ?? params.range_hash;
831
+ let hashWarning;
832
+ if (expectedHash) {
833
+ const actualHash = stableContentHash(currentRange);
834
+ if (actualHash !== expectedHash) {
835
+ hashWarning =
836
+ `Note: expected_hash ${expectedHash} did not match the current range hash ${actualHash} — ` +
837
+ `you likely passed shown_hash from a wider read. The edit proceeded anyway because the read-tracking ` +
838
+ `guard verified you read this file. Drop expected_hash on follow-ups; use expected_old when you need a ` +
839
+ `tight surgical match.`;
840
+ }
841
+ }
842
+ if (params.expected_old !== undefined && params.expected_old !== currentRange) {
843
+ return {
844
+ output: `replace_range rejected for "${relPath}" lines ${startLine}-${endLine}: expected_old did not match current file contents. Re-read the range and retry with current text or expected_hash.`,
845
+ isError: true
846
+ };
847
+ }
848
+ const replacementLines = String(content) === '' ? [] : splitTextLines(String(content)).lines;
849
+ const after = [
850
+ ...lines.slice(0, startIdx),
851
+ ...replacementLines,
852
+ ...lines.slice(endIdx)
853
+ ].join(eol);
854
+ if (after === before) {
855
+ return { output: 'replace_range produced no change to the file.', isError: true };
856
+ }
857
+ if (ctx.languageAdapters) {
858
+ const afterValidation = await ctx.languageAdapters.validate(absPath, after, ctx);
859
+ if (!afterValidation.ok) {
860
+ const beforeValidation = await ctx.languageAdapters.validate(absPath, before, ctx);
861
+ if (introducedNewErrors(beforeValidation.error, afterValidation.error)) {
862
+ return {
863
+ output: `Validation failed after replace_range on "${relPath}":\n${afterValidation.error}\n\nThe file was NOT written. Re-read the surrounding lines and retry with a smaller or corrected replacement.`,
864
+ isError: true
865
+ };
866
+ }
867
+ }
868
+ }
869
+ try {
870
+ await ctx.writeFile(absPath, after);
871
+ }
872
+ catch (err) {
873
+ return { output: `Error writing "${relPath}": ${err instanceof Error ? err.message : String(err)}`, isError: true };
874
+ }
875
+ const removed = Math.max(0, endLine - startLine + 1);
876
+ const added = replacementLines.length;
877
+ const rangeLabel = endLine < startLine
878
+ ? `Inserted ${added} line${added === 1 ? '' : 's'} before line ${startLine}`
879
+ : `Replaced lines ${startLine}-${endLine} (+${added} -${removed})`;
880
+ const baseMessage = `${rangeLabel} in ${relPath}. File saved. Do not restate the file contents — the user can see the diff. Do not re-read this file just to make another nearby edit; continue with the next range or verify when done.`;
881
+ const validationWarning = validatePostWrite(absPath, after);
882
+ const postEditCheck = await (0, post_edit_checks_1.runPostEditTypeCheck)(absPath, ctx).catch(() => ({ newErrorCount: 0, warning: undefined }));
883
+ const trailers = [hashWarning, validationWarning, postEditCheck.warning].filter(Boolean).join('\n\n');
884
+ return {
885
+ output: trailers ? `${baseMessage}\n\n${trailers}` : baseMessage
886
+ };
887
+ }
888
+ };
889
+ exports.replaceRangeTool = replaceRangeTool;
890
+ // ── apply_patch ────────────────────────────────────────────────────────────────
891
+ //
892
+ // Multi-file envelope. One tool call → many edits across many files.
893
+ // Cheaper than N round-trips of apply_edit when the model is doing a
894
+ // rename, refactor, or any batch change. Format follows the Codex/
895
+ // OpenCode "*** Begin Patch / *** End Patch" envelope so models trained
896
+ // on it (gpt-4/5, qwen 2.5+, claude) can emit it natively.
897
+ //
898
+ // Supported actions in v1:
899
+ // *** Update File: <path>
900
+ // @@ <unique context line that exists in the file>
901
+ // - removed line
902
+ // + added line
903
+ // unchanged context line (single space prefix)
904
+ // *** Add File: <path>
905
+ // + content line 1
906
+ // + content line 2
907
+ // *** Delete File: <path>
908
+ //
909
+ // (Move is intentionally out of scope for v1 — implement as Add+Delete.)
910
+ //
911
+ // Each Update block translates to a find/replace internally:
912
+ // find = context lines + removed lines (in their original order)
913
+ // replace = context lines + added lines (in their original order)
914
+ // The same uniqueness/indentation guards as apply_edit apply per-update.
915
+ const applyPatchTool = {
916
+ name: 'apply_patch',
917
+ description: 'Apply a multi-file patch in a single tool call. Use this when you have to change 2+ files (rename, refactor, multi-method comment pass) — much cheaper than calling apply_edit N times. Two accepted formats: (1) **standard unified diff** — what `git diff` produces, with `--- a/path`, `+++ b/path`, `@@` hunks, ` `/`-`/`+` body lines. Most models emit this format natively. Single-file unified diffs accepted; for multi-file, concatenate diffs with their own headers. (2) **Codex envelope** — `*** Begin Patch` / `*** End Patch` wrapping `*** Update File: <path>` / `*** Add File:` / `*** Delete File:` blocks. The tool auto-detects the format from the input.',
918
+ parameters: [
919
+ { name: 'patch', description: 'The full patch — either a unified diff (starts with `--- ` / `+++ ` / `@@`) or a `*** Begin Patch` envelope. For unified diffs the path is read from the `+++ b/<path>` header; for the envelope each `*** Update File:` block names its own path.', required: true },
920
+ { name: 'path', description: 'Optional explicit path. When set with a unified diff, overrides whatever the `+++ b/...` header says — useful when the model emits a diff without proper headers.', required: false }
921
+ ],
922
+ async execute(params, ctx) {
923
+ const raw = (params.patch ?? params.input ?? '').trim();
924
+ if (!raw)
925
+ return { output: 'Error: patch parameter is required', isError: true };
926
+ // Auto-detect format. Unified-diff patches start with one of the
927
+ // standard headers (`---`, `+++`, `diff `, `@@`); the Codex format
928
+ // starts with `*** Begin Patch`. When neither pattern matches we
929
+ // bail with a clear error pointing the model at the two supported
930
+ // shapes — better than letting one of the parsers mis-handle a
931
+ // malformed payload.
932
+ const looksUnified = raw.startsWith('--- ') ||
933
+ raw.startsWith('+++ ') ||
934
+ raw.startsWith('diff ') ||
935
+ raw.startsWith('@@');
936
+ const looksCodex = raw.startsWith('*** Begin Patch');
937
+ if (looksUnified) {
938
+ return executeUnifiedDiffPatch(raw, params.path, ctx);
939
+ }
940
+ if (!looksCodex || !raw.includes('*** End Patch')) {
941
+ return {
942
+ output: 'apply_patch rejected: input is neither a unified diff nor a Codex envelope. Emit either (1) a unified diff starting with `--- a/<path>` + `+++ b/<path>` + `@@` hunks, or (2) a Codex envelope wrapped in `*** Begin Patch` / `*** End Patch`.',
943
+ isError: true
944
+ };
945
+ }
946
+ // Parse into action blocks. Each block starts with `*** Update File:` /
947
+ // `*** Add File:` / `*** Delete File:` and runs until the next block
948
+ // header or `*** End Patch`.
949
+ const body = raw
950
+ .slice(raw.indexOf('\n') + 1)
951
+ .replace(/\n\*\*\* End Patch\s*$/, '')
952
+ .trim();
953
+ const actions = [];
954
+ const lines = body.split('\n');
955
+ let current = null;
956
+ let currentHunk = null;
957
+ for (const line of lines) {
958
+ const updateMatch = /^\*\*\* Update File:\s+(.+?)\s*$/.exec(line);
959
+ const addMatch = /^\*\*\* Add File:\s+(.+?)\s*$/.exec(line);
960
+ const deleteMatch = /^\*\*\* Delete File:\s+(.+?)\s*$/.exec(line);
961
+ if (updateMatch) {
962
+ if (current)
963
+ actions.push(current);
964
+ current = { kind: 'update', path: updateMatch[1], hunks: [] };
965
+ currentHunk = null;
966
+ continue;
967
+ }
968
+ if (addMatch) {
969
+ if (current)
970
+ actions.push(current);
971
+ current = { kind: 'add', path: addMatch[1], lines: [] };
972
+ currentHunk = null;
973
+ continue;
974
+ }
975
+ if (deleteMatch) {
976
+ if (current)
977
+ actions.push(current);
978
+ actions.push({ kind: 'delete', path: deleteMatch[1] });
979
+ current = null;
980
+ currentHunk = null;
981
+ continue;
982
+ }
983
+ if (!current)
984
+ continue;
985
+ if (current.kind === 'add') {
986
+ // Add file: every line should start with `+ ` (or be empty).
987
+ if (line.startsWith('+')) {
988
+ current.lines.push(line.slice(line[1] === ' ' ? 2 : 1));
989
+ }
990
+ continue;
991
+ }
992
+ if (current.kind === 'update') {
993
+ if (line.startsWith('@@')) {
994
+ // Start a new hunk. The text after @@ is purely informational
995
+ // (a hint about location); we don't use it for matching.
996
+ currentHunk = [];
997
+ current.hunks.push(currentHunk);
998
+ continue;
999
+ }
1000
+ if (!currentHunk) {
1001
+ // Update without a prior @@ header — accept it as a single
1002
+ // implicit hunk so the model isn't forced to write @@ for
1003
+ // trivial single-line changes.
1004
+ currentHunk = [];
1005
+ current.hunks.push(currentHunk);
1006
+ }
1007
+ if (line.startsWith('-')) {
1008
+ currentHunk.push({ kind: 'removed', text: line.slice(line[1] === ' ' ? 2 : 1) });
1009
+ }
1010
+ else if (line.startsWith('+')) {
1011
+ currentHunk.push({ kind: 'added', text: line.slice(line[1] === ' ' ? 2 : 1) });
1012
+ }
1013
+ else if (line.startsWith(' ')) {
1014
+ // Context line preserved IN ORDER — combined with removed/
1015
+ // added at execute time to produce a find/replace that
1016
+ // matches the file exactly.
1017
+ currentHunk.push({ kind: 'context', text: line.slice(1) });
1018
+ }
1019
+ continue;
1020
+ }
1021
+ }
1022
+ if (current)
1023
+ actions.push(current);
1024
+ if (actions.length === 0) {
1025
+ return { output: 'apply_patch rejected: envelope contained no action blocks. Use `*** Update File:`, `*** Add File:`, or `*** Delete File:` headers.', isError: true };
1026
+ }
1027
+ // Execute actions sequentially. Stop on the first error to avoid
1028
+ // partial application. Surface what succeeded so the model can
1029
+ // recover with a smaller patch.
1030
+ const results = [];
1031
+ for (const action of actions) {
1032
+ const absPath = isAbsolutePath(action.path)
1033
+ ? action.path
1034
+ : `${ctx.workspaceRoot}/${action.path}`;
1035
+ if (action.kind === 'delete') {
1036
+ try {
1037
+ // when the host wires `deleteFile`, do a real
1038
+ // `fs.unlink` so the file is gone from disk. Hosts on older
1039
+ // builds fall back to blanking the file with a clear note
1040
+ // so the model knows a hard delete didn't happen and can
1041
+ // run `rm` via run_command instead. the only
1042
+ // path was the blank, which left 0-byte file ghosts behind
1043
+ // — Bandit's own self-eval flagged this as a real footgun.
1044
+ if (typeof ctx.deleteFile === 'function') {
1045
+ await ctx.deleteFile(absPath);
1046
+ results.push(`Deleted: ${action.path}`);
1047
+ }
1048
+ else {
1049
+ await ctx.writeFile(absPath, '');
1050
+ results.push(`Deleted (blanked — host does not support hard delete; run \`rm ${action.path}\` via run_command to remove the empty file): ${action.path}`);
1051
+ }
1052
+ }
1053
+ catch (err) {
1054
+ // v1.7.298 right-way fix: don't bail the whole patch on
1055
+ // first failure. Log this action's failure and try the rest
1056
+ // — model can re-emit just the failed actions.
1057
+ results.push(`FAILED Delete ${action.path}: ${err instanceof Error ? err.message : String(err)}`);
1058
+ }
1059
+ continue;
1060
+ }
1061
+ if (action.kind === 'add') {
1062
+ try {
1063
+ await ctx.writeFile(absPath, action.lines.join('\n') + (action.lines.length > 0 ? '\n' : ''));
1064
+ results.push(`Added: ${action.path} (${action.lines.length} lines)`);
1065
+ }
1066
+ catch (err) {
1067
+ results.push(`FAILED Add ${action.path}: ${err instanceof Error ? err.message : String(err)}`);
1068
+ }
1069
+ continue;
1070
+ }
1071
+ // Update: apply each hunk. Read-before-edit guard applies.
1072
+ if (ctx.hasFileBeenRead && !ctx.hasFileBeenRead(absPath)) {
1073
+ results.push(`FAILED Update ${action.path}: read this file with read_file first — apply_patch's "find" strings must match verbatim, which fails on unread files.`);
1074
+ continue;
1075
+ }
1076
+ let before;
1077
+ try {
1078
+ before = await ctx.readFile(absPath);
1079
+ }
1080
+ catch (err) {
1081
+ results.push(`FAILED Update ${action.path}: cannot read (${err instanceof Error ? err.message : String(err)})`);
1082
+ continue;
1083
+ }
1084
+ // Process each hunk independently with per-hunk pass/fail.
1085
+ // v1.7.298 right-way fix: prior behavior aborted the ENTIRE
1086
+ // action (often a 5-file patch) on first hunk failure, even
1087
+ // when later hunks would have applied cleanly. The model then
1088
+ // had to regenerate the whole patch. Now: try each hunk, apply
1089
+ // those that match cleanly, report the rest with enough detail
1090
+ // (preview + fuzzy-match attempt notes) for the model to fix
1091
+ // just the failed hunks on retry. Per-hunk also adds a
1092
+ // whitespace-tolerant fallback: when exact match misses, retry
1093
+ // after collapsing whitespace runs in BOTH the file and the
1094
+ // find string — if that produces a unique match, use the
1095
+ // matched-region positions to splice in the replacement.
1096
+ let after = before;
1097
+ let hunksApplied = 0;
1098
+ let hunksFailed = 0;
1099
+ const hunkFailureNotes = [];
1100
+ for (let hi = 0; hi < action.hunks.length; hi++) {
1101
+ const hunk = action.hunks[hi];
1102
+ const findLines = [];
1103
+ const replaceLines = [];
1104
+ let contextCount = 0;
1105
+ for (const item of hunk) {
1106
+ if (item.kind === 'context') {
1107
+ findLines.push(item.text);
1108
+ replaceLines.push(item.text);
1109
+ contextCount++;
1110
+ }
1111
+ else if (item.kind === 'removed') {
1112
+ findLines.push(item.text);
1113
+ }
1114
+ else if (item.kind === 'added') {
1115
+ replaceLines.push(item.text);
1116
+ }
1117
+ }
1118
+ const find = findLines.join('\n');
1119
+ const replace = replaceLines.join('\n');
1120
+ if (!find) {
1121
+ hunksFailed++;
1122
+ hunkFailureNotes.push(`hunk #${hi + 1}: empty (no removed lines and no context)`);
1123
+ continue;
1124
+ }
1125
+ const occurrences = after.split(find).length - 1;
1126
+ if (occurrences === 1) {
1127
+ after = after.replace(find, replace);
1128
+ hunksApplied++;
1129
+ continue;
1130
+ }
1131
+ if (occurrences > 1) {
1132
+ hunksFailed++;
1133
+ hunkFailureNotes.push(`hunk #${hi + 1}: matches ${occurrences} places — add more context to make it unique`);
1134
+ continue;
1135
+ }
1136
+ // occurrences === 0: try whitespace-tolerant fallback. Collapse
1137
+ // every run of whitespace within a line to a single space (but
1138
+ // keep newlines as line separators) on BOTH sides, search,
1139
+ // and if there's a unique match recover the original byte range
1140
+ // in the file and splice. This catches indentation drift /
1141
+ // trailing-whitespace differences without compromising the
1142
+ // multi-match safety.
1143
+ const normalize = (s) => s.split('\n').map((l) => l.replace(/[ \t]+/g, ' ').trimEnd()).join('\n');
1144
+ const normalizedFile = normalize(after);
1145
+ const normalizedFind = normalize(find);
1146
+ const normalOccurrences = normalizedFile.split(normalizedFind).length - 1;
1147
+ if (normalOccurrences === 1) {
1148
+ // Map the normalized match back to a real range in `after`.
1149
+ // The simplest correct approach: walk `after` line-by-line,
1150
+ // accumulating normalized text, and find the line index
1151
+ // where the normalized window matches. Then slice the
1152
+ // original bytes at that line range and replace with `replace`.
1153
+ const afterLines = after.split('\n');
1154
+ const findLineCount = find.split('\n').length;
1155
+ let matchedStart = -1;
1156
+ for (let i = 0; i + findLineCount <= afterLines.length; i++) {
1157
+ const window = afterLines.slice(i, i + findLineCount).join('\n');
1158
+ if (normalize(window) === normalizedFind) {
1159
+ matchedStart = i;
1160
+ break;
1161
+ }
1162
+ }
1163
+ if (matchedStart >= 0) {
1164
+ const replaceLineCount = replace.split('\n').length;
1165
+ const replacedLines = [
1166
+ ...afterLines.slice(0, matchedStart),
1167
+ ...replace.split('\n'),
1168
+ ...afterLines.slice(matchedStart + findLineCount)
1169
+ ];
1170
+ after = replacedLines.join('\n');
1171
+ hunksApplied++;
1172
+ void replaceLineCount;
1173
+ continue;
1174
+ }
1175
+ }
1176
+ hunksFailed++;
1177
+ const preview = find.length > 160 ? find.slice(0, 160) + '…' : find;
1178
+ hunkFailureNotes.push(`hunk #${hi + 1}: text not found (${contextCount} context line${contextCount === 1 ? '' : 's'} provided). ` +
1179
+ `Tried to find:\n${preview.split('\n').map((l) => ` ${l}`).join('\n')}`);
1180
+ }
1181
+ // Write the file IFF we applied at least one hunk. If every
1182
+ // hunk failed we leave the file untouched — better to surface
1183
+ // the failure than to write a stale snapshot.
1184
+ if (hunksApplied > 0) {
1185
+ try {
1186
+ await ctx.writeFile(absPath, after);
1187
+ const lineDelta = after.split('\n').length - before.split('\n').length;
1188
+ const tail = hunksFailed > 0
1189
+ ? ` — ${hunksApplied}/${action.hunks.length} hunks applied, ${hunksFailed} skipped:\n ${hunkFailureNotes.join('\n ')}`
1190
+ : ` (${action.hunks.length} hunk${action.hunks.length === 1 ? '' : 's'}, ${lineDelta >= 0 ? '+' : ''}${lineDelta} lines)`;
1191
+ results.push(`${hunksFailed > 0 ? 'Partially updated' : 'Updated'}: ${action.path}${tail}`);
1192
+ }
1193
+ catch (err) {
1194
+ results.push(`FAILED Update ${action.path}: write failed (${err instanceof Error ? err.message : String(err)})`);
1195
+ }
1196
+ }
1197
+ else {
1198
+ results.push(`FAILED Update ${action.path}: 0/${action.hunks.length} hunks applied — file left untouched.\n ${hunkFailureNotes.join('\n ')}`);
1199
+ }
1200
+ }
1201
+ // Aggregate success/failure. The whole call is an error iff
1202
+ // EVERY action ended in a hunk-level FAILED line. Partial success
1203
+ // is reported with isError:false so the model can build on what
1204
+ // landed instead of retrying the whole patch.
1205
+ const totalFailures = results.filter((r) => r.startsWith('FAILED ')).length;
1206
+ const allFailed = totalFailures === results.length;
1207
+ const summary = allFailed
1208
+ ? `apply_patch could not land any changes. Inspect the per-action notes below, then either re-emit only the failing hunks with more context (verify whitespace matches read_file output exactly) or fall back to apply_edit for individual lines.`
1209
+ : totalFailures > 0
1210
+ ? `apply_patch partially applied (${actions.length - totalFailures}/${actions.length} actions changed the file). Failed actions list specific hunks; retry just those.`
1211
+ : `Patch applied successfully (${actions.length} action${actions.length === 1 ? '' : 's'}). Do not restate the changes — the user can see the diff. Move on to the next pending task or summarize briefly if done.`;
1212
+ return {
1213
+ output: `${summary}\n\n${results.join('\n')}`,
1214
+ isError: allFailed
1215
+ };
1216
+ }
1217
+ };
1218
+ exports.applyPatchTool = applyPatchTool;
1219
+ /**
1220
+ * Compare a language-adapter's validation errors before and after an
1221
+ * edit and decide whether the edit *introduced* anything new. Returns
1222
+ * true when the edit added errors the file didn't already have —
1223
+ * those should still block the write. Returns false when the post-edit
1224
+ * errors are a subset of the pre-edit errors (the file was already
1225
+ * broken in the same ways).
1226
+ *
1227
+ * We compare by extracting the unique LINES from each error string
1228
+ * and asking "are all after-lines also in before-lines?" This is
1229
+ * coarse — line numbers shift, error indices change — but in practice
1230
+ * TypeScript / ESLint / etc. emit one error per line and the line
1231
+ * content (path + diagnostic + message) is stable enough that exact
1232
+ * match catches the common case. Errors only hashable by exact line
1233
+ * content count toward the introduced-new heuristic.
1234
+ */
1235
+ /**
1236
+ * Normalise the `args` value for run_command / watch_command. Handles
1237
+ * the case where the model emits a JSON array of strings (common when
1238
+ * the model is trained on OpenAI function-calling schemas — they ship
1239
+ * `args` as `string[]` natively, and the model inlines that as a JSON
1240
+ * literal in the params blob). Returns either a parsed string[] or
1241
+ * null when the input doesn't look like a JSON array; callers fall
1242
+ * back to space-separated tokenisation in the null case.
1243
+ *
1244
+ * with gemma4:e4b trying `gh pr create`: model
1245
+ * emitted `args: "[\"pr\",\"create\",\"--title\",\"x\",\"--body\",\"y\"]"`,
1246
+ * shellTokenize saw the whole JSON string as one token, and `gh`
1247
+ * received `"[pr,create,--title,x,--body,y]"` as a single argv. Every
1248
+ * invocation failed with `unknown command "[pr,create,…]"`.
1249
+ */
1250
+ function maybeParseJsonArrayArgs(argsString) {
1251
+ const trimmed = argsString.trim();
1252
+ if (!trimmed.startsWith('[') || !trimmed.endsWith(']'))
1253
+ return null;
1254
+ try {
1255
+ const parsed = JSON.parse(trimmed);
1256
+ if (Array.isArray(parsed) && parsed.every((v) => typeof v === 'string')) {
1257
+ return parsed;
1258
+ }
1259
+ }
1260
+ catch {
1261
+ /* not JSON — fall through */
1262
+ }
1263
+ return null;
1264
+ }
1265
+ function introducedNewErrors(before, after) {
1266
+ const afterText = after ?? '';
1267
+ if (!afterText.trim())
1268
+ return false;
1269
+ const beforeText = before ?? '';
1270
+ if (!beforeText.trim())
1271
+ return true; // before was clean, after isn't — definitely introduced.
1272
+ // Strip position-bearing tokens that change with file content shifts
1273
+ // even when the underlying error is unchanged. Without this, renaming
1274
+ // "foo" → "foo-renamed" in a file that already had a JSON parse error
1275
+ // 30 chars away registered as a NEW error because the message reads
1276
+ // "at position 51" before vs "at position 57" after — the user-visible
1277
+ // bug was: editing a non-broken part of an already-broken file got
1278
+ // gated. in language-adapter regression tests.
1279
+ const stripPositions = (line) => line
1280
+ .replace(/\bat position \d+/gi, 'at position N')
1281
+ .replace(/\b(line|ln) \d+(?: column| col)?(?: \d+)?/gi, 'line N')
1282
+ .replace(/:\s*\d+:\d+/g, ':N:N') // file:line:col → file:N:N
1283
+ .replace(/\bcharacter \d+/gi, 'character N')
1284
+ .replace(/\boffset \d+/gi, 'offset N');
1285
+ const normalize = (s) => {
1286
+ return new Set(s.split('\n')
1287
+ .map((line) => stripPositions(line.trim()))
1288
+ .filter((line) => line.length > 0));
1289
+ };
1290
+ const beforeSet = normalize(beforeText);
1291
+ const afterSet = normalize(afterText);
1292
+ for (const line of afterSet) {
1293
+ if (!beforeSet.has(line))
1294
+ return true;
1295
+ }
1296
+ return false;
1297
+ }
1298
+ /**
1299
+ * Apply a single-file unified-diff payload. Path is read from the
1300
+ * `+++ b/<path>` header unless the caller passed an explicit `path`
1301
+ * param. We deliberately reuse the same read-before-edit guard,
1302
+ * language-adapter validation, and markFileWrite hooks as apply_edit
1303
+ * — apply_patch is a different INPUT format, not a different write
1304
+ * pipeline.
1305
+ */
1306
+ async function executeUnifiedDiffPatch(patchText, pathOverride, ctx) {
1307
+ const parsed = (0, unified_patch_1.parseUnifiedPatch)(patchText);
1308
+ if (!parsed) {
1309
+ return {
1310
+ output: 'apply_patch rejected: input looked like a unified diff but contains no `@@` hunks. Emit at least one hunk header with the form `@@ -<old_start>,<old_count> +<new_start>,<new_count> @@`.',
1311
+ isError: true
1312
+ };
1313
+ }
1314
+ // Resolve path: explicit param wins, then `+++ b/<path>` header,
1315
+ // then `--- a/<path>`. Strip the `a/` and `b/` prefixes git adds.
1316
+ const headerPath = parsed.newPath ?? parsed.oldPath;
1317
+ const stripped = headerPath?.replace(/^[ab]\/+/, '');
1318
+ const relPath = (pathOverride ?? stripped ?? '').trim();
1319
+ if (!relPath) {
1320
+ return {
1321
+ output: 'apply_patch rejected: no path. The unified diff has no `+++ b/<path>` header AND no explicit `path` param was provided. Either include the headers or pass `path` alongside the patch.',
1322
+ isError: true
1323
+ };
1324
+ }
1325
+ const absPath = isAbsolutePath(relPath) ? relPath : `${ctx.workspaceRoot}/${relPath}`;
1326
+ // read-then-patch is allowed for apply_patch. The hunk
1327
+ // context lines (3+ surrounding lines + @@ line numbers) self-validate
1328
+ // memory: if the model is patching from stale memory, applyParsedPatch
1329
+ // below catches it with a "hunk context didn't match" error. Removing
1330
+ // the upfront read-required rejection lets multi-file patches succeed
1331
+ // when SOME of the bundled files were already in conversation context
1332
+ // — from a real bandit-cli linter-fix run where
1333
+ // the agent tried one apply_patch covering 6 files, got rejected on
1334
+ // the first unread file, and burned an iteration re-reading. The
1335
+ // hint about "did you read this first?" still fires below when the
1336
+ // patch fails AND the file hasn't been read.
1337
+ let before;
1338
+ try {
1339
+ before = await ctx.readFile(absPath);
1340
+ }
1341
+ catch (err) {
1342
+ return { output: `Error reading "${relPath}": ${err instanceof Error ? err.message : String(err)}`, isError: true };
1343
+ }
1344
+ const result = (0, unified_patch_1.applyParsedPatch)(before, parsed);
1345
+ if (!result.ok) {
1346
+ const haveRead = !ctx.hasFileBeenRead || ctx.hasFileBeenRead(absPath);
1347
+ const readHint = haveRead
1348
+ ? ''
1349
+ : `\n\nYou have not called read_file on "${relPath}" in this conversation. If your hunk context was reconstructed from memory, that's almost certainly why the match failed — call read_file first, then retry with the verbatim text.`;
1350
+ const ctxLine = result.contextSnippet ? `\n\nFile content near the expected position:\n${result.contextSnippet}` : '';
1351
+ return {
1352
+ output: `apply_patch failed: ${result.reason}${readHint}${ctxLine}`,
1353
+ isError: true
1354
+ };
1355
+ }
1356
+ if (result.next === before) {
1357
+ return { output: 'apply_patch produced no change. Either the patch is empty or it duplicates content already in the file.', isError: true };
1358
+ }
1359
+ if (ctx.languageAdapters) {
1360
+ const afterValidation = await ctx.languageAdapters.validate(absPath, result.next, ctx);
1361
+ if (!afterValidation.ok) {
1362
+ // Pre-existing-error guard — same as apply_edit. Don't block a
1363
+ // patch that targets the comparison-grid bug just because the
1364
+ // file ALSO had unrelated TypeScript rot the user hasn't gotten
1365
+ // to. See introducedNewErrors() above for the rationale.
1366
+ const beforeValidation = await ctx.languageAdapters.validate(absPath, before, ctx);
1367
+ if (introducedNewErrors(beforeValidation.error, afterValidation.error)) {
1368
+ return {
1369
+ output: `Validation failed after apply_patch on "${relPath}":\n${afterValidation.error}\n\nThe file was NOT written. Fix the patch and retry.`,
1370
+ isError: true
1371
+ };
1372
+ }
1373
+ }
1374
+ }
1375
+ try {
1376
+ await ctx.writeFile(absPath, result.next);
1377
+ }
1378
+ catch (err) {
1379
+ return { output: `Error writing "${relPath}": ${err instanceof Error ? err.message : String(err)}`, isError: true };
1380
+ }
1381
+ const lineCount = result.next.split('\n').length;
1382
+ const hunkCount = parsed.hunks.length;
1383
+ return {
1384
+ output: `Applied ${hunkCount} hunk${hunkCount === 1 ? '' : 's'} to "${relPath}" (${lineCount} lines after).`,
1385
+ isError: false
1386
+ };
1387
+ }
1388
+ /**
1389
+ * When a `find` string doesn't match, a common cause is whitespace drift —
1390
+ * the model reconstructed the target line from memory and got the indent
1391
+ * wrong. If we can find a close-but-not-exact version of the first line,
1392
+ * surface that so the model sees what it should copy verbatim next time.
1393
+ */
1394
+ function findIndentationHint(source, find) {
1395
+ const firstLine = find.split('\n', 1)[0].trim();
1396
+ if (firstLine.length < 4)
1397
+ return '';
1398
+ const candidateLine = source.split('\n').find(line => line.trim() === firstLine);
1399
+ if (!candidateLine || candidateLine === firstLine)
1400
+ return '';
1401
+ const indent = candidateLine.match(/^\s*/)?.[0] ?? '';
1402
+ if (!indent)
1403
+ return '';
1404
+ return `Hint: the target line exists in the file but begins with "${indent.replace(/\t/g, '\\t')}" whitespace — your \`find\` is missing that indent. `;
1405
+ }
1406
+ /**
1407
+ * When apply_edit's `find` doesn't match anywhere in the file, surface a
1408
+ * snippet of what the file ACTUALLY contains around the closest fuzzy
1409
+ * match. Saves the model a re-read round-trip and prevents the failure
1410
+ * mode where it retries the same wrong `find` 3+ times before giving up
1411
+ * (observed Portfolio turn m9xj: assumed `<title>Vite + React</title>`
1412
+ * still existed when the file already had `<title>mark-portfolio</title>`
1413
+ * from a prior edit — three iterations wasted before re-reading).
1414
+ *
1415
+ * Strategy: tokenize the first non-empty line of `find`, score every line
1416
+ * in the source by token-overlap, return ±3 lines of context around the
1417
+ * best-scoring line if the overlap is high enough to be meaningful. Bail
1418
+ * silently when the signal is too weak — better no hint than a misleading
1419
+ * one.
1420
+ */
1421
+ function nearestMatchSnippet(source, find) {
1422
+ const findFirstLine = find.split('\n').find(l => l.trim().length > 0)?.trim();
1423
+ if (!findFirstLine || findFirstLine.length < 8)
1424
+ return '';
1425
+ const findTokens = new Set(findFirstLine.split(/[^\w]+/).filter(t => t.length >= 3));
1426
+ if (findTokens.size < 2)
1427
+ return '';
1428
+ const lines = source.split('\n');
1429
+ let bestLine = -1;
1430
+ let bestScore = 0;
1431
+ for (let i = 0; i < lines.length; i++) {
1432
+ const lineTokens = lines[i].split(/[^\w]+/).filter(t => t.length >= 3);
1433
+ if (lineTokens.length === 0)
1434
+ continue;
1435
+ let hits = 0;
1436
+ for (const t of lineTokens)
1437
+ if (findTokens.has(t))
1438
+ hits++;
1439
+ // Normalize by max so a long line with a couple matches doesn't beat
1440
+ // a short line where everything matches. Tie-broken by earlier line.
1441
+ const score = hits / Math.max(findTokens.size, lineTokens.length);
1442
+ if (score > bestScore) {
1443
+ bestScore = score;
1444
+ bestLine = i;
1445
+ }
1446
+ }
1447
+ // 0.4 token-overlap threshold is the empirical "this is probably the
1448
+ // line you meant" cutoff. Lower than that and the snippet is noise.
1449
+ if (bestLine < 0 || bestScore < 0.4)
1450
+ return '';
1451
+ const start = Math.max(0, bestLine - 3);
1452
+ const end = Math.min(lines.length, bestLine + 4);
1453
+ const widest = String(end).length;
1454
+ const snippet = lines
1455
+ .slice(start, end)
1456
+ .map((line, i) => {
1457
+ const lineNum = start + i + 1;
1458
+ const marker = (start + i) === bestLine ? '►' : ' ';
1459
+ return `${marker} ${String(lineNum).padStart(widest, ' ')} │ ${line}`;
1460
+ })
1461
+ .join('\n');
1462
+ return `\n\nClosest match in the file (line ${bestLine + 1}):\n\n${snippet}\n\nIf the marked line is what you meant to edit, copy its exact text into \`find\` (verbatim, including whitespace) and retry.`;
1463
+ }
1464
+ // ── list_files ─────────────────────────────────────────────────────────────────
1465
+ const listFilesTool = {
1466
+ name: 'list_files',
1467
+ description: 'List files matching a glob pattern. Searches the workspace root by default; pass an absolute `cwd` to list anywhere else on disk (user home, /tmp, etc). Returns a newline-separated list of file paths. NOTE: glob is matched relative to `cwd`. To find a repo or directory anywhere on the user\'s machine when you don\'t know the path, prefer `run_command` with `find ~ -type d -name "<name>" 2>/dev/null` — list_files alone won\'t walk the whole home tree.',
1468
+ parameters: [
1469
+ { name: 'pattern', description: 'Glob pattern (e.g. "*.json", "src/**/*.ts", "**/*.md"). Use "*" to match everything in the target directory. Use "**/X" to recursively find X under cwd.', required: true },
1470
+ { name: 'cwd', description: 'Directory to search in. Defaults to the workspace root. Accepts absolute paths like "/Users/name/Desktop" or "~" for the user home (optional)' }
1471
+ ],
1472
+ async execute(params, ctx) {
1473
+ const pattern = params.pattern?.trim();
1474
+ if (!pattern)
1475
+ return { output: 'Error: pattern parameter is required', isError: true };
1476
+ const cwd = params.cwd
1477
+ ? (isAbsolutePath(params.cwd) ? params.cwd : `${ctx.workspaceRoot}/${params.cwd}`)
1478
+ : ctx.workspaceRoot;
1479
+ try {
1480
+ const files = await ctx.listFiles(pattern, cwd);
1481
+ if (!files.length)
1482
+ return { output: `No files matched pattern "${pattern}"` };
1483
+ const list = files.slice(0, 200).join('\n');
1484
+ const suffix = files.length > 200 ? `\n\n[list_files: showing first 200 of ${files.length} files]` : '';
1485
+ return { output: `${files.length} file(s) matched "${pattern}":\n\n${list}${suffix}` };
1486
+ }
1487
+ catch (err) {
1488
+ return { output: `Error listing files: ${err instanceof Error ? err.message : String(err)}`, isError: true };
1489
+ }
1490
+ }
1491
+ };
1492
+ exports.listFilesTool = listFilesTool;
1493
+ // ── ls ─────────────────────────────────────────────────────────────────────────
1494
+ // Dead-simple directory listing. Exists alongside list_files because small
1495
+ // models (<= 7B) reliably skip the glob+cwd combo but handle single-path
1496
+ // tools correctly. If the user asks "what's in ~/Desktop" the model can just
1497
+ // call ls(path="~/Desktop") instead of figuring out the right cwd argument.
1498
+ const lsTool = {
1499
+ name: 'ls',
1500
+ description: 'List immediate files and folders inside a directory. Non-recursive. Use this for "what is in folder X" style questions — especially for directories outside the workspace like "~/Desktop", "~/Downloads", "/tmp". For recursive globs use list_files instead.',
1501
+ parameters: [
1502
+ { name: 'path', description: 'Directory path. Absolute ("/Users/name/Desktop"), tilde-prefixed ("~/Desktop"), or relative to the workspace root (".", "src").', required: true }
1503
+ ],
1504
+ async execute(params, ctx) {
1505
+ const raw = params.path?.trim();
1506
+ if (!raw)
1507
+ return { output: 'Error: path parameter is required', isError: true };
1508
+ // Resolve relative paths against the workspace root. Hosts handle
1509
+ // ~ expansion themselves.
1510
+ const resolved = isAbsolutePath(raw)
1511
+ ? raw
1512
+ : `${ctx.workspaceRoot}/${raw}`;
1513
+ try {
1514
+ // Prefer listDirectoryEntries when the host implements it — the
1515
+ // glob-based listFiles fallback walks recursively and only emits
1516
+ // `isFile()` entries, so it silently misses every subdirectory.
1517
+ // user's "client engament drafts" folder on
1518
+ // ~/Desktop was invisible to the agent because listFiles returned
1519
+ // only the files directly in Desktop, never the folder itself.
1520
+ if (ctx.listDirectoryEntries) {
1521
+ const names = await ctx.listDirectoryEntries(resolved);
1522
+ if (!names.length)
1523
+ return { output: `(empty or not found: ${raw})` };
1524
+ return { output: `${names.length} entr${names.length === 1 ? 'y' : 'ies'} in ${raw}:\n${names.join('\n')}` };
1525
+ }
1526
+ // Fallback path for hosts that predate listDirectoryEntries —
1527
+ // files-only, but better than nothing.
1528
+ const files = await ctx.listFiles('*', resolved);
1529
+ if (!files.length)
1530
+ return { output: `(empty or not found: ${raw})` };
1531
+ const prefix = resolved.endsWith('/') ? resolved : resolved + '/';
1532
+ const names = files.map(f => f.startsWith(prefix) ? f.slice(prefix.length) : f).sort();
1533
+ return { output: `${names.length} entr${names.length === 1 ? 'y' : 'ies'} in ${raw} (files only — host does not support directory listing):\n${names.join('\n')}` };
1534
+ }
1535
+ catch (err) {
1536
+ return { output: `Error listing ${raw}: ${err instanceof Error ? err.message : String(err)}`, isError: true };
1537
+ }
1538
+ }
1539
+ };
1540
+ exports.lsTool = lsTool;
1541
+ // ── find_directory ─────────────────────────────────────────────────────────────
1542
+ // Cross-repo discovery. When the user asks the agent to do work in a repo
1543
+ // that lives outside the current workspace ("switch to the auth-api repo",
1544
+ // "edit the stt-api Dockerfile") the model used to fall back to asking
1545
+ // "where is that repo?" — frustrating because the user already told us the
1546
+ // name. This tool sweeps the standard clone locations (~/Documents/GitHub,
1547
+ // ~/Projects, ~/code, ~/dev, ~/repos, ~/work, ~/src) plus the parent of
1548
+ // the active workspace and returns matching folder names.
1549
+ /**
1550
+ * Tokenise a name for fuzzy matching. Splits on:
1551
+ * - Whitespace
1552
+ * - Path separators (`/`, `\`)
1553
+ * - Hyphens, underscores, dots
1554
+ * - camelCase / PascalCase boundaries (so `AuthApi` → `Auth Api`)
1555
+ * Then lowercases. Lets a query of "auth api" find a repo named
1556
+ * `AuthApi`, `auth-api`, `auth_api`, or `authApi`.
1557
+ */
1558
+ function repoTokenize(s) {
1559
+ return s
1560
+ .replace(/([a-z0-9])([A-Z])/g, '$1 $2')
1561
+ .replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2')
1562
+ .replace(/[-_./\\\s]+/g, ' ')
1563
+ .toLowerCase()
1564
+ .split(' ')
1565
+ .filter(Boolean);
1566
+ }
1567
+ /**
1568
+ * Score how well a folder name matches a query. Higher is better. 0
1569
+ * means no match. Ranks: exact name > exact-token-set > all-tokens-
1570
+ * present > substring > nothing.
1571
+ */
1572
+ function repoMatchScore(name, query) {
1573
+ const lowerName = name.toLowerCase();
1574
+ const lowerQuery = query.toLowerCase();
1575
+ if (lowerName === lowerQuery)
1576
+ return 1000;
1577
+ const queryTokens = repoTokenize(query);
1578
+ const nameTokens = repoTokenize(name);
1579
+ if (queryTokens.length === 0)
1580
+ return 0;
1581
+ // Every query token must appear as a substring of some name token.
1582
+ let matched = 0;
1583
+ for (const qt of queryTokens) {
1584
+ if (nameTokens.some((nt) => nt.includes(qt)))
1585
+ matched++;
1586
+ }
1587
+ if (matched === queryTokens.length) {
1588
+ // All tokens accounted for. Bonus when the token sets are equal
1589
+ // size (cleaner match than "auth" matching "auth-api-v2").
1590
+ const setEquality = nameTokens.length === queryTokens.length ? 100 : 0;
1591
+ return 500 + setEquality;
1592
+ }
1593
+ // Fall back to plain substring on the joined string so partial
1594
+ // queries still surface candidates.
1595
+ if (lowerName.includes(lowerQuery))
1596
+ return 100;
1597
+ return 0;
1598
+ }
1599
+ const findDirectoryTool = {
1600
+ name: 'find_directory',
1601
+ description: 'Locate a repo or folder on the user\'s machine when it is NOT in the current workspace. Searches the user\'s configured `repos.roots` PLUS common clone parents (~/Documents/GitHub, ~/Projects, ~/code, ~/dev, ~/repos, ~/work, ~/src) PLUS the parent of the current workspace, one level deep. Token-based fuzzy match — "auth api" finds AuthApi, auth-api, or auth_api. Call this BEFORE asking the user where a repo lives. Returns absolute (or tilde-prefixed) paths the agent can pass to read_file, list_files, run_command, etc.',
1602
+ parameters: [
1603
+ { name: 'name', description: 'Folder/repo name to find. Spaces, hyphens, underscores, and camelCase boundaries are all treated as token separators — "auth api" matches AuthApi, "stt api" matches stt-api or sttApi, etc.', required: true }
1604
+ ],
1605
+ async execute(params, ctx) {
1606
+ const query = params.name?.trim();
1607
+ if (!query)
1608
+ return { output: 'Error: name parameter is required', isError: true };
1609
+ if (!ctx.listDirectoryEntries) {
1610
+ return { output: 'Error: this host does not support directory enumeration. Fall back to `run_command find ~ -maxdepth 4 -type d -iname "*<name>*"`.', isError: true };
1611
+ }
1612
+ // Strip the last path segment from workspaceRoot to get its parent —
1613
+ // sibling repos sit there in monorepo + multi-repo workflows. User-
1614
+ // configured roots come FIRST so the user's stated locations are
1615
+ // searched before the built-in defaults.
1616
+ const workspaceParent = ctx.workspaceRoot.replace(/[\\/][^\\/]+[\\/]?$/, '') || ctx.workspaceRoot;
1617
+ const parents = [
1618
+ ...(ctx.customRepoRoots ?? []),
1619
+ workspaceParent,
1620
+ '~/Documents/GitHub',
1621
+ '~/GitHub',
1622
+ '~/Projects',
1623
+ '~/code',
1624
+ '~/dev',
1625
+ '~/repos',
1626
+ '~/work',
1627
+ '~/src',
1628
+ '~'
1629
+ ];
1630
+ const seen = new Set();
1631
+ const hits = [];
1632
+ for (const parent of parents) {
1633
+ try {
1634
+ const entries = await ctx.listDirectoryEntries(parent);
1635
+ for (const entry of entries) {
1636
+ if (!entry.endsWith('/'))
1637
+ continue;
1638
+ const name = entry.slice(0, -1);
1639
+ const lower = name.toLowerCase();
1640
+ // Dedup by lowercased basename — tilde paths and the workspace
1641
+ // parent often resolve to overlapping directories; reporting the
1642
+ // same hit twice is noise.
1643
+ if (seen.has(lower))
1644
+ continue;
1645
+ const score = repoMatchScore(name, query);
1646
+ if (score > 0) {
1647
+ seen.add(lower);
1648
+ hits.push({ path: `${parent}/${name}`, name, score });
1649
+ }
1650
+ }
1651
+ }
1652
+ catch {
1653
+ // Parent dir doesn't exist on this machine — normal, skip silently.
1654
+ }
1655
+ }
1656
+ if (hits.length === 0) {
1657
+ return { output: `No directories matched "${query}" in:\n${parents.map((p) => ` - ${p}`).join('\n')}\n\nIf the user keeps repos elsewhere, ask for the absolute path or have them run \`/repos add <path>\` to teach Bandit about a new clone parent.` };
1658
+ }
1659
+ // Sort by score descending; tie-break by shorter name (more
1660
+ // specific matches surface first).
1661
+ hits.sort((a, b) => (b.score - a.score) || (a.name.length - b.name.length));
1662
+ const MAX = 20;
1663
+ const top = hits.slice(0, MAX);
1664
+ const omitted = hits.length - top.length;
1665
+ // Group by score class for friendlier output.
1666
+ const exact = top.filter((h) => h.score >= 1000);
1667
+ const tokenMatch = top.filter((h) => h.score >= 500 && h.score < 1000);
1668
+ const substring = top.filter((h) => h.score < 500);
1669
+ const lines = [];
1670
+ if (exact.length) {
1671
+ lines.push(`Exact match${exact.length === 1 ? '' : 'es'} for "${query}":`);
1672
+ for (const h of exact)
1673
+ lines.push(h.path);
1674
+ }
1675
+ if (tokenMatch.length) {
1676
+ if (lines.length)
1677
+ lines.push('');
1678
+ lines.push(`Token match${tokenMatch.length === 1 ? '' : 'es'} for "${query}":`);
1679
+ for (const h of tokenMatch)
1680
+ lines.push(h.path);
1681
+ }
1682
+ if (substring.length) {
1683
+ if (lines.length)
1684
+ lines.push('');
1685
+ lines.push(`Substring match${substring.length === 1 ? '' : 'es'} for "${query}":`);
1686
+ for (const h of substring)
1687
+ lines.push(h.path);
1688
+ }
1689
+ if (omitted > 0)
1690
+ lines.push(`\n[find_directory: showing first ${MAX} of ${hits.length} matches — narrow the query to see the rest]`);
1691
+ return { output: lines.join('\n') };
1692
+ }
1693
+ };
1694
+ exports.findDirectoryTool = findDirectoryTool;
1695
+ // ── search_code ────────────────────────────────────────────────────────────────
1696
+ const searchCodeTool = {
1697
+ name: 'search_code',
1698
+ description: 'Search for a pattern in file contents using regex. Returns matching lines with file paths and line numbers.',
1699
+ parameters: [
1700
+ { name: 'pattern', description: 'Regex or literal string to search for (e.g. "function login", "TODO:", "interface User")', required: true },
1701
+ { name: 'file_glob', description: 'Optional glob to restrict which files are searched (e.g. "*.ts", "src/**/*.tsx")' },
1702
+ { name: 'cwd', description: 'Directory to search in. Defaults to the workspace root. Accepts absolute paths for searching outside the workspace (optional)' }
1703
+ ],
1704
+ async execute(params, ctx) {
1705
+ const pattern = params.pattern?.trim();
1706
+ if (!pattern)
1707
+ return { output: 'Error: pattern parameter is required', isError: true };
1708
+ const cwd = params.cwd
1709
+ ? (isAbsolutePath(params.cwd) ? params.cwd : `${ctx.workspaceRoot}/${params.cwd}`)
1710
+ : ctx.workspaceRoot;
1711
+ try {
1712
+ const results = await ctx.searchCode(pattern, cwd, params.file_glob);
1713
+ if (!results.trim())
1714
+ return { output: `No matches found for "${pattern}"` };
1715
+ return { output: truncate(results, MAX_SEARCH_CHARS, 'search_code') };
1716
+ }
1717
+ catch (err) {
1718
+ return { output: `Error searching code: ${err instanceof Error ? err.message : String(err)}`, isError: true };
1719
+ }
1720
+ }
1721
+ };
1722
+ exports.searchCodeTool = searchCodeTool;
1723
+ // ── run_command ────────────────────────────────────────────────────────────────
1724
+ /** Commands the agent is allowed to run. Blocks anything destructive.
1725
+ * Grouped by ecosystem so additions are obvious. Curation rules:
1726
+ * - Build/test/inspect tools: allow.
1727
+ * - Shell interpreters (bash/sh/zsh/pwsh): reject — too broad a
1728
+ * blast radius for a single command-as-skill style execution.
1729
+ * - HTTP clients (curl/wget): allow. Skills that fetch remote data
1730
+ * (status checks, webhooks, REST diagnostics) need them. The
1731
+ * per-primary permission gate still prompts the user and the
1732
+ * web_fetch tool remains the preferred path for content
1733
+ * retrieval, but blocking curl outright was forcing skill
1734
+ * authors to shell out via subprocess hacks anyway.
1735
+ * - Destructive-by-default tools (terraform apply, kubectl delete,
1736
+ * aws, gcloud): reject. Users who need them should run in a
1737
+ * dedicated shell, not through the agent.
1738
+ */
1739
+ const ALLOWED_COMMANDS = new Set([
1740
+ // Node / JS ecosystem
1741
+ 'npm', 'pnpm', 'yarn', 'npx', 'node', 'ts-node', 'tsx',
1742
+ 'tsc', 'eslint', 'prettier',
1743
+ 'jest', 'vitest', 'mocha', 'playwright',
1744
+ // Python
1745
+ 'python', 'python3', 'pip', 'pip3', 'poetry', 'uv', 'pytest', 'ruff', 'mypy', 'black',
1746
+ // Git + version control. `gh` is the GitHub CLI — used for PR / issue
1747
+ // / release operations. Same blast-radius profile as git itself; the
1748
+ // agent already does git_commit / git_push via dedicated tools, so
1749
+ // gh is just the remote-side counterpart (gh pr create, gh issue
1750
+ // list, etc). Without it the agent can stage + commit but can't
1751
+ // ship the PR, which makes "make a PR for me" tasks dead-end at
1752
+ // the local commit.
1753
+ 'git', 'gh',
1754
+ // Rust
1755
+ 'cargo', 'rustc', 'rustup',
1756
+ // Go
1757
+ 'go', 'gofmt',
1758
+ // .NET (Mac/Linux: SDK ships dotnet CLI that covers build/test/run)
1759
+ 'dotnet', 'nuget',
1760
+ // Java / JVM
1761
+ 'mvn', 'gradle', 'gradlew', './gradlew', 'java', 'javac', 'kotlin', 'kotlinc',
1762
+ // Ruby
1763
+ 'ruby', 'bundle', 'bundler', 'rake', 'rspec', 'gem',
1764
+ // PHP
1765
+ 'php', 'composer', 'phpunit',
1766
+ // Swift / iOS / macOS
1767
+ 'swift', 'xcodebuild', 'pod',
1768
+ // macOS automation — osascript runs AppleScript/JXA and is gated further
1769
+ // by TCC (Automation/Full Disk Access) at the OS level, so a malicious
1770
+ // script can't actually reach protected resources without the user
1771
+ // having already granted the terminal per-app permission.
1772
+ 'osascript',
1773
+ // C / C++ / generic build
1774
+ 'make', 'cmake', 'ninja', 'gcc', 'clang', 'g++', 'clang++',
1775
+ // Docker (build/inspect only — destructive flags are up to user policy
1776
+ // via BLOCKED_PATTERNS if they want to narrow further)
1777
+ 'docker', 'docker-compose', 'podman',
1778
+ // File inspection / diagnostics — read-only
1779
+ 'ls', 'cat', 'echo', 'pwd', 'head', 'tail', 'wc', 'file', 'stat', 'which',
1780
+ 'grep', 'rg', 'find', 'tree',
1781
+ // Filesystem mutation — needed for project scaffolding ("create a folder
1782
+ // on Desktop and run create-react-app there"). Without these the agent
1783
+ // can write files via write_file but can't create directories,
1784
+ // move/rename, or duplicate. BLOCKED_PATTERNS still catches `rm -rf`,
1785
+ // `mkfs`, `dd if=`, etc. `rm` itself isn't on the list — the dedicated
1786
+ // delete_file tool covers single-file deletes through the user's gate.
1787
+ 'mkdir', 'mv', 'cp', 'touch', 'ln', 'chmod',
1788
+ // JSON / YAML transform utilities
1789
+ 'jq', 'yq',
1790
+ // Text processing — stream editors and pipeline staples. sed/awk let the
1791
+ // agent do small transforms without round-tripping through apply_edit.
1792
+ 'sed', 'awk', 'diff', 'sort', 'uniq', 'cut', 'tr', 'xargs',
1793
+ // System / env diagnostics — read-only.
1794
+ 'date', 'env', 'printenv', 'base64', 'df', 'du', 'ps', 'top',
1795
+ 'id', 'whoami', 'hostname', 'uname', 'time',
1796
+ // Process management — needed for "kill the dev server then restart on
1797
+ // a different port" workflows. pkill/kill exit cleanly when no match,
1798
+ // BLOCKED_PATTERNS catches the catastrophic shapes, and the per-call
1799
+ // approval gate still prompts before each invocation.
1800
+ 'pkill', 'kill', 'lsof',
1801
+ // Database CLIs — read/write tools the agent reaches for during data
1802
+ // tasks ("show me the users table", "run this migration"). Each is
1803
+ // gated by per-call approval; nothing here is more dangerous than
1804
+ // what the agent could already do via raw SQL inside an app process.
1805
+ 'psql', 'mysql', 'sqlite3', 'redis-cli', 'mongosh', 'mongo',
1806
+ // Cloud provider CLIs — common when scaffolding infra or inspecting
1807
+ // deployed resources. The destructive subcommands (terminate, delete,
1808
+ // destroy) still get per-call approval.
1809
+ 'aws', 'gcloud', 'az',
1810
+ // Network diagnostics — read-only host/port/dns checks. Frequently
1811
+ // needed when debugging "why can't I reach this service" issues.
1812
+ 'ping', 'dig', 'nslookup', 'traceroute', 'host', 'nc',
1813
+ // Modern JS runtimes — bun and deno parallel node/npx in many repos.
1814
+ // Without them the agent has to fall back to "tell the user to run it
1815
+ // themselves" for any bun-script.ts / deno run target.
1816
+ 'bun', 'deno', 'bunx',
1817
+ // GitLab CLI — same blast-radius profile as gh. The fewer "I can do
1818
+ // this on GitHub but not GitLab" asymmetries the better.
1819
+ 'glab',
1820
+ // Infra-as-code — terraform/ansible/pulumi. Plan/apply/destroy all
1821
+ // run through the per-call approval gate, and BLOCKED_PATTERNS still
1822
+ // catches catastrophic shapes (rm -rf state files, etc).
1823
+ 'terraform', 'ansible', 'ansible-playbook', 'pulumi',
1824
+ // Archive utilities — used by builds and release flows.
1825
+ 'tar', 'zip', 'unzip', 'gzip', 'gunzip',
1826
+ // macOS convenience — clipboard + Finder/default-app open.
1827
+ 'pbcopy', 'pbpaste', 'open',
1828
+ // Package managers / ops CLIs — explicitly requested by the user.
1829
+ // brew installs are slow and destructive-by-default; kubectl/helm can
1830
+ // take prod-facing actions. Left allow-listed because the workflow
1831
+ // needs them; BLOCKED_PATTERNS still blocks `rm -rf` and similar, and
1832
+ // the user's permissionStore still gates per-primary approval.
1833
+ 'brew', 'kubectl', 'helm',
1834
+ // HTTP clients — needed by skills that hit REST APIs, webhooks,
1835
+ // status endpoints, etc. The web_fetch tool covers most content
1836
+ // retrieval but skills using -H custom headers, -X POST bodies,
1837
+ // or -d form data need the real curl/wget. Per-primary permission
1838
+ // gate still prompts before each unique invocation.
1839
+ 'curl', 'wget'
1840
+ ]);
1841
+ const BLOCKED_PATTERNS = [/rm\s+-rf/, /rmdir/, /format/, /mkfs/, /dd\s+if=/];
1842
+ /**
1843
+ * Map of well-known CLIs the agent might be asked to install but that
1844
+ * aren't on the allow-list themselves (terraform, kubectl when not yet
1845
+ * installed, ripgrep, jq, etc). When the agent calls `run_command` with
1846
+ * one of these, instead of dumping the entire 200-entry allow-list as
1847
+ * an error, we point it at the install path so the next tool call has
1848
+ * a chance of being correct: "you can install <name> via `brew install
1849
+ * <name>` then re-run the original command".
1850
+ */
1851
+ const INSTALL_HINTS = {
1852
+ terraform: 'brew install terraform',
1853
+ ripgrep: 'brew install ripgrep',
1854
+ fzf: 'brew install fzf',
1855
+ bat: 'brew install bat',
1856
+ eza: 'brew install eza',
1857
+ exa: 'brew install eza',
1858
+ fd: 'brew install fd',
1859
+ tldr: 'brew install tldr',
1860
+ httpie: 'brew install httpie',
1861
+ http: 'brew install httpie',
1862
+ ngrok: 'brew install ngrok',
1863
+ rclone: 'brew install rclone',
1864
+ ffmpeg: 'brew install ffmpeg',
1865
+ imagemagick: 'brew install imagemagick',
1866
+ yt: 'pipx install yt-dlp',
1867
+ 'yt-dlp': 'pipx install yt-dlp',
1868
+ vercel: 'npm install -g vercel',
1869
+ netlify: 'npm install -g netlify-cli',
1870
+ wrangler: 'npm install -g wrangler',
1871
+ pnpm: 'npm install -g pnpm',
1872
+ yarn: 'npm install -g yarn',
1873
+ bun: 'brew install oven-sh/bun/bun',
1874
+ deno: 'brew install deno',
1875
+ poetry: 'pipx install poetry',
1876
+ pipx: 'brew install pipx',
1877
+ uv: 'pipx install uv',
1878
+ rye: 'curl -sSf https://rye-up.com/get | bash',
1879
+ awscli: 'brew install awscli',
1880
+ aws: 'brew install awscli',
1881
+ azd: 'brew install azd',
1882
+ doctl: 'brew install doctl',
1883
+ flyctl: 'brew install flyctl',
1884
+ fly: 'brew install flyctl',
1885
+ helm: 'brew install helm',
1886
+ kubectx: 'brew install kubectx',
1887
+ k9s: 'brew install k9s',
1888
+ stern: 'brew install stern',
1889
+ argocd: 'brew install argocd'
1890
+ };
1891
+ /**
1892
+ * Map of well-known commands the agent might reach for that have a
1893
+ * dedicated tool elsewhere in the registry. When the model hits
1894
+ * `run_command` with one of these, point it at the proper tool
1895
+ * instead of the !-prefix escape hatch — the agent should use the
1896
+ * dedicated tool, not ask the user to type a shell command.
1897
+ */
1898
+ const DEDICATED_TOOL_HINTS = {
1899
+ rm: 'delete_file({ path: "<file>" })',
1900
+ unlink: 'delete_file({ path: "<file>" })'
1901
+ };
1902
+ function rejectionMessage(baseCmd) {
1903
+ // Keep the message short. A previous version dumped all ~80 entries of
1904
+ // ALLOWED_COMMANDS into the error string, which read as a wall-of-text
1905
+ // HTTP-style failure to small models — they'd interpret it as a fatal
1906
+ // "500" and bail out of the turn instead of recovering. Three lines is
1907
+ // enough: what failed, the install/dedicated-tool path (if known),
1908
+ // and the !-prefix escape hatch as a last resort. The model can ask
1909
+ // for the full allow-list if it needs one — it never does in practice.
1910
+ const lower = baseCmd.toLowerCase();
1911
+ // Shell interpreters are blocked by design. Steer the MODEL (not the user)
1912
+ // to call the binary directly — gemma-family models reach for
1913
+ // `bash -c "diff …"` and, on rejection, retry the same wrapper instead of
1914
+ // adapting. Give them the concrete shape + the dedicated-tool alternatives.
1915
+ if (['bash', 'sh', 'zsh', 'fish', 'dash', 'ksh', 'pwsh', 'powershell'].includes(lower)) {
1916
+ return `"${baseCmd}" (a shell interpreter) is blocked. Do NOT wrap commands in \`${baseCmd} -c "…"\` — call the program directly: put the binary in cmd and the rest in args (e.g. cmd="diff", args="-rq dirA dirB"). For pipes or globs, run the steps separately or use the dedicated tools (\`search_code\` for grep, \`list_files\` for find).`;
1917
+ }
1918
+ const dedicated = DEDICATED_TOOL_HINTS[lower];
1919
+ if (dedicated) {
1920
+ // Cleanup tasks need a dedicated tool, not a shell-escape ask. The
1921
+ // !-prefix hint here would push the user, not the agent, to act —
1922
+ // wrong direction for a tool the agent should call itself.
1923
+ return `"${baseCmd}" is not in the run_command allow-list. Use the dedicated tool: ${dedicated}.`;
1924
+ }
1925
+ const hint = INSTALL_HINTS[lower];
1926
+ const installLine = hint
1927
+ ? `Install it first: run_command("${hint}"), then retry.`
1928
+ : `If it ships via a package manager already on the allow-list (brew, npm, pip, cargo, gem, go), install it first.`;
1929
+ return `"${baseCmd}" is not in the run_command allow-list. ${installLine} Or tell the user to type \`!${baseCmd} <args>\` in the composer — the \`!\`-prefix runs directly in their shell and bypasses the gate.`;
1930
+ }
1931
+ /**
1932
+ * Shell-aware argv tokenizer. Replaces the naive `split(/\s+/)` that
1933
+ * previously destroyed quoted arguments — breaking
1934
+ * every osascript -e '...' invocation from the email-manager skill
1935
+ * because the single-quoted AppleScript body was split on its internal
1936
+ * spaces (error -2740: "A unknown token can't go here").
1937
+ *
1938
+ * Rules:
1939
+ * - Whitespace splits tokens unless inside a quote.
1940
+ * - Single quotes preserve EVERYTHING verbatim (no escapes) — this is
1941
+ * what AppleScript `-e '...'` relies on.
1942
+ * - Double quotes allow backslash escapes on `\"`, `\\`, `\$`, `` \` ``;
1943
+ * everything else is literal (matches POSIX sh semantics closely
1944
+ * enough for our purposes).
1945
+ * - Backslash outside quotes escapes the next character.
1946
+ * - Single and double quotes are STRIPPED from the output (they're
1947
+ * delimiters, not content).
1948
+ */
1949
+ function shellTokenize(input) {
1950
+ const out = [];
1951
+ let current = '';
1952
+ let inSingle = false;
1953
+ let inDouble = false;
1954
+ let hasToken = false;
1955
+ const push = () => {
1956
+ if (hasToken || current.length > 0)
1957
+ out.push(current);
1958
+ current = '';
1959
+ hasToken = false;
1960
+ };
1961
+ for (let i = 0; i < input.length; i++) {
1962
+ const ch = input[i];
1963
+ if (inSingle) {
1964
+ if (ch === '\'') {
1965
+ inSingle = false;
1966
+ hasToken = true;
1967
+ continue;
1968
+ }
1969
+ current += ch;
1970
+ continue;
1971
+ }
1972
+ if (inDouble) {
1973
+ if (ch === '"') {
1974
+ inDouble = false;
1975
+ hasToken = true;
1976
+ continue;
1977
+ }
1978
+ if (ch === '\\' && i + 1 < input.length && /["\\$`]/.test(input[i + 1])) {
1979
+ current += input[i + 1];
1980
+ i++;
1981
+ continue;
1982
+ }
1983
+ current += ch;
1984
+ continue;
1985
+ }
1986
+ if (ch === '\'') {
1987
+ inSingle = true;
1988
+ hasToken = true;
1989
+ continue;
1990
+ }
1991
+ if (ch === '"') {
1992
+ inDouble = true;
1993
+ hasToken = true;
1994
+ continue;
1995
+ }
1996
+ if (ch === '\\' && i + 1 < input.length) {
1997
+ current += input[i + 1];
1998
+ i++;
1999
+ hasToken = true;
2000
+ continue;
2001
+ }
2002
+ if (/\s/.test(ch)) {
2003
+ if (current.length > 0 || hasToken)
2004
+ push();
2005
+ continue;
2006
+ }
2007
+ current += ch;
2008
+ hasToken = true;
2009
+ }
2010
+ if (current.length > 0 || hasToken)
2011
+ push();
2012
+ return out;
2013
+ }
2014
+ const runCommandTool = {
2015
+ name: 'run_command',
2016
+ description: 'Run a shell command in the workspace and return the output. Allowed commands span common dev stacks: node/pnpm/npm/npx, python/pip/pytest, git, cargo, go, dotnet, mvn/gradle/java, ruby/bundle, php/composer, swift/xcodebuild, make/cmake, docker, package managers (brew, npm install -g, pip install, pipx, cargo install, gem install, go install), and read-only inspection tools (ls, cat, head, tail, grep, find, jq, yq). When the user asks you to install a CLI or package, run the install via the right package manager — the host\'s permission gate prompts the user before each invocation, so attempting an install is the correct behavior, not refusal. Only fall back to "ask the user to run it in their shell" when the command is genuinely outside the allow-list AND no package-manager equivalent exists. Call the binary directly via cmd/args (cmd="git", args="status") — NEVER wrap it in `bash -c` / `sh -c` / `zsh -c`; shell interpreters are blocked and the runner already spawns the program for you. For pipes or globs, use `search_code` (grep) or `list_files` (find), or run the steps separately.',
2017
+ parameters: [
2018
+ { name: 'cmd', description: 'The command to run (e.g. "npm", "tsc", "git")', required: true },
2019
+ { name: 'args', description: 'Space-separated arguments (e.g. "run build", "status", "--noEmit")' },
2020
+ { name: 'cwd', description: 'Working directory relative to workspace root (optional)' }
2021
+ ],
2022
+ async execute(params, ctx) {
2023
+ const rawCmd = params.cmd?.trim();
2024
+ if (!rawCmd)
2025
+ return { output: 'Error: cmd parameter is required', isError: true };
2026
+ // Some models squish the entire command line into `cmd` ("npx create
2027
+ // @angular/cli mqtt-app") instead of splitting it across `cmd` /
2028
+ // `args` per the schema. Normalize before the allow-list check —
2029
+ // otherwise the lookup is `ALLOWED_COMMANDS.has("npx create ...")`
2030
+ // which always misses, and the user sees the model loop on a
2031
+ // command they already approved ( model
2032
+ // approved ng / npx / npm three times in a row, every invocation
2033
+ // 500'd with "command not in the allowed list" because the entire
2034
+ // command line was being treated as a single executable name).
2035
+ let cmd = rawCmd;
2036
+ let argsString = params.args ?? '';
2037
+ const preparsedArgs = maybeParseJsonArrayArgs(argsString);
2038
+ const firstSpace = rawCmd.search(/\s/);
2039
+ if (firstSpace > 0 && !preparsedArgs) {
2040
+ cmd = rawCmd.slice(0, firstSpace);
2041
+ const inlineArgs = rawCmd.slice(firstSpace + 1).trim();
2042
+ argsString = argsString ? `${inlineArgs} ${argsString}` : inlineArgs;
2043
+ }
2044
+ const baseCmd = cmd.split('/').pop() ?? cmd;
2045
+ if (!ALLOWED_COMMANDS.has(baseCmd)) {
2046
+ return { output: rejectionMessage(baseCmd), isError: true };
2047
+ }
2048
+ const fullCommand = preparsedArgs
2049
+ ? `${cmd} ${preparsedArgs.join(' ')}`.trim()
2050
+ : `${cmd} ${argsString}`.trim();
2051
+ for (const blocked of BLOCKED_PATTERNS) {
2052
+ if (blocked.test(fullCommand)) {
2053
+ return { output: `Error: command contains a blocked pattern (${blocked.source})`, isError: true };
2054
+ }
2055
+ }
2056
+ const args = preparsedArgs ?? (argsString ? shellTokenize(argsString) : []);
2057
+ // un-escape `<` / `>` in `git commit` messages.
2058
+ // from a real Bandit commit: the
2059
+ // `Co-authored-by: Bandit <bandit@burtson.ai>` trailer was emitted
2060
+ // as `Co-authored-by: Bandit <bandit@burtson.ai>`. The
2061
+ // model JSON-escapes angle brackets defensively, but GitHub's
2062
+ // trailer parser needs literal `<...>` to resolve the email to
2063
+ // the bandit-stealth user record and render the avatar on the
2064
+ // commit. Scoped to `git commit` so legitimate `<` searches
2065
+ // in other commands (e.g. grep for that exact escape in source)
2066
+ // aren't touched. Applied to every arg since the message can be
2067
+ // in `-m <msg>` (two tokens) or `-m=<msg>` (one token).
2068
+ const isGitCommit = (cmd === 'git' || cmd.endsWith('/git')) && args[0] === 'commit';
2069
+ if (isGitCommit) {
2070
+ for (let i = 0; i < args.length; i++) {
2071
+ if (args[i].includes('\\u003c') || args[i].includes('\\u003e')) {
2072
+ args[i] = args[i].replace(/\\u003c/g, '<').replace(/\\u003e/g, '>');
2073
+ }
2074
+ }
2075
+ }
2076
+ const cwd = params.cwd
2077
+ ? (isAbsolutePath(params.cwd) ? params.cwd : `${ctx.workspaceRoot}/${params.cwd}`)
2078
+ : ctx.workspaceRoot;
2079
+ try {
2080
+ const { stdout, stderr, exitCode } = await ctx.runCommand(cmd, args, cwd);
2081
+ const combined = [
2082
+ stdout.trim() ? `stdout:\n${stdout.trim()}` : '',
2083
+ stderr.trim() ? `stderr:\n${stderr.trim()}` : '',
2084
+ `exit code: ${exitCode}`
2085
+ ].filter(Boolean).join('\n\n');
2086
+ const output = truncate(combined, MAX_COMMAND_CHARS, 'run_command');
2087
+ return { output, isError: exitCode !== 0 };
2088
+ }
2089
+ catch (err) {
2090
+ return { output: `Error running command "${cmd}": ${err instanceof Error ? err.message : String(err)}`, isError: true };
2091
+ }
2092
+ }
2093
+ };
2094
+ exports.runCommandTool = runCommandTool;
2095
+ // ── watch_command ──────────────────────────────────────────────────────────────
2096
+ //
2097
+ // Run a long-lived process for a bounded window and return what came out.
2098
+ // Useful for "start the dev server, watch for the error, decide what to fix"
2099
+ // flows that run_command can't model — run_command expects the process to
2100
+ // exit on its own and gives up at 30s. watch_command knows the process
2101
+ // might run forever, captures output for a bounded window, and SIGTERMs it
2102
+ // at the end so the agent can act on what came out.
2103
+ //
2104
+ // Same allow-list + blocked-pattern gating as run_command. Duration capped
2105
+ // at 60 seconds — anything longer is a sign the agent should refactor the
2106
+ // approach (e.g. write a separate test command that exits) rather than
2107
+ // hold the loop hostage. Output capped at MAX_COMMAND_CHARS.
2108
+ const WATCH_COMMAND_DEFAULT_SECONDS = 10;
2109
+ const WATCH_COMMAND_MAX_SECONDS = 60;
2110
+ const watchCommandTool = {
2111
+ name: 'watch_command',
2112
+ description: 'Run a long-lived shell command and capture its stdout/stderr for a bounded duration. Use this for processes that don\'t exit on their own — dev servers (`npm run dev`), --watch test runners, log tailers. The command is killed at the end of the window so the agent can react to what was emitted. For one-shot commands that exit on their own, prefer run_command. Allowed commands: same set as run_command.',
2113
+ parameters: [
2114
+ { name: 'cmd', description: 'The command to run (e.g. "npm", "node", "python")', required: true },
2115
+ { name: 'args', description: 'Space-separated arguments (e.g. "run dev", "test --watch")' },
2116
+ { name: 'cwd', description: 'Working directory relative to workspace root (optional)' },
2117
+ { name: 'duration_seconds', description: `How long to watch the process before killing it. Default ${WATCH_COMMAND_DEFAULT_SECONDS}s, max ${WATCH_COMMAND_MAX_SECONDS}s.` }
2118
+ ],
2119
+ async execute(params, ctx) {
2120
+ const rawCmd = params.cmd?.trim();
2121
+ if (!rawCmd)
2122
+ return { output: 'Error: cmd parameter is required', isError: true };
2123
+ // Mirror the run_command normalization — accept both
2124
+ // cmd="npm" args="run dev" AND cmd="npm run dev" args="" shapes.
2125
+ let cmd = rawCmd;
2126
+ let argsString = params.args ?? '';
2127
+ const preparsedArgs = maybeParseJsonArrayArgs(argsString);
2128
+ const firstSpace = rawCmd.search(/\s/);
2129
+ if (firstSpace > 0 && !preparsedArgs) {
2130
+ cmd = rawCmd.slice(0, firstSpace);
2131
+ const inlineArgs = rawCmd.slice(firstSpace + 1).trim();
2132
+ argsString = argsString ? `${inlineArgs} ${argsString}` : inlineArgs;
2133
+ }
2134
+ const baseCmd = cmd.split('/').pop() ?? cmd;
2135
+ if (!ALLOWED_COMMANDS.has(baseCmd)) {
2136
+ return { output: rejectionMessage(baseCmd), isError: true };
2137
+ }
2138
+ const fullCommand = preparsedArgs
2139
+ ? `${cmd} ${preparsedArgs.join(' ')}`.trim()
2140
+ : `${cmd} ${argsString}`.trim();
2141
+ for (const blocked of BLOCKED_PATTERNS) {
2142
+ if (blocked.test(fullCommand)) {
2143
+ return { output: `Error: command contains a blocked pattern (${blocked.source})`, isError: true };
2144
+ }
2145
+ }
2146
+ const args = preparsedArgs ?? (argsString ? shellTokenize(argsString) : []);
2147
+ const cwd = params.cwd
2148
+ ? (isAbsolutePath(params.cwd) ? params.cwd : `${ctx.workspaceRoot}/${params.cwd}`)
2149
+ : ctx.workspaceRoot;
2150
+ const requestedSeconds = parseInt(params.duration_seconds ?? '', 10);
2151
+ const durationSeconds = Number.isFinite(requestedSeconds) && requestedSeconds > 0
2152
+ ? Math.min(requestedSeconds, WATCH_COMMAND_MAX_SECONDS)
2153
+ : WATCH_COMMAND_DEFAULT_SECONDS;
2154
+ const durationMs = durationSeconds * 1000;
2155
+ try {
2156
+ // Hosts that don't implement watchCommand fall back to runCommand
2157
+ // with a note. runCommand has its own timeout (30s on the CLI),
2158
+ // so the agent still gets bounded-time output — just without the
2159
+ // "kill on schedule" semantics.
2160
+ if (!ctx.watchCommand) {
2161
+ const fallback = await ctx.runCommand(cmd, args, cwd);
2162
+ const combined = [
2163
+ fallback.stdout.trim() ? `stdout:\n${fallback.stdout.trim()}` : '',
2164
+ fallback.stderr.trim() ? `stderr:\n${fallback.stderr.trim()}` : '',
2165
+ `exit code: ${fallback.exitCode}`,
2166
+ `note: this host does not implement watch_command directly — fell back to run_command. Output reflects only what the process printed before it exited or the runCommand timeout fired.`
2167
+ ].filter(Boolean).join('\n\n');
2168
+ return { output: truncate(combined, MAX_COMMAND_CHARS, 'watch_command'), isError: fallback.exitCode !== 0 };
2169
+ }
2170
+ const result = await ctx.watchCommand(cmd, args, cwd, durationMs);
2171
+ const status = result.endedEarly
2172
+ ? `process exited on its own with code ${result.exitCode ?? 'unknown'} before the ${durationSeconds}s window`
2173
+ : `process was still running after ${durationSeconds}s — sent SIGTERM`;
2174
+ const combined = [
2175
+ `watched "${fullCommand}" for ${durationSeconds}s in ${cwd}`,
2176
+ result.stdout.trim() ? `stdout:\n${result.stdout.trim()}` : '',
2177
+ result.stderr.trim() ? `stderr:\n${result.stderr.trim()}` : '',
2178
+ status
2179
+ ].filter(Boolean).join('\n\n');
2180
+ const output = truncate(combined, MAX_COMMAND_CHARS, 'watch_command');
2181
+ // Only flag isError when the process exited early with a non-zero
2182
+ // code. Being killed by SIGTERM is the expected end state.
2183
+ const isError = result.endedEarly && typeof result.exitCode === 'number' && result.exitCode !== 0;
2184
+ return { output, isError };
2185
+ }
2186
+ catch (err) {
2187
+ return { output: `Error watching command "${cmd}": ${err instanceof Error ? err.message : String(err)}`, isError: true };
2188
+ }
2189
+ }
2190
+ };
2191
+ exports.watchCommandTool = watchCommandTool;
2192
+ /**
2193
+ * Returns a ToolRegistry pre-loaded with all core tools.
2194
+ * Pass the result to ToolUseLoop or use it standalone.
2195
+ * Git tools are registered separately via createGitToolRegistry() and
2196
+ * can be merged with registry.registerAll([...gitRegistry.getAll()]).
2197
+ */
2198
+ function createCoreToolRegistry() {
2199
+ return new tool_registry_1.ToolRegistry().registerAll([
2200
+ readFileTool,
2201
+ writeFileTool,
2202
+ deleteFileTool,
2203
+ applyEditTool,
2204
+ replaceRangeTool,
2205
+ applyPatchTool,
2206
+ listFilesTool,
2207
+ lsTool,
2208
+ findDirectoryTool,
2209
+ searchCodeTool,
2210
+ runCommandTool,
2211
+ watchCommandTool
2212
+ ]);
2213
+ }
2214
+ //# sourceMappingURL=core-tools.js.map