@semalt-ai/code 1.8.5 → 1.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. package/.claude/settings.local.json +7 -1
  2. package/.github/workflows/ci.yml +69 -0
  3. package/ARCHITECTURE.md +6 -95
  4. package/CLAUDE.md +196 -316
  5. package/README.md +148 -4
  6. package/docs/ARCHITECTURE.md +1321 -0
  7. package/docs/CONFIG.md +340 -0
  8. package/docs/HISTORY.md +245 -0
  9. package/examples/embed.js +74 -0
  10. package/index.js +251 -10
  11. package/lib/agent.js +856 -120
  12. package/lib/api.js +239 -50
  13. package/lib/args.js +74 -2
  14. package/lib/audit.js +23 -1
  15. package/lib/background.js +584 -0
  16. package/lib/checkpoints.js +757 -0
  17. package/lib/commands/auth.js +94 -0
  18. package/lib/commands/chat-session.js +489 -0
  19. package/lib/commands/chat-slash.js +415 -0
  20. package/lib/commands/chat-turn.js +669 -0
  21. package/lib/commands/chat.js +407 -0
  22. package/lib/commands/custom.js +157 -0
  23. package/lib/commands/history-utils.js +66 -0
  24. package/lib/commands/index.js +268 -0
  25. package/lib/commands/mcp.js +113 -0
  26. package/lib/commands/oneshot.js +193 -0
  27. package/lib/commands/registry.js +269 -0
  28. package/lib/commands/tasks.js +89 -0
  29. package/lib/compact.js +87 -0
  30. package/lib/config.js +360 -11
  31. package/lib/constants.js +401 -3
  32. package/lib/deny.js +199 -0
  33. package/lib/doctor.js +160 -0
  34. package/lib/headless.js +202 -0
  35. package/lib/hooks.js +286 -0
  36. package/lib/images.js +270 -0
  37. package/lib/internals.js +49 -0
  38. package/lib/mcp/boundary.js +131 -0
  39. package/lib/mcp/client.js +270 -0
  40. package/lib/mcp/oauth.js +134 -0
  41. package/lib/memory.js +209 -0
  42. package/lib/metrics.js +37 -2
  43. package/lib/payload.js +54 -0
  44. package/lib/permission-rules.js +401 -0
  45. package/lib/permissions.js +123 -26
  46. package/lib/pricing.js +67 -0
  47. package/lib/proc.js +62 -0
  48. package/lib/prompts.js +99 -8
  49. package/lib/sandbox.js +568 -0
  50. package/lib/sdk.js +328 -0
  51. package/lib/secrets.js +211 -0
  52. package/lib/skills.js +223 -0
  53. package/lib/subagents.js +516 -0
  54. package/lib/tool_registry.js +2862 -0
  55. package/lib/tool_specs.js +263 -9
  56. package/lib/tools.js +352 -1039
  57. package/lib/ui/anim.js +86 -0
  58. package/lib/ui/ansi.js +17 -27
  59. package/lib/ui/chat-history.js +253 -71
  60. package/lib/ui/create-ui.js +67 -24
  61. package/lib/ui/diff.js +90 -25
  62. package/lib/ui/file-activity.js +236 -0
  63. package/lib/ui/format.js +195 -29
  64. package/lib/ui/input-field.js +21 -11
  65. package/lib/ui/md-stream.js +234 -0
  66. package/lib/ui/render-operation.js +113 -0
  67. package/lib/ui/select.js +1 -4
  68. package/lib/ui/status-bar.js +146 -36
  69. package/lib/ui/stream.js +20 -13
  70. package/lib/ui/theme.js +190 -44
  71. package/lib/ui/tool-operation.js +190 -0
  72. package/lib/ui/utils.js +9 -5
  73. package/lib/ui/web-activity.js +270 -0
  74. package/lib/ui/writer.js +159 -45
  75. package/lib/ui.js +1 -1
  76. package/lib/verify.js +229 -0
  77. package/lib/web-extract.js +213 -0
  78. package/lib/web-summarize.js +68 -0
  79. package/package.json +19 -4
  80. package/scripts/lint.js +57 -0
  81. package/test/agent-loop.test.js +389 -0
  82. package/test/anim-driver.test.js +153 -0
  83. package/test/ask-user-display.test.js +226 -0
  84. package/test/ask-user-gate.test.js +231 -0
  85. package/test/background.test.js +414 -0
  86. package/test/chat-history-nocolor.test.js +155 -0
  87. package/test/chat-relogin.test.js +207 -0
  88. package/test/chat.test.js +114 -0
  89. package/test/checkpoints-agent.test.js +181 -0
  90. package/test/checkpoints.test.js +650 -0
  91. package/test/command-registry.test.js +160 -0
  92. package/test/compact.test.js +116 -0
  93. package/test/completion-lazy.test.js +52 -0
  94. package/test/config-merge.test.js +324 -0
  95. package/test/config-quarantine.test.js +128 -0
  96. package/test/config-write-guard-allow-anywhere.test.js +56 -0
  97. package/test/config-write-guard-skip.test.js +46 -0
  98. package/test/config-write-guard.test.js +153 -0
  99. package/test/context-split.test.js +215 -0
  100. package/test/cost-doctor.test.js +142 -0
  101. package/test/custom-commands-chat.test.js +106 -0
  102. package/test/custom-commands.test.js +230 -0
  103. package/test/defer-detail-band.test.js +403 -0
  104. package/test/deny-windows.test.js +120 -0
  105. package/test/deny.test.js +83 -0
  106. package/test/detail-band-tab-flatten.test.js +242 -0
  107. package/test/download-allow-anywhere.test.js +66 -0
  108. package/test/download-confine.test.js +153 -0
  109. package/test/exec-diff.test.js +268 -0
  110. package/test/executors.test.js +599 -0
  111. package/test/extract-tool-calls.test.js +349 -0
  112. package/test/fetch-url-validation.test.js +219 -0
  113. package/test/file-activity.test.js +522 -0
  114. package/test/fixtures/tool-calls.js +57 -0
  115. package/test/fixtures/web-page.js +91 -0
  116. package/test/git-tools.test.js +384 -0
  117. package/test/grep-glob-serialize.test.js +242 -0
  118. package/test/grep-glob.test.js +268 -0
  119. package/test/grep-path-target.test.js +227 -0
  120. package/test/harness/README.md +57 -0
  121. package/test/harness/chat-harness.js +143 -0
  122. package/test/harness/memwarn-headless-child.js +65 -0
  123. package/test/harness/mock-llm.js +120 -0
  124. package/test/harness/mock-mcp-server.js +142 -0
  125. package/test/harness/sse-server.js +69 -0
  126. package/test/headless.test.js +348 -0
  127. package/test/history-utils.test.js +88 -0
  128. package/test/hooks-agent.test.js +238 -0
  129. package/test/hooks-verify-sandbox.test.js +232 -0
  130. package/test/hooks.test.js +216 -0
  131. package/test/http-get-user-agent.test.js +142 -0
  132. package/test/images-api.test.js +208 -0
  133. package/test/images.test.js +238 -0
  134. package/test/input-field-ctrl-o.test.js +37 -0
  135. package/test/live-height-physical.test.js +281 -0
  136. package/test/max-iterations.test.js +218 -0
  137. package/test/mcp-boundary.test.js +57 -0
  138. package/test/mcp-client.test.js +267 -0
  139. package/test/mcp-oauth.test.js +86 -0
  140. package/test/md-stream.test.js +183 -0
  141. package/test/memory-truncation-warning.test.js +222 -0
  142. package/test/memory.test.js +198 -0
  143. package/test/native-dispatch.test.js +409 -0
  144. package/test/native-live-narration.test.js +254 -0
  145. package/test/output-chokepoint.test.js +188 -0
  146. package/test/output-heredoc-leak.test.js +195 -0
  147. package/test/output-preview.test.js +245 -0
  148. package/test/path-guards.test.js +134 -0
  149. package/test/payload.test.js +99 -0
  150. package/test/permission-rules-agent.test.js +210 -0
  151. package/test/permission-rules.test.js +297 -0
  152. package/test/permissions.test.js +362 -0
  153. package/test/plan-mode.test.js +167 -0
  154. package/test/read-paginate.test.js +275 -0
  155. package/test/readonly-tools.test.js +177 -0
  156. package/test/render-operation.test.js +317 -0
  157. package/test/replay-descriptor-xml.test.js +216 -0
  158. package/test/replay-descriptor.test.js +189 -0
  159. package/test/replay-web-aggregate.test.js +291 -0
  160. package/test/replay-web-persist.test.js +241 -0
  161. package/test/result-cap.test.js +233 -0
  162. package/test/running-glyph-anim.test.js +111 -0
  163. package/test/sandbox-agent.test.js +147 -0
  164. package/test/sandbox-integration.test.js +216 -0
  165. package/test/sandbox.test.js +408 -0
  166. package/test/sdk.test.js +234 -0
  167. package/test/shell-output-cap.test.js +181 -0
  168. package/test/skills-chat.test.js +110 -0
  169. package/test/skills.test.js +295 -0
  170. package/test/smoke.test.js +68 -0
  171. package/test/status-bar-driver.test.js +93 -0
  172. package/test/status-bar-pause.test.js +164 -0
  173. package/test/status-bar-resync.test.js +188 -0
  174. package/test/stream-parser.test.js +171 -0
  175. package/test/subagents-agent.test.js +178 -0
  176. package/test/subagents.test.js +222 -0
  177. package/test/theme-palette.test.js +166 -0
  178. package/test/tool-registry.test.js +85 -0
  179. package/test/trim-budget.test.js +101 -0
  180. package/test/truncate-visible.test.js +78 -0
  181. package/test/verify-agent.test.js +317 -0
  182. package/test/verify.test.js +141 -0
  183. package/test/view-image.test.js +199 -0
  184. package/test/web-activity-ordering.test.js +203 -0
  185. package/test/web-activity.test.js +207 -0
  186. package/test/web-data-extraction-guidance.test.js +71 -0
  187. package/test/web-extract.test.js +185 -0
  188. package/test/web-fetch-agent.test.js +291 -0
  189. package/test/web-fetch-mode.test.js +193 -0
  190. package/test/web-search.test.js +380 -0
  191. package/lib/commands.js +0 -1438
  192. package/path +0 -1
package/lib/tools.js CHANGED
@@ -1,21 +1,34 @@
1
1
  'use strict';
2
2
 
3
- const fs = require('fs');
4
- const fsp = require('fs/promises');
5
- const http = require('http');
6
- const https = require('https');
7
3
  const os = require('os');
8
4
  const path = require('path');
5
+ // Note: fs / fs/promises / http / https are no longer required here — the file
6
+ // and network executors moved to lib/tool_registry.js in Task 1.4b. tools.js now
7
+ // keeps only the path/sandbox guards, the shell executor, and the parse glue.
9
8
  const { spawn } = require('child_process');
10
9
 
11
10
  const { logToolCall } = require('./audit');
12
11
  const { spawnWithGroup, killTreeEscalating } = require('./proc');
12
+ const { classifyShellCommand } = require('./deny');
13
+ const { resolveSandboxedSpawn } = require('./sandbox');
14
+ const { TOOL_REGISTRY, fromInvoke, entryForAction, dynamicToolEntries } = require('./tool_registry');
13
15
  const writer = require('./ui/writer');
16
+ const { CONFIG_PATH, protectedConfigDirs } = require('./constants');
14
17
 
15
18
  const MEMORY_PATH = path.join(os.homedir(), '.semalt-ai', 'memory.json');
16
19
 
17
20
  const _dryRun = process.argv.includes('--dry-run');
18
21
  const _allowAnywhere = process.argv.includes('--allow-anywhere');
22
+ // Binary network isolation (Task 4.4b): the human-typed --no-network flag forces
23
+ // kernel-level no-network for sandboxed commands. Read once at module load — a
24
+ // human-only signal the model can never reach (the model controls only the
25
+ // command string). sandbox.network in config does the same via decideSandbox.
26
+ const _noNetwork = process.argv.includes('--no-network');
27
+ // The single, explicit opt-out of ALL safety. When set, the destructive-command
28
+ // deny-list and the config-file read guard are bypassed. This is the only flag
29
+ // that does so — --allow-* tier flags grant approval but never disable the
30
+ // deny-list. See lib/deny.js and Task 0.1.
31
+ const _skipPermissions = process.argv.includes('--dangerously-skip-permissions');
19
32
  const _skippedOps = [];
20
33
  function getSkippedOps() { return _skippedOps.slice(); }
21
34
 
@@ -56,38 +69,166 @@ function _sandboxError(filePath) {
56
69
  return { error: `Path outside allowed area: ${filePath}. Use --allow-anywhere to override.` };
57
70
  }
58
71
 
59
- // Cheap ReDoS guard. Rejects pathologically long patterns, common
60
- // catastrophic-backtracking anti-patterns, and pattern×data sizes large
61
- // enough to hang the regex engine.
62
- function _checkRegexSafety(pattern, data) {
72
+ // Files that hold secrets or sensitive history and must NEVER be read back into
73
+ // the model context the API key / auth token live in config.json, the memory
74
+ // store may contain secrets, and the audit log records past tool inputs. This
75
+ // guard is intentionally NOT gated on --allow-anywhere: --allow-anywhere widens
76
+ // where the agent may write, it does not unlock secret exfiltration. The only
77
+ // override is --dangerously-skip-permissions (opting out of all safety).
78
+ const _AUDIT_LOG_PATH = path.join(os.homedir(), '.semalt-ai', 'audit.log');
79
+ const PROTECTED_READ_PATHS = new Set([
80
+ path.resolve(CONFIG_PATH),
81
+ path.resolve(MEMORY_PATH),
82
+ path.resolve(_AUDIT_LOG_PATH),
83
+ ]);
84
+
85
+ function isProtectedSecretPath(filePath) {
86
+ if (_skipPermissions) return false;
87
+ if (typeof filePath !== 'string' || !filePath) return false;
88
+ return PROTECTED_READ_PATHS.has(path.resolve(filePath));
89
+ }
90
+
91
+ function _secretReadError(filePath) {
92
+ return { error: `Refused: ${filePath} holds secrets/credentials and cannot be read by the agent. (This guard is not overridable with --allow-anywhere.)` };
93
+ }
94
+
95
+ // Config/execution-driving paths that the AGENT must never WRITE — the write-side
96
+ // companion to the read guard above (Pre-Task 5.0b). Covers the protected-config
97
+ // set (lib/constants.js protectedConfigDirs): the whole ~/.semalt-ai dir AND
98
+ // every project .semalt dir from the CWD up to the repo root, INCLUDING files
99
+ // that do not yet exist (a missing config.json / agents/*.md / hook is inside a
100
+ // protected DIR, so it is refused regardless of existence — the CVE-2026-25725
101
+ // lesson). Directory-prefix matched on the resolved path, mirroring isPathSafe.
102
+ //
103
+ // Policy parity with isProtectedSecretPath: NOT gated on --allow-anywhere
104
+ // (--allow-anywhere widens WHERE the agent may write, it does not unlock writing
105
+ // the config surfaces that drive host-privileged execution). The only override is
106
+ // --dangerously-skip-permissions (opting out of all safety). A human editing
107
+ // their own config in an editor is unaffected — this guards the agent's tools and
108
+ // the sandboxed shell, not the human.
109
+ function isProtectedConfigPath(filePath) {
110
+ if (_skipPermissions) return false;
111
+ if (typeof filePath !== 'string' || !filePath) return false;
112
+ const resolved = path.resolve(filePath);
113
+ for (const dir of protectedConfigDirs()) {
114
+ const base = path.resolve(dir);
115
+ const prefix = base.endsWith(path.sep) ? base : base + path.sep;
116
+ if (resolved === base || resolved.startsWith(prefix)) return true;
117
+ }
118
+ return false;
119
+ }
120
+
121
+ function _protectedConfigWriteError(filePath) {
122
+ return { error: `Refused: ${filePath} is a protected config path (under ~/.semalt-ai or a project .semalt dir) that drives execution and cannot be written by the agent. (This guard is not overridable with --allow-anywhere.)` };
123
+ }
124
+
125
+ // Active (unescaped) regex metacharacters. A search pattern that contains NONE
126
+ // of these — or one the caller explicitly marks `literal` — is a plain literal:
127
+ // matching it (via split/join or indexOf) is O(dataLen) and CANNOT backtrack, so
128
+ // the regex-ReDoS bounds below DO NOT apply. This is what makes the intended
129
+ // copy-a-block-then-replace workflow work at any length (read_file defaults line
130
+ // numbers OFF specifically to keep snippets copyable, lib/agent.js): a long
131
+ // literal block is never rejected for its length.
132
+ const _REGEX_META = new Set(['.', '*', '+', '?', '^', '$', '{', '}', '(', ')', '|', '[', ']']);
133
+
134
+ function _hasActiveRegexMeta(pattern) {
135
+ if (typeof pattern !== 'string') return false;
136
+ for (let i = 0; i < pattern.length; i++) {
137
+ const ch = pattern[i];
138
+ if (ch === '\\') { i++; continue; } // the escaped next char is inert, skip it
139
+ if (_REGEX_META.has(ch)) return true;
140
+ }
141
+ return false;
142
+ }
143
+
144
+ // Decide literal vs regex. A pattern is matched literally when the caller forces
145
+ // it (`literal: true` — for copied code blocks that legitimately contain
146
+ // regex-special chars like `(` or `[`), or when auto-detection finds no active
147
+ // regex metacharacter at all (the pasted plain-text-block case).
148
+ function _isLiteralPattern(pattern, literal) {
149
+ if (literal === true) return true;
150
+ return !_hasActiveRegexMeta(pattern);
151
+ }
152
+
153
+ // ReDoS guard for the REGEX path only. Literals bypass it entirely — they cannot
154
+ // backtrack, so their length is irrelevant. Catastrophic backtracking comes from
155
+ // nested quantifiers (Check B below), NOT from pattern length: the old
156
+ // `dataLen * pattern.length` proxy (Check C) is gone because it penalized exactly
157
+ // the safe dimension — it rejected long *literals* (e.g. any block over ~250
158
+ // chars on a 40 KB file) while a short bomb like `(a+)+$` (length 6) sailed
159
+ // straight past it. For a genuine regex we keep two real protections:
160
+ // • a sanity length cap — a multi-thousand-char metacharacter-heavy pattern is
161
+ // suspicious and serves no legitimate purpose (literals use `literal:true`);
162
+ // • the nested-quantifier detector, which is the actual backtracking guard.
163
+ function _checkRegexSafety(pattern, data, literal) {
63
164
  if (typeof pattern !== 'string') return null;
165
+ if (_isLiteralPattern(pattern, literal)) return null; // literal: O(dataLen), unbounded by length
64
166
  if (pattern.length > 1000) {
65
- return { error: 'Pattern rejected: length exceeds 1000 chars' };
167
+ return { error: 'Regex rejected: length exceeds 1000 chars (use literal:true to match a long block verbatim)' };
66
168
  }
67
169
  if (/(\(.*[+*].*\).*[+*])|(\[.*\].*[+*].*[+*])/.test(pattern)) {
68
170
  return { error: 'Pattern rejected: potentially catastrophic backtracking' };
69
171
  }
70
- const dataLen = typeof data === 'string' ? data.length : 0;
71
- if (dataLen * pattern.length > 10_000_000) {
72
- return { error: 'Pattern too complex for input size' };
73
- }
74
172
  return null;
75
173
  }
76
174
 
77
- function createToolExecutor(permissionManager, ui, getConfig) {
175
+ // The single authority for splitting an ask_user question into its menu. A line
176
+ // matching `^\s*\d+[.)]\s+(.+)$` is a numbered OPTION; every other line is
177
+ // PROMPT prose. Returns { prompt, options } where `prompt` is the non-numbered
178
+ // lines joined (trimmed) and `options` is the option labels — but ONLY when
179
+ // there are ≥2 of them (a lone "1." is prose, not a menu), matching the prior
180
+ // _parseNumberedOptions contract. Display-only: the caller still hands the FULL
181
+ // original question to the model. Pure; safe on null/non-string (auto-answer
182
+ // paths pass arbitrary text).
183
+ function parseAskMenu(text) {
184
+ const options = [];
185
+ const promptLines = [];
186
+ for (const line of String(text == null ? '' : text).split('\n')) {
187
+ const m = line.match(/^\s*\d+[.)]\s+(.+)$/);
188
+ if (m) options.push(m[1].trim());
189
+ else promptLines.push(line);
190
+ }
191
+ return {
192
+ prompt: promptLines.join('\n').trim(),
193
+ options: options.length >= 2 ? options : [],
194
+ };
195
+ }
196
+
197
+ function createToolExecutor(permissionManager, ui, getConfig, options = {}) {
78
198
  const { BOLD, DIM, FG_DARK, FG_GRAY, FG_GREEN, FG_RED, FG_YELLOW, RST, renderDiff } = ui;
199
+ // Checkpoints & rewind (Task 4.3). When a store is wired, the prior state of a
200
+ // file is snapshotted in agentExecFile AFTER the permission gate approves and
201
+ // BEFORE the executor mutates. Optional — undefined in tests/headless paths
202
+ // that don't opt in. Subagents reuse this same agentExecFile, so a child's
203
+ // mutations are checkpointed into the parent session for free.
204
+ const checkpointStore = options.checkpointStore || null;
205
+ // OS sandbox fallback approver (Task 4.4). When the sandbox is unavailable in
206
+ // `auto` mode, agentExecShell asks a HUMAN before running a command
207
+ // unsandboxed via this callback (it returns true to allow, false to refuse).
208
+ // Undefined in non-TTY/headless/test paths → the command is REFUSED (never a
209
+ // silent unsandboxed run). This is injected by the executor owner (index.js),
210
+ // never reachable by the model, so the agent can't approve its own escape.
211
+ const onUnsandboxed = typeof options.onUnsandboxed === 'function' ? options.onUnsandboxed : null;
212
+ // Web-fetch secondary summarizer (Task W.1). An injected async LLM call
213
+ // `(messages, { model, signal }) => Promise<string>` (the api client's
214
+ // chatComplete) used by http_get to summarize extracted page content in a
215
+ // separate call — only the summary enters the main context. Optional: when
216
+ // absent (headless/oneshot paths without an api client), http_get returns the
217
+ // extracted Markdown instead of summarizing, never the raw page.
218
+ const webChat = typeof options.webChat === 'function' ? options.webChat : null;
219
+ // Web search backend (Task W.2b). An injected async call
220
+ // `(query, { count }) => Promise<{ results: [{title,url,snippet}], … }>` (the
221
+ // api client's dashboardSearch, which hits the backend POST /api/search).
222
+ // Optional: when absent (headless/oneshot paths without an api client) the
223
+ // web_search tool degrades to a clean tool error, never a crash.
224
+ const webSearch = typeof options.webSearch === 'function' ? options.webSearch : null;
79
225
  // Continuation lines in a system-message bubble (chat-history.js else branch)
80
226
  // are indented by 5 spaces. Let the diff renderer reserve those columns so
81
227
  // its lines don't auto-wrap inside the bubble.
82
228
  const DIFF_BUBBLE_INSET = 5;
83
229
 
84
230
  function _parseNumberedOptions(text) {
85
- const options = [];
86
- for (const line of text.split('\n')) {
87
- const m = line.match(/^\s*\d+[.)]\s+(.+)$/);
88
- if (m) options.push(m[1].trim());
89
- }
90
- return options.length >= 2 ? options : [];
231
+ return parseAskMenu(text).options;
91
232
  }
92
233
 
93
234
  // Build the permission descriptor for a [action, ...args] call tuple.
@@ -125,95 +266,60 @@ function createToolExecutor(permissionManager, ui, getConfig) {
125
266
  delete_file: 'delete_file',
126
267
  move_file: 'move_file',
127
268
  copy_file: 'copy_file',
269
+ download: 'download',
270
+ edit_file: 'edit_file',
271
+ replace_in_file: 'replace_in_file',
272
+ make_dir: 'make_dir',
273
+ remove_dir: 'remove_dir',
274
+ upload: 'upload',
128
275
  };
129
276
  const roTag = READONLY_TAG[action];
130
277
  if (roTag && permissionManager.readonlyBlock(roTag)) return null;
131
278
 
132
- switch (action) {
133
- case 'shell':
134
- case 'exec':
135
- return { actionType: 'shell', description: args[0] || '', tag: 'exec' };
136
-
137
- case 'write':
138
- case 'append': {
139
- const filePath = args[0];
140
- const content = args[1];
141
- const tag = action === 'write' ? 'write_file' : 'append_file';
142
-
143
- let existing = '';
144
- try { existing = await fsp.readFile(filePath, 'utf8'); } catch {}
145
- const finalContent = action === 'write' ? (content || '') : (existing + (content || ''));
146
- const diffOutput = _uiActive
147
- ? renderDiff(existing, finalContent, filePath, { inset: DIFF_BUBBLE_INSET })
148
- : renderDiff(existing, finalContent, filePath);
149
- if (!_uiActive) writer.scrollback(diffOutput);
150
-
151
- // Dry-run renders the diff (above) but skips the picker — the
152
- // executor's dry-run early return reports the skip.
153
- if (_dryRun) return null;
154
-
155
- let desc = `${action === 'write' ? 'Write' : 'Append to'} ${filePath}`;
156
- if (content) desc += ` (${content.length} chars)`;
157
- if (_uiActive) desc = `${desc}\n${diffOutput}`;
158
- return { actionType: 'file', description: desc, tag };
159
- }
160
-
161
- case 'delete_file': {
162
- const filePath = args[0];
163
- _log(` ${FG_YELLOW}${BOLD}⚠ Deleting: ${filePath}${RST}`);
164
- return { actionType: 'file', description: `Delete ${filePath}`, tag: 'delete_file' };
165
- }
166
-
167
- case 'make_dir':
168
- return { actionType: 'file', description: `Create directory ${args[0]}`, tag: 'make_dir' };
169
-
170
- case 'remove_dir':
171
- return { actionType: 'file', description: `Remove directory ${args[0]}`, tag: 'remove_dir' };
279
+ // Per-tool descriptor now lives on the registry entry (Task 1.4b). Read-only
280
+ // ops resolve to a permission() that returns null (no gate). The side effects
281
+ // that used to live in the switch cases (write/append diff render,
282
+ // delete/move warning lines) moved into those permission() bodies unchanged.
283
+ const entry = entryForAction(action);
284
+ if (entry && typeof entry.permission === 'function') return entry.permission(ctx, args);
285
+ return null;
286
+ }
172
287
 
173
- case 'move_file': {
174
- const src = args[0];
175
- const dst = args[1];
176
- _log(` ${FG_YELLOW}${BOLD}⚠ Moving: ${src} ${dst}${RST}`);
177
- return { actionType: 'file', description: `Move ${src} to ${dst}`, tag: 'move_file' };
288
+ async function agentExecShell(command, options = {}) {
289
+ // Destructive-command deny-list. Enforced for EVERY shell call regardless
290
+ // of approval mode (interactive, non-TTY, or any --allow-* flag). This is
291
+ // the unbypassable chokepoint: all shell execution funnels through here.
292
+ // The only escape hatch is --dangerously-skip-permissions.
293
+ //
294
+ // The `initiator` distinguishes agent-initiated calls (the model asked) from
295
+ // user-initiated ones (a human typed `!cmd` / `semalt-code shell`). Agent
296
+ // calls keep the hard block. User calls are exempt from the block, except for
297
+ // the catastrophic subset (disk wipe / fork bomb), which gets a one-time y/N
298
+ // confirmation via options.confirm as a typo guard. See lib/deny.js.
299
+ const initiator = options.initiator === 'user' ? 'user' : 'agent';
300
+ if (!_skipPermissions) {
301
+ const verdict = classifyShellCommand(command, initiator);
302
+ if (verdict.action === 'block') {
303
+ const msg = `Blocked by safety deny-list: ${verdict.label}. Refuse to run: ${command}. To override, restart with --dangerously-skip-permissions.`;
304
+ _log(` ${FG_RED}✗ ${msg}${RST}`);
305
+ logToolCall('exec', { command }, false, 'denied');
306
+ return { exit_code: -1, stdout: '', stderr: msg, blocked: true };
307
+ }
308
+ if (verdict.action === 'confirm') {
309
+ let approved = false;
310
+ if (typeof options.confirm === 'function') {
311
+ try { approved = await options.confirm(verdict.label, command); }
312
+ catch { approved = false; }
313
+ }
314
+ if (!approved) {
315
+ const msg = `Cancelled (${verdict.label}): ${command}`;
316
+ _log(` ${FG_RED}✗ ${msg}${RST}`);
317
+ logToolCall('exec', { command }, false, 'cancelled');
318
+ return { exit_code: -1, stdout: '', stderr: msg, blocked: true };
319
+ }
178
320
  }
179
-
180
- case 'copy_file':
181
- return { actionType: 'file', description: `Copy ${args[0]} to ${args[1]}`, tag: 'copy_file' };
182
-
183
- case 'edit_file':
184
- return { actionType: 'file', description: `Edit line ${args[1]} in ${args[0]}`, tag: 'edit_file' };
185
-
186
- case 'replace_in_file':
187
- return { actionType: 'file', description: `Replace in ${args[0]}`, tag: 'replace_in_file' };
188
-
189
- case 'set_env':
190
- return { actionType: 'env', description: `Set env ${args[0]}=${args[1] || ''}`, tag: 'set_env' };
191
-
192
- case 'download':
193
- return { actionType: 'net', description: `Download ${args[0]}`, tag: 'download' };
194
-
195
- case 'upload':
196
- return { actionType: 'file', description: `Upload to ${args[0]}`, tag: 'upload' };
197
-
198
- case 'http_get':
199
- return { actionType: 'net', description: `HTTP GET ${args[0]}`, tag: 'http_get' };
200
-
201
- // ask_user is a real gate — "do you want me to ask the user this
202
- // question?" — separate from the question prompt itself (which is
203
- // captureSelect or stdin further down in the executor). Lifted here
204
- // so the activity bubble doesn't pre-date grant.
205
- case 'ask_user':
206
- return { actionType: 'user', description: `Ask user: ${args[0]}`, tag: 'ask_user' };
207
-
208
- case 'store_memory':
209
- return { actionType: 'memory', description: `Store memory: ${args[0]}`, tag: 'store_memory' };
210
-
211
- default:
212
- return null;
213
321
  }
214
- }
215
322
 
216
- async function agentExecShell(command, options = {}) {
217
323
  if (_dryRun) {
218
324
  _log(` ${FG_DARK}[dry-run] $ ${command}${RST}`);
219
325
  _skippedOps.push({ category: 'cmd', symbol: '▶', desc: command });
@@ -225,18 +331,55 @@ function createToolExecutor(permissionManager, ui, getConfig) {
225
331
  const timeout = cfg.command_timeout_ms || 30000;
226
332
  const { signal } = options;
227
333
 
334
+ // ---------------------------------------------------------------------
335
+ // OS sandbox (Task 4.4; unified chokepoint Pre-Task 5.0a). EVERY shell call
336
+ // — here, self-verification, and command-type hooks — funnels through the
337
+ // SHARED resolveSandboxedSpawn shim, so the model has no path that runs a
338
+ // command outside this decision. --dangerously-skip-permissions (a
339
+ // human-only flag) opts out of all safety, sandbox included.
340
+ //
341
+ // run:true → spawn the resolved file/args (jailed when sandbox 'on';
342
+ // plain { shell:true } when 'off'/human-approved 'unavailable').
343
+ // run:false → fail-safe refusal: failIfUnavailable hard error (hard:true)
344
+ // or no/declined human approval — NEVER a silent unsandboxed run.
345
+ // ---------------------------------------------------------------------
346
+ const resolution = await resolveSandboxedSpawn({
347
+ command,
348
+ getConfig,
349
+ onUnsandboxed,
350
+ cwd: process.cwd(),
351
+ allowAnywhere: _allowAnywhere,
352
+ skipPermissions: _skipPermissions,
353
+ noNetwork: _noNetwork,
354
+ });
355
+ if (!resolution.run) {
356
+ _log(` ${FG_RED}✗ ${resolution.message}${RST}`);
357
+ logToolCall('exec', { command, sandbox: 'unavailable' }, false, resolution.hard ? 'sandbox-blocked' : 'sandbox-refused');
358
+ return { exit_code: -1, stdout: '', stderr: resolution.message, blocked: true, sandbox: 'unavailable' };
359
+ }
360
+ const spawnFile = resolution.file;
361
+ const spawnArgs = resolution.useShell ? [] : resolution.args;
362
+ const spawnOpts = resolution.useShell ? { shell: true } : {};
363
+ const sandboxStatus = resolution.sandbox;
364
+ // Binary network mode for this run (Task 4.4b): 'on' (host network) | 'off'
365
+ // (kernel-level no-network). Surfaced in the result + audit (net:on|off).
366
+ const networkStatus = resolution.network || 'on';
367
+
228
368
  return new Promise((resolve) => {
229
369
  let child;
230
370
  try {
231
371
  // spawnWithGroup gives us a process-group leader on POSIX so
232
372
  // killTreeEscalating can reach descendants via -pid. With shell:true
233
373
  // a plain child.kill targets only the sh wrapper, leaving the real
234
- // workload (find /, pipelines, etc.) running as orphans.
235
- child = spawnWithGroup(spawn, command, [], { shell: true });
374
+ // workload (find /, pipelines, etc.) running as orphans. When sandboxed,
375
+ // the group leader is the bwrap/sandbox-exec process — killing the group
376
+ // tears down the whole jailed subtree, so child-process confinement
377
+ // composes with the existing tree-kill plumbing.
378
+ child = spawnWithGroup(spawn, spawnFile, spawnArgs, spawnOpts);
236
379
  } catch (error) {
237
380
  _log(` ${FG_RED}✗ ${error.message}${RST}`);
238
- logToolCall('exec', { command }, true, 'error');
239
- return resolve({ exit_code: -1, stdout: '', stderr: error.message });
381
+ logToolCall('exec', { command, sandbox: sandboxStatus, network: networkStatus }, true, 'error');
382
+ return resolve({ exit_code: -1, stdout: '', stderr: error.message, sandbox: sandboxStatus, network: networkStatus });
240
383
  }
241
384
  const startedAt = Date.now();
242
385
  let stdout = '';
@@ -277,8 +420,8 @@ function createToolExecutor(permissionManager, ui, getConfig) {
277
420
  clearTimeout(timer);
278
421
  detachAbort();
279
422
  _log(` ${FG_RED}✗ ${error.message}${RST}`);
280
- logToolCall('exec', { command }, true, 'error');
281
- resolve({ exit_code: -1, stdout, stderr: stderr || error.message });
423
+ logToolCall('exec', { command, sandbox: sandboxStatus, network: networkStatus }, true, 'error');
424
+ resolve({ exit_code: -1, stdout, stderr: stderr || error.message, sandbox: sandboxStatus, network: networkStatus });
282
425
  });
283
426
  child.on('close', (code, sigName) => {
284
427
  clearTimeout(timer);
@@ -287,14 +430,14 @@ function createToolExecutor(permissionManager, ui, getConfig) {
287
430
  const elapsed_s = Math.max(0, Math.round((Date.now() - startedAt) / 1000));
288
431
  const note = `[user interrupted after ${elapsed_s}s]`;
289
432
  stderr += (stderr ? '\n' : '') + note;
290
- logToolCall('exec', { command }, true, 'aborted');
291
- resolve({ exit_code: -1, stdout, stderr, aborted: true, elapsed_s });
433
+ logToolCall('exec', { command, sandbox: sandboxStatus, network: networkStatus }, true, 'aborted');
434
+ resolve({ exit_code: -1, stdout, stderr, aborted: true, elapsed_s, sandbox: sandboxStatus, network: networkStatus });
292
435
  return;
293
436
  }
294
437
  if (killed) stderr += (stderr ? '\n' : '') + `[timed out after ${timeout}ms]`;
295
438
  const exit_code = killed ? -1 : (code != null ? code : (sigName ? -1 : 0));
296
- logToolCall('exec', { command }, true, exit_code === 0 ? 'ok' : 'error');
297
- resolve({ exit_code, stdout, stderr });
439
+ logToolCall('exec', { command, sandbox: sandboxStatus, network: networkStatus }, true, exit_code === 0 ? 'ok' : 'error');
440
+ resolve({ exit_code, stdout, stderr, sandbox: sandboxStatus, network: networkStatus });
298
441
  });
299
442
  });
300
443
  }
@@ -312,719 +455,69 @@ function createToolExecutor(permissionManager, ui, getConfig) {
312
455
  signal = last.signal || null;
313
456
  args = rest.slice(0, -1);
314
457
  }
315
- const [arg0 = null, arg1 = null, arg2 = null, arg3 = null] = args;
316
-
317
- if (action === 'read') {
318
- const filePath = arg0;
319
- const startedAt = Date.now();
320
- const stat = await fsp.stat(filePath).catch(() => null);
321
- if (stat) {
322
- const cfg = getConfig ? getConfig() : {};
323
- const maxBytes = (cfg.max_file_size_kb || 512) * 1024;
324
- if (stat.size > maxBytes) {
325
- const kb = (stat.size / 1024).toFixed(0);
326
- logToolCall('read_file', { path: filePath }, false, 'error');
327
- return { error: `File too large: ${kb} KB exceeds max_file_size_kb=${cfg.max_file_size_kb || 512}` };
328
- }
329
- }
330
- if (signal && signal.aborted) {
331
- logToolCall('read_file', { path: filePath }, true, 'aborted');
332
- return { aborted: true, elapsed_s: Math.max(0, Math.round((Date.now() - startedAt) / 1000)) };
333
- }
334
- try {
335
- const data = await fsp.readFile(filePath, { encoding: 'utf8', signal: signal || undefined });
336
- const lines = data.split('\n').length;
337
- if (lines > 10) {
338
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Read ${filePath} (${lines} lines, ${data.length} chars)${RST}`);
339
- } else {
340
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Read ${filePath}${RST}`);
341
- }
342
- logToolCall('read_file', { path: filePath }, true, 'ok');
343
- return { content: data, path: filePath, bytes: Buffer.byteLength(data, 'utf8') };
344
- } catch (error) {
345
- if (error && (error.name === 'AbortError' || error.code === 'ABORT_ERR')) {
346
- logToolCall('read_file', { path: filePath }, true, 'aborted');
347
- return { aborted: true, elapsed_s: Math.max(0, Math.round((Date.now() - startedAt) / 1000)) };
348
- }
349
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
350
- logToolCall('read_file', { path: filePath }, true, 'error');
351
- return { error: error.message };
352
- }
353
- }
354
-
355
- if (action === 'write' || action === 'append') {
356
- const filePath = arg0;
357
- const content = arg1;
358
- const tag = action === 'write' ? 'write_file' : 'append_file';
359
-
360
- const blocked = permissionManager.readonlyBlock(tag);
361
- if (blocked) {
362
- logToolCall(tag, { path: filePath, content }, false, 'denied');
363
- return blocked;
364
- }
365
-
366
- if (!isPathSafe(filePath)) {
367
- logToolCall(tag, { path: filePath }, false, 'denied');
368
- return _sandboxError(filePath);
369
- }
370
-
371
- // Dry-run: record the skipped op and return without writing. The diff
372
- // was already rendered in describePermission ahead of this dispatch.
373
- if (_dryRun) {
374
- const verb = action === 'write' ? 'write' : 'append';
375
- _skippedOps.push({ category: 'file', symbol: '✎', desc: `${verb} ${filePath}` });
376
- logToolCall(tag, { path: filePath }, false, 'dry-run');
377
- return { status: 'dry-run', message: 'dry-run: write skipped', path: filePath };
378
- }
379
-
380
- try {
381
- const dir = path.dirname(filePath);
382
- if (dir && dir !== '.') await fsp.mkdir(dir, { recursive: true });
383
- if (action === 'write') await fsp.writeFile(filePath, content || '');
384
- else await fsp.appendFile(filePath, content || '');
385
- const verb = action === 'write' ? 'Wrote' : 'Appended to';
386
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}${verb} ${filePath}${RST}`);
387
- logToolCall(tag, { path: filePath, content }, true, 'ok');
388
- return { status: 'ok', path: filePath, bytes: (content || '').length };
389
- } catch (error) {
390
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
391
- logToolCall(tag, { path: filePath, content }, true, 'error');
392
- return { error: error.message };
393
- }
394
- }
395
-
396
- if (action === 'list_dir') {
397
- const dirPath = arg0;
398
- try {
399
- const entries = await fsp.readdir(dirPath, { withFileTypes: true });
400
- const items = entries.map((e) => {
401
- if (e.isSymbolicLink()) return `[L] ${e.name}`;
402
- if (e.isDirectory()) return `[D] ${e.name}`;
403
- return `[F] ${e.name}`;
404
- });
405
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Listed ${dirPath} (${items.length} items)${RST}`);
406
- logToolCall('list_dir', { path: dirPath }, true, 'ok');
407
- return { items, path: dirPath };
408
- } catch (error) {
409
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
410
- logToolCall('list_dir', { path: dirPath }, true, 'error');
411
- return { error: error.message };
412
- }
413
- }
414
-
415
- if (action === 'delete_file') {
416
- const filePath = arg0;
417
-
418
- const blocked = permissionManager.readonlyBlock('delete_file');
419
- if (blocked) {
420
- logToolCall('delete_file', { path: filePath }, false, 'denied');
421
- return blocked;
422
- }
423
-
424
- if (!isPathSafe(filePath)) {
425
- logToolCall('delete_file', { path: filePath }, false, 'denied');
426
- return _sandboxError(filePath);
427
- }
428
-
429
- try {
430
- await fsp.unlink(filePath);
431
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Deleted ${filePath}${RST}`);
432
- logToolCall('delete_file', { path: filePath }, true, 'ok');
433
- return { status: 'ok', path: filePath };
434
- } catch (error) {
435
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
436
- logToolCall('delete_file', { path: filePath }, true, 'error');
437
- return { error: error.message };
438
- }
439
- }
440
-
441
- if (action === 'make_dir') {
442
- const dirPath = arg0;
443
- if (!isPathSafe(dirPath)) {
444
- logToolCall('make_dir', { path: dirPath }, false, 'denied');
445
- return _sandboxError(dirPath);
446
- }
447
- try {
448
- await fsp.mkdir(dirPath, { recursive: true });
449
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Created directory ${dirPath}${RST}`);
450
- logToolCall('make_dir', { path: dirPath }, true, 'ok');
451
- return { status: 'ok', path: dirPath };
452
- } catch (error) {
453
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
454
- logToolCall('make_dir', { path: dirPath }, true, 'error');
455
- return { error: error.message };
456
- }
457
- }
458
-
459
- if (action === 'remove_dir') {
460
- const dirPath = arg0;
461
- if (!isPathSafe(dirPath)) {
462
- logToolCall('remove_dir', { path: dirPath }, false, 'denied');
463
- return _sandboxError(dirPath);
464
- }
465
- try {
466
- await fsp.rm(dirPath, { recursive: true, force: true });
467
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Removed directory ${dirPath}${RST}`);
468
- logToolCall('remove_dir', { path: dirPath }, true, 'ok');
469
- return { status: 'ok', path: dirPath };
470
- } catch (error) {
471
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
472
- logToolCall('remove_dir', { path: dirPath }, true, 'error');
473
- return { error: error.message };
474
- }
475
- }
476
-
477
- if (action === 'move_file') {
478
- const src = arg0;
479
- const dst = arg1;
480
-
481
- const blocked = permissionManager.readonlyBlock('move_file');
482
- if (blocked) {
483
- logToolCall('move_file', { src, dst }, false, 'denied');
484
- return blocked;
485
- }
486
-
487
- if (!isPathSafe(dst)) {
488
- logToolCall('move_file', { src, dst }, false, 'denied');
489
- return _sandboxError(dst);
490
- }
491
-
492
- try {
493
- const dstDir = path.dirname(dst);
494
- if (dstDir && dstDir !== '.') await fsp.mkdir(dstDir, { recursive: true });
495
- try {
496
- await fsp.rename(src, dst);
497
- } catch (renameErr) {
498
- if (renameErr.code !== 'EXDEV') throw renameErr;
499
- // Cross-device rename not supported — copy then remove
500
- await fsp.cp(src, dst, { recursive: true });
501
- await fsp.rm(src, { recursive: true, force: true });
502
- }
503
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Moved ${src} → ${dst}${RST}`);
504
- logToolCall('move_file', { src, dst }, true, 'ok');
505
- return { status: 'ok', src, dst };
506
- } catch (error) {
507
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
508
- logToolCall('move_file', { src, dst }, true, 'error');
509
- return { error: error.message };
510
- }
511
- }
512
-
513
- if (action === 'copy_file') {
514
- const src = arg0;
515
- const dst = arg1;
516
-
517
- const blocked = permissionManager.readonlyBlock('copy_file');
518
- if (blocked) {
519
- logToolCall('copy_file', { src, dst }, false, 'denied');
520
- return blocked;
521
- }
522
-
523
- if (!isPathSafe(dst)) {
524
- logToolCall('copy_file', { src, dst }, false, 'denied');
525
- return _sandboxError(dst);
526
- }
527
-
528
- try {
529
- const dstDir = path.dirname(dst);
530
- if (dstDir && dstDir !== '.') await fsp.mkdir(dstDir, { recursive: true });
531
- await fsp.cp(src, dst, { recursive: true });
532
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Copied ${src} → ${dst}${RST}`);
533
- logToolCall('copy_file', { src, dst }, true, 'ok');
534
- return { status: 'ok', src, dst };
535
- } catch (error) {
536
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
537
- logToolCall('copy_file', { src, dst }, true, 'error');
538
- return { error: error.message };
539
- }
540
- }
541
-
542
- if (action === 'edit_file') {
543
- const filePath = arg0;
544
- const lineNum = arg1;
545
- const newContent = arg2;
546
- try {
547
- const data = await fsp.readFile(filePath, 'utf8');
548
- const lines = data.split('\n');
549
- if (lineNum < 1 || lineNum > lines.length) {
550
- logToolCall('edit_file', { path: filePath, line: lineNum }, true, 'error');
551
- return { error: `Line ${lineNum} out of range (file has ${lines.length} lines)` };
552
- }
553
- lines[lineNum - 1] = newContent;
554
- await fsp.writeFile(filePath, lines.join('\n'));
555
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Edited line ${lineNum} in ${filePath}${RST}`);
556
- logToolCall('edit_file', { path: filePath, line: lineNum }, true, 'ok');
557
- return { status: 'ok', path: filePath, line: lineNum };
558
- } catch (error) {
559
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
560
- logToolCall('edit_file', { path: filePath, line: lineNum }, true, 'error');
561
- return { error: error.message };
562
- }
563
- }
564
-
565
- if (action === 'search_in_file') {
566
- const filePath = arg0;
567
- const pattern = arg1;
568
- try {
569
- const data = await fsp.readFile(filePath, 'utf8');
570
- const guardErr = _checkRegexSafety(pattern, data);
571
- if (guardErr) {
572
- logToolCall('search_in_file', { path: filePath, pattern }, true, 'error');
573
- return guardErr;
574
- }
575
- const regex = new RegExp(pattern);
576
- const matches = data.split('\n')
577
- .map((content, idx) => regex.test(content) ? { line: idx + 1, content } : null)
578
- .filter(Boolean);
579
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Found ${matches.length} match(es) in ${filePath}${RST}`);
580
- logToolCall('search_in_file', { path: filePath, pattern }, true, 'ok');
581
- return { matches, path: filePath };
582
- } catch (error) {
583
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
584
- logToolCall('search_in_file', { path: filePath, pattern }, true, 'error');
585
- return { error: error.message };
586
- }
587
- }
588
-
589
- if (action === 'replace_in_file') {
590
- const filePath = arg0;
591
- const searchStr = arg1;
592
- const replaceStr = arg2;
593
- const flags = arg3 || '';
594
- try {
595
- const data = await fsp.readFile(filePath, 'utf8');
596
- const guardErr = _checkRegexSafety(searchStr, data);
597
- if (guardErr) {
598
- logToolCall('replace_in_file', { path: filePath, search: searchStr }, true, 'error');
599
- return guardErr;
600
- }
601
- const safeFlags = flags.replace(/[^gimsuy]/g, '');
602
- const countFlags = safeFlags.includes('g') ? safeFlags : safeFlags + 'g';
603
- const count = (data.match(new RegExp(searchStr, countFlags)) || []).length;
604
- const regex = new RegExp(searchStr, safeFlags || undefined);
605
- const newData = data.replace(regex, replaceStr);
606
- await fsp.writeFile(filePath, newData);
607
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Replaced ${count} occurrence(s) in ${filePath}${RST}`);
608
- logToolCall('replace_in_file', { path: filePath, search: searchStr }, true, 'ok');
609
- return { status: 'ok', path: filePath, count };
610
- } catch (error) {
611
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
612
- logToolCall('replace_in_file', { path: filePath, search: searchStr }, true, 'error');
613
- return { error: error.message };
614
- }
615
- }
616
458
 
617
- if (action === 'search_files') {
618
- const pattern = arg0;
619
- const searchDir = arg1 || '.';
620
- const startedAt = Date.now();
621
- try {
622
- let regStr = pattern.replace(/[.+^${}()|[\]\\]/g, '\\$&');
623
- regStr = regStr.replace(/\*\*/g, '\x00');
624
- regStr = regStr.replace(/\*/g, '[^/]*');
625
- regStr = regStr.replace(/\x00\//g, '(?:.*/)?');
626
- regStr = regStr.replace(/\x00/g, '.*');
627
- const regex = new RegExp(`^${regStr}$`);
628
- const matchName = !pattern.includes('/');
629
- const files = [];
630
- async function walk(dir, rel) {
631
- if (signal && signal.aborted) return;
632
- let entries;
633
- try { entries = await fsp.readdir(dir, { withFileTypes: true }); } catch { return; }
634
- for (const entry of entries) {
635
- if (signal && signal.aborted) return;
636
- const relPath = rel ? `${rel}/${entry.name}` : entry.name;
637
- if (regex.test(matchName ? entry.name : relPath)) files.push(relPath);
638
- if (entry.isDirectory()) await walk(path.join(dir, entry.name), relPath);
639
- }
640
- }
641
- await walk(searchDir, '');
642
- if (signal && signal.aborted) {
643
- logToolCall('search_files', { pattern, dir: searchDir }, true, 'aborted');
644
- return { aborted: true, elapsed_s: Math.max(0, Math.round((Date.now() - startedAt) / 1000)) };
645
- }
646
- files.sort();
647
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Found ${files.length} file(s) matching "${pattern}"${RST}`);
648
- logToolCall('search_files', { pattern, dir: searchDir }, true, 'ok');
649
- return { files, pattern, dir: searchDir };
650
- } catch (error) {
651
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
652
- logToolCall('search_files', { pattern, dir: searchDir }, true, 'error');
653
- return { error: error.message };
654
- }
655
- }
656
-
657
- if (action === 'file_stat') {
658
- const filePath = arg0;
659
- try {
660
- const stat = await fsp.stat(filePath);
661
- const type = stat.isDirectory() ? 'directory' : stat.isSymbolicLink() ? 'symlink' : 'file';
662
- const size_kb = (stat.size / 1024).toFixed(2);
663
- const mode = '0o' + stat.mode.toString(8);
664
- const mtime = stat.mtime.toISOString();
665
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Stat ${filePath}${RST}`);
666
- logToolCall('file_stat', { path: filePath }, true, 'ok');
667
- return { path: filePath, size_kb, mtime, type, mode };
668
- } catch (error) {
669
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
670
- logToolCall('file_stat', { path: filePath }, true, 'error');
671
- return { error: error.message };
672
- }
673
- }
674
-
675
- if (action === 'get_env') {
676
- const varName = arg0;
677
- const value = process.env[varName];
678
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Got env ${varName}${RST}`);
679
- logToolCall('get_env', { name: varName }, true, 'ok');
680
- return { name: varName, value: value !== undefined ? value : null };
681
- }
682
-
683
- if (action === 'set_env') {
684
- const varName = arg0;
685
- const value = arg1 || '';
686
- process.env[varName] = value;
687
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Set env ${varName}${RST}`);
688
- logToolCall('set_env', { name: varName }, true, 'ok');
689
- return { status: 'ok', name: varName };
690
- }
691
-
692
- if (action === 'download') {
693
- const url = arg0;
694
- if (_dryRun) {
695
- _skippedOps.push({ category: 'net', symbol: '↓', desc: `download ${url}` });
696
- logToolCall('download', { url }, false, 'dry-run');
697
- return { status: 'dry-run', message: 'dry-run: network call skipped' };
698
- }
699
- let fileName;
700
- try {
701
- fileName = path.basename(new URL(url).pathname) || 'download';
702
- } catch {
703
- fileName = 'download';
704
- }
705
- const outPath = path.join(process.cwd(), fileName);
706
- const startedAt = Date.now();
707
- return new Promise((resolve) => {
708
- let abortedByUser = false;
709
- let onAbort = null;
710
- let activeReq = null;
711
- let activeFile = null;
712
- const detachAbort = () => {
713
- if (onAbort && signal) {
714
- try { signal.removeEventListener('abort', onAbort); } catch {}
715
- onAbort = null;
716
- }
717
- };
718
- const finishAborted = () => {
719
- fs.unlink(outPath, () => {});
720
- logToolCall('download', { url }, true, 'aborted');
721
- resolve({ aborted: true, elapsed_s: Math.max(0, Math.round((Date.now() - startedAt) / 1000)) });
722
- };
723
- if (signal) {
724
- if (signal.aborted) {
725
- abortedByUser = true;
726
- finishAborted();
727
- return;
728
- }
729
- onAbort = () => {
730
- abortedByUser = true;
731
- try { if (activeReq) activeReq.destroy(new Error('Aborted')); } catch {}
732
- try { if (activeFile) activeFile.destroy(); } catch {}
733
- };
734
- signal.addEventListener('abort', onAbort, { once: true });
735
- }
736
-
737
- function doDownload(target, redirectsLeft) {
738
- const proto = target.startsWith('https') ? https : http;
739
- const req = proto.get(target, (res) => {
740
- if ([301, 302, 303, 307, 308].includes(res.statusCode) && redirectsLeft > 0 && res.headers.location) {
741
- res.resume();
742
- return doDownload(res.headers.location, redirectsLeft - 1);
743
- }
744
- if (res.statusCode >= 400) {
745
- res.resume();
746
- const msg = `HTTP ${res.statusCode}`;
747
- detachAbort();
748
- _log(` ${FG_RED}✗ ${msg}${RST}`);
749
- logToolCall('download', { url }, true, 'error');
750
- return resolve({ error: msg });
751
- }
752
- const file = fs.createWriteStream(outPath);
753
- activeFile = file;
754
- res.pipe(file);
755
- file.on('finish', () => {
756
- file.close();
757
- detachAbort();
758
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Downloaded to ${outPath}${RST}`);
759
- logToolCall('download', { url }, true, 'ok');
760
- resolve({ status: 'ok', path: outPath });
761
- });
762
- file.on('error', (err) => {
763
- if (abortedByUser) {
764
- detachAbort();
765
- finishAborted();
766
- return;
767
- }
768
- fs.unlink(outPath, () => {});
769
- detachAbort();
770
- _log(` ${FG_RED}✗ ${err.message}${RST}`);
771
- logToolCall('download', { url }, true, 'error');
772
- resolve({ error: err.message });
773
- });
774
- });
775
- activeReq = req;
776
- req.on('error', (err) => {
777
- if (abortedByUser) {
778
- detachAbort();
779
- finishAborted();
780
- return;
781
- }
782
- fs.unlink(outPath, () => {});
783
- detachAbort();
784
- _log(` ${FG_RED}✗ ${err.message}${RST}`);
785
- logToolCall('download', { url }, true, 'error');
786
- resolve({ error: err.message });
787
- });
788
- req.setTimeout(120000, () => {
789
- req.destroy();
790
- fs.unlink(outPath, () => {});
791
- detachAbort();
792
- logToolCall('download', { url }, true, 'error');
793
- resolve({ error: 'Request timeout' });
794
- });
795
- }
796
- doDownload(url, 5);
797
- });
798
- }
799
-
800
- if (action === 'upload') {
801
- const filePath = arg0;
802
- const encodedContent = arg1 || '';
803
- if (!isPathSafe(filePath)) {
804
- logToolCall('upload', { path: filePath }, false, 'denied');
805
- return _sandboxError(filePath);
806
- }
807
- try {
808
- const dir = path.dirname(filePath);
809
- if (dir && dir !== '.') await fsp.mkdir(dir, { recursive: true });
810
- const buffer = Buffer.from(encodedContent.trim(), 'base64');
811
- await fsp.writeFile(filePath, buffer);
812
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Uploaded ${buffer.length} bytes to ${filePath}${RST}`);
813
- logToolCall('upload', { path: filePath }, true, 'ok');
814
- return { status: 'ok', path: filePath, bytes: buffer.length };
815
- } catch (error) {
816
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
817
- logToolCall('upload', { path: filePath }, true, 'error');
818
- return { error: error.message };
819
- }
820
- }
821
-
822
- if (action === 'http_get') {
823
- const url = arg0;
824
- if (_dryRun) {
825
- _skippedOps.push({ category: 'net', symbol: '↓', desc: `GET ${url}` });
826
- logToolCall('http_get', { url }, false, 'dry-run');
827
- return { status: 'dry-run', message: 'dry-run: network call skipped' };
828
- }
829
- const httpCfg = getConfig ? getConfig() : {};
830
- const reqTimeoutMs = Math.max(15000, httpCfg.request_timeout_ms || 15000);
831
- const maxBytes = Math.max(1024, httpCfg.http_fetch_max_bytes || 262144);
832
- const startedAt = Date.now();
833
- return new Promise((resolve) => {
834
- let abortedByUser = false;
835
- let onAbort = null;
836
- let activeReq = null;
837
- const detachAbort = () => {
838
- if (onAbort && signal) {
839
- try { signal.removeEventListener('abort', onAbort); } catch {}
840
- onAbort = null;
841
- }
842
- };
843
- const finishAborted = () => {
844
- logToolCall('http_get', { url }, true, 'aborted');
845
- resolve({ aborted: true, elapsed_s: Math.max(0, Math.round((Date.now() - startedAt) / 1000)) });
846
- };
847
- if (signal) {
848
- if (signal.aborted) {
849
- abortedByUser = true;
850
- finishAborted();
851
- return;
852
- }
853
- onAbort = () => {
854
- abortedByUser = true;
855
- try { if (activeReq) activeReq.destroy(new Error('Aborted')); } catch {}
856
- };
857
- signal.addEventListener('abort', onAbort, { once: true });
858
- }
859
-
860
- function doGet(target, redirectsLeft) {
861
- const proto = target.startsWith('https') ? https : http;
862
- const req = proto.get(target, (res) => {
863
- if ([301, 302, 303, 307, 308].includes(res.statusCode) && redirectsLeft > 0 && res.headers.location) {
864
- res.resume();
865
- return doGet(res.headers.location, redirectsLeft - 1);
866
- }
867
- const bufs = [];
868
- let totalBytes = 0;
869
- let capped = false;
870
- res.on('data', (chunk) => {
871
- totalBytes += chunk.length;
872
- if (!capped) {
873
- if (totalBytes <= maxBytes) {
874
- bufs.push(chunk);
875
- } else {
876
- const keep = maxBytes - (totalBytes - chunk.length);
877
- if (keep > 0) bufs.push(chunk.slice(0, keep));
878
- capped = true;
879
- // Keep the connection draining so totalBytes reflects reality,
880
- // but stop buffering further bytes.
881
- }
882
- }
883
- });
884
- res.on('end', () => {
885
- if (abortedByUser) return;
886
- detachAbort();
887
- const kept = Buffer.concat(bufs);
888
- const keptBytes = kept.length;
889
- let body = kept.toString('utf8');
890
- if (capped) {
891
- const origKb = (totalBytes / 1024).toFixed(0);
892
- const keptKb = (keptBytes / 1024).toFixed(0);
893
- const droppedKb = ((totalBytes - keptBytes) / 1024).toFixed(0);
894
- body += `\n\n[... truncated: original was ${origKb}KB, showing first ${keptKb}KB. The remaining ${droppedKb}KB was discarded. If you need the rest, narrow your request (e.g. fetch a specific subpage) rather than retrying this URL.]`;
895
- }
896
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}HTTP GET ${target} (${res.statusCode}, ${totalBytes} bytes${capped ? `, truncated to ${keptBytes}` : ''})${RST}`);
897
- logToolCall('http_get', { url: target }, true, res.statusCode < 400 ? 'ok' : 'error');
898
- // `bytes` is the total transferred payload length (pre-cap);
899
- // consumers that want to know the wire size without parsing
900
- // the appended truncation note rely on this.
901
- resolve({ status_code: res.statusCode, body, bytes: totalBytes });
902
- });
903
- });
904
- activeReq = req;
905
- req.on('error', (err) => {
906
- if (abortedByUser) {
907
- detachAbort();
908
- finishAborted();
909
- return;
910
- }
911
- detachAbort();
912
- _log(` ${FG_RED}✗ ${err.message}${RST}`);
913
- logToolCall('http_get', { url: target }, true, 'error');
914
- resolve({ error: err.message, error_code: err.code });
915
- });
916
- req.setTimeout(reqTimeoutMs, () => {
917
- req.destroy();
918
- detachAbort();
919
- logToolCall('http_get', { url: target }, true, 'error');
920
- resolve({ error: 'Request timeout', error_code: 'ETIMEDOUT' });
921
- });
922
- }
923
- doGet(url, 5);
924
- });
925
- }
926
-
927
- if (action === 'ask_user') {
928
- const question = arg0;
929
- const options = _parseNumberedOptions(question);
930
- if (options.length >= 2) {
931
- const selected = await permissionManager.captureSelect({ options });
932
- logToolCall('ask_user', { question }, true, 'ok');
933
- return { question, answer: selected || options[0] };
934
- }
935
- if (!process.stdout.isTTY || process.stdin.isRaw) {
936
- writer.scrollback(`\n ${FG_YELLOW}?${RST} ${question}\n ${DIM}[auto-answering 'y']${RST}`);
937
- logToolCall('ask_user', { question }, true, 'ok');
938
- return { question, answer: 'y' };
939
- }
940
- // audit: allowed — inline prompt without trailing newline; unreachable when TUI writer is active
941
- // (process.stdin.isRaw is true while the TUI input field holds raw mode).
942
- process.stdout.write(`\n ${FG_YELLOW}?${RST} ${question}\n ${FG_GRAY}>${RST} `);
943
- const buf = Buffer.alloc(4096);
944
- let input = '';
945
- while (true) {
946
- const n = fs.readSync(0, buf, 0, 1);
947
- if (n === 0) break;
948
- const ch = buf[0];
949
- if (ch === 0x0a) break;
950
- if (ch === 0x0d) continue;
951
- input += String.fromCharCode(ch);
952
- }
953
- _log();
954
- logToolCall('ask_user', { question }, true, 'ok');
955
- return { question, answer: input };
956
- }
957
-
958
- if (action === 'store_memory') {
959
- const key = arg0;
960
- const value = arg1 || '';
961
- try {
962
- let mem = {};
963
- try { mem = JSON.parse(await fsp.readFile(MEMORY_PATH, 'utf8')); } catch {}
964
- mem[key] = value;
965
- await fsp.mkdir(path.dirname(MEMORY_PATH), { recursive: true });
966
- await fsp.writeFile(MEMORY_PATH, JSON.stringify(mem, null, 2));
967
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Stored memory: ${key}${RST}`);
968
- logToolCall('store_memory', { key }, true, 'ok');
969
- return { status: 'ok', key };
970
- } catch (error) {
971
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
972
- logToolCall('store_memory', { key }, true, 'error');
973
- return { error: error.message };
974
- }
975
- }
976
-
977
- if (action === 'recall_memory') {
978
- const key = arg0;
979
- try {
980
- let mem = {};
981
- try { mem = JSON.parse(await fsp.readFile(MEMORY_PATH, 'utf8')); } catch {}
982
- const found = key in mem;
983
- const value = found ? mem[key] : null;
984
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Recalled memory: ${key}${RST}`);
985
- logToolCall('recall_memory', { key }, true, 'ok');
986
- return { key, value, found };
987
- } catch (error) {
988
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
989
- logToolCall('recall_memory', { key }, true, 'error');
990
- return { error: error.message };
991
- }
992
- }
993
-
994
- if (action === 'list_memories') {
995
- try {
996
- let mem = {};
997
- try { mem = JSON.parse(await fsp.readFile(MEMORY_PATH, 'utf8')); } catch {}
998
- const keys = Object.keys(mem);
999
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Listed ${keys.length} memory key(s)${RST}`);
1000
- logToolCall('list_memories', {}, true, 'ok');
1001
- return { keys };
1002
- } catch (error) {
1003
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
1004
- logToolCall('list_memories', {}, true, 'error');
1005
- return { error: error.message };
1006
- }
1007
- }
1008
-
1009
- if (action === 'system_info') {
1010
- const info = {
1011
- platform: os.platform(),
1012
- arch: os.arch(),
1013
- hostname: os.hostname(),
1014
- user: process.env.USER || process.env.USERNAME || '',
1015
- total_mem_mb: Math.round(os.totalmem() / 1024 / 1024),
1016
- free_mem_mb: Math.round(os.freemem() / 1024 / 1024),
1017
- node_version: process.version,
1018
- cwd: process.cwd(),
1019
- };
1020
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}System info: ${info.platform}/${info.arch}${RST}`);
1021
- logToolCall('system_info', {}, true, 'ok');
1022
- return info;
1023
- }
1024
-
1025
- logToolCall(action, { action }, false, 'error');
1026
- return { error: `Unknown action: ${action}` };
1027
- }
459
+ // Dispatch via the tool registry (Task 1.4b). The per-action executor body
460
+ // lives on its TOOL_REGISTRY entry; ctx carries the collaborators the body
461
+ // closes over. Signature is unchanged, so agent.js stays untouched.
462
+ const entry = entryForAction(action);
463
+ if (!entry || typeof entry.execute !== 'function') {
464
+ logToolCall(action, { action }, false, 'error');
465
+ return { error: `Unknown action: ${action}` };
466
+ }
467
+
468
+ // Checkpoint capture (Task 4.3): snapshot prior state BEFORE the mutation
469
+ // (post-gate a denied call never reaches here). Skip under --dry-run (no
470
+ // mutation will happen). beginCapture is fail-safe (returns null on error),
471
+ // and commit only fires on a status:'ok' result, so a call the executor
472
+ // itself refuses (--readonly, sandbox) produces no committed checkpoint.
473
+ let _cp = null;
474
+ if (checkpointStore && !_dryRun) {
475
+ try { _cp = await checkpointStore.beginCapture(action, args); } catch { _cp = null; }
476
+ }
477
+ const result = await entry.execute(ctx, args, { signal });
478
+ if (_cp) {
479
+ try { if (result && result.status === 'ok') _cp.commit(); } catch { /* fail-safe: never block a completed mutation */ }
480
+ }
481
+ return result;
482
+ }
483
+
484
+ // Dependency bag passed to every registry execute()/permission(). Built once
485
+ // here so the moved executor bodies see the same collaborators they had inside
486
+ // this factory closure. _uiActive is a getter so it always reflects the live
487
+ // module flag rather than a snapshot.
488
+ const ctx = {
489
+ _log,
490
+ logToolCall,
491
+ isPathSafe,
492
+ isProtectedSecretPath,
493
+ isProtectedConfigPath,
494
+ _sandboxError,
495
+ _secretReadError,
496
+ _protectedConfigWriteError,
497
+ _checkRegexSafety,
498
+ _isLiteralPattern,
499
+ _parseNumberedOptions,
500
+ _parseAskMenu: parseAskMenu,
501
+ _dryRun,
502
+ _skippedOps,
503
+ MEMORY_PATH,
504
+ permissionManager,
505
+ getConfig,
506
+ webChat,
507
+ webSearch,
508
+ renderDiff,
509
+ DIFF_BUBBLE_INSET,
510
+ writer,
511
+ agentExecShell,
512
+ FG_GREEN,
513
+ FG_GRAY,
514
+ FG_RED,
515
+ FG_YELLOW,
516
+ RST,
517
+ DIM,
518
+ BOLD,
519
+ get _uiActive() { return _uiActive; },
520
+ };
1028
521
 
1029
522
  return {
1030
523
  agentExecFile,
@@ -1033,97 +526,11 @@ function createToolExecutor(permissionManager, ui, getConfig) {
1033
526
  };
1034
527
  }
1035
528
 
1036
- // Map a MiniMax-style {name, params} invocation to the internal
1037
- // [action, arg1, arg2, …] call tuple consumed by the agent loop.
529
+ // Native function-calling path internal [action, ...args] call tuple.
530
+ // Delegates to the tool registry (lib/tool_registry.js) the single source
531
+ // shared with the XML path. Kept here (and exported) for backward compatibility.
1038
532
  function mapInvokeToCall(toolName, params) {
1039
- const name = (toolName || '').toLowerCase();
1040
- const p = params || {};
1041
- switch (name) {
1042
- case 'write_file':
1043
- case 'create_file':
1044
- return p.path ? ['write', p.path, p.content != null ? p.content : ''] : null;
1045
- case 'read_file':
1046
- return p.path ? ['read', p.path] : null;
1047
- case 'append_file':
1048
- return p.path ? ['append', p.path, p.content != null ? p.content : ''] : null;
1049
- case 'delete_file':
1050
- return p.path ? ['delete_file', p.path] : null;
1051
- case 'list_dir':
1052
- return ['list_dir', p.path || '.'];
1053
- case 'make_dir':
1054
- return p.path ? ['make_dir', p.path] : null;
1055
- case 'remove_dir':
1056
- return p.path ? ['remove_dir', p.path] : null;
1057
- case 'move_file':
1058
- return p.src && p.dst ? ['move_file', p.src, p.dst] : null;
1059
- case 'copy_file':
1060
- return p.src && p.dst ? ['copy_file', p.src, p.dst] : null;
1061
- case 'file_stat':
1062
- return p.path ? ['file_stat', p.path] : null;
1063
- case 'search_files':
1064
- return ['search_files', p.pattern || '*', p.dir || '.'];
1065
- case 'search_in_file':
1066
- return p.path && p.pattern ? ['search_in_file', p.path, p.pattern] : null;
1067
- case 'replace_in_file':
1068
- return p.path && p.search !== undefined
1069
- ? ['replace_in_file', p.path, p.search, p.replace != null ? p.replace : '', p.flags || '']
1070
- : null;
1071
- case 'edit_file':
1072
- return p.path && p.line !== undefined
1073
- ? ['edit_file', p.path, parseInt(p.line, 10), p.content != null ? p.content : '']
1074
- : null;
1075
- case 'get_env':
1076
- return p.name ? ['get_env', p.name] : null;
1077
- case 'set_env':
1078
- return p.name ? ['set_env', p.name, p.value != null ? p.value : ''] : null;
1079
- case 'download':
1080
- return p.url ? ['download', p.url] : null;
1081
- case 'upload':
1082
- return p.path ? ['upload', p.path, p.content != null ? p.content : ''] : null;
1083
- case 'http_get':
1084
- return p.url ? ['http_get', p.url] : null;
1085
- case 'ask_user':
1086
- return p.question ? ['ask_user', p.question] : null;
1087
- case 'store_memory':
1088
- return p.key ? ['store_memory', p.key, p.value != null ? p.value : ''] : null;
1089
- case 'recall_memory':
1090
- return p.key ? ['recall_memory', p.key] : null;
1091
- case 'list_memories':
1092
- return ['list_memories'];
1093
- case 'system_info':
1094
- return ['system_info'];
1095
- case 'exec':
1096
- case 'shell':
1097
- return p.command ? ['shell', p.command] : null;
1098
- default:
1099
- return null;
1100
- }
1101
- }
1102
-
1103
- // Compile a regex twice — once with double quotes, once with single — from a
1104
- // template where `Q` stands for the quote char. Matches from both variants
1105
- // are returned in a single iterable.
1106
- function _matchDual(text, template) {
1107
- const results = [];
1108
- for (const q of ['"', "'"]) {
1109
- const re = new RegExp(template.replace(/Q/g, q), 'g');
1110
- for (const m of text.matchAll(re)) results.push(m);
1111
- }
1112
- return results;
1113
- }
1114
-
1115
- // Models sometimes wrap the inline body of a single-value tool tag in a nested
1116
- // pseudo-tag, e.g. `<list_dir><path>/tmp/foo</path></list_dir>` instead of the
1117
- // documented `<list_dir>/tmp/foo</list_dir>`. When the body is exactly one
1118
- // wrapper element (no siblings, no surrounding text), unwrap it once so the
1119
- // parser recovers the intended value. Safe to call on any inline-content body
1120
- // — a plain path/command/URL won't match the regex and is returned as-is.
1121
- function _unwrapInnerTag(inner) {
1122
- if (inner == null) return inner;
1123
- const trimmed = String(inner).trim();
1124
- const m = trimmed.match(/^<(\w+)(?:\s[^>]*)?>([\s\S]*)<\/\1>$/);
1125
- if (!m) return inner;
1126
- return m[2].trim();
533
+ return fromInvoke(toolName, params);
1127
534
  }
1128
535
 
1129
536
  // MiniMax-M2 tool-call XML repair. Some inference backends — notably mlx-lm
@@ -1322,137 +729,37 @@ function extractToolCalls(text, options = {}) {
1322
729
  }
1323
730
  }
1324
731
 
1325
- for (const match of text.matchAll(/```(?:shell|bash|sh)\n([\s\S]*?)```/g)) {
1326
- for (const line of match[1].trim().split('\n')) {
1327
- const cmd = line.trim();
1328
- if (cmd && !cmd.startsWith('#')) calls.push(['shell', cmd]);
732
+ // Bare-code-fence TEXT HEURISTIC: a ```bash/```sh/```shell markdown block with
733
+ // NO tool tag, each non-comment line inferred as a shell command. This is the
734
+ // ONLY mechanism that fires on untagged prose, so it is the only one gated by
735
+ // `skipTextHeuristics` (set on the native rail — see lib/agent.js). Every other
736
+ // pass in this function requires an EXPLICIT tool tag (<minimax:tool_call>,
737
+ // <function=…>, <tool_call>, the registered <tool> tags, MCP tags) and stays
738
+ // active regardless. The heuristic itself is unchanged — it is only skipped.
739
+ if (!options.skipTextHeuristics) {
740
+ for (const match of text.matchAll(/```(?:shell|bash|sh)\n([\s\S]*?)```/g)) {
741
+ for (const line of match[1].trim().split('\n')) {
742
+ const cmd = line.trim();
743
+ if (cmd && !cmd.startsWith('#')) calls.push(['shell', cmd]);
744
+ }
1329
745
  }
1330
746
  }
1331
747
 
1332
- for (const match of text.matchAll(/<(?:shell|exec|run_command|run)>([\s\S]*?)<\/(?:shell|exec|run_command|run)>/g)) {
1333
- calls.push(['shell', _unwrapInnerTag(match[1]).trim()]);
1334
- }
1335
-
1336
- for (const match of text.matchAll(/<read_file>([\s\S]*?)<\/read_file>/g)) {
1337
- calls.push(['read', _unwrapInnerTag(match[1]).trim()]);
1338
- }
1339
-
1340
- for (const match of _matchDual(text, '<read_file\\s+path=Q([^Q]+)Q\\s*\\/?>')) {
1341
- calls.push(['read', match[1]]);
1342
- }
1343
-
1344
- for (const match of _matchDual(text, '<write_file\\s+path=Q([^Q]+)Q>([\\s\\S]*?)<\\/write_file>')) {
1345
- calls.push(['write', match[1], match[2]]);
1346
- }
1347
-
1348
- for (const match of _matchDual(text, '<create_file\\s+path=Q([^Q]+)Q>([\\s\\S]*?)<\\/create_file>')) {
1349
- calls.push(['write', match[1], match[2]]);
1350
- }
1351
-
1352
- for (const match of _matchDual(text, '<append_file\\s+path=Q([^Q]+)Q>([\\s\\S]*?)<\\/append_file>')) {
1353
- calls.push(['append', match[1], match[2]]);
1354
- }
1355
-
1356
- for (const match of text.matchAll(/<list_dir>([\s\S]*?)<\/list_dir>/g)) {
1357
- calls.push(['list_dir', _unwrapInnerTag(match[1]).trim()]);
1358
- }
1359
-
1360
- for (const match of text.matchAll(/<search_files>([\s\S]*?)<\/search_files>/g)) {
1361
- calls.push(['search_files', _unwrapInnerTag(match[1]).trim(), '.']);
1362
- }
1363
-
1364
- for (const match of _matchDual(text, '<search_files\\s+pattern=Q([^Q]+)Q(?:\\s+dir=Q([^Q]*)Q)?\\s*(?:><\\/search_files>|\\/>)')) {
1365
- calls.push(['search_files', match[1], match[2] || '.']);
1366
- }
1367
-
1368
- for (const match of text.matchAll(/<delete_file>([\s\S]*?)<\/delete_file>/g)) {
1369
- calls.push(['delete_file', _unwrapInnerTag(match[1]).trim()]);
1370
- }
1371
-
1372
- for (const match of text.matchAll(/<make_dir>([\s\S]*?)<\/make_dir>/g)) {
1373
- calls.push(['make_dir', _unwrapInnerTag(match[1]).trim()]);
1374
- }
1375
-
1376
- for (const match of text.matchAll(/<remove_dir>([\s\S]*?)<\/remove_dir>/g)) {
1377
- calls.push(['remove_dir', _unwrapInnerTag(match[1]).trim()]);
1378
- }
1379
-
1380
- for (const match of text.matchAll(/<get_env>([\s\S]*?)<\/get_env>/g)) {
1381
- calls.push(['get_env', _unwrapInnerTag(match[1]).trim()]);
1382
- }
1383
-
1384
- for (const match of _matchDual(text, '<set_env\\s+name=Q([^Q]+)Q\\s+value=Q([^Q]*)Q\\s*(?:><\\/set_env>|\\/>)')) {
1385
- calls.push(['set_env', match[1], match[2]]);
1386
- }
1387
-
1388
- for (const match of _matchDual(text, '<move_file\\s+src=Q([^Q]+)Q\\s+dst=Q([^Q]+)Q\\s*(?:><\\/move_file>|\\/>)')) {
1389
- calls.push(['move_file', match[1], match[2]]);
1390
- }
1391
-
1392
- for (const match of _matchDual(text, '<copy_file\\s+src=Q([^Q]+)Q\\s+dst=Q([^Q]+)Q\\s*(?:><\\/copy_file>|\\/>)')) {
1393
- calls.push(['copy_file', match[1], match[2]]);
1394
- }
1395
-
1396
- for (const match of _matchDual(text, '<edit_file\\s+path=Q([^Q]+)Q\\s+line=Q(\\d+)Q>([\\s\\S]*?)<\\/edit_file>')) {
1397
- calls.push(['edit_file', match[1], parseInt(match[2], 10), match[3]]);
1398
- }
1399
-
1400
- for (const match of _matchDual(text, '<search_in_file\\s+path=Q([^Q]+)Q>([\\s\\S]*?)<\\/search_in_file>')) {
1401
- calls.push(['search_in_file', match[1], match[2].trim()]);
1402
- }
1403
-
1404
- for (const match of _matchDual(text, '<replace_in_file\\s+path=Q([^Q]+)Q\\s+search=Q([^Q]*)Q\\s+replace=Q([^Q]*)Q>([\\s\\S]*?)<\\/replace_in_file>')) {
1405
- calls.push(['replace_in_file', match[1], match[2], match[3], match[4].trim()]);
1406
- }
1407
-
1408
- for (const match of text.matchAll(/<download>([\s\S]*?)<\/download>/g)) {
1409
- calls.push(['download', _unwrapInnerTag(match[1]).trim()]);
1410
- }
1411
-
1412
- for (const match of _matchDual(text, '<upload\\s+path=Q([^Q]+)Q>([\\s\\S]*?)<\\/upload>')) {
1413
- calls.push(['upload', match[1], match[2]]);
1414
- }
1415
-
1416
- for (const match of text.matchAll(/<file_stat>([\s\S]*?)<\/file_stat>/g)) {
1417
- calls.push(['file_stat', _unwrapInnerTag(match[1]).trim()]);
1418
- }
1419
-
1420
- for (const match of text.matchAll(/<http_get\b([^>]*?)(?:><\/http_get>|\/>)/g)) {
1421
- const attrStr = match[1];
1422
- const urlMatch = attrStr.match(/url="([^"]+)"/) || attrStr.match(/url='([^']+)'/);
1423
- if (urlMatch) calls.push(['http_get', urlMatch[1]]);
1424
- }
1425
-
1426
- // Inline-content form: <http_get>URL</http_get>. Models mirror the style of
1427
- // <list_dir>, <download>, etc. even though the system prompt advertises the
1428
- // attribute form — accept both so the second tag in a multi-call response
1429
- // isn't silently dropped. Also tolerate `<http_get>url="URL"</http_get>` where
1430
- // the model put the attribute syntax in the body.
1431
- for (const match of text.matchAll(/<http_get>([\s\S]*?)<\/http_get>/g)) {
1432
- const inner = match[1].trim();
1433
- if (!inner) continue;
1434
- const urlAttr = inner.match(/url="([^"]+)"/) || inner.match(/url='([^']+)'/);
1435
- calls.push(['http_get', urlAttr ? urlAttr[1] : _unwrapInnerTag(inner).trim()]);
1436
- }
1437
-
1438
- for (const match of _matchDual(text, '<ask_user\\s+question=Q([^Q]+)Q\\s*(?:><\\/ask_user>|\\/>)')) {
1439
- calls.push(['ask_user', match[1]]);
1440
- }
1441
-
1442
- for (const match of _matchDual(text, '<store_memory\\s+key=Q([^Q]+)Q>([\\s\\S]*?)<\\/store_memory>')) {
1443
- calls.push(['store_memory', match[1], match[2]]);
1444
- }
1445
-
1446
- for (const match of _matchDual(text, '<recall_memory\\s+key=Q([^Q]+)Q\\s*(?:><\\/recall_memory>|\\/>)')) {
1447
- calls.push(['recall_memory', match[1]]);
1448
- }
1449
-
1450
- for (const match of text.matchAll(/<list_memories\s*(?:><\/list_memories>|\/>)/g)) {
1451
- calls.push(['list_memories']);
748
+ // XML/tag path: each tool's parseAttrs (parseXml) lives next to its spec in
749
+ // the tool registry. Entries run in array order, which — together with the
750
+ // wrapper/JSON/fence passes above — reproduces the exact emission order the
751
+ // characterization tests pin (test/extract-tool-calls.test.js). This replaces
752
+ // the ~25 standalone regex blocks that used to be inlined here.
753
+ for (const entry of TOOL_REGISTRY) {
754
+ if (!entry.parseXml) continue;
755
+ for (const call of entry.parseXml(text)) calls.push(call);
1452
756
  }
1453
757
 
1454
- for (const match of text.matchAll(/<system_info\s*(?:><\/system_info>|\/>)/g)) {
1455
- calls.push(['system_info']);
758
+ // Dynamic tools (MCP, Task 3.3) get the same XML pass so non-native models can
759
+ // invoke them via `<mcp__server__tool>{json args}</mcp__server__tool>`.
760
+ for (const entry of dynamicToolEntries()) {
761
+ if (!entry.parseXml) continue;
762
+ for (const call of entry.parseXml(text)) calls.push(call);
1456
763
  }
1457
764
 
1458
765
  return calls;
@@ -1481,8 +788,14 @@ module.exports = {
1481
788
  createToolExecutor,
1482
789
  extractToolCalls,
1483
790
  getSkippedOps,
791
+ // Exported for unit testing (Task 1.1). These pure path guards are otherwise
792
+ // private; exposing them changes no runtime behavior.
793
+ isPathSafe,
794
+ isProtectedSecretPath,
795
+ isProtectedConfigPath,
1484
796
  isUIActive,
1485
797
  mapInvokeToCall,
798
+ parseAskMenu,
1486
799
  repairMinimaxMalformedXml,
1487
800
  setUIActive,
1488
801
  };