@semalt-ai/code 1.8.5 → 1.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. package/.claude/settings.local.json +6 -1
  2. package/.github/workflows/ci.yml +69 -0
  3. package/CLAUDE.md +1584 -26
  4. package/README.md +147 -3
  5. package/examples/embed.js +74 -0
  6. package/index.js +251 -10
  7. package/lib/agent.js +711 -104
  8. package/lib/api.js +213 -49
  9. package/lib/args.js +74 -2
  10. package/lib/audit.js +23 -1
  11. package/lib/background.js +584 -0
  12. package/lib/checkpoints.js +757 -0
  13. package/lib/commands/auth.js +94 -0
  14. package/lib/commands/chat-session.js +306 -0
  15. package/lib/commands/chat-slash.js +399 -0
  16. package/lib/commands/chat-turn.js +446 -0
  17. package/lib/commands/chat.js +403 -0
  18. package/lib/commands/custom.js +157 -0
  19. package/lib/commands/history-utils.js +66 -0
  20. package/lib/commands/index.js +268 -0
  21. package/lib/commands/mcp.js +113 -0
  22. package/lib/commands/oneshot.js +193 -0
  23. package/lib/commands/registry.js +269 -0
  24. package/lib/commands/tasks.js +89 -0
  25. package/lib/compact.js +87 -0
  26. package/lib/config.js +333 -11
  27. package/lib/constants.js +372 -3
  28. package/lib/deny.js +199 -0
  29. package/lib/doctor.js +160 -0
  30. package/lib/headless.js +167 -0
  31. package/lib/hooks.js +286 -0
  32. package/lib/images.js +264 -0
  33. package/lib/internals.js +49 -0
  34. package/lib/mcp/boundary.js +131 -0
  35. package/lib/mcp/client.js +270 -0
  36. package/lib/mcp/oauth.js +134 -0
  37. package/lib/memory.js +209 -0
  38. package/lib/metrics.js +37 -2
  39. package/lib/payload.js +54 -0
  40. package/lib/permission-rules.js +401 -0
  41. package/lib/permissions.js +100 -10
  42. package/lib/pricing.js +67 -0
  43. package/lib/proc.js +62 -0
  44. package/lib/prompts.js +84 -5
  45. package/lib/sandbox.js +568 -0
  46. package/lib/sdk.js +328 -0
  47. package/lib/secrets.js +211 -0
  48. package/lib/skills.js +223 -0
  49. package/lib/subagents.js +516 -0
  50. package/lib/tool_registry.js +2558 -0
  51. package/lib/tool_specs.js +222 -2
  52. package/lib/tools.js +272 -1020
  53. package/lib/ui/format.js +22 -1
  54. package/lib/ui/input-field.js +16 -7
  55. package/lib/ui/status-bar.js +79 -11
  56. package/lib/ui/theme.js +1 -0
  57. package/lib/ui/web-activity.js +218 -0
  58. package/lib/verify.js +229 -0
  59. package/lib/web-extract.js +213 -0
  60. package/lib/web-summarize.js +68 -0
  61. package/package.json +19 -4
  62. package/scripts/lint.js +57 -0
  63. package/test/agent-loop.test.js +389 -0
  64. package/test/background.test.js +414 -0
  65. package/test/chat.test.js +114 -0
  66. package/test/checkpoints-agent.test.js +181 -0
  67. package/test/checkpoints.test.js +650 -0
  68. package/test/command-registry.test.js +160 -0
  69. package/test/compact.test.js +116 -0
  70. package/test/completion-lazy.test.js +52 -0
  71. package/test/config-merge.test.js +324 -0
  72. package/test/config-quarantine.test.js +128 -0
  73. package/test/config-write-guard-allow-anywhere.test.js +56 -0
  74. package/test/config-write-guard-skip.test.js +46 -0
  75. package/test/config-write-guard.test.js +153 -0
  76. package/test/context-split.test.js +215 -0
  77. package/test/cost-doctor.test.js +142 -0
  78. package/test/custom-commands-chat.test.js +106 -0
  79. package/test/custom-commands.test.js +230 -0
  80. package/test/deny-windows.test.js +120 -0
  81. package/test/deny.test.js +83 -0
  82. package/test/download-allow-anywhere.test.js +66 -0
  83. package/test/download-confine.test.js +153 -0
  84. package/test/executors.test.js +362 -0
  85. package/test/extract-tool-calls.test.js +315 -0
  86. package/test/fetch-url-validation.test.js +219 -0
  87. package/test/fixtures/tool-calls.js +57 -0
  88. package/test/fixtures/web-page.js +91 -0
  89. package/test/git-tools.test.js +384 -0
  90. package/test/grep-glob-serialize.test.js +242 -0
  91. package/test/grep-glob.test.js +268 -0
  92. package/test/harness/README.md +57 -0
  93. package/test/harness/chat-harness.js +142 -0
  94. package/test/harness/memwarn-headless-child.js +65 -0
  95. package/test/harness/mock-llm.js +120 -0
  96. package/test/harness/mock-mcp-server.js +142 -0
  97. package/test/harness/sse-server.js +69 -0
  98. package/test/headless.test.js +203 -0
  99. package/test/history-utils.test.js +88 -0
  100. package/test/hooks-agent.test.js +238 -0
  101. package/test/hooks-verify-sandbox.test.js +232 -0
  102. package/test/hooks.test.js +216 -0
  103. package/test/http-get-user-agent.test.js +142 -0
  104. package/test/images-api.test.js +208 -0
  105. package/test/images.test.js +238 -0
  106. package/test/max-iterations.test.js +216 -0
  107. package/test/mcp-boundary.test.js +57 -0
  108. package/test/mcp-client.test.js +267 -0
  109. package/test/mcp-oauth.test.js +86 -0
  110. package/test/memory-truncation-warning.test.js +222 -0
  111. package/test/memory.test.js +198 -0
  112. package/test/native-dispatch.test.js +356 -0
  113. package/test/output-chokepoint.test.js +188 -0
  114. package/test/path-guards.test.js +134 -0
  115. package/test/payload.test.js +99 -0
  116. package/test/permission-rules-agent.test.js +210 -0
  117. package/test/permission-rules.test.js +297 -0
  118. package/test/permissions.test.js +163 -0
  119. package/test/plan-mode.test.js +167 -0
  120. package/test/read-paginate.test.js +275 -0
  121. package/test/readonly-tools.test.js +177 -0
  122. package/test/result-cap.test.js +233 -0
  123. package/test/sandbox-agent.test.js +147 -0
  124. package/test/sandbox-integration.test.js +216 -0
  125. package/test/sandbox.test.js +408 -0
  126. package/test/sdk.test.js +234 -0
  127. package/test/shell-output-cap.test.js +181 -0
  128. package/test/skills-chat.test.js +110 -0
  129. package/test/skills.test.js +295 -0
  130. package/test/smoke.test.js +68 -0
  131. package/test/status-bar-pause.test.js +164 -0
  132. package/test/stream-parser.test.js +147 -0
  133. package/test/subagents-agent.test.js +178 -0
  134. package/test/subagents.test.js +222 -0
  135. package/test/tool-registry.test.js +85 -0
  136. package/test/trim-budget.test.js +101 -0
  137. package/test/verify-agent.test.js +317 -0
  138. package/test/verify.test.js +141 -0
  139. package/test/web-activity-ordering.test.js +194 -0
  140. package/test/web-activity.test.js +207 -0
  141. package/test/web-data-extraction-guidance.test.js +71 -0
  142. package/test/web-extract.test.js +185 -0
  143. package/test/web-fetch-agent.test.js +291 -0
  144. package/test/web-fetch-mode.test.js +193 -0
  145. package/test/web-search.test.js +380 -0
  146. package/lib/commands.js +0 -1438
package/lib/tools.js CHANGED
@@ -1,21 +1,34 @@
1
1
  'use strict';
2
2
 
3
- const fs = require('fs');
4
- const fsp = require('fs/promises');
5
- const http = require('http');
6
- const https = require('https');
7
3
  const os = require('os');
8
4
  const path = require('path');
5
+ // Note: fs / fs/promises / http / https are no longer required here — the file
6
+ // and network executors moved to lib/tool_registry.js in Task 1.4b. tools.js now
7
+ // keeps only the path/sandbox guards, the shell executor, and the parse glue.
9
8
  const { spawn } = require('child_process');
10
9
 
11
10
  const { logToolCall } = require('./audit');
12
11
  const { spawnWithGroup, killTreeEscalating } = require('./proc');
12
+ const { classifyShellCommand } = require('./deny');
13
+ const { resolveSandboxedSpawn } = require('./sandbox');
14
+ const { TOOL_REGISTRY, fromInvoke, entryForAction, dynamicToolEntries } = require('./tool_registry');
13
15
  const writer = require('./ui/writer');
16
+ const { CONFIG_PATH, protectedConfigDirs } = require('./constants');
14
17
 
15
18
  const MEMORY_PATH = path.join(os.homedir(), '.semalt-ai', 'memory.json');
16
19
 
17
20
  const _dryRun = process.argv.includes('--dry-run');
18
21
  const _allowAnywhere = process.argv.includes('--allow-anywhere');
22
+ // Binary network isolation (Task 4.4b): the human-typed --no-network flag forces
23
+ // kernel-level no-network for sandboxed commands. Read once at module load — a
24
+ // human-only signal the model can never reach (the model controls only the
25
+ // command string). sandbox.network in config does the same via decideSandbox.
26
+ const _noNetwork = process.argv.includes('--no-network');
27
+ // The single, explicit opt-out of ALL safety. When set, the destructive-command
28
+ // deny-list and the config-file read guard are bypassed. This is the only flag
29
+ // that does so — --allow-* tier flags grant approval but never disable the
30
+ // deny-list. See lib/deny.js and Task 0.1.
31
+ const _skipPermissions = process.argv.includes('--dangerously-skip-permissions');
19
32
  const _skippedOps = [];
20
33
  function getSkippedOps() { return _skippedOps.slice(); }
21
34
 
@@ -56,6 +69,59 @@ function _sandboxError(filePath) {
56
69
  return { error: `Path outside allowed area: ${filePath}. Use --allow-anywhere to override.` };
57
70
  }
58
71
 
72
+ // Files that hold secrets or sensitive history and must NEVER be read back into
73
+ // the model context — the API key / auth token live in config.json, the memory
74
+ // store may contain secrets, and the audit log records past tool inputs. This
75
+ // guard is intentionally NOT gated on --allow-anywhere: --allow-anywhere widens
76
+ // where the agent may write, it does not unlock secret exfiltration. The only
77
+ // override is --dangerously-skip-permissions (opting out of all safety).
78
+ const _AUDIT_LOG_PATH = path.join(os.homedir(), '.semalt-ai', 'audit.log');
79
+ const PROTECTED_READ_PATHS = new Set([
80
+ path.resolve(CONFIG_PATH),
81
+ path.resolve(MEMORY_PATH),
82
+ path.resolve(_AUDIT_LOG_PATH),
83
+ ]);
84
+
85
+ function isProtectedSecretPath(filePath) {
86
+ if (_skipPermissions) return false;
87
+ if (typeof filePath !== 'string' || !filePath) return false;
88
+ return PROTECTED_READ_PATHS.has(path.resolve(filePath));
89
+ }
90
+
91
+ function _secretReadError(filePath) {
92
+ return { error: `Refused: ${filePath} holds secrets/credentials and cannot be read by the agent. (This guard is not overridable with --allow-anywhere.)` };
93
+ }
94
+
95
+ // Config/execution-driving paths that the AGENT must never WRITE — the write-side
96
+ // companion to the read guard above (Pre-Task 5.0b). Covers the protected-config
97
+ // set (lib/constants.js protectedConfigDirs): the whole ~/.semalt-ai dir AND
98
+ // every project .semalt dir from the CWD up to the repo root, INCLUDING files
99
+ // that do not yet exist (a missing config.json / agents/*.md / hook is inside a
100
+ // protected DIR, so it is refused regardless of existence — the CVE-2026-25725
101
+ // lesson). Directory-prefix matched on the resolved path, mirroring isPathSafe.
102
+ //
103
+ // Policy parity with isProtectedSecretPath: NOT gated on --allow-anywhere
104
+ // (--allow-anywhere widens WHERE the agent may write, it does not unlock writing
105
+ // the config surfaces that drive host-privileged execution). The only override is
106
+ // --dangerously-skip-permissions (opting out of all safety). A human editing
107
+ // their own config in an editor is unaffected — this guards the agent's tools and
108
+ // the sandboxed shell, not the human.
109
+ function isProtectedConfigPath(filePath) {
110
+ if (_skipPermissions) return false;
111
+ if (typeof filePath !== 'string' || !filePath) return false;
112
+ const resolved = path.resolve(filePath);
113
+ for (const dir of protectedConfigDirs()) {
114
+ const base = path.resolve(dir);
115
+ const prefix = base.endsWith(path.sep) ? base : base + path.sep;
116
+ if (resolved === base || resolved.startsWith(prefix)) return true;
117
+ }
118
+ return false;
119
+ }
120
+
121
+ function _protectedConfigWriteError(filePath) {
122
+ return { error: `Refused: ${filePath} is a protected config path (under ~/.semalt-ai or a project .semalt dir) that drives execution and cannot be written by the agent. (This guard is not overridable with --allow-anywhere.)` };
123
+ }
124
+
59
125
  // Cheap ReDoS guard. Rejects pathologically long patterns, common
60
126
  // catastrophic-backtracking anti-patterns, and pattern×data sizes large
61
127
  // enough to hang the regex engine.
@@ -74,8 +140,34 @@ function _checkRegexSafety(pattern, data) {
74
140
  return null;
75
141
  }
76
142
 
77
- function createToolExecutor(permissionManager, ui, getConfig) {
143
+ function createToolExecutor(permissionManager, ui, getConfig, options = {}) {
78
144
  const { BOLD, DIM, FG_DARK, FG_GRAY, FG_GREEN, FG_RED, FG_YELLOW, RST, renderDiff } = ui;
145
+ // Checkpoints & rewind (Task 4.3). When a store is wired, the prior state of a
146
+ // file is snapshotted in agentExecFile AFTER the permission gate approves and
147
+ // BEFORE the executor mutates. Optional — undefined in tests/headless paths
148
+ // that don't opt in. Subagents reuse this same agentExecFile, so a child's
149
+ // mutations are checkpointed into the parent session for free.
150
+ const checkpointStore = options.checkpointStore || null;
151
+ // OS sandbox fallback approver (Task 4.4). When the sandbox is unavailable in
152
+ // `auto` mode, agentExecShell asks a HUMAN before running a command
153
+ // unsandboxed via this callback (it returns true to allow, false to refuse).
154
+ // Undefined in non-TTY/headless/test paths → the command is REFUSED (never a
155
+ // silent unsandboxed run). This is injected by the executor owner (index.js),
156
+ // never reachable by the model, so the agent can't approve its own escape.
157
+ const onUnsandboxed = typeof options.onUnsandboxed === 'function' ? options.onUnsandboxed : null;
158
+ // Web-fetch secondary summarizer (Task W.1). An injected async LLM call
159
+ // `(messages, { model, signal }) => Promise<string>` (the api client's
160
+ // chatComplete) used by http_get to summarize extracted page content in a
161
+ // separate call — only the summary enters the main context. Optional: when
162
+ // absent (headless/oneshot paths without an api client), http_get returns the
163
+ // extracted Markdown instead of summarizing, never the raw page.
164
+ const webChat = typeof options.webChat === 'function' ? options.webChat : null;
165
+ // Web search backend (Task W.2b). An injected async call
166
+ // `(query, { count }) => Promise<{ results: [{title,url,snippet}], … }>` (the
167
+ // api client's dashboardSearch, which hits the backend POST /api/search).
168
+ // Optional: when absent (headless/oneshot paths without an api client) the
169
+ // web_search tool degrades to a clean tool error, never a crash.
170
+ const webSearch = typeof options.webSearch === 'function' ? options.webSearch : null;
79
171
  // Continuation lines in a system-message bubble (chat-history.js else branch)
80
172
  // are indented by 5 spaces. Let the diff renderer reserve those columns so
81
173
  // its lines don't auto-wrap inside the bubble.
@@ -125,95 +217,60 @@ function createToolExecutor(permissionManager, ui, getConfig) {
125
217
  delete_file: 'delete_file',
126
218
  move_file: 'move_file',
127
219
  copy_file: 'copy_file',
220
+ download: 'download',
221
+ edit_file: 'edit_file',
222
+ replace_in_file: 'replace_in_file',
223
+ make_dir: 'make_dir',
224
+ remove_dir: 'remove_dir',
225
+ upload: 'upload',
128
226
  };
129
227
  const roTag = READONLY_TAG[action];
130
228
  if (roTag && permissionManager.readonlyBlock(roTag)) return null;
131
229
 
132
- switch (action) {
133
- case 'shell':
134
- case 'exec':
135
- return { actionType: 'shell', description: args[0] || '', tag: 'exec' };
136
-
137
- case 'write':
138
- case 'append': {
139
- const filePath = args[0];
140
- const content = args[1];
141
- const tag = action === 'write' ? 'write_file' : 'append_file';
142
-
143
- let existing = '';
144
- try { existing = await fsp.readFile(filePath, 'utf8'); } catch {}
145
- const finalContent = action === 'write' ? (content || '') : (existing + (content || ''));
146
- const diffOutput = _uiActive
147
- ? renderDiff(existing, finalContent, filePath, { inset: DIFF_BUBBLE_INSET })
148
- : renderDiff(existing, finalContent, filePath);
149
- if (!_uiActive) writer.scrollback(diffOutput);
150
-
151
- // Dry-run renders the diff (above) but skips the picker — the
152
- // executor's dry-run early return reports the skip.
153
- if (_dryRun) return null;
154
-
155
- let desc = `${action === 'write' ? 'Write' : 'Append to'} ${filePath}`;
156
- if (content) desc += ` (${content.length} chars)`;
157
- if (_uiActive) desc = `${desc}\n${diffOutput}`;
158
- return { actionType: 'file', description: desc, tag };
159
- }
160
-
161
- case 'delete_file': {
162
- const filePath = args[0];
163
- _log(` ${FG_YELLOW}${BOLD}⚠ Deleting: ${filePath}${RST}`);
164
- return { actionType: 'file', description: `Delete ${filePath}`, tag: 'delete_file' };
165
- }
166
-
167
- case 'make_dir':
168
- return { actionType: 'file', description: `Create directory ${args[0]}`, tag: 'make_dir' };
169
-
170
- case 'remove_dir':
171
- return { actionType: 'file', description: `Remove directory ${args[0]}`, tag: 'remove_dir' };
230
+ // Per-tool descriptor now lives on the registry entry (Task 1.4b). Read-only
231
+ // ops resolve to a permission() that returns null (no gate). The side effects
232
+ // that used to live in the switch cases (write/append diff render,
233
+ // delete/move warning lines) moved into those permission() bodies unchanged.
234
+ const entry = entryForAction(action);
235
+ if (entry && typeof entry.permission === 'function') return entry.permission(ctx, args);
236
+ return null;
237
+ }
172
238
 
173
- case 'move_file': {
174
- const src = args[0];
175
- const dst = args[1];
176
- _log(` ${FG_YELLOW}${BOLD}⚠ Moving: ${src} ${dst}${RST}`);
177
- return { actionType: 'file', description: `Move ${src} to ${dst}`, tag: 'move_file' };
239
+ async function agentExecShell(command, options = {}) {
240
+ // Destructive-command deny-list. Enforced for EVERY shell call regardless
241
+ // of approval mode (interactive, non-TTY, or any --allow-* flag). This is
242
+ // the unbypassable chokepoint: all shell execution funnels through here.
243
+ // The only escape hatch is --dangerously-skip-permissions.
244
+ //
245
+ // The `initiator` distinguishes agent-initiated calls (the model asked) from
246
+ // user-initiated ones (a human typed `!cmd` / `semalt-code shell`). Agent
247
+ // calls keep the hard block. User calls are exempt from the block, except for
248
+ // the catastrophic subset (disk wipe / fork bomb), which gets a one-time y/N
249
+ // confirmation via options.confirm as a typo guard. See lib/deny.js.
250
+ const initiator = options.initiator === 'user' ? 'user' : 'agent';
251
+ if (!_skipPermissions) {
252
+ const verdict = classifyShellCommand(command, initiator);
253
+ if (verdict.action === 'block') {
254
+ const msg = `Blocked by safety deny-list: ${verdict.label}. Refuse to run: ${command}. To override, restart with --dangerously-skip-permissions.`;
255
+ _log(` ${FG_RED}✗ ${msg}${RST}`);
256
+ logToolCall('exec', { command }, false, 'denied');
257
+ return { exit_code: -1, stdout: '', stderr: msg, blocked: true };
258
+ }
259
+ if (verdict.action === 'confirm') {
260
+ let approved = false;
261
+ if (typeof options.confirm === 'function') {
262
+ try { approved = await options.confirm(verdict.label, command); }
263
+ catch { approved = false; }
264
+ }
265
+ if (!approved) {
266
+ const msg = `Cancelled (${verdict.label}): ${command}`;
267
+ _log(` ${FG_RED}✗ ${msg}${RST}`);
268
+ logToolCall('exec', { command }, false, 'cancelled');
269
+ return { exit_code: -1, stdout: '', stderr: msg, blocked: true };
270
+ }
178
271
  }
179
-
180
- case 'copy_file':
181
- return { actionType: 'file', description: `Copy ${args[0]} to ${args[1]}`, tag: 'copy_file' };
182
-
183
- case 'edit_file':
184
- return { actionType: 'file', description: `Edit line ${args[1]} in ${args[0]}`, tag: 'edit_file' };
185
-
186
- case 'replace_in_file':
187
- return { actionType: 'file', description: `Replace in ${args[0]}`, tag: 'replace_in_file' };
188
-
189
- case 'set_env':
190
- return { actionType: 'env', description: `Set env ${args[0]}=${args[1] || ''}`, tag: 'set_env' };
191
-
192
- case 'download':
193
- return { actionType: 'net', description: `Download ${args[0]}`, tag: 'download' };
194
-
195
- case 'upload':
196
- return { actionType: 'file', description: `Upload to ${args[0]}`, tag: 'upload' };
197
-
198
- case 'http_get':
199
- return { actionType: 'net', description: `HTTP GET ${args[0]}`, tag: 'http_get' };
200
-
201
- // ask_user is a real gate — "do you want me to ask the user this
202
- // question?" — separate from the question prompt itself (which is
203
- // captureSelect or stdin further down in the executor). Lifted here
204
- // so the activity bubble doesn't pre-date grant.
205
- case 'ask_user':
206
- return { actionType: 'user', description: `Ask user: ${args[0]}`, tag: 'ask_user' };
207
-
208
- case 'store_memory':
209
- return { actionType: 'memory', description: `Store memory: ${args[0]}`, tag: 'store_memory' };
210
-
211
- default:
212
- return null;
213
272
  }
214
- }
215
273
 
216
- async function agentExecShell(command, options = {}) {
217
274
  if (_dryRun) {
218
275
  _log(` ${FG_DARK}[dry-run] $ ${command}${RST}`);
219
276
  _skippedOps.push({ category: 'cmd', symbol: '▶', desc: command });
@@ -225,18 +282,55 @@ function createToolExecutor(permissionManager, ui, getConfig) {
225
282
  const timeout = cfg.command_timeout_ms || 30000;
226
283
  const { signal } = options;
227
284
 
285
+ // ---------------------------------------------------------------------
286
+ // OS sandbox (Task 4.4; unified chokepoint Pre-Task 5.0a). EVERY shell call
287
+ // — here, self-verification, and command-type hooks — funnels through the
288
+ // SHARED resolveSandboxedSpawn shim, so the model has no path that runs a
289
+ // command outside this decision. --dangerously-skip-permissions (a
290
+ // human-only flag) opts out of all safety, sandbox included.
291
+ //
292
+ // run:true → spawn the resolved file/args (jailed when sandbox 'on';
293
+ // plain { shell:true } when 'off'/human-approved 'unavailable').
294
+ // run:false → fail-safe refusal: failIfUnavailable hard error (hard:true)
295
+ // or no/declined human approval — NEVER a silent unsandboxed run.
296
+ // ---------------------------------------------------------------------
297
+ const resolution = await resolveSandboxedSpawn({
298
+ command,
299
+ getConfig,
300
+ onUnsandboxed,
301
+ cwd: process.cwd(),
302
+ allowAnywhere: _allowAnywhere,
303
+ skipPermissions: _skipPermissions,
304
+ noNetwork: _noNetwork,
305
+ });
306
+ if (!resolution.run) {
307
+ _log(` ${FG_RED}✗ ${resolution.message}${RST}`);
308
+ logToolCall('exec', { command, sandbox: 'unavailable' }, false, resolution.hard ? 'sandbox-blocked' : 'sandbox-refused');
309
+ return { exit_code: -1, stdout: '', stderr: resolution.message, blocked: true, sandbox: 'unavailable' };
310
+ }
311
+ const spawnFile = resolution.file;
312
+ const spawnArgs = resolution.useShell ? [] : resolution.args;
313
+ const spawnOpts = resolution.useShell ? { shell: true } : {};
314
+ const sandboxStatus = resolution.sandbox;
315
+ // Binary network mode for this run (Task 4.4b): 'on' (host network) | 'off'
316
+ // (kernel-level no-network). Surfaced in the result + audit (net:on|off).
317
+ const networkStatus = resolution.network || 'on';
318
+
228
319
  return new Promise((resolve) => {
229
320
  let child;
230
321
  try {
231
322
  // spawnWithGroup gives us a process-group leader on POSIX so
232
323
  // killTreeEscalating can reach descendants via -pid. With shell:true
233
324
  // a plain child.kill targets only the sh wrapper, leaving the real
234
- // workload (find /, pipelines, etc.) running as orphans.
235
- child = spawnWithGroup(spawn, command, [], { shell: true });
325
+ // workload (find /, pipelines, etc.) running as orphans. When sandboxed,
326
+ // the group leader is the bwrap/sandbox-exec process — killing the group
327
+ // tears down the whole jailed subtree, so child-process confinement
328
+ // composes with the existing tree-kill plumbing.
329
+ child = spawnWithGroup(spawn, spawnFile, spawnArgs, spawnOpts);
236
330
  } catch (error) {
237
331
  _log(` ${FG_RED}✗ ${error.message}${RST}`);
238
- logToolCall('exec', { command }, true, 'error');
239
- return resolve({ exit_code: -1, stdout: '', stderr: error.message });
332
+ logToolCall('exec', { command, sandbox: sandboxStatus, network: networkStatus }, true, 'error');
333
+ return resolve({ exit_code: -1, stdout: '', stderr: error.message, sandbox: sandboxStatus, network: networkStatus });
240
334
  }
241
335
  const startedAt = Date.now();
242
336
  let stdout = '';
@@ -277,8 +371,8 @@ function createToolExecutor(permissionManager, ui, getConfig) {
277
371
  clearTimeout(timer);
278
372
  detachAbort();
279
373
  _log(` ${FG_RED}✗ ${error.message}${RST}`);
280
- logToolCall('exec', { command }, true, 'error');
281
- resolve({ exit_code: -1, stdout, stderr: stderr || error.message });
374
+ logToolCall('exec', { command, sandbox: sandboxStatus, network: networkStatus }, true, 'error');
375
+ resolve({ exit_code: -1, stdout, stderr: stderr || error.message, sandbox: sandboxStatus, network: networkStatus });
282
376
  });
283
377
  child.on('close', (code, sigName) => {
284
378
  clearTimeout(timer);
@@ -287,14 +381,14 @@ function createToolExecutor(permissionManager, ui, getConfig) {
287
381
  const elapsed_s = Math.max(0, Math.round((Date.now() - startedAt) / 1000));
288
382
  const note = `[user interrupted after ${elapsed_s}s]`;
289
383
  stderr += (stderr ? '\n' : '') + note;
290
- logToolCall('exec', { command }, true, 'aborted');
291
- resolve({ exit_code: -1, stdout, stderr, aborted: true, elapsed_s });
384
+ logToolCall('exec', { command, sandbox: sandboxStatus, network: networkStatus }, true, 'aborted');
385
+ resolve({ exit_code: -1, stdout, stderr, aborted: true, elapsed_s, sandbox: sandboxStatus, network: networkStatus });
292
386
  return;
293
387
  }
294
388
  if (killed) stderr += (stderr ? '\n' : '') + `[timed out after ${timeout}ms]`;
295
389
  const exit_code = killed ? -1 : (code != null ? code : (sigName ? -1 : 0));
296
- logToolCall('exec', { command }, true, exit_code === 0 ? 'ok' : 'error');
297
- resolve({ exit_code, stdout, stderr });
390
+ logToolCall('exec', { command, sandbox: sandboxStatus, network: networkStatus }, true, exit_code === 0 ? 'ok' : 'error');
391
+ resolve({ exit_code, stdout, stderr, sandbox: sandboxStatus, network: networkStatus });
298
392
  });
299
393
  });
300
394
  }
@@ -312,719 +406,67 @@ function createToolExecutor(permissionManager, ui, getConfig) {
312
406
  signal = last.signal || null;
313
407
  args = rest.slice(0, -1);
314
408
  }
315
- const [arg0 = null, arg1 = null, arg2 = null, arg3 = null] = args;
316
-
317
- if (action === 'read') {
318
- const filePath = arg0;
319
- const startedAt = Date.now();
320
- const stat = await fsp.stat(filePath).catch(() => null);
321
- if (stat) {
322
- const cfg = getConfig ? getConfig() : {};
323
- const maxBytes = (cfg.max_file_size_kb || 512) * 1024;
324
- if (stat.size > maxBytes) {
325
- const kb = (stat.size / 1024).toFixed(0);
326
- logToolCall('read_file', { path: filePath }, false, 'error');
327
- return { error: `File too large: ${kb} KB exceeds max_file_size_kb=${cfg.max_file_size_kb || 512}` };
328
- }
329
- }
330
- if (signal && signal.aborted) {
331
- logToolCall('read_file', { path: filePath }, true, 'aborted');
332
- return { aborted: true, elapsed_s: Math.max(0, Math.round((Date.now() - startedAt) / 1000)) };
333
- }
334
- try {
335
- const data = await fsp.readFile(filePath, { encoding: 'utf8', signal: signal || undefined });
336
- const lines = data.split('\n').length;
337
- if (lines > 10) {
338
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Read ${filePath} (${lines} lines, ${data.length} chars)${RST}`);
339
- } else {
340
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Read ${filePath}${RST}`);
341
- }
342
- logToolCall('read_file', { path: filePath }, true, 'ok');
343
- return { content: data, path: filePath, bytes: Buffer.byteLength(data, 'utf8') };
344
- } catch (error) {
345
- if (error && (error.name === 'AbortError' || error.code === 'ABORT_ERR')) {
346
- logToolCall('read_file', { path: filePath }, true, 'aborted');
347
- return { aborted: true, elapsed_s: Math.max(0, Math.round((Date.now() - startedAt) / 1000)) };
348
- }
349
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
350
- logToolCall('read_file', { path: filePath }, true, 'error');
351
- return { error: error.message };
352
- }
353
- }
354
-
355
- if (action === 'write' || action === 'append') {
356
- const filePath = arg0;
357
- const content = arg1;
358
- const tag = action === 'write' ? 'write_file' : 'append_file';
359
-
360
- const blocked = permissionManager.readonlyBlock(tag);
361
- if (blocked) {
362
- logToolCall(tag, { path: filePath, content }, false, 'denied');
363
- return blocked;
364
- }
365
-
366
- if (!isPathSafe(filePath)) {
367
- logToolCall(tag, { path: filePath }, false, 'denied');
368
- return _sandboxError(filePath);
369
- }
370
-
371
- // Dry-run: record the skipped op and return without writing. The diff
372
- // was already rendered in describePermission ahead of this dispatch.
373
- if (_dryRun) {
374
- const verb = action === 'write' ? 'write' : 'append';
375
- _skippedOps.push({ category: 'file', symbol: '✎', desc: `${verb} ${filePath}` });
376
- logToolCall(tag, { path: filePath }, false, 'dry-run');
377
- return { status: 'dry-run', message: 'dry-run: write skipped', path: filePath };
378
- }
379
-
380
- try {
381
- const dir = path.dirname(filePath);
382
- if (dir && dir !== '.') await fsp.mkdir(dir, { recursive: true });
383
- if (action === 'write') await fsp.writeFile(filePath, content || '');
384
- else await fsp.appendFile(filePath, content || '');
385
- const verb = action === 'write' ? 'Wrote' : 'Appended to';
386
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}${verb} ${filePath}${RST}`);
387
- logToolCall(tag, { path: filePath, content }, true, 'ok');
388
- return { status: 'ok', path: filePath, bytes: (content || '').length };
389
- } catch (error) {
390
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
391
- logToolCall(tag, { path: filePath, content }, true, 'error');
392
- return { error: error.message };
393
- }
394
- }
395
-
396
- if (action === 'list_dir') {
397
- const dirPath = arg0;
398
- try {
399
- const entries = await fsp.readdir(dirPath, { withFileTypes: true });
400
- const items = entries.map((e) => {
401
- if (e.isSymbolicLink()) return `[L] ${e.name}`;
402
- if (e.isDirectory()) return `[D] ${e.name}`;
403
- return `[F] ${e.name}`;
404
- });
405
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Listed ${dirPath} (${items.length} items)${RST}`);
406
- logToolCall('list_dir', { path: dirPath }, true, 'ok');
407
- return { items, path: dirPath };
408
- } catch (error) {
409
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
410
- logToolCall('list_dir', { path: dirPath }, true, 'error');
411
- return { error: error.message };
412
- }
413
- }
414
-
415
- if (action === 'delete_file') {
416
- const filePath = arg0;
417
-
418
- const blocked = permissionManager.readonlyBlock('delete_file');
419
- if (blocked) {
420
- logToolCall('delete_file', { path: filePath }, false, 'denied');
421
- return blocked;
422
- }
423
-
424
- if (!isPathSafe(filePath)) {
425
- logToolCall('delete_file', { path: filePath }, false, 'denied');
426
- return _sandboxError(filePath);
427
- }
428
-
429
- try {
430
- await fsp.unlink(filePath);
431
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Deleted ${filePath}${RST}`);
432
- logToolCall('delete_file', { path: filePath }, true, 'ok');
433
- return { status: 'ok', path: filePath };
434
- } catch (error) {
435
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
436
- logToolCall('delete_file', { path: filePath }, true, 'error');
437
- return { error: error.message };
438
- }
439
- }
440
-
441
- if (action === 'make_dir') {
442
- const dirPath = arg0;
443
- if (!isPathSafe(dirPath)) {
444
- logToolCall('make_dir', { path: dirPath }, false, 'denied');
445
- return _sandboxError(dirPath);
446
- }
447
- try {
448
- await fsp.mkdir(dirPath, { recursive: true });
449
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Created directory ${dirPath}${RST}`);
450
- logToolCall('make_dir', { path: dirPath }, true, 'ok');
451
- return { status: 'ok', path: dirPath };
452
- } catch (error) {
453
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
454
- logToolCall('make_dir', { path: dirPath }, true, 'error');
455
- return { error: error.message };
456
- }
457
- }
458
-
459
- if (action === 'remove_dir') {
460
- const dirPath = arg0;
461
- if (!isPathSafe(dirPath)) {
462
- logToolCall('remove_dir', { path: dirPath }, false, 'denied');
463
- return _sandboxError(dirPath);
464
- }
465
- try {
466
- await fsp.rm(dirPath, { recursive: true, force: true });
467
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Removed directory ${dirPath}${RST}`);
468
- logToolCall('remove_dir', { path: dirPath }, true, 'ok');
469
- return { status: 'ok', path: dirPath };
470
- } catch (error) {
471
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
472
- logToolCall('remove_dir', { path: dirPath }, true, 'error');
473
- return { error: error.message };
474
- }
475
- }
476
-
477
- if (action === 'move_file') {
478
- const src = arg0;
479
- const dst = arg1;
480
-
481
- const blocked = permissionManager.readonlyBlock('move_file');
482
- if (blocked) {
483
- logToolCall('move_file', { src, dst }, false, 'denied');
484
- return blocked;
485
- }
486
-
487
- if (!isPathSafe(dst)) {
488
- logToolCall('move_file', { src, dst }, false, 'denied');
489
- return _sandboxError(dst);
490
- }
491
-
492
- try {
493
- const dstDir = path.dirname(dst);
494
- if (dstDir && dstDir !== '.') await fsp.mkdir(dstDir, { recursive: true });
495
- try {
496
- await fsp.rename(src, dst);
497
- } catch (renameErr) {
498
- if (renameErr.code !== 'EXDEV') throw renameErr;
499
- // Cross-device rename not supported — copy then remove
500
- await fsp.cp(src, dst, { recursive: true });
501
- await fsp.rm(src, { recursive: true, force: true });
502
- }
503
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Moved ${src} → ${dst}${RST}`);
504
- logToolCall('move_file', { src, dst }, true, 'ok');
505
- return { status: 'ok', src, dst };
506
- } catch (error) {
507
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
508
- logToolCall('move_file', { src, dst }, true, 'error');
509
- return { error: error.message };
510
- }
511
- }
512
-
513
- if (action === 'copy_file') {
514
- const src = arg0;
515
- const dst = arg1;
516
409
 
517
- const blocked = permissionManager.readonlyBlock('copy_file');
518
- if (blocked) {
519
- logToolCall('copy_file', { src, dst }, false, 'denied');
520
- return blocked;
521
- }
522
-
523
- if (!isPathSafe(dst)) {
524
- logToolCall('copy_file', { src, dst }, false, 'denied');
525
- return _sandboxError(dst);
526
- }
527
-
528
- try {
529
- const dstDir = path.dirname(dst);
530
- if (dstDir && dstDir !== '.') await fsp.mkdir(dstDir, { recursive: true });
531
- await fsp.cp(src, dst, { recursive: true });
532
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Copied ${src} → ${dst}${RST}`);
533
- logToolCall('copy_file', { src, dst }, true, 'ok');
534
- return { status: 'ok', src, dst };
535
- } catch (error) {
536
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
537
- logToolCall('copy_file', { src, dst }, true, 'error');
538
- return { error: error.message };
539
- }
540
- }
541
-
542
- if (action === 'edit_file') {
543
- const filePath = arg0;
544
- const lineNum = arg1;
545
- const newContent = arg2;
546
- try {
547
- const data = await fsp.readFile(filePath, 'utf8');
548
- const lines = data.split('\n');
549
- if (lineNum < 1 || lineNum > lines.length) {
550
- logToolCall('edit_file', { path: filePath, line: lineNum }, true, 'error');
551
- return { error: `Line ${lineNum} out of range (file has ${lines.length} lines)` };
552
- }
553
- lines[lineNum - 1] = newContent;
554
- await fsp.writeFile(filePath, lines.join('\n'));
555
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Edited line ${lineNum} in ${filePath}${RST}`);
556
- logToolCall('edit_file', { path: filePath, line: lineNum }, true, 'ok');
557
- return { status: 'ok', path: filePath, line: lineNum };
558
- } catch (error) {
559
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
560
- logToolCall('edit_file', { path: filePath, line: lineNum }, true, 'error');
561
- return { error: error.message };
562
- }
563
- }
564
-
565
- if (action === 'search_in_file') {
566
- const filePath = arg0;
567
- const pattern = arg1;
568
- try {
569
- const data = await fsp.readFile(filePath, 'utf8');
570
- const guardErr = _checkRegexSafety(pattern, data);
571
- if (guardErr) {
572
- logToolCall('search_in_file', { path: filePath, pattern }, true, 'error');
573
- return guardErr;
574
- }
575
- const regex = new RegExp(pattern);
576
- const matches = data.split('\n')
577
- .map((content, idx) => regex.test(content) ? { line: idx + 1, content } : null)
578
- .filter(Boolean);
579
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Found ${matches.length} match(es) in ${filePath}${RST}`);
580
- logToolCall('search_in_file', { path: filePath, pattern }, true, 'ok');
581
- return { matches, path: filePath };
582
- } catch (error) {
583
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
584
- logToolCall('search_in_file', { path: filePath, pattern }, true, 'error');
585
- return { error: error.message };
586
- }
587
- }
588
-
589
- if (action === 'replace_in_file') {
590
- const filePath = arg0;
591
- const searchStr = arg1;
592
- const replaceStr = arg2;
593
- const flags = arg3 || '';
594
- try {
595
- const data = await fsp.readFile(filePath, 'utf8');
596
- const guardErr = _checkRegexSafety(searchStr, data);
597
- if (guardErr) {
598
- logToolCall('replace_in_file', { path: filePath, search: searchStr }, true, 'error');
599
- return guardErr;
600
- }
601
- const safeFlags = flags.replace(/[^gimsuy]/g, '');
602
- const countFlags = safeFlags.includes('g') ? safeFlags : safeFlags + 'g';
603
- const count = (data.match(new RegExp(searchStr, countFlags)) || []).length;
604
- const regex = new RegExp(searchStr, safeFlags || undefined);
605
- const newData = data.replace(regex, replaceStr);
606
- await fsp.writeFile(filePath, newData);
607
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Replaced ${count} occurrence(s) in ${filePath}${RST}`);
608
- logToolCall('replace_in_file', { path: filePath, search: searchStr }, true, 'ok');
609
- return { status: 'ok', path: filePath, count };
610
- } catch (error) {
611
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
612
- logToolCall('replace_in_file', { path: filePath, search: searchStr }, true, 'error');
613
- return { error: error.message };
614
- }
615
- }
616
-
617
- if (action === 'search_files') {
618
- const pattern = arg0;
619
- const searchDir = arg1 || '.';
620
- const startedAt = Date.now();
621
- try {
622
- let regStr = pattern.replace(/[.+^${}()|[\]\\]/g, '\\$&');
623
- regStr = regStr.replace(/\*\*/g, '\x00');
624
- regStr = regStr.replace(/\*/g, '[^/]*');
625
- regStr = regStr.replace(/\x00\//g, '(?:.*/)?');
626
- regStr = regStr.replace(/\x00/g, '.*');
627
- const regex = new RegExp(`^${regStr}$`);
628
- const matchName = !pattern.includes('/');
629
- const files = [];
630
- async function walk(dir, rel) {
631
- if (signal && signal.aborted) return;
632
- let entries;
633
- try { entries = await fsp.readdir(dir, { withFileTypes: true }); } catch { return; }
634
- for (const entry of entries) {
635
- if (signal && signal.aborted) return;
636
- const relPath = rel ? `${rel}/${entry.name}` : entry.name;
637
- if (regex.test(matchName ? entry.name : relPath)) files.push(relPath);
638
- if (entry.isDirectory()) await walk(path.join(dir, entry.name), relPath);
639
- }
640
- }
641
- await walk(searchDir, '');
642
- if (signal && signal.aborted) {
643
- logToolCall('search_files', { pattern, dir: searchDir }, true, 'aborted');
644
- return { aborted: true, elapsed_s: Math.max(0, Math.round((Date.now() - startedAt) / 1000)) };
645
- }
646
- files.sort();
647
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Found ${files.length} file(s) matching "${pattern}"${RST}`);
648
- logToolCall('search_files', { pattern, dir: searchDir }, true, 'ok');
649
- return { files, pattern, dir: searchDir };
650
- } catch (error) {
651
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
652
- logToolCall('search_files', { pattern, dir: searchDir }, true, 'error');
653
- return { error: error.message };
654
- }
655
- }
656
-
657
- if (action === 'file_stat') {
658
- const filePath = arg0;
659
- try {
660
- const stat = await fsp.stat(filePath);
661
- const type = stat.isDirectory() ? 'directory' : stat.isSymbolicLink() ? 'symlink' : 'file';
662
- const size_kb = (stat.size / 1024).toFixed(2);
663
- const mode = '0o' + stat.mode.toString(8);
664
- const mtime = stat.mtime.toISOString();
665
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Stat ${filePath}${RST}`);
666
- logToolCall('file_stat', { path: filePath }, true, 'ok');
667
- return { path: filePath, size_kb, mtime, type, mode };
668
- } catch (error) {
669
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
670
- logToolCall('file_stat', { path: filePath }, true, 'error');
671
- return { error: error.message };
672
- }
673
- }
674
-
675
- if (action === 'get_env') {
676
- const varName = arg0;
677
- const value = process.env[varName];
678
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Got env ${varName}${RST}`);
679
- logToolCall('get_env', { name: varName }, true, 'ok');
680
- return { name: varName, value: value !== undefined ? value : null };
681
- }
682
-
683
- if (action === 'set_env') {
684
- const varName = arg0;
685
- const value = arg1 || '';
686
- process.env[varName] = value;
687
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Set env ${varName}${RST}`);
688
- logToolCall('set_env', { name: varName }, true, 'ok');
689
- return { status: 'ok', name: varName };
690
- }
691
-
692
- if (action === 'download') {
693
- const url = arg0;
694
- if (_dryRun) {
695
- _skippedOps.push({ category: 'net', symbol: '↓', desc: `download ${url}` });
696
- logToolCall('download', { url }, false, 'dry-run');
697
- return { status: 'dry-run', message: 'dry-run: network call skipped' };
698
- }
699
- let fileName;
700
- try {
701
- fileName = path.basename(new URL(url).pathname) || 'download';
702
- } catch {
703
- fileName = 'download';
704
- }
705
- const outPath = path.join(process.cwd(), fileName);
706
- const startedAt = Date.now();
707
- return new Promise((resolve) => {
708
- let abortedByUser = false;
709
- let onAbort = null;
710
- let activeReq = null;
711
- let activeFile = null;
712
- const detachAbort = () => {
713
- if (onAbort && signal) {
714
- try { signal.removeEventListener('abort', onAbort); } catch {}
715
- onAbort = null;
716
- }
717
- };
718
- const finishAborted = () => {
719
- fs.unlink(outPath, () => {});
720
- logToolCall('download', { url }, true, 'aborted');
721
- resolve({ aborted: true, elapsed_s: Math.max(0, Math.round((Date.now() - startedAt) / 1000)) });
722
- };
723
- if (signal) {
724
- if (signal.aborted) {
725
- abortedByUser = true;
726
- finishAborted();
727
- return;
728
- }
729
- onAbort = () => {
730
- abortedByUser = true;
731
- try { if (activeReq) activeReq.destroy(new Error('Aborted')); } catch {}
732
- try { if (activeFile) activeFile.destroy(); } catch {}
733
- };
734
- signal.addEventListener('abort', onAbort, { once: true });
735
- }
736
-
737
- function doDownload(target, redirectsLeft) {
738
- const proto = target.startsWith('https') ? https : http;
739
- const req = proto.get(target, (res) => {
740
- if ([301, 302, 303, 307, 308].includes(res.statusCode) && redirectsLeft > 0 && res.headers.location) {
741
- res.resume();
742
- return doDownload(res.headers.location, redirectsLeft - 1);
743
- }
744
- if (res.statusCode >= 400) {
745
- res.resume();
746
- const msg = `HTTP ${res.statusCode}`;
747
- detachAbort();
748
- _log(` ${FG_RED}✗ ${msg}${RST}`);
749
- logToolCall('download', { url }, true, 'error');
750
- return resolve({ error: msg });
751
- }
752
- const file = fs.createWriteStream(outPath);
753
- activeFile = file;
754
- res.pipe(file);
755
- file.on('finish', () => {
756
- file.close();
757
- detachAbort();
758
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Downloaded to ${outPath}${RST}`);
759
- logToolCall('download', { url }, true, 'ok');
760
- resolve({ status: 'ok', path: outPath });
761
- });
762
- file.on('error', (err) => {
763
- if (abortedByUser) {
764
- detachAbort();
765
- finishAborted();
766
- return;
767
- }
768
- fs.unlink(outPath, () => {});
769
- detachAbort();
770
- _log(` ${FG_RED}✗ ${err.message}${RST}`);
771
- logToolCall('download', { url }, true, 'error');
772
- resolve({ error: err.message });
773
- });
774
- });
775
- activeReq = req;
776
- req.on('error', (err) => {
777
- if (abortedByUser) {
778
- detachAbort();
779
- finishAborted();
780
- return;
781
- }
782
- fs.unlink(outPath, () => {});
783
- detachAbort();
784
- _log(` ${FG_RED}✗ ${err.message}${RST}`);
785
- logToolCall('download', { url }, true, 'error');
786
- resolve({ error: err.message });
787
- });
788
- req.setTimeout(120000, () => {
789
- req.destroy();
790
- fs.unlink(outPath, () => {});
791
- detachAbort();
792
- logToolCall('download', { url }, true, 'error');
793
- resolve({ error: 'Request timeout' });
794
- });
795
- }
796
- doDownload(url, 5);
797
- });
798
- }
799
-
800
- if (action === 'upload') {
801
- const filePath = arg0;
802
- const encodedContent = arg1 || '';
803
- if (!isPathSafe(filePath)) {
804
- logToolCall('upload', { path: filePath }, false, 'denied');
805
- return _sandboxError(filePath);
806
- }
807
- try {
808
- const dir = path.dirname(filePath);
809
- if (dir && dir !== '.') await fsp.mkdir(dir, { recursive: true });
810
- const buffer = Buffer.from(encodedContent.trim(), 'base64');
811
- await fsp.writeFile(filePath, buffer);
812
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Uploaded ${buffer.length} bytes to ${filePath}${RST}`);
813
- logToolCall('upload', { path: filePath }, true, 'ok');
814
- return { status: 'ok', path: filePath, bytes: buffer.length };
815
- } catch (error) {
816
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
817
- logToolCall('upload', { path: filePath }, true, 'error');
818
- return { error: error.message };
819
- }
820
- }
821
-
822
- if (action === 'http_get') {
823
- const url = arg0;
824
- if (_dryRun) {
825
- _skippedOps.push({ category: 'net', symbol: '↓', desc: `GET ${url}` });
826
- logToolCall('http_get', { url }, false, 'dry-run');
827
- return { status: 'dry-run', message: 'dry-run: network call skipped' };
828
- }
829
- const httpCfg = getConfig ? getConfig() : {};
830
- const reqTimeoutMs = Math.max(15000, httpCfg.request_timeout_ms || 15000);
831
- const maxBytes = Math.max(1024, httpCfg.http_fetch_max_bytes || 262144);
832
- const startedAt = Date.now();
833
- return new Promise((resolve) => {
834
- let abortedByUser = false;
835
- let onAbort = null;
836
- let activeReq = null;
837
- const detachAbort = () => {
838
- if (onAbort && signal) {
839
- try { signal.removeEventListener('abort', onAbort); } catch {}
840
- onAbort = null;
841
- }
842
- };
843
- const finishAborted = () => {
844
- logToolCall('http_get', { url }, true, 'aborted');
845
- resolve({ aborted: true, elapsed_s: Math.max(0, Math.round((Date.now() - startedAt) / 1000)) });
846
- };
847
- if (signal) {
848
- if (signal.aborted) {
849
- abortedByUser = true;
850
- finishAborted();
851
- return;
852
- }
853
- onAbort = () => {
854
- abortedByUser = true;
855
- try { if (activeReq) activeReq.destroy(new Error('Aborted')); } catch {}
856
- };
857
- signal.addEventListener('abort', onAbort, { once: true });
858
- }
859
-
860
- function doGet(target, redirectsLeft) {
861
- const proto = target.startsWith('https') ? https : http;
862
- const req = proto.get(target, (res) => {
863
- if ([301, 302, 303, 307, 308].includes(res.statusCode) && redirectsLeft > 0 && res.headers.location) {
864
- res.resume();
865
- return doGet(res.headers.location, redirectsLeft - 1);
866
- }
867
- const bufs = [];
868
- let totalBytes = 0;
869
- let capped = false;
870
- res.on('data', (chunk) => {
871
- totalBytes += chunk.length;
872
- if (!capped) {
873
- if (totalBytes <= maxBytes) {
874
- bufs.push(chunk);
875
- } else {
876
- const keep = maxBytes - (totalBytes - chunk.length);
877
- if (keep > 0) bufs.push(chunk.slice(0, keep));
878
- capped = true;
879
- // Keep the connection draining so totalBytes reflects reality,
880
- // but stop buffering further bytes.
881
- }
882
- }
883
- });
884
- res.on('end', () => {
885
- if (abortedByUser) return;
886
- detachAbort();
887
- const kept = Buffer.concat(bufs);
888
- const keptBytes = kept.length;
889
- let body = kept.toString('utf8');
890
- if (capped) {
891
- const origKb = (totalBytes / 1024).toFixed(0);
892
- const keptKb = (keptBytes / 1024).toFixed(0);
893
- const droppedKb = ((totalBytes - keptBytes) / 1024).toFixed(0);
894
- body += `\n\n[... truncated: original was ${origKb}KB, showing first ${keptKb}KB. The remaining ${droppedKb}KB was discarded. If you need the rest, narrow your request (e.g. fetch a specific subpage) rather than retrying this URL.]`;
895
- }
896
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}HTTP GET ${target} (${res.statusCode}, ${totalBytes} bytes${capped ? `, truncated to ${keptBytes}` : ''})${RST}`);
897
- logToolCall('http_get', { url: target }, true, res.statusCode < 400 ? 'ok' : 'error');
898
- // `bytes` is the total transferred payload length (pre-cap);
899
- // consumers that want to know the wire size without parsing
900
- // the appended truncation note rely on this.
901
- resolve({ status_code: res.statusCode, body, bytes: totalBytes });
902
- });
903
- });
904
- activeReq = req;
905
- req.on('error', (err) => {
906
- if (abortedByUser) {
907
- detachAbort();
908
- finishAborted();
909
- return;
910
- }
911
- detachAbort();
912
- _log(` ${FG_RED}✗ ${err.message}${RST}`);
913
- logToolCall('http_get', { url: target }, true, 'error');
914
- resolve({ error: err.message, error_code: err.code });
915
- });
916
- req.setTimeout(reqTimeoutMs, () => {
917
- req.destroy();
918
- detachAbort();
919
- logToolCall('http_get', { url: target }, true, 'error');
920
- resolve({ error: 'Request timeout', error_code: 'ETIMEDOUT' });
921
- });
922
- }
923
- doGet(url, 5);
924
- });
925
- }
926
-
927
- if (action === 'ask_user') {
928
- const question = arg0;
929
- const options = _parseNumberedOptions(question);
930
- if (options.length >= 2) {
931
- const selected = await permissionManager.captureSelect({ options });
932
- logToolCall('ask_user', { question }, true, 'ok');
933
- return { question, answer: selected || options[0] };
934
- }
935
- if (!process.stdout.isTTY || process.stdin.isRaw) {
936
- writer.scrollback(`\n ${FG_YELLOW}?${RST} ${question}\n ${DIM}[auto-answering 'y']${RST}`);
937
- logToolCall('ask_user', { question }, true, 'ok');
938
- return { question, answer: 'y' };
939
- }
940
- // audit: allowed — inline prompt without trailing newline; unreachable when TUI writer is active
941
- // (process.stdin.isRaw is true while the TUI input field holds raw mode).
942
- process.stdout.write(`\n ${FG_YELLOW}?${RST} ${question}\n ${FG_GRAY}>${RST} `);
943
- const buf = Buffer.alloc(4096);
944
- let input = '';
945
- while (true) {
946
- const n = fs.readSync(0, buf, 0, 1);
947
- if (n === 0) break;
948
- const ch = buf[0];
949
- if (ch === 0x0a) break;
950
- if (ch === 0x0d) continue;
951
- input += String.fromCharCode(ch);
952
- }
953
- _log();
954
- logToolCall('ask_user', { question }, true, 'ok');
955
- return { question, answer: input };
956
- }
957
-
958
- if (action === 'store_memory') {
959
- const key = arg0;
960
- const value = arg1 || '';
961
- try {
962
- let mem = {};
963
- try { mem = JSON.parse(await fsp.readFile(MEMORY_PATH, 'utf8')); } catch {}
964
- mem[key] = value;
965
- await fsp.mkdir(path.dirname(MEMORY_PATH), { recursive: true });
966
- await fsp.writeFile(MEMORY_PATH, JSON.stringify(mem, null, 2));
967
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Stored memory: ${key}${RST}`);
968
- logToolCall('store_memory', { key }, true, 'ok');
969
- return { status: 'ok', key };
970
- } catch (error) {
971
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
972
- logToolCall('store_memory', { key }, true, 'error');
973
- return { error: error.message };
974
- }
975
- }
976
-
977
- if (action === 'recall_memory') {
978
- const key = arg0;
979
- try {
980
- let mem = {};
981
- try { mem = JSON.parse(await fsp.readFile(MEMORY_PATH, 'utf8')); } catch {}
982
- const found = key in mem;
983
- const value = found ? mem[key] : null;
984
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Recalled memory: ${key}${RST}`);
985
- logToolCall('recall_memory', { key }, true, 'ok');
986
- return { key, value, found };
987
- } catch (error) {
988
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
989
- logToolCall('recall_memory', { key }, true, 'error');
990
- return { error: error.message };
991
- }
992
- }
993
-
994
- if (action === 'list_memories') {
995
- try {
996
- let mem = {};
997
- try { mem = JSON.parse(await fsp.readFile(MEMORY_PATH, 'utf8')); } catch {}
998
- const keys = Object.keys(mem);
999
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Listed ${keys.length} memory key(s)${RST}`);
1000
- logToolCall('list_memories', {}, true, 'ok');
1001
- return { keys };
1002
- } catch (error) {
1003
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
1004
- logToolCall('list_memories', {}, true, 'error');
1005
- return { error: error.message };
1006
- }
1007
- }
1008
-
1009
- if (action === 'system_info') {
1010
- const info = {
1011
- platform: os.platform(),
1012
- arch: os.arch(),
1013
- hostname: os.hostname(),
1014
- user: process.env.USER || process.env.USERNAME || '',
1015
- total_mem_mb: Math.round(os.totalmem() / 1024 / 1024),
1016
- free_mem_mb: Math.round(os.freemem() / 1024 / 1024),
1017
- node_version: process.version,
1018
- cwd: process.cwd(),
1019
- };
1020
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}System info: ${info.platform}/${info.arch}${RST}`);
1021
- logToolCall('system_info', {}, true, 'ok');
1022
- return info;
1023
- }
1024
-
1025
- logToolCall(action, { action }, false, 'error');
1026
- return { error: `Unknown action: ${action}` };
1027
- }
410
+ // Dispatch via the tool registry (Task 1.4b). The per-action executor body
411
+ // lives on its TOOL_REGISTRY entry; ctx carries the collaborators the body
412
+ // closes over. Signature is unchanged, so agent.js stays untouched.
413
+ const entry = entryForAction(action);
414
+ if (!entry || typeof entry.execute !== 'function') {
415
+ logToolCall(action, { action }, false, 'error');
416
+ return { error: `Unknown action: ${action}` };
417
+ }
418
+
419
+ // Checkpoint capture (Task 4.3): snapshot prior state BEFORE the mutation
420
+ // (post-gate — a denied call never reaches here). Skip under --dry-run (no
421
+ // mutation will happen). beginCapture is fail-safe (returns null on error),
422
+ // and commit only fires on a status:'ok' result, so a call the executor
423
+ // itself refuses (--readonly, sandbox) produces no committed checkpoint.
424
+ let _cp = null;
425
+ if (checkpointStore && !_dryRun) {
426
+ try { _cp = await checkpointStore.beginCapture(action, args); } catch { _cp = null; }
427
+ }
428
+ const result = await entry.execute(ctx, args, { signal });
429
+ if (_cp) {
430
+ try { if (result && result.status === 'ok') _cp.commit(); } catch { /* fail-safe: never block a completed mutation */ }
431
+ }
432
+ return result;
433
+ }
434
+
435
+ // Dependency bag passed to every registry execute()/permission(). Built once
436
+ // here so the moved executor bodies see the same collaborators they had inside
437
+ // this factory closure. _uiActive is a getter so it always reflects the live
438
+ // module flag rather than a snapshot.
439
+ const ctx = {
440
+ _log,
441
+ logToolCall,
442
+ isPathSafe,
443
+ isProtectedSecretPath,
444
+ isProtectedConfigPath,
445
+ _sandboxError,
446
+ _secretReadError,
447
+ _protectedConfigWriteError,
448
+ _checkRegexSafety,
449
+ _parseNumberedOptions,
450
+ _dryRun,
451
+ _skippedOps,
452
+ MEMORY_PATH,
453
+ permissionManager,
454
+ getConfig,
455
+ webChat,
456
+ webSearch,
457
+ renderDiff,
458
+ DIFF_BUBBLE_INSET,
459
+ writer,
460
+ agentExecShell,
461
+ FG_GREEN,
462
+ FG_GRAY,
463
+ FG_RED,
464
+ FG_YELLOW,
465
+ RST,
466
+ DIM,
467
+ BOLD,
468
+ get _uiActive() { return _uiActive; },
469
+ };
1028
470
 
1029
471
  return {
1030
472
  agentExecFile,
@@ -1033,97 +475,11 @@ function createToolExecutor(permissionManager, ui, getConfig) {
1033
475
  };
1034
476
  }
1035
477
 
1036
- // Map a MiniMax-style {name, params} invocation to the internal
1037
- // [action, arg1, arg2, …] call tuple consumed by the agent loop.
478
+ // Native function-calling path internal [action, ...args] call tuple.
479
+ // Delegates to the tool registry (lib/tool_registry.js) the single source
480
+ // shared with the XML path. Kept here (and exported) for backward compatibility.
1038
481
  function mapInvokeToCall(toolName, params) {
1039
- const name = (toolName || '').toLowerCase();
1040
- const p = params || {};
1041
- switch (name) {
1042
- case 'write_file':
1043
- case 'create_file':
1044
- return p.path ? ['write', p.path, p.content != null ? p.content : ''] : null;
1045
- case 'read_file':
1046
- return p.path ? ['read', p.path] : null;
1047
- case 'append_file':
1048
- return p.path ? ['append', p.path, p.content != null ? p.content : ''] : null;
1049
- case 'delete_file':
1050
- return p.path ? ['delete_file', p.path] : null;
1051
- case 'list_dir':
1052
- return ['list_dir', p.path || '.'];
1053
- case 'make_dir':
1054
- return p.path ? ['make_dir', p.path] : null;
1055
- case 'remove_dir':
1056
- return p.path ? ['remove_dir', p.path] : null;
1057
- case 'move_file':
1058
- return p.src && p.dst ? ['move_file', p.src, p.dst] : null;
1059
- case 'copy_file':
1060
- return p.src && p.dst ? ['copy_file', p.src, p.dst] : null;
1061
- case 'file_stat':
1062
- return p.path ? ['file_stat', p.path] : null;
1063
- case 'search_files':
1064
- return ['search_files', p.pattern || '*', p.dir || '.'];
1065
- case 'search_in_file':
1066
- return p.path && p.pattern ? ['search_in_file', p.path, p.pattern] : null;
1067
- case 'replace_in_file':
1068
- return p.path && p.search !== undefined
1069
- ? ['replace_in_file', p.path, p.search, p.replace != null ? p.replace : '', p.flags || '']
1070
- : null;
1071
- case 'edit_file':
1072
- return p.path && p.line !== undefined
1073
- ? ['edit_file', p.path, parseInt(p.line, 10), p.content != null ? p.content : '']
1074
- : null;
1075
- case 'get_env':
1076
- return p.name ? ['get_env', p.name] : null;
1077
- case 'set_env':
1078
- return p.name ? ['set_env', p.name, p.value != null ? p.value : ''] : null;
1079
- case 'download':
1080
- return p.url ? ['download', p.url] : null;
1081
- case 'upload':
1082
- return p.path ? ['upload', p.path, p.content != null ? p.content : ''] : null;
1083
- case 'http_get':
1084
- return p.url ? ['http_get', p.url] : null;
1085
- case 'ask_user':
1086
- return p.question ? ['ask_user', p.question] : null;
1087
- case 'store_memory':
1088
- return p.key ? ['store_memory', p.key, p.value != null ? p.value : ''] : null;
1089
- case 'recall_memory':
1090
- return p.key ? ['recall_memory', p.key] : null;
1091
- case 'list_memories':
1092
- return ['list_memories'];
1093
- case 'system_info':
1094
- return ['system_info'];
1095
- case 'exec':
1096
- case 'shell':
1097
- return p.command ? ['shell', p.command] : null;
1098
- default:
1099
- return null;
1100
- }
1101
- }
1102
-
1103
- // Compile a regex twice — once with double quotes, once with single — from a
1104
- // template where `Q` stands for the quote char. Matches from both variants
1105
- // are returned in a single iterable.
1106
- function _matchDual(text, template) {
1107
- const results = [];
1108
- for (const q of ['"', "'"]) {
1109
- const re = new RegExp(template.replace(/Q/g, q), 'g');
1110
- for (const m of text.matchAll(re)) results.push(m);
1111
- }
1112
- return results;
1113
- }
1114
-
1115
- // Models sometimes wrap the inline body of a single-value tool tag in a nested
1116
- // pseudo-tag, e.g. `<list_dir><path>/tmp/foo</path></list_dir>` instead of the
1117
- // documented `<list_dir>/tmp/foo</list_dir>`. When the body is exactly one
1118
- // wrapper element (no siblings, no surrounding text), unwrap it once so the
1119
- // parser recovers the intended value. Safe to call on any inline-content body
1120
- // — a plain path/command/URL won't match the regex and is returned as-is.
1121
- function _unwrapInnerTag(inner) {
1122
- if (inner == null) return inner;
1123
- const trimmed = String(inner).trim();
1124
- const m = trimmed.match(/^<(\w+)(?:\s[^>]*)?>([\s\S]*)<\/\1>$/);
1125
- if (!m) return inner;
1126
- return m[2].trim();
482
+ return fromInvoke(toolName, params);
1127
483
  }
1128
484
 
1129
485
  // MiniMax-M2 tool-call XML repair. Some inference backends — notably mlx-lm
@@ -1329,130 +685,21 @@ function extractToolCalls(text, options = {}) {
1329
685
  }
1330
686
  }
1331
687
 
1332
- for (const match of text.matchAll(/<(?:shell|exec|run_command|run)>([\s\S]*?)<\/(?:shell|exec|run_command|run)>/g)) {
1333
- calls.push(['shell', _unwrapInnerTag(match[1]).trim()]);
1334
- }
1335
-
1336
- for (const match of text.matchAll(/<read_file>([\s\S]*?)<\/read_file>/g)) {
1337
- calls.push(['read', _unwrapInnerTag(match[1]).trim()]);
1338
- }
1339
-
1340
- for (const match of _matchDual(text, '<read_file\\s+path=Q([^Q]+)Q\\s*\\/?>')) {
1341
- calls.push(['read', match[1]]);
1342
- }
1343
-
1344
- for (const match of _matchDual(text, '<write_file\\s+path=Q([^Q]+)Q>([\\s\\S]*?)<\\/write_file>')) {
1345
- calls.push(['write', match[1], match[2]]);
1346
- }
1347
-
1348
- for (const match of _matchDual(text, '<create_file\\s+path=Q([^Q]+)Q>([\\s\\S]*?)<\\/create_file>')) {
1349
- calls.push(['write', match[1], match[2]]);
1350
- }
1351
-
1352
- for (const match of _matchDual(text, '<append_file\\s+path=Q([^Q]+)Q>([\\s\\S]*?)<\\/append_file>')) {
1353
- calls.push(['append', match[1], match[2]]);
1354
- }
1355
-
1356
- for (const match of text.matchAll(/<list_dir>([\s\S]*?)<\/list_dir>/g)) {
1357
- calls.push(['list_dir', _unwrapInnerTag(match[1]).trim()]);
1358
- }
1359
-
1360
- for (const match of text.matchAll(/<search_files>([\s\S]*?)<\/search_files>/g)) {
1361
- calls.push(['search_files', _unwrapInnerTag(match[1]).trim(), '.']);
1362
- }
1363
-
1364
- for (const match of _matchDual(text, '<search_files\\s+pattern=Q([^Q]+)Q(?:\\s+dir=Q([^Q]*)Q)?\\s*(?:><\\/search_files>|\\/>)')) {
1365
- calls.push(['search_files', match[1], match[2] || '.']);
1366
- }
1367
-
1368
- for (const match of text.matchAll(/<delete_file>([\s\S]*?)<\/delete_file>/g)) {
1369
- calls.push(['delete_file', _unwrapInnerTag(match[1]).trim()]);
1370
- }
1371
-
1372
- for (const match of text.matchAll(/<make_dir>([\s\S]*?)<\/make_dir>/g)) {
1373
- calls.push(['make_dir', _unwrapInnerTag(match[1]).trim()]);
1374
- }
1375
-
1376
- for (const match of text.matchAll(/<remove_dir>([\s\S]*?)<\/remove_dir>/g)) {
1377
- calls.push(['remove_dir', _unwrapInnerTag(match[1]).trim()]);
1378
- }
1379
-
1380
- for (const match of text.matchAll(/<get_env>([\s\S]*?)<\/get_env>/g)) {
1381
- calls.push(['get_env', _unwrapInnerTag(match[1]).trim()]);
1382
- }
1383
-
1384
- for (const match of _matchDual(text, '<set_env\\s+name=Q([^Q]+)Q\\s+value=Q([^Q]*)Q\\s*(?:><\\/set_env>|\\/>)')) {
1385
- calls.push(['set_env', match[1], match[2]]);
1386
- }
1387
-
1388
- for (const match of _matchDual(text, '<move_file\\s+src=Q([^Q]+)Q\\s+dst=Q([^Q]+)Q\\s*(?:><\\/move_file>|\\/>)')) {
1389
- calls.push(['move_file', match[1], match[2]]);
1390
- }
1391
-
1392
- for (const match of _matchDual(text, '<copy_file\\s+src=Q([^Q]+)Q\\s+dst=Q([^Q]+)Q\\s*(?:><\\/copy_file>|\\/>)')) {
1393
- calls.push(['copy_file', match[1], match[2]]);
1394
- }
1395
-
1396
- for (const match of _matchDual(text, '<edit_file\\s+path=Q([^Q]+)Q\\s+line=Q(\\d+)Q>([\\s\\S]*?)<\\/edit_file>')) {
1397
- calls.push(['edit_file', match[1], parseInt(match[2], 10), match[3]]);
1398
- }
1399
-
1400
- for (const match of _matchDual(text, '<search_in_file\\s+path=Q([^Q]+)Q>([\\s\\S]*?)<\\/search_in_file>')) {
1401
- calls.push(['search_in_file', match[1], match[2].trim()]);
1402
- }
1403
-
1404
- for (const match of _matchDual(text, '<replace_in_file\\s+path=Q([^Q]+)Q\\s+search=Q([^Q]*)Q\\s+replace=Q([^Q]*)Q>([\\s\\S]*?)<\\/replace_in_file>')) {
1405
- calls.push(['replace_in_file', match[1], match[2], match[3], match[4].trim()]);
1406
- }
1407
-
1408
- for (const match of text.matchAll(/<download>([\s\S]*?)<\/download>/g)) {
1409
- calls.push(['download', _unwrapInnerTag(match[1]).trim()]);
1410
- }
1411
-
1412
- for (const match of _matchDual(text, '<upload\\s+path=Q([^Q]+)Q>([\\s\\S]*?)<\\/upload>')) {
1413
- calls.push(['upload', match[1], match[2]]);
1414
- }
1415
-
1416
- for (const match of text.matchAll(/<file_stat>([\s\S]*?)<\/file_stat>/g)) {
1417
- calls.push(['file_stat', _unwrapInnerTag(match[1]).trim()]);
1418
- }
1419
-
1420
- for (const match of text.matchAll(/<http_get\b([^>]*?)(?:><\/http_get>|\/>)/g)) {
1421
- const attrStr = match[1];
1422
- const urlMatch = attrStr.match(/url="([^"]+)"/) || attrStr.match(/url='([^']+)'/);
1423
- if (urlMatch) calls.push(['http_get', urlMatch[1]]);
1424
- }
1425
-
1426
- // Inline-content form: <http_get>URL</http_get>. Models mirror the style of
1427
- // <list_dir>, <download>, etc. even though the system prompt advertises the
1428
- // attribute form — accept both so the second tag in a multi-call response
1429
- // isn't silently dropped. Also tolerate `<http_get>url="URL"</http_get>` where
1430
- // the model put the attribute syntax in the body.
1431
- for (const match of text.matchAll(/<http_get>([\s\S]*?)<\/http_get>/g)) {
1432
- const inner = match[1].trim();
1433
- if (!inner) continue;
1434
- const urlAttr = inner.match(/url="([^"]+)"/) || inner.match(/url='([^']+)'/);
1435
- calls.push(['http_get', urlAttr ? urlAttr[1] : _unwrapInnerTag(inner).trim()]);
1436
- }
1437
-
1438
- for (const match of _matchDual(text, '<ask_user\\s+question=Q([^Q]+)Q\\s*(?:><\\/ask_user>|\\/>)')) {
1439
- calls.push(['ask_user', match[1]]);
1440
- }
1441
-
1442
- for (const match of _matchDual(text, '<store_memory\\s+key=Q([^Q]+)Q>([\\s\\S]*?)<\\/store_memory>')) {
1443
- calls.push(['store_memory', match[1], match[2]]);
1444
- }
1445
-
1446
- for (const match of _matchDual(text, '<recall_memory\\s+key=Q([^Q]+)Q\\s*(?:><\\/recall_memory>|\\/>)')) {
1447
- calls.push(['recall_memory', match[1]]);
1448
- }
1449
-
1450
- for (const match of text.matchAll(/<list_memories\s*(?:><\/list_memories>|\/>)/g)) {
1451
- calls.push(['list_memories']);
688
+ // XML/tag path: each tool's parseAttrs (parseXml) lives next to its spec in
689
+ // the tool registry. Entries run in array order, which — together with the
690
+ // wrapper/JSON/fence passes above — reproduces the exact emission order the
691
+ // characterization tests pin (test/extract-tool-calls.test.js). This replaces
692
+ // the ~25 standalone regex blocks that used to be inlined here.
693
+ for (const entry of TOOL_REGISTRY) {
694
+ if (!entry.parseXml) continue;
695
+ for (const call of entry.parseXml(text)) calls.push(call);
1452
696
  }
1453
697
 
1454
- for (const match of text.matchAll(/<system_info\s*(?:><\/system_info>|\/>)/g)) {
1455
- calls.push(['system_info']);
698
+ // Dynamic tools (MCP, Task 3.3) get the same XML pass so non-native models can
699
+ // invoke them via `<mcp__server__tool>{json args}</mcp__server__tool>`.
700
+ for (const entry of dynamicToolEntries()) {
701
+ if (!entry.parseXml) continue;
702
+ for (const call of entry.parseXml(text)) calls.push(call);
1456
703
  }
1457
704
 
1458
705
  return calls;
@@ -1481,6 +728,11 @@ module.exports = {
1481
728
  createToolExecutor,
1482
729
  extractToolCalls,
1483
730
  getSkippedOps,
731
+ // Exported for unit testing (Task 1.1). These pure path guards are otherwise
732
+ // private; exposing them changes no runtime behavior.
733
+ isPathSafe,
734
+ isProtectedSecretPath,
735
+ isProtectedConfigPath,
1484
736
  isUIActive,
1485
737
  mapInvokeToCall,
1486
738
  repairMinimaxMalformedXml,