@semalt-ai/code 1.8.4 → 1.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. package/.claude/settings.local.json +8 -1
  2. package/.github/workflows/ci.yml +69 -0
  3. package/CLAUDE.md +1588 -27
  4. package/README.md +147 -3
  5. package/TECHNICAL_DEBT.md +66 -0
  6. package/examples/embed.js +74 -0
  7. package/index.js +259 -11
  8. package/lib/agent.js +935 -181
  9. package/lib/api.js +308 -55
  10. package/lib/args.js +96 -2
  11. package/lib/audit.js +23 -1
  12. package/lib/background.js +584 -0
  13. package/lib/checkpoints.js +757 -0
  14. package/lib/commands/auth.js +94 -0
  15. package/lib/commands/chat-session.js +306 -0
  16. package/lib/commands/chat-slash.js +399 -0
  17. package/lib/commands/chat-turn.js +446 -0
  18. package/lib/commands/chat.js +403 -0
  19. package/lib/commands/custom.js +157 -0
  20. package/lib/commands/history-utils.js +66 -0
  21. package/lib/commands/index.js +268 -0
  22. package/lib/commands/mcp.js +113 -0
  23. package/lib/commands/oneshot.js +193 -0
  24. package/lib/commands/registry.js +269 -0
  25. package/lib/commands/tasks.js +89 -0
  26. package/lib/compact.js +87 -0
  27. package/lib/config.js +346 -11
  28. package/lib/constants.js +372 -3
  29. package/lib/debug.js +106 -0
  30. package/lib/deny.js +199 -0
  31. package/lib/doctor.js +160 -0
  32. package/lib/headless.js +167 -0
  33. package/lib/hooks.js +286 -0
  34. package/lib/images.js +264 -0
  35. package/lib/internals.js +49 -0
  36. package/lib/mcp/boundary.js +131 -0
  37. package/lib/mcp/client.js +270 -0
  38. package/lib/mcp/oauth.js +134 -0
  39. package/lib/memory.js +209 -0
  40. package/lib/metrics.js +37 -2
  41. package/lib/payload.js +54 -0
  42. package/lib/permission-rules.js +401 -0
  43. package/lib/permissions.js +100 -10
  44. package/lib/pricing.js +67 -0
  45. package/lib/proc.js +158 -0
  46. package/lib/prompts.js +88 -8
  47. package/lib/sandbox.js +568 -0
  48. package/lib/sdk.js +328 -0
  49. package/lib/secrets.js +211 -0
  50. package/lib/skills.js +223 -0
  51. package/lib/subagents.js +516 -0
  52. package/lib/tool_registry.js +2558 -0
  53. package/lib/tool_specs.js +236 -9
  54. package/lib/tools.js +370 -944
  55. package/lib/ui/chat-history.js +19 -1
  56. package/lib/ui/format.js +101 -6
  57. package/lib/ui/input-field.js +16 -7
  58. package/lib/ui/status-bar.js +79 -11
  59. package/lib/ui/terminal.js +10 -4
  60. package/lib/ui/theme.js +1 -0
  61. package/lib/ui/web-activity.js +218 -0
  62. package/lib/ui/writer.js +7 -9
  63. package/lib/verify.js +229 -0
  64. package/lib/web-extract.js +213 -0
  65. package/lib/web-summarize.js +68 -0
  66. package/package.json +19 -4
  67. package/scripts/lint.js +57 -0
  68. package/test/agent-loop.test.js +389 -0
  69. package/test/background.test.js +414 -0
  70. package/test/chat.test.js +114 -0
  71. package/test/checkpoints-agent.test.js +181 -0
  72. package/test/checkpoints.test.js +650 -0
  73. package/test/command-registry.test.js +160 -0
  74. package/test/compact.test.js +116 -0
  75. package/test/completion-lazy.test.js +52 -0
  76. package/test/config-merge.test.js +324 -0
  77. package/test/config-quarantine.test.js +128 -0
  78. package/test/config-write-guard-allow-anywhere.test.js +56 -0
  79. package/test/config-write-guard-skip.test.js +46 -0
  80. package/test/config-write-guard.test.js +153 -0
  81. package/test/context-split.test.js +215 -0
  82. package/test/cost-doctor.test.js +142 -0
  83. package/test/custom-commands-chat.test.js +106 -0
  84. package/test/custom-commands.test.js +230 -0
  85. package/test/deny-windows.test.js +120 -0
  86. package/test/deny.test.js +83 -0
  87. package/test/download-allow-anywhere.test.js +66 -0
  88. package/test/download-confine.test.js +153 -0
  89. package/test/executors.test.js +362 -0
  90. package/test/extract-tool-calls.test.js +315 -0
  91. package/test/fetch-url-validation.test.js +219 -0
  92. package/test/fixtures/tool-calls.js +57 -0
  93. package/test/fixtures/web-page.js +91 -0
  94. package/test/git-tools.test.js +384 -0
  95. package/test/grep-glob-serialize.test.js +242 -0
  96. package/test/grep-glob.test.js +268 -0
  97. package/test/harness/README.md +57 -0
  98. package/test/harness/chat-harness.js +142 -0
  99. package/test/harness/memwarn-headless-child.js +65 -0
  100. package/test/harness/mock-llm.js +120 -0
  101. package/test/harness/mock-mcp-server.js +142 -0
  102. package/test/harness/sse-server.js +69 -0
  103. package/test/headless.test.js +203 -0
  104. package/test/history-utils.test.js +88 -0
  105. package/test/hooks-agent.test.js +238 -0
  106. package/test/hooks-verify-sandbox.test.js +232 -0
  107. package/test/hooks.test.js +216 -0
  108. package/test/http-get-user-agent.test.js +142 -0
  109. package/test/images-api.test.js +208 -0
  110. package/test/images.test.js +238 -0
  111. package/test/max-iterations.test.js +216 -0
  112. package/test/mcp-boundary.test.js +57 -0
  113. package/test/mcp-client.test.js +267 -0
  114. package/test/mcp-oauth.test.js +86 -0
  115. package/test/memory-truncation-warning.test.js +222 -0
  116. package/test/memory.test.js +198 -0
  117. package/test/native-dispatch.test.js +356 -0
  118. package/test/output-chokepoint.test.js +188 -0
  119. package/test/path-guards.test.js +134 -0
  120. package/test/payload.test.js +99 -0
  121. package/test/permission-rules-agent.test.js +210 -0
  122. package/test/permission-rules.test.js +297 -0
  123. package/test/permissions.test.js +163 -0
  124. package/test/plan-mode.test.js +167 -0
  125. package/test/read-paginate.test.js +275 -0
  126. package/test/readonly-tools.test.js +177 -0
  127. package/test/result-cap.test.js +233 -0
  128. package/test/sandbox-agent.test.js +147 -0
  129. package/test/sandbox-integration.test.js +216 -0
  130. package/test/sandbox.test.js +408 -0
  131. package/test/sdk.test.js +234 -0
  132. package/test/shell-output-cap.test.js +181 -0
  133. package/test/skills-chat.test.js +110 -0
  134. package/test/skills.test.js +295 -0
  135. package/test/smoke.test.js +68 -0
  136. package/test/status-bar-pause.test.js +164 -0
  137. package/test/stream-parser.test.js +147 -0
  138. package/test/subagents-agent.test.js +178 -0
  139. package/test/subagents.test.js +222 -0
  140. package/test/tool-registry.test.js +85 -0
  141. package/test/trim-budget.test.js +101 -0
  142. package/test/verify-agent.test.js +317 -0
  143. package/test/verify.test.js +141 -0
  144. package/test/web-activity-ordering.test.js +194 -0
  145. package/test/web-activity.test.js +207 -0
  146. package/test/web-data-extraction-guidance.test.js +71 -0
  147. package/test/web-extract.test.js +185 -0
  148. package/test/web-fetch-agent.test.js +291 -0
  149. package/test/web-fetch-mode.test.js +193 -0
  150. package/test/web-search.test.js +380 -0
  151. package/lib/commands.js +0 -1288
package/lib/tools.js CHANGED
@@ -1,20 +1,34 @@
1
1
  'use strict';
2
2
 
3
- const fs = require('fs');
4
- const fsp = require('fs/promises');
5
- const http = require('http');
6
- const https = require('https');
7
3
  const os = require('os');
8
4
  const path = require('path');
5
+ // Note: fs / fs/promises / http / https are no longer required here — the file
6
+ // and network executors moved to lib/tool_registry.js in Task 1.4b. tools.js now
7
+ // keeps only the path/sandbox guards, the shell executor, and the parse glue.
9
8
  const { spawn } = require('child_process');
10
9
 
11
10
  const { logToolCall } = require('./audit');
11
+ const { spawnWithGroup, killTreeEscalating } = require('./proc');
12
+ const { classifyShellCommand } = require('./deny');
13
+ const { resolveSandboxedSpawn } = require('./sandbox');
14
+ const { TOOL_REGISTRY, fromInvoke, entryForAction, dynamicToolEntries } = require('./tool_registry');
12
15
  const writer = require('./ui/writer');
16
+ const { CONFIG_PATH, protectedConfigDirs } = require('./constants');
13
17
 
14
18
  const MEMORY_PATH = path.join(os.homedir(), '.semalt-ai', 'memory.json');
15
19
 
16
20
  const _dryRun = process.argv.includes('--dry-run');
17
21
  const _allowAnywhere = process.argv.includes('--allow-anywhere');
22
+ // Binary network isolation (Task 4.4b): the human-typed --no-network flag forces
23
+ // kernel-level no-network for sandboxed commands. Read once at module load — a
24
+ // human-only signal the model can never reach (the model controls only the
25
+ // command string). sandbox.network in config does the same via decideSandbox.
26
+ const _noNetwork = process.argv.includes('--no-network');
27
+ // The single, explicit opt-out of ALL safety. When set, the destructive-command
28
+ // deny-list and the config-file read guard are bypassed. This is the only flag
29
+ // that does so — --allow-* tier flags grant approval but never disable the
30
+ // deny-list. See lib/deny.js and Task 0.1.
31
+ const _skipPermissions = process.argv.includes('--dangerously-skip-permissions');
18
32
  const _skippedOps = [];
19
33
  function getSkippedOps() { return _skippedOps.slice(); }
20
34
 
@@ -55,6 +69,59 @@ function _sandboxError(filePath) {
55
69
  return { error: `Path outside allowed area: ${filePath}. Use --allow-anywhere to override.` };
56
70
  }
57
71
 
72
+ // Files that hold secrets or sensitive history and must NEVER be read back into
73
+ // the model context — the API key / auth token live in config.json, the memory
74
+ // store may contain secrets, and the audit log records past tool inputs. This
75
+ // guard is intentionally NOT gated on --allow-anywhere: --allow-anywhere widens
76
+ // where the agent may write, it does not unlock secret exfiltration. The only
77
+ // override is --dangerously-skip-permissions (opting out of all safety).
78
+ const _AUDIT_LOG_PATH = path.join(os.homedir(), '.semalt-ai', 'audit.log');
79
+ const PROTECTED_READ_PATHS = new Set([
80
+ path.resolve(CONFIG_PATH),
81
+ path.resolve(MEMORY_PATH),
82
+ path.resolve(_AUDIT_LOG_PATH),
83
+ ]);
84
+
85
+ function isProtectedSecretPath(filePath) {
86
+ if (_skipPermissions) return false;
87
+ if (typeof filePath !== 'string' || !filePath) return false;
88
+ return PROTECTED_READ_PATHS.has(path.resolve(filePath));
89
+ }
90
+
91
+ function _secretReadError(filePath) {
92
+ return { error: `Refused: ${filePath} holds secrets/credentials and cannot be read by the agent. (This guard is not overridable with --allow-anywhere.)` };
93
+ }
94
+
95
+ // Config/execution-driving paths that the AGENT must never WRITE — the write-side
96
+ // companion to the read guard above (Pre-Task 5.0b). Covers the protected-config
97
+ // set (lib/constants.js protectedConfigDirs): the whole ~/.semalt-ai dir AND
98
+ // every project .semalt dir from the CWD up to the repo root, INCLUDING files
99
+ // that do not yet exist (a missing config.json / agents/*.md / hook is inside a
100
+ // protected DIR, so it is refused regardless of existence — the CVE-2026-25725
101
+ // lesson). Directory-prefix matched on the resolved path, mirroring isPathSafe.
102
+ //
103
+ // Policy parity with isProtectedSecretPath: NOT gated on --allow-anywhere
104
+ // (--allow-anywhere widens WHERE the agent may write, it does not unlock writing
105
+ // the config surfaces that drive host-privileged execution). The only override is
106
+ // --dangerously-skip-permissions (opting out of all safety). A human editing
107
+ // their own config in an editor is unaffected — this guards the agent's tools and
108
+ // the sandboxed shell, not the human.
109
+ function isProtectedConfigPath(filePath) {
110
+ if (_skipPermissions) return false;
111
+ if (typeof filePath !== 'string' || !filePath) return false;
112
+ const resolved = path.resolve(filePath);
113
+ for (const dir of protectedConfigDirs()) {
114
+ const base = path.resolve(dir);
115
+ const prefix = base.endsWith(path.sep) ? base : base + path.sep;
116
+ if (resolved === base || resolved.startsWith(prefix)) return true;
117
+ }
118
+ return false;
119
+ }
120
+
121
+ function _protectedConfigWriteError(filePath) {
122
+ return { error: `Refused: ${filePath} is a protected config path (under ~/.semalt-ai or a project .semalt dir) that drives execution and cannot be written by the agent. (This guard is not overridable with --allow-anywhere.)` };
123
+ }
124
+
58
125
  // Cheap ReDoS guard. Rejects pathologically long patterns, common
59
126
  // catastrophic-backtracking anti-patterns, and pattern×data sizes large
60
127
  // enough to hang the regex engine.
@@ -73,8 +140,34 @@ function _checkRegexSafety(pattern, data) {
73
140
  return null;
74
141
  }
75
142
 
76
- function createToolExecutor(permissionManager, ui, getConfig) {
143
+ function createToolExecutor(permissionManager, ui, getConfig, options = {}) {
77
144
  const { BOLD, DIM, FG_DARK, FG_GRAY, FG_GREEN, FG_RED, FG_YELLOW, RST, renderDiff } = ui;
145
+ // Checkpoints & rewind (Task 4.3). When a store is wired, the prior state of a
146
+ // file is snapshotted in agentExecFile AFTER the permission gate approves and
147
+ // BEFORE the executor mutates. Optional — undefined in tests/headless paths
148
+ // that don't opt in. Subagents reuse this same agentExecFile, so a child's
149
+ // mutations are checkpointed into the parent session for free.
150
+ const checkpointStore = options.checkpointStore || null;
151
+ // OS sandbox fallback approver (Task 4.4). When the sandbox is unavailable in
152
+ // `auto` mode, agentExecShell asks a HUMAN before running a command
153
+ // unsandboxed via this callback (it returns true to allow, false to refuse).
154
+ // Undefined in non-TTY/headless/test paths → the command is REFUSED (never a
155
+ // silent unsandboxed run). This is injected by the executor owner (index.js),
156
+ // never reachable by the model, so the agent can't approve its own escape.
157
+ const onUnsandboxed = typeof options.onUnsandboxed === 'function' ? options.onUnsandboxed : null;
158
+ // Web-fetch secondary summarizer (Task W.1). An injected async LLM call
159
+ // `(messages, { model, signal }) => Promise<string>` (the api client's
160
+ // chatComplete) used by http_get to summarize extracted page content in a
161
+ // separate call — only the summary enters the main context. Optional: when
162
+ // absent (headless/oneshot paths without an api client), http_get returns the
163
+ // extracted Markdown instead of summarizing, never the raw page.
164
+ const webChat = typeof options.webChat === 'function' ? options.webChat : null;
165
+ // Web search backend (Task W.2b). An injected async call
166
+ // `(query, { count }) => Promise<{ results: [{title,url,snippet}], … }>` (the
167
+ // api client's dashboardSearch, which hits the backend POST /api/search).
168
+ // Optional: when absent (headless/oneshot paths without an api client) the
169
+ // web_search tool degrades to a clean tool error, never a crash.
170
+ const webSearch = typeof options.webSearch === 'function' ? options.webSearch : null;
78
171
  // Continuation lines in a system-message bubble (chat-history.js else branch)
79
172
  // are indented by 5 spaces. Let the diff renderer reserve those columns so
80
173
  // its lines don't auto-wrap inside the bubble.
@@ -89,7 +182,95 @@ function createToolExecutor(permissionManager, ui, getConfig) {
89
182
  return options.length >= 2 ? options : [];
90
183
  }
91
184
 
92
- async function agentExecShell(command) {
185
+ // Build the permission descriptor for a [action, ...args] call tuple.
186
+ // Returns null when no permission gate is needed — read-only ops, dry-run
187
+ // skips, and --readonly blocks short-circuit through the executor's own
188
+ // error path. When non-null the caller (the agent loop) feeds the three
189
+ // fields straight into permissionManager.askPermission.
190
+ //
191
+ // Side effects, intentionally hosted here so they fire pre-picker:
192
+ // - write/append: render the file diff. In CLI mode the diff is
193
+ // emitted to scrollback; in TUI mode it's appended to `description`
194
+ // so it renders inside the picker bubble.
195
+ // - delete_file / move_file: emit a CLI warning line above the picker.
196
+ // Centralizing these means the executor body stays purely about the
197
+ // operation itself.
198
+ async function describePermission(call) {
199
+ if (!Array.isArray(call) || call.length === 0) return null;
200
+ const [action, ...args] = call;
201
+
202
+ // Dry-run skips the gate for every tool whose executor has a dry-run
203
+ // early return — asking the user to authorize an op that won't run is
204
+ // confusing UX. write/append are handled inside their own case so the
205
+ // diff still renders before the skip. Tools without dry-run support
206
+ // (delete_file, make_dir, etc.) fall through and get gated normally.
207
+ if (_dryRun && (action === 'shell' || action === 'exec' || action === 'download' || action === 'http_get')) {
208
+ return null;
209
+ }
210
+
211
+ // --readonly blocks the op deterministically; no point prompting first.
212
+ // The executor's own readonlyBlock() check produces the user-facing
213
+ // error message on the next dispatch step.
214
+ const READONLY_TAG = {
215
+ write: 'write_file',
216
+ append: 'append_file',
217
+ delete_file: 'delete_file',
218
+ move_file: 'move_file',
219
+ copy_file: 'copy_file',
220
+ download: 'download',
221
+ edit_file: 'edit_file',
222
+ replace_in_file: 'replace_in_file',
223
+ make_dir: 'make_dir',
224
+ remove_dir: 'remove_dir',
225
+ upload: 'upload',
226
+ };
227
+ const roTag = READONLY_TAG[action];
228
+ if (roTag && permissionManager.readonlyBlock(roTag)) return null;
229
+
230
+ // Per-tool descriptor now lives on the registry entry (Task 1.4b). Read-only
231
+ // ops resolve to a permission() that returns null (no gate). The side effects
232
+ // that used to live in the switch cases (write/append diff render,
233
+ // delete/move warning lines) moved into those permission() bodies unchanged.
234
+ const entry = entryForAction(action);
235
+ if (entry && typeof entry.permission === 'function') return entry.permission(ctx, args);
236
+ return null;
237
+ }
238
+
239
+ async function agentExecShell(command, options = {}) {
240
+ // Destructive-command deny-list. Enforced for EVERY shell call regardless
241
+ // of approval mode (interactive, non-TTY, or any --allow-* flag). This is
242
+ // the unbypassable chokepoint: all shell execution funnels through here.
243
+ // The only escape hatch is --dangerously-skip-permissions.
244
+ //
245
+ // The `initiator` distinguishes agent-initiated calls (the model asked) from
246
+ // user-initiated ones (a human typed `!cmd` / `semalt-code shell`). Agent
247
+ // calls keep the hard block. User calls are exempt from the block, except for
248
+ // the catastrophic subset (disk wipe / fork bomb), which gets a one-time y/N
249
+ // confirmation via options.confirm as a typo guard. See lib/deny.js.
250
+ const initiator = options.initiator === 'user' ? 'user' : 'agent';
251
+ if (!_skipPermissions) {
252
+ const verdict = classifyShellCommand(command, initiator);
253
+ if (verdict.action === 'block') {
254
+ const msg = `Blocked by safety deny-list: ${verdict.label}. Refuse to run: ${command}. To override, restart with --dangerously-skip-permissions.`;
255
+ _log(` ${FG_RED}✗ ${msg}${RST}`);
256
+ logToolCall('exec', { command }, false, 'denied');
257
+ return { exit_code: -1, stdout: '', stderr: msg, blocked: true };
258
+ }
259
+ if (verdict.action === 'confirm') {
260
+ let approved = false;
261
+ if (typeof options.confirm === 'function') {
262
+ try { approved = await options.confirm(verdict.label, command); }
263
+ catch { approved = false; }
264
+ }
265
+ if (!approved) {
266
+ const msg = `Cancelled (${verdict.label}): ${command}`;
267
+ _log(` ${FG_RED}✗ ${msg}${RST}`);
268
+ logToolCall('exec', { command }, false, 'cancelled');
269
+ return { exit_code: -1, stdout: '', stderr: msg, blocked: true };
270
+ }
271
+ }
272
+ }
273
+
93
274
  if (_dryRun) {
94
275
  _log(` ${FG_DARK}[dry-run] $ ${command}${RST}`);
95
276
  _skippedOps.push({ category: 'cmd', symbol: '▶', desc: command });
@@ -97,859 +278,208 @@ function createToolExecutor(permissionManager, ui, getConfig) {
97
278
  return { exit_code: 0, stdout: '', stderr: 'dry-run: command skipped' };
98
279
  }
99
280
 
100
- const approved = await permissionManager.askPermission('shell', command, 'exec');
101
- if (!approved) {
102
- logToolCall('exec', { command }, false, 'denied');
103
- return { exit_code: -1, stdout: '', stderr: 'Permission denied by user' };
104
- }
105
-
106
281
  const cfg = getConfig ? getConfig() : {};
107
282
  const timeout = cfg.command_timeout_ms || 30000;
283
+ const { signal } = options;
284
+
285
+ // ---------------------------------------------------------------------
286
+ // OS sandbox (Task 4.4; unified chokepoint Pre-Task 5.0a). EVERY shell call
287
+ // — here, self-verification, and command-type hooks — funnels through the
288
+ // SHARED resolveSandboxedSpawn shim, so the model has no path that runs a
289
+ // command outside this decision. --dangerously-skip-permissions (a
290
+ // human-only flag) opts out of all safety, sandbox included.
291
+ //
292
+ // run:true → spawn the resolved file/args (jailed when sandbox 'on';
293
+ // plain { shell:true } when 'off'/human-approved 'unavailable').
294
+ // run:false → fail-safe refusal: failIfUnavailable hard error (hard:true)
295
+ // or no/declined human approval — NEVER a silent unsandboxed run.
296
+ // ---------------------------------------------------------------------
297
+ const resolution = await resolveSandboxedSpawn({
298
+ command,
299
+ getConfig,
300
+ onUnsandboxed,
301
+ cwd: process.cwd(),
302
+ allowAnywhere: _allowAnywhere,
303
+ skipPermissions: _skipPermissions,
304
+ noNetwork: _noNetwork,
305
+ });
306
+ if (!resolution.run) {
307
+ _log(` ${FG_RED}✗ ${resolution.message}${RST}`);
308
+ logToolCall('exec', { command, sandbox: 'unavailable' }, false, resolution.hard ? 'sandbox-blocked' : 'sandbox-refused');
309
+ return { exit_code: -1, stdout: '', stderr: resolution.message, blocked: true, sandbox: 'unavailable' };
310
+ }
311
+ const spawnFile = resolution.file;
312
+ const spawnArgs = resolution.useShell ? [] : resolution.args;
313
+ const spawnOpts = resolution.useShell ? { shell: true } : {};
314
+ const sandboxStatus = resolution.sandbox;
315
+ // Binary network mode for this run (Task 4.4b): 'on' (host network) | 'off'
316
+ // (kernel-level no-network). Surfaced in the result + audit (net:on|off).
317
+ const networkStatus = resolution.network || 'on';
108
318
 
109
319
  return new Promise((resolve) => {
110
320
  let child;
111
321
  try {
112
- child = spawn(command, { shell: true });
322
+ // spawnWithGroup gives us a process-group leader on POSIX so
323
+ // killTreeEscalating can reach descendants via -pid. With shell:true
324
+ // a plain child.kill targets only the sh wrapper, leaving the real
325
+ // workload (find /, pipelines, etc.) running as orphans. When sandboxed,
326
+ // the group leader is the bwrap/sandbox-exec process — killing the group
327
+ // tears down the whole jailed subtree, so child-process confinement
328
+ // composes with the existing tree-kill plumbing.
329
+ child = spawnWithGroup(spawn, spawnFile, spawnArgs, spawnOpts);
113
330
  } catch (error) {
114
331
  _log(` ${FG_RED}✗ ${error.message}${RST}`);
115
- logToolCall('exec', { command }, true, 'error');
116
- return resolve({ exit_code: -1, stdout: '', stderr: error.message });
332
+ logToolCall('exec', { command, sandbox: sandboxStatus, network: networkStatus }, true, 'error');
333
+ return resolve({ exit_code: -1, stdout: '', stderr: error.message, sandbox: sandboxStatus, network: networkStatus });
117
334
  }
335
+ const startedAt = Date.now();
118
336
  let stdout = '';
119
337
  let stderr = '';
120
338
  let killed = false;
339
+ let abortedByUser = false;
340
+
121
341
  const timer = setTimeout(() => {
122
342
  killed = true;
123
- try { child.kill('SIGTERM'); } catch {}
343
+ killTreeEscalating(child);
124
344
  }, timeout);
345
+
346
+ let onAbort = null;
347
+ const detachAbort = () => {
348
+ if (onAbort && signal) {
349
+ signal.removeEventListener('abort', onAbort);
350
+ onAbort = null;
351
+ }
352
+ };
353
+ if (signal) {
354
+ if (signal.aborted) {
355
+ abortedByUser = true;
356
+ killTreeEscalating(child);
357
+ } else {
358
+ onAbort = () => {
359
+ abortedByUser = true;
360
+ killTreeEscalating(child);
361
+ };
362
+ signal.addEventListener('abort', onAbort, { once: true });
363
+ }
364
+ }
365
+
125
366
  child.stdout.setEncoding('utf8');
126
367
  child.stderr.setEncoding('utf8');
127
368
  child.stdout.on('data', (c) => { stdout += c; });
128
369
  child.stderr.on('data', (c) => { stderr += c; });
129
370
  child.on('error', (error) => {
130
371
  clearTimeout(timer);
372
+ detachAbort();
131
373
  _log(` ${FG_RED}✗ ${error.message}${RST}`);
132
- logToolCall('exec', { command }, true, 'error');
133
- resolve({ exit_code: -1, stdout, stderr: stderr || error.message });
374
+ logToolCall('exec', { command, sandbox: sandboxStatus, network: networkStatus }, true, 'error');
375
+ resolve({ exit_code: -1, stdout, stderr: stderr || error.message, sandbox: sandboxStatus, network: networkStatus });
134
376
  });
135
- child.on('close', (code, signal) => {
377
+ child.on('close', (code, sigName) => {
136
378
  clearTimeout(timer);
379
+ detachAbort();
380
+ if (abortedByUser) {
381
+ const elapsed_s = Math.max(0, Math.round((Date.now() - startedAt) / 1000));
382
+ const note = `[user interrupted after ${elapsed_s}s]`;
383
+ stderr += (stderr ? '\n' : '') + note;
384
+ logToolCall('exec', { command, sandbox: sandboxStatus, network: networkStatus }, true, 'aborted');
385
+ resolve({ exit_code: -1, stdout, stderr, aborted: true, elapsed_s, sandbox: sandboxStatus, network: networkStatus });
386
+ return;
387
+ }
137
388
  if (killed) stderr += (stderr ? '\n' : '') + `[timed out after ${timeout}ms]`;
138
- const exit_code = killed ? -1 : (code != null ? code : (signal ? -1 : 0));
139
- logToolCall('exec', { command }, true, exit_code === 0 ? 'ok' : 'error');
140
- resolve({ exit_code, stdout, stderr });
389
+ const exit_code = killed ? -1 : (code != null ? code : (sigName ? -1 : 0));
390
+ logToolCall('exec', { command, sandbox: sandboxStatus, network: networkStatus }, true, exit_code === 0 ? 'ok' : 'error');
391
+ resolve({ exit_code, stdout, stderr, sandbox: sandboxStatus, network: networkStatus });
141
392
  });
142
393
  });
143
394
  }
144
395
 
145
- async function agentExecFile(action, ...args) {
146
- const [arg0 = null, arg1 = null, arg2 = null, arg3 = null] = args;
147
-
148
- if (action === 'read') {
149
- const filePath = arg0;
150
- const stat = await fsp.stat(filePath).catch(() => null);
151
- if (stat) {
152
- const cfg = getConfig ? getConfig() : {};
153
- const maxBytes = (cfg.max_file_size_kb || 512) * 1024;
154
- if (stat.size > maxBytes) {
155
- const kb = (stat.size / 1024).toFixed(0);
156
- logToolCall('read_file', { path: filePath }, false, 'error');
157
- return { error: `File too large: ${kb} KB exceeds max_file_size_kb=${cfg.max_file_size_kb || 512}` };
158
- }
159
- }
160
- try {
161
- const data = await fsp.readFile(filePath, 'utf8');
162
- const lines = data.split('\n').length;
163
- if (lines > 10) {
164
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Read ${filePath} (${lines} lines, ${data.length} chars)${RST}`);
165
- } else {
166
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Read ${filePath}${RST}`);
167
- }
168
- logToolCall('read_file', { path: filePath }, true, 'ok');
169
- return { content: data, path: filePath, bytes: Buffer.byteLength(data, 'utf8') };
170
- } catch (error) {
171
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
172
- logToolCall('read_file', { path: filePath }, true, 'error');
173
- return { error: error.message };
174
- }
175
- }
176
-
177
- if (action === 'write' || action === 'append') {
178
- const filePath = arg0;
179
- const content = arg1;
180
- const tag = action === 'write' ? 'write_file' : 'append_file';
181
-
182
- const blocked = permissionManager.readonlyBlock(tag);
183
- if (blocked) {
184
- logToolCall(tag, { path: filePath, content }, false, 'denied');
185
- return blocked;
186
- }
187
-
188
- if (!isPathSafe(filePath)) {
189
- logToolCall(tag, { path: filePath }, false, 'denied');
190
- return _sandboxError(filePath);
191
- }
192
-
193
- // Read existing content for diff display
194
- let existing = '';
195
- try { existing = await fsp.readFile(filePath, 'utf8'); } catch {}
196
-
197
- // For append the final state is existing + new content
198
- const finalContent = action === 'write' ? (content || '') : (existing + (content || ''));
199
-
200
- // In CLI mode, print the diff inline. In TUI mode, direct stdout writes
201
- // collide with the live chat-history/status-bar redraw, so we route the
202
- // diff into the permission description instead (where it renders inside
203
- // the permission bubble and is safely truncated by MAX_DESC_LINES).
204
- const diffOutput = _uiActive
205
- ? renderDiff(existing, finalContent, filePath, { inset: DIFF_BUBBLE_INSET })
206
- : renderDiff(existing, finalContent, filePath);
207
- if (!_uiActive) writer.scrollback(diffOutput);
208
-
209
- // Dry-run: record the skipped op and return without writing
210
- if (_dryRun) {
211
- const verb = action === 'write' ? 'write' : 'append';
212
- _skippedOps.push({ category: 'file', symbol: '✎', desc: `${verb} ${filePath}` });
213
- logToolCall(tag, { path: filePath }, false, 'dry-run');
214
- return { status: 'dry-run', message: 'dry-run: write skipped', path: filePath };
215
- }
216
-
217
- // Permission check routes through TUI dialog in chat mode, interactiveSelect in non-TUI flows
218
- let desc = `${action === 'write' ? 'Write' : 'Append to'} ${filePath}`;
219
- if (content) desc += ` (${content.length} chars)`;
220
- if (_uiActive) desc = `${desc}\n${diffOutput}`;
221
- const approved = await permissionManager.askPermission('file', desc, tag);
222
- if (!approved) {
223
- logToolCall(tag, { path: filePath, content }, false, 'denied');
224
- return { error: 'Permission denied' };
225
- }
226
- try {
227
- const dir = path.dirname(filePath);
228
- if (dir && dir !== '.') await fsp.mkdir(dir, { recursive: true });
229
- if (action === 'write') await fsp.writeFile(filePath, content || '');
230
- else await fsp.appendFile(filePath, content || '');
231
- const verb = action === 'write' ? 'Wrote' : 'Appended to';
232
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}${verb} ${filePath}${RST}`);
233
- logToolCall(tag, { path: filePath, content }, true, 'ok');
234
- return { status: 'ok', path: filePath, bytes: (content || '').length };
235
- } catch (error) {
236
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
237
- logToolCall(tag, { path: filePath, content }, true, 'error');
238
- return { error: error.message };
239
- }
240
- }
241
-
242
- if (action === 'list_dir') {
243
- const dirPath = arg0;
244
- try {
245
- const entries = await fsp.readdir(dirPath, { withFileTypes: true });
246
- const items = entries.map((e) => {
247
- if (e.isSymbolicLink()) return `[L] ${e.name}`;
248
- if (e.isDirectory()) return `[D] ${e.name}`;
249
- return `[F] ${e.name}`;
250
- });
251
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Listed ${dirPath} (${items.length} items)${RST}`);
252
- logToolCall('list_dir', { path: dirPath }, true, 'ok');
253
- return { items, path: dirPath };
254
- } catch (error) {
255
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
256
- logToolCall('list_dir', { path: dirPath }, true, 'error');
257
- return { error: error.message };
258
- }
259
- }
260
-
261
- if (action === 'delete_file') {
262
- const filePath = arg0;
263
-
264
- const blocked = permissionManager.readonlyBlock('delete_file');
265
- if (blocked) {
266
- logToolCall('delete_file', { path: filePath }, false, 'denied');
267
- return blocked;
268
- }
269
-
270
- if (!isPathSafe(filePath)) {
271
- logToolCall('delete_file', { path: filePath }, false, 'denied');
272
- return _sandboxError(filePath);
273
- }
274
-
275
- _log(` ${FG_YELLOW}${BOLD}⚠ Deleting: ${filePath}${RST}`);
276
-
277
- const approved = await permissionManager.askPermission('file', `Delete ${filePath}`, 'delete_file');
278
- if (!approved) {
279
- logToolCall('delete_file', { path: filePath }, false, 'denied');
280
- return { error: 'Permission denied' };
281
- }
282
- try {
283
- await fsp.unlink(filePath);
284
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Deleted ${filePath}${RST}`);
285
- logToolCall('delete_file', { path: filePath }, true, 'ok');
286
- return { status: 'ok', path: filePath };
287
- } catch (error) {
288
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
289
- logToolCall('delete_file', { path: filePath }, true, 'error');
290
- return { error: error.message };
291
- }
292
- }
293
-
294
- if (action === 'make_dir') {
295
- const dirPath = arg0;
296
- if (!isPathSafe(dirPath)) {
297
- logToolCall('make_dir', { path: dirPath }, false, 'denied');
298
- return _sandboxError(dirPath);
299
- }
300
- const approved = await permissionManager.askPermission('file', `Create directory ${dirPath}`, 'make_dir');
301
- if (!approved) {
302
- logToolCall('make_dir', { path: dirPath }, false, 'denied');
303
- return { error: 'Permission denied' };
304
- }
305
- try {
306
- await fsp.mkdir(dirPath, { recursive: true });
307
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Created directory ${dirPath}${RST}`);
308
- logToolCall('make_dir', { path: dirPath }, true, 'ok');
309
- return { status: 'ok', path: dirPath };
310
- } catch (error) {
311
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
312
- logToolCall('make_dir', { path: dirPath }, true, 'error');
313
- return { error: error.message };
314
- }
315
- }
316
-
317
- if (action === 'remove_dir') {
318
- const dirPath = arg0;
319
- if (!isPathSafe(dirPath)) {
320
- logToolCall('remove_dir', { path: dirPath }, false, 'denied');
321
- return _sandboxError(dirPath);
322
- }
323
- const approved = await permissionManager.askPermission('file', `Remove directory ${dirPath}`, 'remove_dir');
324
- if (!approved) {
325
- logToolCall('remove_dir', { path: dirPath }, false, 'denied');
326
- return { error: 'Permission denied' };
327
- }
328
- try {
329
- await fsp.rm(dirPath, { recursive: true, force: true });
330
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Removed directory ${dirPath}${RST}`);
331
- logToolCall('remove_dir', { path: dirPath }, true, 'ok');
332
- return { status: 'ok', path: dirPath };
333
- } catch (error) {
334
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
335
- logToolCall('remove_dir', { path: dirPath }, true, 'error');
336
- return { error: error.message };
337
- }
338
- }
339
-
340
- if (action === 'move_file') {
341
- const src = arg0;
342
- const dst = arg1;
343
-
344
- const blocked = permissionManager.readonlyBlock('move_file');
345
- if (blocked) {
346
- logToolCall('move_file', { src, dst }, false, 'denied');
347
- return blocked;
348
- }
349
-
350
- if (!isPathSafe(dst)) {
351
- logToolCall('move_file', { src, dst }, false, 'denied');
352
- return _sandboxError(dst);
353
- }
354
-
355
- _log(` ${FG_YELLOW}${BOLD}⚠ Moving: ${src} → ${dst}${RST}`);
356
-
357
- const approved = await permissionManager.askPermission('file', `Move ${src} to ${dst}`, 'move_file');
358
- if (!approved) {
359
- logToolCall('move_file', { src, dst }, false, 'denied');
360
- return { error: 'Permission denied' };
361
- }
362
- try {
363
- const dstDir = path.dirname(dst);
364
- if (dstDir && dstDir !== '.') await fsp.mkdir(dstDir, { recursive: true });
365
- try {
366
- await fsp.rename(src, dst);
367
- } catch (renameErr) {
368
- if (renameErr.code !== 'EXDEV') throw renameErr;
369
- // Cross-device rename not supported — copy then remove
370
- await fsp.cp(src, dst, { recursive: true });
371
- await fsp.rm(src, { recursive: true, force: true });
372
- }
373
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Moved ${src} → ${dst}${RST}`);
374
- logToolCall('move_file', { src, dst }, true, 'ok');
375
- return { status: 'ok', src, dst };
376
- } catch (error) {
377
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
378
- logToolCall('move_file', { src, dst }, true, 'error');
379
- return { error: error.message };
380
- }
381
- }
382
-
383
- if (action === 'copy_file') {
384
- const src = arg0;
385
- const dst = arg1;
386
-
387
- const blocked = permissionManager.readonlyBlock('copy_file');
388
- if (blocked) {
389
- logToolCall('copy_file', { src, dst }, false, 'denied');
390
- return blocked;
391
- }
392
-
393
- if (!isPathSafe(dst)) {
394
- logToolCall('copy_file', { src, dst }, false, 'denied');
395
- return _sandboxError(dst);
396
- }
397
-
398
- const approved = await permissionManager.askPermission('file', `Copy ${src} to ${dst}`, 'copy_file');
399
- if (!approved) {
400
- logToolCall('copy_file', { src, dst }, false, 'denied');
401
- return { error: 'Permission denied' };
402
- }
403
- try {
404
- const dstDir = path.dirname(dst);
405
- if (dstDir && dstDir !== '.') await fsp.mkdir(dstDir, { recursive: true });
406
- await fsp.cp(src, dst, { recursive: true });
407
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Copied ${src} → ${dst}${RST}`);
408
- logToolCall('copy_file', { src, dst }, true, 'ok');
409
- return { status: 'ok', src, dst };
410
- } catch (error) {
411
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
412
- logToolCall('copy_file', { src, dst }, true, 'error');
413
- return { error: error.message };
414
- }
415
- }
416
-
417
- if (action === 'edit_file') {
418
- const filePath = arg0;
419
- const lineNum = arg1;
420
- const newContent = arg2;
421
- const approved = await permissionManager.askPermission('file', `Edit line ${lineNum} in ${filePath}`, 'edit_file');
422
- if (!approved) {
423
- logToolCall('edit_file', { path: filePath, line: lineNum }, false, 'denied');
424
- return { error: 'Permission denied' };
425
- }
426
- try {
427
- const data = await fsp.readFile(filePath, 'utf8');
428
- const lines = data.split('\n');
429
- if (lineNum < 1 || lineNum > lines.length) {
430
- logToolCall('edit_file', { path: filePath, line: lineNum }, true, 'error');
431
- return { error: `Line ${lineNum} out of range (file has ${lines.length} lines)` };
432
- }
433
- lines[lineNum - 1] = newContent;
434
- await fsp.writeFile(filePath, lines.join('\n'));
435
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Edited line ${lineNum} in ${filePath}${RST}`);
436
- logToolCall('edit_file', { path: filePath, line: lineNum }, true, 'ok');
437
- return { status: 'ok', path: filePath, line: lineNum };
438
- } catch (error) {
439
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
440
- logToolCall('edit_file', { path: filePath, line: lineNum }, true, 'error');
441
- return { error: error.message };
442
- }
443
- }
444
-
445
- if (action === 'search_in_file') {
446
- const filePath = arg0;
447
- const pattern = arg1;
448
- try {
449
- const data = await fsp.readFile(filePath, 'utf8');
450
- const guardErr = _checkRegexSafety(pattern, data);
451
- if (guardErr) {
452
- logToolCall('search_in_file', { path: filePath, pattern }, true, 'error');
453
- return guardErr;
454
- }
455
- const regex = new RegExp(pattern);
456
- const matches = data.split('\n')
457
- .map((content, idx) => regex.test(content) ? { line: idx + 1, content } : null)
458
- .filter(Boolean);
459
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Found ${matches.length} match(es) in ${filePath}${RST}`);
460
- logToolCall('search_in_file', { path: filePath, pattern }, true, 'ok');
461
- return { matches, path: filePath };
462
- } catch (error) {
463
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
464
- logToolCall('search_in_file', { path: filePath, pattern }, true, 'error');
465
- return { error: error.message };
466
- }
467
- }
468
-
469
- if (action === 'replace_in_file') {
470
- const filePath = arg0;
471
- const searchStr = arg1;
472
- const replaceStr = arg2;
473
- const flags = arg3 || '';
474
- const approved = await permissionManager.askPermission('file', `Replace in ${filePath}`, 'replace_in_file');
475
- if (!approved) {
476
- logToolCall('replace_in_file', { path: filePath, search: searchStr }, false, 'denied');
477
- return { error: 'Permission denied' };
478
- }
479
- try {
480
- const data = await fsp.readFile(filePath, 'utf8');
481
- const guardErr = _checkRegexSafety(searchStr, data);
482
- if (guardErr) {
483
- logToolCall('replace_in_file', { path: filePath, search: searchStr }, true, 'error');
484
- return guardErr;
485
- }
486
- const safeFlags = flags.replace(/[^gimsuy]/g, '');
487
- const countFlags = safeFlags.includes('g') ? safeFlags : safeFlags + 'g';
488
- const count = (data.match(new RegExp(searchStr, countFlags)) || []).length;
489
- const regex = new RegExp(searchStr, safeFlags || undefined);
490
- const newData = data.replace(regex, replaceStr);
491
- await fsp.writeFile(filePath, newData);
492
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Replaced ${count} occurrence(s) in ${filePath}${RST}`);
493
- logToolCall('replace_in_file', { path: filePath, search: searchStr }, true, 'ok');
494
- return { status: 'ok', path: filePath, count };
495
- } catch (error) {
496
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
497
- logToolCall('replace_in_file', { path: filePath, search: searchStr }, true, 'error');
498
- return { error: error.message };
499
- }
500
- }
501
-
502
- if (action === 'search_files') {
503
- const pattern = arg0;
504
- const searchDir = arg1 || '.';
505
- try {
506
- let regStr = pattern.replace(/[.+^${}()|[\]\\]/g, '\\$&');
507
- regStr = regStr.replace(/\*\*/g, '\x00');
508
- regStr = regStr.replace(/\*/g, '[^/]*');
509
- regStr = regStr.replace(/\x00\//g, '(?:.*/)?');
510
- regStr = regStr.replace(/\x00/g, '.*');
511
- const regex = new RegExp(`^${regStr}$`);
512
- const matchName = !pattern.includes('/');
513
- const files = [];
514
- async function walk(dir, rel) {
515
- let entries;
516
- try { entries = await fsp.readdir(dir, { withFileTypes: true }); } catch { return; }
517
- for (const entry of entries) {
518
- const relPath = rel ? `${rel}/${entry.name}` : entry.name;
519
- if (regex.test(matchName ? entry.name : relPath)) files.push(relPath);
520
- if (entry.isDirectory()) await walk(path.join(dir, entry.name), relPath);
521
- }
522
- }
523
- await walk(searchDir, '');
524
- files.sort();
525
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Found ${files.length} file(s) matching "${pattern}"${RST}`);
526
- logToolCall('search_files', { pattern, dir: searchDir }, true, 'ok');
527
- return { files, pattern, dir: searchDir };
528
- } catch (error) {
529
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
530
- logToolCall('search_files', { pattern, dir: searchDir }, true, 'error');
531
- return { error: error.message };
532
- }
533
- }
534
-
535
- if (action === 'file_stat') {
536
- const filePath = arg0;
537
- try {
538
- const stat = await fsp.stat(filePath);
539
- const type = stat.isDirectory() ? 'directory' : stat.isSymbolicLink() ? 'symlink' : 'file';
540
- const size_kb = (stat.size / 1024).toFixed(2);
541
- const mode = '0o' + stat.mode.toString(8);
542
- const mtime = stat.mtime.toISOString();
543
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Stat ${filePath}${RST}`);
544
- logToolCall('file_stat', { path: filePath }, true, 'ok');
545
- return { path: filePath, size_kb, mtime, type, mode };
546
- } catch (error) {
547
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
548
- logToolCall('file_stat', { path: filePath }, true, 'error');
549
- return { error: error.message };
550
- }
551
- }
552
-
553
- if (action === 'get_env') {
554
- const varName = arg0;
555
- const value = process.env[varName];
556
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Got env ${varName}${RST}`);
557
- logToolCall('get_env', { name: varName }, true, 'ok');
558
- return { name: varName, value: value !== undefined ? value : null };
559
- }
560
-
561
- if (action === 'set_env') {
562
- const varName = arg0;
563
- const value = arg1 || '';
564
- const approved = await permissionManager.askPermission('env', `Set env ${varName}=${value}`, 'set_env');
565
- if (!approved) {
566
- logToolCall('set_env', { name: varName }, false, 'denied');
567
- return { error: 'Permission denied' };
568
- }
569
- process.env[varName] = value;
570
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Set env ${varName}${RST}`);
571
- logToolCall('set_env', { name: varName }, true, 'ok');
572
- return { status: 'ok', name: varName };
573
- }
574
-
575
- if (action === 'download') {
576
- const url = arg0;
577
- if (_dryRun) {
578
- _skippedOps.push({ category: 'net', symbol: '↓', desc: `download ${url}` });
579
- logToolCall('download', { url }, false, 'dry-run');
580
- return { status: 'dry-run', message: 'dry-run: network call skipped' };
581
- }
582
- let fileName;
583
- try {
584
- fileName = path.basename(new URL(url).pathname) || 'download';
585
- } catch {
586
- fileName = 'download';
587
- }
588
- const outPath = path.join(process.cwd(), fileName);
589
- const approved = await permissionManager.askPermission('net', `Download ${url}`, 'download');
590
- if (!approved) {
591
- logToolCall('download', { url }, false, 'denied');
592
- return { error: 'Permission denied' };
593
- }
594
- return new Promise((resolve) => {
595
- function doDownload(target, redirectsLeft) {
596
- const proto = target.startsWith('https') ? https : http;
597
- const req = proto.get(target, (res) => {
598
- if ([301, 302, 303, 307, 308].includes(res.statusCode) && redirectsLeft > 0 && res.headers.location) {
599
- res.resume();
600
- return doDownload(res.headers.location, redirectsLeft - 1);
601
- }
602
- if (res.statusCode >= 400) {
603
- res.resume();
604
- const msg = `HTTP ${res.statusCode}`;
605
- _log(` ${FG_RED}✗ ${msg}${RST}`);
606
- logToolCall('download', { url }, true, 'error');
607
- return resolve({ error: msg });
608
- }
609
- const file = fs.createWriteStream(outPath);
610
- res.pipe(file);
611
- file.on('finish', () => {
612
- file.close();
613
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Downloaded to ${outPath}${RST}`);
614
- logToolCall('download', { url }, true, 'ok');
615
- resolve({ status: 'ok', path: outPath });
616
- });
617
- file.on('error', (err) => {
618
- fs.unlink(outPath, () => {});
619
- _log(` ${FG_RED}✗ ${err.message}${RST}`);
620
- logToolCall('download', { url }, true, 'error');
621
- resolve({ error: err.message });
622
- });
623
- });
624
- req.on('error', (err) => {
625
- fs.unlink(outPath, () => {});
626
- _log(` ${FG_RED}✗ ${err.message}${RST}`);
627
- logToolCall('download', { url }, true, 'error');
628
- resolve({ error: err.message });
629
- });
630
- req.setTimeout(120000, () => {
631
- req.destroy();
632
- fs.unlink(outPath, () => {});
633
- logToolCall('download', { url }, true, 'error');
634
- resolve({ error: 'Request timeout' });
635
- });
636
- }
637
- doDownload(url, 5);
638
- });
639
- }
640
-
641
- if (action === 'upload') {
642
- const filePath = arg0;
643
- const encodedContent = arg1 || '';
644
- if (!isPathSafe(filePath)) {
645
- logToolCall('upload', { path: filePath }, false, 'denied');
646
- return _sandboxError(filePath);
647
- }
648
- const approved = await permissionManager.askPermission('file', `Upload to ${filePath}`, 'upload');
649
- if (!approved) {
650
- logToolCall('upload', { path: filePath }, false, 'denied');
651
- return { error: 'Permission denied' };
652
- }
653
- try {
654
- const dir = path.dirname(filePath);
655
- if (dir && dir !== '.') await fsp.mkdir(dir, { recursive: true });
656
- const buffer = Buffer.from(encodedContent.trim(), 'base64');
657
- await fsp.writeFile(filePath, buffer);
658
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Uploaded ${buffer.length} bytes to ${filePath}${RST}`);
659
- logToolCall('upload', { path: filePath }, true, 'ok');
660
- return { status: 'ok', path: filePath, bytes: buffer.length };
661
- } catch (error) {
662
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
663
- logToolCall('upload', { path: filePath }, true, 'error');
664
- return { error: error.message };
665
- }
666
- }
667
-
668
- if (action === 'http_get') {
669
- const url = arg0;
670
- if (_dryRun) {
671
- _skippedOps.push({ category: 'net', symbol: '↓', desc: `GET ${url}` });
672
- logToolCall('http_get', { url }, false, 'dry-run');
673
- return { status: 'dry-run', message: 'dry-run: network call skipped' };
674
- }
675
- const approved = await permissionManager.askPermission('net', `HTTP GET ${url}`, 'http_get');
676
- if (!approved) {
677
- logToolCall('http_get', { url }, false, 'denied');
678
- return { error: 'Permission denied' };
679
- }
680
- const httpCfg = getConfig ? getConfig() : {};
681
- const reqTimeoutMs = Math.max(15000, httpCfg.request_timeout_ms || 15000);
682
- const maxBytes = Math.max(1024, httpCfg.http_fetch_max_bytes || 262144);
683
- return new Promise((resolve) => {
684
- function doGet(target, redirectsLeft) {
685
- const proto = target.startsWith('https') ? https : http;
686
- const req = proto.get(target, (res) => {
687
- if ([301, 302, 303, 307, 308].includes(res.statusCode) && redirectsLeft > 0 && res.headers.location) {
688
- res.resume();
689
- return doGet(res.headers.location, redirectsLeft - 1);
690
- }
691
- const bufs = [];
692
- let totalBytes = 0;
693
- let capped = false;
694
- res.on('data', (chunk) => {
695
- totalBytes += chunk.length;
696
- if (!capped) {
697
- if (totalBytes <= maxBytes) {
698
- bufs.push(chunk);
699
- } else {
700
- const keep = maxBytes - (totalBytes - chunk.length);
701
- if (keep > 0) bufs.push(chunk.slice(0, keep));
702
- capped = true;
703
- // Keep the connection draining so totalBytes reflects reality,
704
- // but stop buffering further bytes.
705
- }
706
- }
707
- });
708
- res.on('end', () => {
709
- const kept = Buffer.concat(bufs);
710
- const keptBytes = kept.length;
711
- let body = kept.toString('utf8');
712
- if (capped) {
713
- const origKb = (totalBytes / 1024).toFixed(0);
714
- const keptKb = (keptBytes / 1024).toFixed(0);
715
- const droppedKb = ((totalBytes - keptBytes) / 1024).toFixed(0);
716
- body += `\n\n[... truncated: original was ${origKb}KB, showing first ${keptKb}KB. The remaining ${droppedKb}KB was discarded. If you need the rest, narrow your request (e.g. fetch a specific subpage) rather than retrying this URL.]`;
717
- }
718
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}HTTP GET ${target} (${res.statusCode}, ${totalBytes} bytes${capped ? `, truncated to ${keptBytes}` : ''})${RST}`);
719
- logToolCall('http_get', { url: target }, true, res.statusCode < 400 ? 'ok' : 'error');
720
- // `bytes` is the total transferred payload length (pre-cap);
721
- // consumers that want to know the wire size without parsing
722
- // the appended truncation note rely on this.
723
- resolve({ status_code: res.statusCode, body, bytes: totalBytes });
724
- });
725
- });
726
- req.on('error', (err) => {
727
- _log(` ${FG_RED}✗ ${err.message}${RST}`);
728
- logToolCall('http_get', { url: target }, true, 'error');
729
- resolve({ error: err.message, error_code: err.code });
730
- });
731
- req.setTimeout(reqTimeoutMs, () => {
732
- req.destroy();
733
- logToolCall('http_get', { url: target }, true, 'error');
734
- resolve({ error: 'Request timeout', error_code: 'ETIMEDOUT' });
735
- });
736
- }
737
- doGet(url, 5);
738
- });
739
- }
740
-
741
- if (action === 'ask_user') {
742
- const question = arg0;
743
- const approved = await permissionManager.askPermission('user', `Ask user: ${question}`, 'ask_user');
744
- if (!approved) {
745
- logToolCall('ask_user', { question }, false, 'denied');
746
- return { error: 'Permission denied' };
747
- }
748
- const options = _parseNumberedOptions(question);
749
- if (options.length >= 2) {
750
- const selected = await permissionManager.captureSelect({ options });
751
- logToolCall('ask_user', { question }, true, 'ok');
752
- return { question, answer: selected || options[0] };
753
- }
754
- if (!process.stdout.isTTY || process.stdin.isRaw) {
755
- writer.scrollback(`\n ${FG_YELLOW}?${RST} ${question}\n ${DIM}[auto-answering 'y']${RST}`);
756
- logToolCall('ask_user', { question }, true, 'ok');
757
- return { question, answer: 'y' };
758
- }
759
- // audit: allowed — inline prompt without trailing newline; unreachable when TUI writer is active
760
- // (process.stdin.isRaw is true while the TUI input field holds raw mode).
761
- process.stdout.write(`\n ${FG_YELLOW}?${RST} ${question}\n ${FG_GRAY}>${RST} `);
762
- const buf = Buffer.alloc(4096);
763
- let input = '';
764
- while (true) {
765
- const n = fs.readSync(0, buf, 0, 1);
766
- if (n === 0) break;
767
- const ch = buf[0];
768
- if (ch === 0x0a) break;
769
- if (ch === 0x0d) continue;
770
- input += String.fromCharCode(ch);
771
- }
772
- _log();
773
- logToolCall('ask_user', { question }, true, 'ok');
774
- return { question, answer: input };
775
- }
776
-
777
- if (action === 'store_memory') {
778
- const key = arg0;
779
- const value = arg1 || '';
780
- const approved = await permissionManager.askPermission('memory', `Store memory: ${key}`, 'store_memory');
781
- if (!approved) {
782
- logToolCall('store_memory', { key }, false, 'denied');
783
- return { error: 'Permission denied' };
784
- }
785
- try {
786
- let mem = {};
787
- try { mem = JSON.parse(await fsp.readFile(MEMORY_PATH, 'utf8')); } catch {}
788
- mem[key] = value;
789
- await fsp.mkdir(path.dirname(MEMORY_PATH), { recursive: true });
790
- await fsp.writeFile(MEMORY_PATH, JSON.stringify(mem, null, 2));
791
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Stored memory: ${key}${RST}`);
792
- logToolCall('store_memory', { key }, true, 'ok');
793
- return { status: 'ok', key };
794
- } catch (error) {
795
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
796
- logToolCall('store_memory', { key }, true, 'error');
797
- return { error: error.message };
798
- }
799
- }
800
-
801
- if (action === 'recall_memory') {
802
- const key = arg0;
803
- try {
804
- let mem = {};
805
- try { mem = JSON.parse(await fsp.readFile(MEMORY_PATH, 'utf8')); } catch {}
806
- const found = key in mem;
807
- const value = found ? mem[key] : null;
808
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Recalled memory: ${key}${RST}`);
809
- logToolCall('recall_memory', { key }, true, 'ok');
810
- return { key, value, found };
811
- } catch (error) {
812
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
813
- logToolCall('recall_memory', { key }, true, 'error');
814
- return { error: error.message };
815
- }
816
- }
817
-
818
- if (action === 'list_memories') {
819
- try {
820
- let mem = {};
821
- try { mem = JSON.parse(await fsp.readFile(MEMORY_PATH, 'utf8')); } catch {}
822
- const keys = Object.keys(mem);
823
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}Listed ${keys.length} memory key(s)${RST}`);
824
- logToolCall('list_memories', {}, true, 'ok');
825
- return { keys };
826
- } catch (error) {
827
- _log(` ${FG_RED}✗ ${error.message}${RST}`);
828
- logToolCall('list_memories', {}, true, 'error');
829
- return { error: error.message };
830
- }
831
- }
832
-
833
- if (action === 'system_info') {
834
- const info = {
835
- platform: os.platform(),
836
- arch: os.arch(),
837
- hostname: os.hostname(),
838
- user: process.env.USER || process.env.USERNAME || '',
839
- total_mem_mb: Math.round(os.totalmem() / 1024 / 1024),
840
- free_mem_mb: Math.round(os.freemem() / 1024 / 1024),
841
- node_version: process.version,
842
- cwd: process.cwd(),
843
- };
844
- _log(` ${FG_GREEN}✓${RST} ${FG_GRAY}System info: ${info.platform}/${info.arch}${RST}`);
845
- logToolCall('system_info', {}, true, 'ok');
846
- return info;
847
- }
848
-
849
- logToolCall(action, { action }, false, 'error');
850
- return { error: `Unknown action: ${action}` };
851
- }
396
+ async function agentExecFile(action, ...rest) {
397
+ // The trailing arg may be an options object `{ signal }`. Detect and peel
398
+ // it off so positional args line up with the existing per-action branches.
399
+ // All real positional args are strings or numbers, so a plain object at
400
+ // the tail is unambiguously options.
401
+ let signal = null;
402
+ let args = rest;
403
+ const last = rest[rest.length - 1];
404
+ if (last && typeof last === 'object' && !Array.isArray(last)
405
+ && Object.getPrototypeOf(last) === Object.prototype) {
406
+ signal = last.signal || null;
407
+ args = rest.slice(0, -1);
408
+ }
409
+
410
+ // Dispatch via the tool registry (Task 1.4b). The per-action executor body
411
+ // lives on its TOOL_REGISTRY entry; ctx carries the collaborators the body
412
+ // closes over. Signature is unchanged, so agent.js stays untouched.
413
+ const entry = entryForAction(action);
414
+ if (!entry || typeof entry.execute !== 'function') {
415
+ logToolCall(action, { action }, false, 'error');
416
+ return { error: `Unknown action: ${action}` };
417
+ }
418
+
419
+ // Checkpoint capture (Task 4.3): snapshot prior state BEFORE the mutation
420
+ // (post-gate a denied call never reaches here). Skip under --dry-run (no
421
+ // mutation will happen). beginCapture is fail-safe (returns null on error),
422
+ // and commit only fires on a status:'ok' result, so a call the executor
423
+ // itself refuses (--readonly, sandbox) produces no committed checkpoint.
424
+ let _cp = null;
425
+ if (checkpointStore && !_dryRun) {
426
+ try { _cp = await checkpointStore.beginCapture(action, args); } catch { _cp = null; }
427
+ }
428
+ const result = await entry.execute(ctx, args, { signal });
429
+ if (_cp) {
430
+ try { if (result && result.status === 'ok') _cp.commit(); } catch { /* fail-safe: never block a completed mutation */ }
431
+ }
432
+ return result;
433
+ }
434
+
435
+ // Dependency bag passed to every registry execute()/permission(). Built once
436
+ // here so the moved executor bodies see the same collaborators they had inside
437
+ // this factory closure. _uiActive is a getter so it always reflects the live
438
+ // module flag rather than a snapshot.
439
+ const ctx = {
440
+ _log,
441
+ logToolCall,
442
+ isPathSafe,
443
+ isProtectedSecretPath,
444
+ isProtectedConfigPath,
445
+ _sandboxError,
446
+ _secretReadError,
447
+ _protectedConfigWriteError,
448
+ _checkRegexSafety,
449
+ _parseNumberedOptions,
450
+ _dryRun,
451
+ _skippedOps,
452
+ MEMORY_PATH,
453
+ permissionManager,
454
+ getConfig,
455
+ webChat,
456
+ webSearch,
457
+ renderDiff,
458
+ DIFF_BUBBLE_INSET,
459
+ writer,
460
+ agentExecShell,
461
+ FG_GREEN,
462
+ FG_GRAY,
463
+ FG_RED,
464
+ FG_YELLOW,
465
+ RST,
466
+ DIM,
467
+ BOLD,
468
+ get _uiActive() { return _uiActive; },
469
+ };
852
470
 
853
471
  return {
854
472
  agentExecFile,
855
473
  agentExecShell,
474
+ describePermission,
856
475
  };
857
476
  }
858
477
 
859
- // Map a MiniMax-style {name, params} invocation to the internal
860
- // [action, arg1, arg2, …] call tuple consumed by the agent loop.
478
+ // Native function-calling path internal [action, ...args] call tuple.
479
+ // Delegates to the tool registry (lib/tool_registry.js) the single source
480
+ // shared with the XML path. Kept here (and exported) for backward compatibility.
861
481
  function mapInvokeToCall(toolName, params) {
862
- const name = (toolName || '').toLowerCase();
863
- const p = params || {};
864
- switch (name) {
865
- case 'write_file':
866
- case 'create_file':
867
- return p.path ? ['write', p.path, p.content != null ? p.content : ''] : null;
868
- case 'read_file':
869
- return p.path ? ['read', p.path] : null;
870
- case 'append_file':
871
- return p.path ? ['append', p.path, p.content != null ? p.content : ''] : null;
872
- case 'delete_file':
873
- return p.path ? ['delete_file', p.path] : null;
874
- case 'list_dir':
875
- return ['list_dir', p.path || p.dir || '.'];
876
- case 'make_dir':
877
- return p.path ? ['make_dir', p.path] : null;
878
- case 'remove_dir':
879
- return p.path ? ['remove_dir', p.path] : null;
880
- case 'move_file':
881
- return p.src && p.dst ? ['move_file', p.src, p.dst] : null;
882
- case 'copy_file':
883
- return p.src && p.dst ? ['copy_file', p.src, p.dst] : null;
884
- case 'file_stat':
885
- return p.path ? ['file_stat', p.path] : null;
886
- case 'search_files':
887
- return ['search_files', p.pattern || p.glob || '*', p.dir || '.'];
888
- case 'search_in_file':
889
- return p.path && p.pattern ? ['search_in_file', p.path, p.pattern] : null;
890
- case 'replace_in_file':
891
- return p.path && p.search !== undefined
892
- ? ['replace_in_file', p.path, p.search, p.replace != null ? p.replace : '', p.flags || '']
893
- : null;
894
- case 'edit_file':
895
- return p.path && p.line !== undefined
896
- ? ['edit_file', p.path, parseInt(p.line, 10), p.content != null ? p.content : '']
897
- : null;
898
- case 'get_env':
899
- return p.name ? ['get_env', p.name] : null;
900
- case 'set_env':
901
- return p.name ? ['set_env', p.name, p.value != null ? p.value : ''] : null;
902
- case 'download':
903
- return p.url ? ['download', p.url] : null;
904
- case 'upload':
905
- return p.path ? ['upload', p.path, p.content != null ? p.content : ''] : null;
906
- case 'http_get':
907
- return p.url ? ['http_get', p.url] : null;
908
- case 'ask_user':
909
- return p.question ? ['ask_user', p.question] : null;
910
- case 'store_memory':
911
- return p.key ? ['store_memory', p.key, p.value != null ? p.value : ''] : null;
912
- case 'recall_memory':
913
- return p.key ? ['recall_memory', p.key] : null;
914
- case 'list_memories':
915
- return ['list_memories'];
916
- case 'system_info':
917
- return ['system_info'];
918
- case 'exec':
919
- case 'shell':
920
- case 'run':
921
- case 'run_command':
922
- case 'bash':
923
- return p.command ? ['shell', p.command] : null;
924
- default:
925
- return null;
926
- }
927
- }
928
-
929
- // Compile a regex twice — once with double quotes, once with single — from a
930
- // template where `Q` stands for the quote char. Matches from both variants
931
- // are returned in a single iterable.
932
- function _matchDual(text, template) {
933
- const results = [];
934
- for (const q of ['"', "'"]) {
935
- const re = new RegExp(template.replace(/Q/g, q), 'g');
936
- for (const m of text.matchAll(re)) results.push(m);
937
- }
938
- return results;
939
- }
940
-
941
- // Models sometimes wrap the inline body of a single-value tool tag in a nested
942
- // pseudo-tag, e.g. `<list_dir><path>/tmp/foo</path></list_dir>` instead of the
943
- // documented `<list_dir>/tmp/foo</list_dir>`. When the body is exactly one
944
- // wrapper element (no siblings, no surrounding text), unwrap it once so the
945
- // parser recovers the intended value. Safe to call on any inline-content body
946
- // — a plain path/command/URL won't match the regex and is returned as-is.
947
- function _unwrapInnerTag(inner) {
948
- if (inner == null) return inner;
949
- const trimmed = String(inner).trim();
950
- const m = trimmed.match(/^<(\w+)(?:\s[^>]*)?>([\s\S]*)<\/\1>$/);
951
- if (!m) return inner;
952
- return m[2].trim();
482
+ return fromInvoke(toolName, params);
953
483
  }
954
484
 
955
485
  // MiniMax-M2 tool-call XML repair. Some inference backends — notably mlx-lm
@@ -1155,130 +685,21 @@ function extractToolCalls(text, options = {}) {
1155
685
  }
1156
686
  }
1157
687
 
1158
- for (const match of text.matchAll(/<(?:shell|exec|run_command|run)>([\s\S]*?)<\/(?:shell|exec|run_command|run)>/g)) {
1159
- calls.push(['shell', _unwrapInnerTag(match[1]).trim()]);
1160
- }
1161
-
1162
- for (const match of text.matchAll(/<read_file>([\s\S]*?)<\/read_file>/g)) {
1163
- calls.push(['read', _unwrapInnerTag(match[1]).trim()]);
1164
- }
1165
-
1166
- for (const match of _matchDual(text, '<read_file\\s+path=Q([^Q]+)Q\\s*\\/?>')) {
1167
- calls.push(['read', match[1]]);
1168
- }
1169
-
1170
- for (const match of _matchDual(text, '<write_file\\s+path=Q([^Q]+)Q>([\\s\\S]*?)<\\/write_file>')) {
1171
- calls.push(['write', match[1], match[2]]);
1172
- }
1173
-
1174
- for (const match of _matchDual(text, '<create_file\\s+path=Q([^Q]+)Q>([\\s\\S]*?)<\\/create_file>')) {
1175
- calls.push(['write', match[1], match[2]]);
1176
- }
1177
-
1178
- for (const match of _matchDual(text, '<append_file\\s+path=Q([^Q]+)Q>([\\s\\S]*?)<\\/append_file>')) {
1179
- calls.push(['append', match[1], match[2]]);
1180
- }
1181
-
1182
- for (const match of text.matchAll(/<list_dir>([\s\S]*?)<\/list_dir>/g)) {
1183
- calls.push(['list_dir', _unwrapInnerTag(match[1]).trim()]);
1184
- }
1185
-
1186
- for (const match of text.matchAll(/<search_files>([\s\S]*?)<\/search_files>/g)) {
1187
- calls.push(['search_files', _unwrapInnerTag(match[1]).trim(), '.']);
1188
- }
1189
-
1190
- for (const match of _matchDual(text, '<search_files\\s+pattern=Q([^Q]+)Q(?:\\s+dir=Q([^Q]*)Q)?\\s*(?:><\\/search_files>|\\/>)')) {
1191
- calls.push(['search_files', match[1], match[2] || '.']);
1192
- }
1193
-
1194
- for (const match of text.matchAll(/<delete_file>([\s\S]*?)<\/delete_file>/g)) {
1195
- calls.push(['delete_file', _unwrapInnerTag(match[1]).trim()]);
1196
- }
1197
-
1198
- for (const match of text.matchAll(/<make_dir>([\s\S]*?)<\/make_dir>/g)) {
1199
- calls.push(['make_dir', _unwrapInnerTag(match[1]).trim()]);
1200
- }
1201
-
1202
- for (const match of text.matchAll(/<remove_dir>([\s\S]*?)<\/remove_dir>/g)) {
1203
- calls.push(['remove_dir', _unwrapInnerTag(match[1]).trim()]);
1204
- }
1205
-
1206
- for (const match of text.matchAll(/<get_env>([\s\S]*?)<\/get_env>/g)) {
1207
- calls.push(['get_env', _unwrapInnerTag(match[1]).trim()]);
1208
- }
1209
-
1210
- for (const match of _matchDual(text, '<set_env\\s+name=Q([^Q]+)Q\\s+value=Q([^Q]*)Q\\s*(?:><\\/set_env>|\\/>)')) {
1211
- calls.push(['set_env', match[1], match[2]]);
1212
- }
1213
-
1214
- for (const match of _matchDual(text, '<move_file\\s+src=Q([^Q]+)Q\\s+dst=Q([^Q]+)Q\\s*(?:><\\/move_file>|\\/>)')) {
1215
- calls.push(['move_file', match[1], match[2]]);
1216
- }
1217
-
1218
- for (const match of _matchDual(text, '<copy_file\\s+src=Q([^Q]+)Q\\s+dst=Q([^Q]+)Q\\s*(?:><\\/copy_file>|\\/>)')) {
1219
- calls.push(['copy_file', match[1], match[2]]);
1220
- }
1221
-
1222
- for (const match of _matchDual(text, '<edit_file\\s+path=Q([^Q]+)Q\\s+line=Q(\\d+)Q>([\\s\\S]*?)<\\/edit_file>')) {
1223
- calls.push(['edit_file', match[1], parseInt(match[2], 10), match[3]]);
1224
- }
1225
-
1226
- for (const match of _matchDual(text, '<search_in_file\\s+path=Q([^Q]+)Q>([\\s\\S]*?)<\\/search_in_file>')) {
1227
- calls.push(['search_in_file', match[1], match[2].trim()]);
1228
- }
1229
-
1230
- for (const match of _matchDual(text, '<replace_in_file\\s+path=Q([^Q]+)Q\\s+search=Q([^Q]*)Q\\s+replace=Q([^Q]*)Q>([\\s\\S]*?)<\\/replace_in_file>')) {
1231
- calls.push(['replace_in_file', match[1], match[2], match[3], match[4].trim()]);
1232
- }
1233
-
1234
- for (const match of text.matchAll(/<download>([\s\S]*?)<\/download>/g)) {
1235
- calls.push(['download', _unwrapInnerTag(match[1]).trim()]);
1236
- }
1237
-
1238
- for (const match of _matchDual(text, '<upload\\s+path=Q([^Q]+)Q>([\\s\\S]*?)<\\/upload>')) {
1239
- calls.push(['upload', match[1], match[2]]);
1240
- }
1241
-
1242
- for (const match of text.matchAll(/<file_stat>([\s\S]*?)<\/file_stat>/g)) {
1243
- calls.push(['file_stat', _unwrapInnerTag(match[1]).trim()]);
1244
- }
1245
-
1246
- for (const match of text.matchAll(/<http_get\b([^>]*?)(?:><\/http_get>|\/>)/g)) {
1247
- const attrStr = match[1];
1248
- const urlMatch = attrStr.match(/url="([^"]+)"/) || attrStr.match(/url='([^']+)'/);
1249
- if (urlMatch) calls.push(['http_get', urlMatch[1]]);
1250
- }
1251
-
1252
- // Inline-content form: <http_get>URL</http_get>. Models mirror the style of
1253
- // <list_dir>, <download>, etc. even though the system prompt advertises the
1254
- // attribute form — accept both so the second tag in a multi-call response
1255
- // isn't silently dropped. Also tolerate `<http_get>url="URL"</http_get>` where
1256
- // the model put the attribute syntax in the body.
1257
- for (const match of text.matchAll(/<http_get>([\s\S]*?)<\/http_get>/g)) {
1258
- const inner = match[1].trim();
1259
- if (!inner) continue;
1260
- const urlAttr = inner.match(/url="([^"]+)"/) || inner.match(/url='([^']+)'/);
1261
- calls.push(['http_get', urlAttr ? urlAttr[1] : _unwrapInnerTag(inner).trim()]);
1262
- }
1263
-
1264
- for (const match of _matchDual(text, '<ask_user\\s+question=Q([^Q]+)Q\\s*(?:><\\/ask_user>|\\/>)')) {
1265
- calls.push(['ask_user', match[1]]);
1266
- }
1267
-
1268
- for (const match of _matchDual(text, '<store_memory\\s+key=Q([^Q]+)Q>([\\s\\S]*?)<\\/store_memory>')) {
1269
- calls.push(['store_memory', match[1], match[2]]);
1270
- }
1271
-
1272
- for (const match of _matchDual(text, '<recall_memory\\s+key=Q([^Q]+)Q\\s*(?:><\\/recall_memory>|\\/>)')) {
1273
- calls.push(['recall_memory', match[1]]);
1274
- }
1275
-
1276
- for (const match of text.matchAll(/<list_memories\s*(?:><\/list_memories>|\/>)/g)) {
1277
- calls.push(['list_memories']);
688
+ // XML/tag path: each tool's parseAttrs (parseXml) lives next to its spec in
689
+ // the tool registry. Entries run in array order, which — together with the
690
+ // wrapper/JSON/fence passes above — reproduces the exact emission order the
691
+ // characterization tests pin (test/extract-tool-calls.test.js). This replaces
692
+ // the ~25 standalone regex blocks that used to be inlined here.
693
+ for (const entry of TOOL_REGISTRY) {
694
+ if (!entry.parseXml) continue;
695
+ for (const call of entry.parseXml(text)) calls.push(call);
1278
696
  }
1279
697
 
1280
- for (const match of text.matchAll(/<system_info\s*(?:><\/system_info>|\/>)/g)) {
1281
- calls.push(['system_info']);
698
+ // Dynamic tools (MCP, Task 3.3) get the same XML pass so non-native models can
699
+ // invoke them via `<mcp__server__tool>{json args}</mcp__server__tool>`.
700
+ for (const entry of dynamicToolEntries()) {
701
+ if (!entry.parseXml) continue;
702
+ for (const call of entry.parseXml(text)) calls.push(call);
1282
703
  }
1283
704
 
1284
705
  return calls;
@@ -1307,6 +728,11 @@ module.exports = {
1307
728
  createToolExecutor,
1308
729
  extractToolCalls,
1309
730
  getSkippedOps,
731
+ // Exported for unit testing (Task 1.1). These pure path guards are otherwise
732
+ // private; exposing them changes no runtime behavior.
733
+ isPathSafe,
734
+ isProtectedSecretPath,
735
+ isProtectedConfigPath,
1310
736
  isUIActive,
1311
737
  mapInvokeToCall,
1312
738
  repairMinimaxMalformedXml,