npm - @semalt-ai/code - Versions diffs - 1.8.5 → 1.20.0 - Mend

@semalt-ai/code 1.8.5 → 1.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (192) hide show

package/.claude/settings.local.json +7 -1
package/.github/workflows/ci.yml +69 -0
package/ARCHITECTURE.md +6 -95
package/CLAUDE.md +196 -316
package/README.md +148 -4
package/docs/ARCHITECTURE.md +1321 -0
package/docs/CONFIG.md +340 -0
package/docs/HISTORY.md +245 -0
package/examples/embed.js +74 -0
package/index.js +251 -10
package/lib/agent.js +856 -120
package/lib/api.js +239 -50
package/lib/args.js +74 -2
package/lib/audit.js +23 -1
package/lib/background.js +584 -0
package/lib/checkpoints.js +757 -0
package/lib/commands/auth.js +94 -0
package/lib/commands/chat-session.js +489 -0
package/lib/commands/chat-slash.js +415 -0
package/lib/commands/chat-turn.js +669 -0
package/lib/commands/chat.js +407 -0
package/lib/commands/custom.js +157 -0
package/lib/commands/history-utils.js +66 -0
package/lib/commands/index.js +268 -0
package/lib/commands/mcp.js +113 -0
package/lib/commands/oneshot.js +193 -0
package/lib/commands/registry.js +269 -0
package/lib/commands/tasks.js +89 -0
package/lib/compact.js +87 -0
package/lib/config.js +360 -11
package/lib/constants.js +401 -3
package/lib/deny.js +199 -0
package/lib/doctor.js +160 -0
package/lib/headless.js +202 -0
package/lib/hooks.js +286 -0
package/lib/images.js +270 -0
package/lib/internals.js +49 -0
package/lib/mcp/boundary.js +131 -0
package/lib/mcp/client.js +270 -0
package/lib/mcp/oauth.js +134 -0
package/lib/memory.js +209 -0
package/lib/metrics.js +37 -2
package/lib/payload.js +54 -0
package/lib/permission-rules.js +401 -0
package/lib/permissions.js +123 -26
package/lib/pricing.js +67 -0
package/lib/proc.js +62 -0
package/lib/prompts.js +99 -8
package/lib/sandbox.js +568 -0
package/lib/sdk.js +328 -0
package/lib/secrets.js +211 -0
package/lib/skills.js +223 -0
package/lib/subagents.js +516 -0
package/lib/tool_registry.js +2862 -0
package/lib/tool_specs.js +263 -9
package/lib/tools.js +352 -1039
package/lib/ui/anim.js +86 -0
package/lib/ui/ansi.js +17 -27
package/lib/ui/chat-history.js +253 -71
package/lib/ui/create-ui.js +67 -24
package/lib/ui/diff.js +90 -25
package/lib/ui/file-activity.js +236 -0
package/lib/ui/format.js +195 -29
package/lib/ui/input-field.js +21 -11
package/lib/ui/md-stream.js +234 -0
package/lib/ui/render-operation.js +113 -0
package/lib/ui/select.js +1 -4
package/lib/ui/status-bar.js +146 -36
package/lib/ui/stream.js +20 -13
package/lib/ui/theme.js +190 -44
package/lib/ui/tool-operation.js +190 -0
package/lib/ui/utils.js +9 -5
package/lib/ui/web-activity.js +270 -0
package/lib/ui/writer.js +159 -45
package/lib/ui.js +1 -1
package/lib/verify.js +229 -0
package/lib/web-extract.js +213 -0
package/lib/web-summarize.js +68 -0
package/package.json +19 -4
package/scripts/lint.js +57 -0
package/test/agent-loop.test.js +389 -0
package/test/anim-driver.test.js +153 -0
package/test/ask-user-display.test.js +226 -0
package/test/ask-user-gate.test.js +231 -0
package/test/background.test.js +414 -0
package/test/chat-history-nocolor.test.js +155 -0
package/test/chat-relogin.test.js +207 -0
package/test/chat.test.js +114 -0
package/test/checkpoints-agent.test.js +181 -0
package/test/checkpoints.test.js +650 -0
package/test/command-registry.test.js +160 -0
package/test/compact.test.js +116 -0
package/test/completion-lazy.test.js +52 -0
package/test/config-merge.test.js +324 -0
package/test/config-quarantine.test.js +128 -0
package/test/config-write-guard-allow-anywhere.test.js +56 -0
package/test/config-write-guard-skip.test.js +46 -0
package/test/config-write-guard.test.js +153 -0
package/test/context-split.test.js +215 -0
package/test/cost-doctor.test.js +142 -0
package/test/custom-commands-chat.test.js +106 -0
package/test/custom-commands.test.js +230 -0
package/test/defer-detail-band.test.js +403 -0
package/test/deny-windows.test.js +120 -0
package/test/deny.test.js +83 -0
package/test/detail-band-tab-flatten.test.js +242 -0
package/test/download-allow-anywhere.test.js +66 -0
package/test/download-confine.test.js +153 -0
package/test/exec-diff.test.js +268 -0
package/test/executors.test.js +599 -0
package/test/extract-tool-calls.test.js +349 -0
package/test/fetch-url-validation.test.js +219 -0
package/test/file-activity.test.js +522 -0
package/test/fixtures/tool-calls.js +57 -0
package/test/fixtures/web-page.js +91 -0
package/test/git-tools.test.js +384 -0
package/test/grep-glob-serialize.test.js +242 -0
package/test/grep-glob.test.js +268 -0
package/test/grep-path-target.test.js +227 -0
package/test/harness/README.md +57 -0
package/test/harness/chat-harness.js +143 -0
package/test/harness/memwarn-headless-child.js +65 -0
package/test/harness/mock-llm.js +120 -0
package/test/harness/mock-mcp-server.js +142 -0
package/test/harness/sse-server.js +69 -0
package/test/headless.test.js +348 -0
package/test/history-utils.test.js +88 -0
package/test/hooks-agent.test.js +238 -0
package/test/hooks-verify-sandbox.test.js +232 -0
package/test/hooks.test.js +216 -0
package/test/http-get-user-agent.test.js +142 -0
package/test/images-api.test.js +208 -0
package/test/images.test.js +238 -0
package/test/input-field-ctrl-o.test.js +37 -0
package/test/live-height-physical.test.js +281 -0
package/test/max-iterations.test.js +218 -0
package/test/mcp-boundary.test.js +57 -0
package/test/mcp-client.test.js +267 -0
package/test/mcp-oauth.test.js +86 -0
package/test/md-stream.test.js +183 -0
package/test/memory-truncation-warning.test.js +222 -0
package/test/memory.test.js +198 -0
package/test/native-dispatch.test.js +409 -0
package/test/native-live-narration.test.js +254 -0
package/test/output-chokepoint.test.js +188 -0
package/test/output-heredoc-leak.test.js +195 -0
package/test/output-preview.test.js +245 -0
package/test/path-guards.test.js +134 -0
package/test/payload.test.js +99 -0
package/test/permission-rules-agent.test.js +210 -0
package/test/permission-rules.test.js +297 -0
package/test/permissions.test.js +362 -0
package/test/plan-mode.test.js +167 -0
package/test/read-paginate.test.js +275 -0
package/test/readonly-tools.test.js +177 -0
package/test/render-operation.test.js +317 -0
package/test/replay-descriptor-xml.test.js +216 -0
package/test/replay-descriptor.test.js +189 -0
package/test/replay-web-aggregate.test.js +291 -0
package/test/replay-web-persist.test.js +241 -0
package/test/result-cap.test.js +233 -0
package/test/running-glyph-anim.test.js +111 -0
package/test/sandbox-agent.test.js +147 -0
package/test/sandbox-integration.test.js +216 -0
package/test/sandbox.test.js +408 -0
package/test/sdk.test.js +234 -0
package/test/shell-output-cap.test.js +181 -0
package/test/skills-chat.test.js +110 -0
package/test/skills.test.js +295 -0
package/test/smoke.test.js +68 -0
package/test/status-bar-driver.test.js +93 -0
package/test/status-bar-pause.test.js +164 -0
package/test/status-bar-resync.test.js +188 -0
package/test/stream-parser.test.js +171 -0
package/test/subagents-agent.test.js +178 -0
package/test/subagents.test.js +222 -0
package/test/theme-palette.test.js +166 -0
package/test/tool-registry.test.js +85 -0
package/test/trim-budget.test.js +101 -0
package/test/truncate-visible.test.js +78 -0
package/test/verify-agent.test.js +317 -0
package/test/verify.test.js +141 -0
package/test/view-image.test.js +199 -0
package/test/web-activity-ordering.test.js +203 -0
package/test/web-activity.test.js +207 -0
package/test/web-data-extraction-guidance.test.js +71 -0
package/test/web-extract.test.js +185 -0
package/test/web-fetch-agent.test.js +291 -0
package/test/web-fetch-mode.test.js +193 -0
package/test/web-search.test.js +380 -0
package/lib/commands.js +0 -1438
package/path +0 -1

package/lib/tools.js CHANGED Viewed

@@ -1,21 +1,34 @@
 'use strict';
-const fs = require('fs');
-const fsp = require('fs/promises');
-const http = require('http');
-const https = require('https');
 const os = require('os');
 const path = require('path');
+// Note: fs / fs/promises / http / https are no longer required here — the file
+// and network executors moved to lib/tool_registry.js in Task 1.4b. tools.js now
+// keeps only the path/sandbox guards, the shell executor, and the parse glue.
 const { spawn } = require('child_process');
 const { logToolCall } = require('./audit');
 const { spawnWithGroup, killTreeEscalating } = require('./proc');
+const { classifyShellCommand } = require('./deny');
+const { resolveSandboxedSpawn } = require('./sandbox');
+const { TOOL_REGISTRY, fromInvoke, entryForAction, dynamicToolEntries } = require('./tool_registry');
 const writer = require('./ui/writer');
+const { CONFIG_PATH, protectedConfigDirs } = require('./constants');
 const MEMORY_PATH = path.join(os.homedir(), '.semalt-ai', 'memory.json');
 const _dryRun = process.argv.includes('--dry-run');
 const _allowAnywhere = process.argv.includes('--allow-anywhere');
+// Binary network isolation (Task 4.4b): the human-typed --no-network flag forces
+// kernel-level no-network for sandboxed commands. Read once at module load — a
+// human-only signal the model can never reach (the model controls only the
+// command string). sandbox.network in config does the same via decideSandbox.
+const _noNetwork = process.argv.includes('--no-network');
+// The single, explicit opt-out of ALL safety. When set, the destructive-command
+// deny-list and the config-file read guard are bypassed. This is the only flag
+// that does so — --allow-* tier flags grant approval but never disable the
+// deny-list. See lib/deny.js and Task 0.1.
+const _skipPermissions = process.argv.includes('--dangerously-skip-permissions');
 const _skippedOps = [];
 function getSkippedOps() { return _skippedOps.slice(); }
@@ -56,38 +69,166 @@ function _sandboxError(filePath) {
   return { error: `Path outside allowed area: ${filePath}. Use --allow-anywhere to override.` };
 }
-// Cheap ReDoS guard. Rejects pathologically long patterns, common
-// catastrophic-backtracking anti-patterns, and pattern×data sizes large
-// enough to hang the regex engine.
-function _checkRegexSafety(pattern, data) {
+// Files that hold secrets or sensitive history and must NEVER be read back into
+// the model context — the API key / auth token live in config.json, the memory
+// store may contain secrets, and the audit log records past tool inputs. This
+// guard is intentionally NOT gated on --allow-anywhere: --allow-anywhere widens
+// where the agent may write, it does not unlock secret exfiltration. The only
+// override is --dangerously-skip-permissions (opting out of all safety).
+const _AUDIT_LOG_PATH = path.join(os.homedir(), '.semalt-ai', 'audit.log');
+const PROTECTED_READ_PATHS = new Set([
+  path.resolve(CONFIG_PATH),
+  path.resolve(MEMORY_PATH),
+  path.resolve(_AUDIT_LOG_PATH),
+]);
+function isProtectedSecretPath(filePath) {
+  if (_skipPermissions) return false;
+  if (typeof filePath !== 'string' || !filePath) return false;
+  return PROTECTED_READ_PATHS.has(path.resolve(filePath));
+}
+function _secretReadError(filePath) {
+  return { error: `Refused: ${filePath} holds secrets/credentials and cannot be read by the agent. (This guard is not overridable with --allow-anywhere.)` };
+}
+// Config/execution-driving paths that the AGENT must never WRITE — the write-side
+// companion to the read guard above (Pre-Task 5.0b). Covers the protected-config
+// set (lib/constants.js protectedConfigDirs): the whole ~/.semalt-ai dir AND
+// every project .semalt dir from the CWD up to the repo root, INCLUDING files
+// that do not yet exist (a missing config.json / agents/*.md / hook is inside a
+// protected DIR, so it is refused regardless of existence — the CVE-2026-25725
+// lesson). Directory-prefix matched on the resolved path, mirroring isPathSafe.
+//
+// Policy parity with isProtectedSecretPath: NOT gated on --allow-anywhere
+// (--allow-anywhere widens WHERE the agent may write, it does not unlock writing
+// the config surfaces that drive host-privileged execution). The only override is
+// --dangerously-skip-permissions (opting out of all safety). A human editing
+// their own config in an editor is unaffected — this guards the agent's tools and
+// the sandboxed shell, not the human.
+function isProtectedConfigPath(filePath) {
+  if (_skipPermissions) return false;
+  if (typeof filePath !== 'string' || !filePath) return false;
+  const resolved = path.resolve(filePath);
+  for (const dir of protectedConfigDirs()) {
+    const base = path.resolve(dir);
+    const prefix = base.endsWith(path.sep) ? base : base + path.sep;
+    if (resolved === base || resolved.startsWith(prefix)) return true;
+  }
+  return false;
+}
+function _protectedConfigWriteError(filePath) {
+  return { error: `Refused: ${filePath} is a protected config path (under ~/.semalt-ai or a project .semalt dir) that drives execution and cannot be written by the agent. (This guard is not overridable with --allow-anywhere.)` };
+}
+// Active (unescaped) regex metacharacters. A search pattern that contains NONE
+// of these — or one the caller explicitly marks `literal` — is a plain literal:
+// matching it (via split/join or indexOf) is O(dataLen) and CANNOT backtrack, so
+// the regex-ReDoS bounds below DO NOT apply. This is what makes the intended
+// copy-a-block-then-replace workflow work at any length (read_file defaults line
+// numbers OFF specifically to keep snippets copyable, lib/agent.js): a long
+// literal block is never rejected for its length.
+const _REGEX_META = new Set(['.', '*', '+', '?', '^', '$', '{', '}', '(', ')', '|', '[', ']']);
+function _hasActiveRegexMeta(pattern) {
+  if (typeof pattern !== 'string') return false;
+  for (let i = 0; i < pattern.length; i++) {
+    const ch = pattern[i];
+    if (ch === '\\') { i++; continue; } // the escaped next char is inert, skip it
+    if (_REGEX_META.has(ch)) return true;
+  }
+  return false;
+}
+// Decide literal vs regex. A pattern is matched literally when the caller forces
+// it (`literal: true` — for copied code blocks that legitimately contain
+// regex-special chars like `(` or `[`), or when auto-detection finds no active
+// regex metacharacter at all (the pasted plain-text-block case).
+function _isLiteralPattern(pattern, literal) {
+  if (literal === true) return true;
+  return !_hasActiveRegexMeta(pattern);
+}
+// ReDoS guard for the REGEX path only. Literals bypass it entirely — they cannot
+// backtrack, so their length is irrelevant. Catastrophic backtracking comes from
+// nested quantifiers (Check B below), NOT from pattern length: the old
+// `dataLen * pattern.length` proxy (Check C) is gone because it penalized exactly
+// the safe dimension — it rejected long *literals* (e.g. any block over ~250
+// chars on a 40 KB file) while a short bomb like `(a+)+$` (length 6) sailed
+// straight past it. For a genuine regex we keep two real protections:
+//   • a sanity length cap — a multi-thousand-char metacharacter-heavy pattern is
+//     suspicious and serves no legitimate purpose (literals use `literal:true`);
+//   • the nested-quantifier detector, which is the actual backtracking guard.
+function _checkRegexSafety(pattern, data, literal) {
   if (typeof pattern !== 'string') return null;
+  if (_isLiteralPattern(pattern, literal)) return null; // literal: O(dataLen), unbounded by length
   if (pattern.length > 1000) {
-    return { error: 'Pattern rejected: length exceeds 1000 chars' };
+    return { error: 'Regex rejected: length exceeds 1000 chars (use literal:true to match a long block verbatim)' };
   }
   if (/(\(.*[+*].*\).*[+*])|(\[.*\].*[+*].*[+*])/.test(pattern)) {
     return { error: 'Pattern rejected: potentially catastrophic backtracking' };
   }
-  const dataLen = typeof data === 'string' ? data.length : 0;
-  if (dataLen * pattern.length > 10_000_000) {
-    return { error: 'Pattern too complex for input size' };
-  }
   return null;
 }
-function createToolExecutor(permissionManager, ui, getConfig) {
+// The single authority for splitting an ask_user question into its menu. A line
+// matching `^\s*\d+[.)]\s+(.+)$` is a numbered OPTION; every other line is
+// PROMPT prose. Returns { prompt, options } where `prompt` is the non-numbered
+// lines joined (trimmed) and `options` is the option labels — but ONLY when
+// there are ≥2 of them (a lone "1." is prose, not a menu), matching the prior
+// _parseNumberedOptions contract. Display-only: the caller still hands the FULL
+// original question to the model. Pure; safe on null/non-string (auto-answer
+// paths pass arbitrary text).
+function parseAskMenu(text) {
+  const options = [];
+  const promptLines = [];
+  for (const line of String(text == null ? '' : text).split('\n')) {
+    const m = line.match(/^\s*\d+[.)]\s+(.+)$/);
+    if (m) options.push(m[1].trim());
+    else promptLines.push(line);
+  }
+  return {
+    prompt: promptLines.join('\n').trim(),
+    options: options.length >= 2 ? options : [],
+  };
+}
+function createToolExecutor(permissionManager, ui, getConfig, options = {}) {
   const { BOLD, DIM, FG_DARK, FG_GRAY, FG_GREEN, FG_RED, FG_YELLOW, RST, renderDiff } = ui;
+  // Checkpoints & rewind (Task 4.3). When a store is wired, the prior state of a
+  // file is snapshotted in agentExecFile AFTER the permission gate approves and
+  // BEFORE the executor mutates. Optional — undefined in tests/headless paths
+  // that don't opt in. Subagents reuse this same agentExecFile, so a child's
+  // mutations are checkpointed into the parent session for free.
+  const checkpointStore = options.checkpointStore || null;
+  // OS sandbox fallback approver (Task 4.4). When the sandbox is unavailable in
+  // `auto` mode, agentExecShell asks a HUMAN before running a command
+  // unsandboxed via this callback (it returns true to allow, false to refuse).
+  // Undefined in non-TTY/headless/test paths → the command is REFUSED (never a
+  // silent unsandboxed run). This is injected by the executor owner (index.js),
+  // never reachable by the model, so the agent can't approve its own escape.
+  const onUnsandboxed = typeof options.onUnsandboxed === 'function' ? options.onUnsandboxed : null;
+  // Web-fetch secondary summarizer (Task W.1). An injected async LLM call
+  // `(messages, { model, signal }) => Promise<string>` (the api client's
+  // chatComplete) used by http_get to summarize extracted page content in a
+  // separate call — only the summary enters the main context. Optional: when
+  // absent (headless/oneshot paths without an api client), http_get returns the
+  // extracted Markdown instead of summarizing, never the raw page.
+  const webChat = typeof options.webChat === 'function' ? options.webChat : null;
+  // Web search backend (Task W.2b). An injected async call
+  // `(query, { count }) => Promise<{ results: [{title,url,snippet}], … }>` (the
+  // api client's dashboardSearch, which hits the backend POST /api/search).
+  // Optional: when absent (headless/oneshot paths without an api client) the
+  // web_search tool degrades to a clean tool error, never a crash.
+  const webSearch = typeof options.webSearch === 'function' ? options.webSearch : null;
   // Continuation lines in a system-message bubble (chat-history.js else branch)
   // are indented by 5 spaces. Let the diff renderer reserve those columns so
   // its lines don't auto-wrap inside the bubble.
   const DIFF_BUBBLE_INSET = 5;
   function _parseNumberedOptions(text) {
-    const options = [];
-    for (const line of text.split('\n')) {
-      const m = line.match(/^\s*\d+[.)]\s+(.+)$/);
-      if (m) options.push(m[1].trim());
-    }
-    return options.length >= 2 ? options : [];
+    return parseAskMenu(text).options;
   }
   // Build the permission descriptor for a [action, ...args] call tuple.
@@ -125,95 +266,60 @@ function createToolExecutor(permissionManager, ui, getConfig) {
       delete_file: 'delete_file',
       move_file: 'move_file',
       copy_file: 'copy_file',
+      download: 'download',
+      edit_file: 'edit_file',
+      replace_in_file: 'replace_in_file',
+      make_dir: 'make_dir',
+      remove_dir: 'remove_dir',
+      upload: 'upload',
     };
     const roTag = READONLY_TAG[action];
     if (roTag && permissionManager.readonlyBlock(roTag)) return null;
-    switch (action) {
-      case 'shell':
-      case 'exec':
-        return { actionType: 'shell', description: args[0] || '', tag: 'exec' };
-      case 'write':
-      case 'append': {
-        const filePath = args[0];
-        const content = args[1];
-        const tag = action === 'write' ? 'write_file' : 'append_file';
-        let existing = '';
-        try { existing = await fsp.readFile(filePath, 'utf8'); } catch {}
-        const finalContent = action === 'write' ? (content || '') : (existing + (content || ''));
-        const diffOutput = _uiActive
-          ? renderDiff(existing, finalContent, filePath, { inset: DIFF_BUBBLE_INSET })
-          : renderDiff(existing, finalContent, filePath);
-        if (!_uiActive) writer.scrollback(diffOutput);
-        // Dry-run renders the diff (above) but skips the picker — the
-        // executor's dry-run early return reports the skip.
-        if (_dryRun) return null;
-        let desc = `${action === 'write' ? 'Write' : 'Append to'} ${filePath}`;
-        if (content) desc += ` (${content.length} chars)`;
-        if (_uiActive) desc = `${desc}\n${diffOutput}`;
-        return { actionType: 'file', description: desc, tag };
-      }
-      case 'delete_file': {
-        const filePath = args[0];
-        _log(`  ${FG_YELLOW}${BOLD}⚠ Deleting: ${filePath}${RST}`);
-        return { actionType: 'file', description: `Delete ${filePath}`, tag: 'delete_file' };
-      }
-      case 'make_dir':
-        return { actionType: 'file', description: `Create directory ${args[0]}`, tag: 'make_dir' };
-      case 'remove_dir':
-        return { actionType: 'file', description: `Remove directory ${args[0]}`, tag: 'remove_dir' };
+    // Per-tool descriptor now lives on the registry entry (Task 1.4b). Read-only
+    // ops resolve to a permission() that returns null (no gate). The side effects
+    // that used to live in the switch cases (write/append diff render,
+    // delete/move warning lines) moved into those permission() bodies unchanged.
+    const entry = entryForAction(action);
+    if (entry && typeof entry.permission === 'function') return entry.permission(ctx, args);
+    return null;
+  }
-      case 'move_file': {
-        const src = args[0];
-        const dst = args[1];
-        _log(`  ${FG_YELLOW}${BOLD}⚠ Moving: ${src} → ${dst}${RST}`);
-        return { actionType: 'file', description: `Move ${src} to ${dst}`, tag: 'move_file' };
+  async function agentExecShell(command, options = {}) {
+    // Destructive-command deny-list. Enforced for EVERY shell call regardless
+    // of approval mode (interactive, non-TTY, or any --allow-* flag). This is
+    // the unbypassable chokepoint: all shell execution funnels through here.
+    // The only escape hatch is --dangerously-skip-permissions.
+    //
+    // The `initiator` distinguishes agent-initiated calls (the model asked) from
+    // user-initiated ones (a human typed `!cmd` / `semalt-code shell`). Agent
+    // calls keep the hard block. User calls are exempt from the block, except for
+    // the catastrophic subset (disk wipe / fork bomb), which gets a one-time y/N
+    // confirmation via options.confirm as a typo guard. See lib/deny.js.
+    const initiator = options.initiator === 'user' ? 'user' : 'agent';
+    if (!_skipPermissions) {
+      const verdict = classifyShellCommand(command, initiator);
+      if (verdict.action === 'block') {
+        const msg = `Blocked by safety deny-list: ${verdict.label}. Refuse to run: ${command}. To override, restart with --dangerously-skip-permissions.`;
+        _log(`  ${FG_RED}✗ ${msg}${RST}`);
+        logToolCall('exec', { command }, false, 'denied');
+        return { exit_code: -1, stdout: '', stderr: msg, blocked: true };
+      }
+      if (verdict.action === 'confirm') {
+        let approved = false;
+        if (typeof options.confirm === 'function') {
+          try { approved = await options.confirm(verdict.label, command); }
+          catch { approved = false; }
+        }
+        if (!approved) {
+          const msg = `Cancelled (${verdict.label}): ${command}`;
+          _log(`  ${FG_RED}✗ ${msg}${RST}`);
+          logToolCall('exec', { command }, false, 'cancelled');
+          return { exit_code: -1, stdout: '', stderr: msg, blocked: true };
+        }
       }
-      case 'copy_file':
-        return { actionType: 'file', description: `Copy ${args[0]} to ${args[1]}`, tag: 'copy_file' };
-      case 'edit_file':
-        return { actionType: 'file', description: `Edit line ${args[1]} in ${args[0]}`, tag: 'edit_file' };
-      case 'replace_in_file':
-        return { actionType: 'file', description: `Replace in ${args[0]}`, tag: 'replace_in_file' };
-      case 'set_env':
-        return { actionType: 'env', description: `Set env ${args[0]}=${args[1] || ''}`, tag: 'set_env' };
-      case 'download':
-        return { actionType: 'net', description: `Download ${args[0]}`, tag: 'download' };
-      case 'upload':
-        return { actionType: 'file', description: `Upload to ${args[0]}`, tag: 'upload' };
-      case 'http_get':
-        return { actionType: 'net', description: `HTTP GET ${args[0]}`, tag: 'http_get' };
-      // ask_user is a real gate — "do you want me to ask the user this
-      // question?" — separate from the question prompt itself (which is
-      // captureSelect or stdin further down in the executor). Lifted here
-      // so the activity bubble doesn't pre-date grant.
-      case 'ask_user':
-        return { actionType: 'user', description: `Ask user: ${args[0]}`, tag: 'ask_user' };
-      case 'store_memory':
-        return { actionType: 'memory', description: `Store memory: ${args[0]}`, tag: 'store_memory' };
-      default:
-        return null;
     }
-  }
-  async function agentExecShell(command, options = {}) {
     if (_dryRun) {
       _log(`  ${FG_DARK}[dry-run] $ ${command}${RST}`);
       _skippedOps.push({ category: 'cmd', symbol: '▶', desc: command });
@@ -225,18 +331,55 @@ function createToolExecutor(permissionManager, ui, getConfig) {
     const timeout = cfg.command_timeout_ms || 30000;
     const { signal } = options;
+    // ---------------------------------------------------------------------
+    // OS sandbox (Task 4.4; unified chokepoint Pre-Task 5.0a). EVERY shell call
+    // — here, self-verification, and command-type hooks — funnels through the
+    // SHARED resolveSandboxedSpawn shim, so the model has no path that runs a
+    // command outside this decision. --dangerously-skip-permissions (a
+    // human-only flag) opts out of all safety, sandbox included.
+    //
+    //   run:true  → spawn the resolved file/args (jailed when sandbox 'on';
+    //               plain { shell:true } when 'off'/human-approved 'unavailable').
+    //   run:false → fail-safe refusal: failIfUnavailable hard error (hard:true)
+    //               or no/declined human approval — NEVER a silent unsandboxed run.
+    // ---------------------------------------------------------------------
+    const resolution = await resolveSandboxedSpawn({
+      command,
+      getConfig,
+      onUnsandboxed,
+      cwd: process.cwd(),
+      allowAnywhere: _allowAnywhere,
+      skipPermissions: _skipPermissions,
+      noNetwork: _noNetwork,
+    });
+    if (!resolution.run) {
+      _log(`  ${FG_RED}✗ ${resolution.message}${RST}`);
+      logToolCall('exec', { command, sandbox: 'unavailable' }, false, resolution.hard ? 'sandbox-blocked' : 'sandbox-refused');
+      return { exit_code: -1, stdout: '', stderr: resolution.message, blocked: true, sandbox: 'unavailable' };
+    }
+    const spawnFile = resolution.file;
+    const spawnArgs = resolution.useShell ? [] : resolution.args;
+    const spawnOpts = resolution.useShell ? { shell: true } : {};
+    const sandboxStatus = resolution.sandbox;
+    // Binary network mode for this run (Task 4.4b): 'on' (host network) | 'off'
+    // (kernel-level no-network). Surfaced in the result + audit (net:on|off).
+    const networkStatus = resolution.network || 'on';
     return new Promise((resolve) => {
       let child;
       try {
         // spawnWithGroup gives us a process-group leader on POSIX so
         // killTreeEscalating can reach descendants via -pid. With shell:true
         // a plain child.kill targets only the sh wrapper, leaving the real
-        // workload (find /, pipelines, etc.) running as orphans.
-        child = spawnWithGroup(spawn, command, [], { shell: true });
+        // workload (find /, pipelines, etc.) running as orphans. When sandboxed,
+        // the group leader is the bwrap/sandbox-exec process — killing the group
+        // tears down the whole jailed subtree, so child-process confinement
+        // composes with the existing tree-kill plumbing.
+        child = spawnWithGroup(spawn, spawnFile, spawnArgs, spawnOpts);
       } catch (error) {
         _log(`  ${FG_RED}✗ ${error.message}${RST}`);
-        logToolCall('exec', { command }, true, 'error');
-        return resolve({ exit_code: -1, stdout: '', stderr: error.message });
+        logToolCall('exec', { command, sandbox: sandboxStatus, network: networkStatus }, true, 'error');
+        return resolve({ exit_code: -1, stdout: '', stderr: error.message, sandbox: sandboxStatus, network: networkStatus });
       }
       const startedAt = Date.now();
       let stdout = '';
@@ -277,8 +420,8 @@ function createToolExecutor(permissionManager, ui, getConfig) {
         clearTimeout(timer);
         detachAbort();
         _log(`  ${FG_RED}✗ ${error.message}${RST}`);
-        logToolCall('exec', { command }, true, 'error');
-        resolve({ exit_code: -1, stdout, stderr: stderr || error.message });
+        logToolCall('exec', { command, sandbox: sandboxStatus, network: networkStatus }, true, 'error');
+        resolve({ exit_code: -1, stdout, stderr: stderr || error.message, sandbox: sandboxStatus, network: networkStatus });
       });
       child.on('close', (code, sigName) => {
         clearTimeout(timer);
@@ -287,14 +430,14 @@ function createToolExecutor(permissionManager, ui, getConfig) {
           const elapsed_s = Math.max(0, Math.round((Date.now() - startedAt) / 1000));
           const note = `[user interrupted after ${elapsed_s}s]`;
           stderr += (stderr ? '\n' : '') + note;
-          logToolCall('exec', { command }, true, 'aborted');
-          resolve({ exit_code: -1, stdout, stderr, aborted: true, elapsed_s });
+          logToolCall('exec', { command, sandbox: sandboxStatus, network: networkStatus }, true, 'aborted');
+          resolve({ exit_code: -1, stdout, stderr, aborted: true, elapsed_s, sandbox: sandboxStatus, network: networkStatus });
           return;
         }
         if (killed) stderr += (stderr ? '\n' : '') + `[timed out after ${timeout}ms]`;
         const exit_code = killed ? -1 : (code != null ? code : (sigName ? -1 : 0));
-        logToolCall('exec', { command }, true, exit_code === 0 ? 'ok' : 'error');
-        resolve({ exit_code, stdout, stderr });
+        logToolCall('exec', { command, sandbox: sandboxStatus, network: networkStatus }, true, exit_code === 0 ? 'ok' : 'error');
+        resolve({ exit_code, stdout, stderr, sandbox: sandboxStatus, network: networkStatus });
       });
     });
   }
@@ -312,719 +455,69 @@ function createToolExecutor(permissionManager, ui, getConfig) {
       signal = last.signal || null;
       args = rest.slice(0, -1);
     }
-    const [arg0 = null, arg1 = null, arg2 = null, arg3 = null] = args;
-    if (action === 'read') {
-      const filePath = arg0;
-      const startedAt = Date.now();
-      const stat = await fsp.stat(filePath).catch(() => null);
-      if (stat) {
-        const cfg = getConfig ? getConfig() : {};
-        const maxBytes = (cfg.max_file_size_kb || 512) * 1024;
-        if (stat.size > maxBytes) {
-          const kb = (stat.size / 1024).toFixed(0);
-          logToolCall('read_file', { path: filePath }, false, 'error');
-          return { error: `File too large: ${kb} KB exceeds max_file_size_kb=${cfg.max_file_size_kb || 512}` };
-        }
-      }
-      if (signal && signal.aborted) {
-        logToolCall('read_file', { path: filePath }, true, 'aborted');
-        return { aborted: true, elapsed_s: Math.max(0, Math.round((Date.now() - startedAt) / 1000)) };
-      }
-      try {
-        const data = await fsp.readFile(filePath, { encoding: 'utf8', signal: signal || undefined });
-        const lines = data.split('\n').length;
-        if (lines > 10) {
-          _log(`  ${FG_GREEN}✓${RST} ${FG_GRAY}Read ${filePath} (${lines} lines, ${data.length} chars)${RST}`);
-        } else {
-          _log(`  ${FG_GREEN}✓${RST} ${FG_GRAY}Read ${filePath}${RST}`);
-        }
-        logToolCall('read_file', { path: filePath }, true, 'ok');
-        return { content: data, path: filePath, bytes: Buffer.byteLength(data, 'utf8') };
-      } catch (error) {
-        if (error && (error.name === 'AbortError' || error.code === 'ABORT_ERR')) {
-          logToolCall('read_file', { path: filePath }, true, 'aborted');
-          return { aborted: true, elapsed_s: Math.max(0, Math.round((Date.now() - startedAt) / 1000)) };
-        }
-        _log(`  ${FG_RED}✗ ${error.message}${RST}`);
-        logToolCall('read_file', { path: filePath }, true, 'error');
-        return { error: error.message };
-      }
-    }
-    if (action === 'write' || action === 'append') {
-      const filePath = arg0;
-      const content = arg1;
-      const tag = action === 'write' ? 'write_file' : 'append_file';
-      const blocked = permissionManager.readonlyBlock(tag);
-      if (blocked) {
-        logToolCall(tag, { path: filePath, content }, false, 'denied');
-        return blocked;
-      }
-      if (!isPathSafe(filePath)) {
-        logToolCall(tag, { path: filePath }, false, 'denied');
-        return _sandboxError(filePath);
-      }
-      // Dry-run: record the skipped op and return without writing. The diff
-      // was already rendered in describePermission ahead of this dispatch.
-      if (_dryRun) {
-        const verb = action === 'write' ? 'write' : 'append';
-        _skippedOps.push({ category: 'file', symbol: '✎', desc: `${verb} ${filePath}` });
-        logToolCall(tag, { path: filePath }, false, 'dry-run');
-        return { status: 'dry-run', message: 'dry-run: write skipped', path: filePath };
-      }
-      try {
-        const dir = path.dirname(filePath);
-        if (dir && dir !== '.') await fsp.mkdir(dir, { recursive: true });
-        if (action === 'write') await fsp.writeFile(filePath, content || '');
-        else await fsp.appendFile(filePath, content || '');
-        const verb = action === 'write' ? 'Wrote' : 'Appended to';
-        _log(`  ${FG_GREEN}✓${RST} ${FG_GRAY}${verb} ${filePath}${RST}`);
-        logToolCall(tag, { path: filePath, content }, true, 'ok');
-        return { status: 'ok', path: filePath, bytes: (content || '').length };
-      } catch (error) {
-        _log(`  ${FG_RED}✗ ${error.message}${RST}`);
-        logToolCall(tag, { path: filePath, content }, true, 'error');
-        return { error: error.message };
-      }
-    }
-    if (action === 'list_dir') {
-      const dirPath = arg0;
-      try {
-        const entries = await fsp.readdir(dirPath, { withFileTypes: true });
-        const items = entries.map((e) => {
-          if (e.isSymbolicLink()) return `[L] ${e.name}`;
-          if (e.isDirectory()) return `[D] ${e.name}`;
-          return `[F] ${e.name}`;
-        });
-        _log(`  ${FG_GREEN}✓${RST} ${FG_GRAY}Listed ${dirPath} (${items.length} items)${RST}`);
-        logToolCall('list_dir', { path: dirPath }, true, 'ok');
-        return { items, path: dirPath };
-      } catch (error) {
-        _log(`  ${FG_RED}✗ ${error.message}${RST}`);
-        logToolCall('list_dir', { path: dirPath }, true, 'error');
-        return { error: error.message };
-      }
-    }
-    if (action === 'delete_file') {
-      const filePath = arg0;
-      const blocked = permissionManager.readonlyBlock('delete_file');
-      if (blocked) {
-        logToolCall('delete_file', { path: filePath }, false, 'denied');
-        return blocked;
-      }
-      if (!isPathSafe(filePath)) {
-        logToolCall('delete_file', { path: filePath }, false, 'denied');
-        return _sandboxError(filePath);
-      }
-      try {
-        await fsp.unlink(filePath);
-        _log(`  ${FG_GREEN}✓${RST} ${FG_GRAY}Deleted ${filePath}${RST}`);
-        logToolCall('delete_file', { path: filePath }, true, 'ok');
-        return { status: 'ok', path: filePath };
-      } catch (error) {
-        _log(`  ${FG_RED}✗ ${error.message}${RST}`);
-        logToolCall('delete_file', { path: filePath }, true, 'error');
-        return { error: error.message };
-      }
-    }
-    if (action === 'make_dir') {
-      const dirPath = arg0;
-      if (!isPathSafe(dirPath)) {
-        logToolCall('make_dir', { path: dirPath }, false, 'denied');
-        return _sandboxError(dirPath);
-      }
-      try {
-        await fsp.mkdir(dirPath, { recursive: true });
-        _log(`  ${FG_GREEN}✓${RST} ${FG_GRAY}Created directory ${dirPath}${RST}`);
-        logToolCall('make_dir', { path: dirPath }, true, 'ok');
-        return { status: 'ok', path: dirPath };
-      } catch (error) {
-        _log(`  ${FG_RED}✗ ${error.message}${RST}`);
-        logToolCall('make_dir', { path: dirPath }, true, 'error');
-        return { error: error.message };
-      }
-    }
-    if (action === 'remove_dir') {
-      const dirPath = arg0;
-      if (!isPathSafe(dirPath)) {
-        logToolCall('remove_dir', { path: dirPath }, false, 'denied');
-        return _sandboxError(dirPath);
-      }
-      try {
-        await fsp.rm(dirPath, { recursive: true, force: true });
-        _log(`  ${FG_GREEN}✓${RST} ${FG_GRAY}Removed directory ${dirPath}${RST}`);
-        logToolCall('remove_dir', { path: dirPath }, true, 'ok');
-        return { status: 'ok', path: dirPath };
-      } catch (error) {
-        _log(`  ${FG_RED}✗ ${error.message}${RST}`);
-        logToolCall('remove_dir', { path: dirPath }, true, 'error');
-        return { error: error.message };
-      }
-    }
-    if (action === 'move_file') {
-      const src = arg0;
-      const dst = arg1;
-      const blocked = permissionManager.readonlyBlock('move_file');
-      if (blocked) {
-        logToolCall('move_file', { src, dst }, false, 'denied');
-        return blocked;
-      }
-      if (!isPathSafe(dst)) {
-        logToolCall('move_file', { src, dst }, false, 'denied');
-        return _sandboxError(dst);
-      }
-      try {
-        const dstDir = path.dirname(dst);
-        if (dstDir && dstDir !== '.') await fsp.mkdir(dstDir, { recursive: true });
-        try {
-          await fsp.rename(src, dst);
-        } catch (renameErr) {
-          if (renameErr.code !== 'EXDEV') throw renameErr;
-          // Cross-device rename not supported — copy then remove
-          await fsp.cp(src, dst, { recursive: true });
-          await fsp.rm(src, { recursive: true, force: true });
-        }
-        _log(`  ${FG_GREEN}✓${RST} ${FG_GRAY}Moved ${src} → ${dst}${RST}`);
-        logToolCall('move_file', { src, dst }, true, 'ok');
-        return { status: 'ok', src, dst };
-      } catch (error) {
-        _log(`  ${FG_RED}✗ ${error.message}${RST}`);
-        logToolCall('move_file', { src, dst }, true, 'error');
-        return { error: error.message };
-      }
-    }
-    if (action === 'copy_file') {
-      const src = arg0;
-      const dst = arg1;
-      const blocked = permissionManager.readonlyBlock('copy_file');
-      if (blocked) {
-        logToolCall('copy_file', { src, dst }, false, 'denied');
-        return blocked;
-      }
-      if (!isPathSafe(dst)) {
-        logToolCall('copy_file', { src, dst }, false, 'denied');
-        return _sandboxError(dst);
-      }
-      try {
-        const dstDir = path.dirname(dst);
-        if (dstDir && dstDir !== '.') await fsp.mkdir(dstDir, { recursive: true });
-        await fsp.cp(src, dst, { recursive: true });
-        _log(`  ${FG_GREEN}✓${RST} ${FG_GRAY}Copied ${src} → ${dst}${RST}`);
-        logToolCall('copy_file', { src, dst }, true, 'ok');
-        return { status: 'ok', src, dst };
-      } catch (error) {
-        _log(`  ${FG_RED}✗ ${error.message}${RST}`);
-        logToolCall('copy_file', { src, dst }, true, 'error');
-        return { error: error.message };
-      }
-    }
-    if (action === 'edit_file') {
-      const filePath = arg0;
-      const lineNum = arg1;
-      const newContent = arg2;
-      try {
-        const data = await fsp.readFile(filePath, 'utf8');
-        const lines = data.split('\n');
-        if (lineNum < 1 || lineNum > lines.length) {
-          logToolCall('edit_file', { path: filePath, line: lineNum }, true, 'error');
-          return { error: `Line ${lineNum} out of range (file has ${lines.length} lines)` };
-        }
-        lines[lineNum - 1] = newContent;
-        await fsp.writeFile(filePath, lines.join('\n'));
-        _log(`  ${FG_GREEN}✓${RST} ${FG_GRAY}Edited line ${lineNum} in ${filePath}${RST}`);
-        logToolCall('edit_file', { path: filePath, line: lineNum }, true, 'ok');
-        return { status: 'ok', path: filePath, line: lineNum };
-      } catch (error) {
-        _log(`  ${FG_RED}✗ ${error.message}${RST}`);
-        logToolCall('edit_file', { path: filePath, line: lineNum }, true, 'error');
-        return { error: error.message };
-      }
-    }
-    if (action === 'search_in_file') {
-      const filePath = arg0;
-      const pattern = arg1;
-      try {
-        const data = await fsp.readFile(filePath, 'utf8');
-        const guardErr = _checkRegexSafety(pattern, data);
-        if (guardErr) {
-          logToolCall('search_in_file', { path: filePath, pattern }, true, 'error');
-          return guardErr;
-        }
-        const regex = new RegExp(pattern);
-        const matches = data.split('\n')
-          .map((content, idx) => regex.test(content) ? { line: idx + 1, content } : null)
-          .filter(Boolean);
-        _log(`  ${FG_GREEN}✓${RST} ${FG_GRAY}Found ${matches.length} match(es) in ${filePath}${RST}`);
-        logToolCall('search_in_file', { path: filePath, pattern }, true, 'ok');
-        return { matches, path: filePath };
-      } catch (error) {
-        _log(`  ${FG_RED}✗ ${error.message}${RST}`);
-        logToolCall('search_in_file', { path: filePath, pattern }, true, 'error');
-        return { error: error.message };
-      }
-    }
-    if (action === 'replace_in_file') {
-      const filePath = arg0;
-      const searchStr = arg1;
-      const replaceStr = arg2;
-      const flags = arg3 || '';
-      try {
-        const data = await fsp.readFile(filePath, 'utf8');
-        const guardErr = _checkRegexSafety(searchStr, data);
-        if (guardErr) {
-          logToolCall('replace_in_file', { path: filePath, search: searchStr }, true, 'error');
-          return guardErr;
-        }
-        const safeFlags = flags.replace(/[^gimsuy]/g, '');
-        const countFlags = safeFlags.includes('g') ? safeFlags : safeFlags + 'g';
-        const count = (data.match(new RegExp(searchStr, countFlags)) || []).length;
-        const regex = new RegExp(searchStr, safeFlags || undefined);
-        const newData = data.replace(regex, replaceStr);
-        await fsp.writeFile(filePath, newData);
-        _log(`  ${FG_GREEN}✓${RST} ${FG_GRAY}Replaced ${count} occurrence(s) in ${filePath}${RST}`);
-        logToolCall('replace_in_file', { path: filePath, search: searchStr }, true, 'ok');
-        return { status: 'ok', path: filePath, count };
-      } catch (error) {
-        _log(`  ${FG_RED}✗ ${error.message}${RST}`);
-        logToolCall('replace_in_file', { path: filePath, search: searchStr }, true, 'error');
-        return { error: error.message };
-      }
-    }
-    if (action === 'search_files') {
-      const pattern = arg0;
-      const searchDir = arg1 || '.';
-      const startedAt = Date.now();
-      try {
-        let regStr = pattern.replace(/[.+^${}()|[\]\\]/g, '\\$&');
-        regStr = regStr.replace(/\*\*/g, '\x00');
-        regStr = regStr.replace(/\*/g, '[^/]*');
-        regStr = regStr.replace(/\x00\//g, '(?:.*/)?');
-        regStr = regStr.replace(/\x00/g, '.*');
-        const regex = new RegExp(`^${regStr}$`);
-        const matchName = !pattern.includes('/');
-        const files = [];
-        async function walk(dir, rel) {
-          if (signal && signal.aborted) return;
-          let entries;
-          try { entries = await fsp.readdir(dir, { withFileTypes: true }); } catch { return; }
-          for (const entry of entries) {
-            if (signal && signal.aborted) return;
-            const relPath = rel ? `${rel}/${entry.name}` : entry.name;
-            if (regex.test(matchName ? entry.name : relPath)) files.push(relPath);
-            if (entry.isDirectory()) await walk(path.join(dir, entry.name), relPath);
-          }
-        }
-        await walk(searchDir, '');
-        if (signal && signal.aborted) {
-          logToolCall('search_files', { pattern, dir: searchDir }, true, 'aborted');
-          return { aborted: true, elapsed_s: Math.max(0, Math.round((Date.now() - startedAt) / 1000)) };
-        }
-        files.sort();
-        _log(`  ${FG_GREEN}✓${RST} ${FG_GRAY}Found ${files.length} file(s) matching "${pattern}"${RST}`);
-        logToolCall('search_files', { pattern, dir: searchDir }, true, 'ok');
-        return { files, pattern, dir: searchDir };
-      } catch (error) {
-        _log(`  ${FG_RED}✗ ${error.message}${RST}`);
-        logToolCall('search_files', { pattern, dir: searchDir }, true, 'error');
-        return { error: error.message };
-      }
-    }
-    if (action === 'file_stat') {
-      const filePath = arg0;
-      try {
-        const stat = await fsp.stat(filePath);
-        const type = stat.isDirectory() ? 'directory' : stat.isSymbolicLink() ? 'symlink' : 'file';
-        const size_kb = (stat.size / 1024).toFixed(2);
-        const mode = '0o' + stat.mode.toString(8);
-        const mtime = stat.mtime.toISOString();
-        _log(`  ${FG_GREEN}✓${RST} ${FG_GRAY}Stat ${filePath}${RST}`);
-        logToolCall('file_stat', { path: filePath }, true, 'ok');
-        return { path: filePath, size_kb, mtime, type, mode };
-      } catch (error) {
-        _log(`  ${FG_RED}✗ ${error.message}${RST}`);
-        logToolCall('file_stat', { path: filePath }, true, 'error');
-        return { error: error.message };
-      }
-    }
-    if (action === 'get_env') {
-      const varName = arg0;
-      const value = process.env[varName];
-      _log(`  ${FG_GREEN}✓${RST} ${FG_GRAY}Got env ${varName}${RST}`);
-      logToolCall('get_env', { name: varName }, true, 'ok');
-      return { name: varName, value: value !== undefined ? value : null };
-    }
-    if (action === 'set_env') {
-      const varName = arg0;
-      const value = arg1 || '';
-      process.env[varName] = value;
-      _log(`  ${FG_GREEN}✓${RST} ${FG_GRAY}Set env ${varName}${RST}`);
-      logToolCall('set_env', { name: varName }, true, 'ok');
-      return { status: 'ok', name: varName };
-    }
-    if (action === 'download') {
-      const url = arg0;
-      if (_dryRun) {
-        _skippedOps.push({ category: 'net', symbol: '↓', desc: `download ${url}` });
-        logToolCall('download', { url }, false, 'dry-run');
-        return { status: 'dry-run', message: 'dry-run: network call skipped' };
-      }
-      let fileName;
-      try {
-        fileName = path.basename(new URL(url).pathname) || 'download';
-      } catch {
-        fileName = 'download';
-      }
-      const outPath = path.join(process.cwd(), fileName);
-      const startedAt = Date.now();
-      return new Promise((resolve) => {
-        let abortedByUser = false;
-        let onAbort = null;
-        let activeReq = null;
-        let activeFile = null;
-        const detachAbort = () => {
-          if (onAbort && signal) {
-            try { signal.removeEventListener('abort', onAbort); } catch {}
-            onAbort = null;
-          }
-        };
-        const finishAborted = () => {
-          fs.unlink(outPath, () => {});
-          logToolCall('download', { url }, true, 'aborted');
-          resolve({ aborted: true, elapsed_s: Math.max(0, Math.round((Date.now() - startedAt) / 1000)) });
-        };
-        if (signal) {
-          if (signal.aborted) {
-            abortedByUser = true;
-            finishAborted();
-            return;
-          }
-          onAbort = () => {
-            abortedByUser = true;
-            try { if (activeReq) activeReq.destroy(new Error('Aborted')); } catch {}
-            try { if (activeFile) activeFile.destroy(); } catch {}
-          };
-          signal.addEventListener('abort', onAbort, { once: true });
-        }
-        function doDownload(target, redirectsLeft) {
-          const proto = target.startsWith('https') ? https : http;
-          const req = proto.get(target, (res) => {
-            if ([301, 302, 303, 307, 308].includes(res.statusCode) && redirectsLeft > 0 && res.headers.location) {
-              res.resume();
-              return doDownload(res.headers.location, redirectsLeft - 1);
-            }
-            if (res.statusCode >= 400) {
-              res.resume();
-              const msg = `HTTP ${res.statusCode}`;
-              detachAbort();
-              _log(`  ${FG_RED}✗ ${msg}${RST}`);
-              logToolCall('download', { url }, true, 'error');
-              return resolve({ error: msg });
-            }
-            const file = fs.createWriteStream(outPath);
-            activeFile = file;
-            res.pipe(file);
-            file.on('finish', () => {
-              file.close();
-              detachAbort();
-              _log(`  ${FG_GREEN}✓${RST} ${FG_GRAY}Downloaded to ${outPath}${RST}`);
-              logToolCall('download', { url }, true, 'ok');
-              resolve({ status: 'ok', path: outPath });
-            });
-            file.on('error', (err) => {
-              if (abortedByUser) {
-                detachAbort();
-                finishAborted();
-                return;
-              }
-              fs.unlink(outPath, () => {});
-              detachAbort();
-              _log(`  ${FG_RED}✗ ${err.message}${RST}`);
-              logToolCall('download', { url }, true, 'error');
-              resolve({ error: err.message });
-            });
-          });
-          activeReq = req;
-          req.on('error', (err) => {
-            if (abortedByUser) {
-              detachAbort();
-              finishAborted();
-              return;
-            }
-            fs.unlink(outPath, () => {});
-            detachAbort();
-            _log(`  ${FG_RED}✗ ${err.message}${RST}`);
-            logToolCall('download', { url }, true, 'error');
-            resolve({ error: err.message });
-          });
-          req.setTimeout(120000, () => {
-            req.destroy();
-            fs.unlink(outPath, () => {});
-            detachAbort();
-            logToolCall('download', { url }, true, 'error');
-            resolve({ error: 'Request timeout' });
-          });
-        }
-        doDownload(url, 5);
-      });
-    }
-    if (action === 'upload') {
-      const filePath = arg0;
-      const encodedContent = arg1 || '';
-      if (!isPathSafe(filePath)) {
-        logToolCall('upload', { path: filePath }, false, 'denied');
-        return _sandboxError(filePath);
-      }
-      try {
-        const dir = path.dirname(filePath);
-        if (dir && dir !== '.') await fsp.mkdir(dir, { recursive: true });
-        const buffer = Buffer.from(encodedContent.trim(), 'base64');
-        await fsp.writeFile(filePath, buffer);
-        _log(`  ${FG_GREEN}✓${RST} ${FG_GRAY}Uploaded ${buffer.length} bytes to ${filePath}${RST}`);
-        logToolCall('upload', { path: filePath }, true, 'ok');
-        return { status: 'ok', path: filePath, bytes: buffer.length };
-      } catch (error) {
-        _log(`  ${FG_RED}✗ ${error.message}${RST}`);
-        logToolCall('upload', { path: filePath }, true, 'error');
-        return { error: error.message };
-      }
-    }
-    if (action === 'http_get') {
-      const url = arg0;
-      if (_dryRun) {
-        _skippedOps.push({ category: 'net', symbol: '↓', desc: `GET ${url}` });
-        logToolCall('http_get', { url }, false, 'dry-run');
-        return { status: 'dry-run', message: 'dry-run: network call skipped' };
-      }
-      const httpCfg = getConfig ? getConfig() : {};
-      const reqTimeoutMs = Math.max(15000, httpCfg.request_timeout_ms || 15000);
-      const maxBytes = Math.max(1024, httpCfg.http_fetch_max_bytes || 262144);
-      const startedAt = Date.now();
-      return new Promise((resolve) => {
-        let abortedByUser = false;
-        let onAbort = null;
-        let activeReq = null;
-        const detachAbort = () => {
-          if (onAbort && signal) {
-            try { signal.removeEventListener('abort', onAbort); } catch {}
-            onAbort = null;
-          }
-        };
-        const finishAborted = () => {
-          logToolCall('http_get', { url }, true, 'aborted');
-          resolve({ aborted: true, elapsed_s: Math.max(0, Math.round((Date.now() - startedAt) / 1000)) });
-        };
-        if (signal) {
-          if (signal.aborted) {
-            abortedByUser = true;
-            finishAborted();
-            return;
-          }
-          onAbort = () => {
-            abortedByUser = true;
-            try { if (activeReq) activeReq.destroy(new Error('Aborted')); } catch {}
-          };
-          signal.addEventListener('abort', onAbort, { once: true });
-        }
-        function doGet(target, redirectsLeft) {
-          const proto = target.startsWith('https') ? https : http;
-          const req = proto.get(target, (res) => {
-            if ([301, 302, 303, 307, 308].includes(res.statusCode) && redirectsLeft > 0 && res.headers.location) {
-              res.resume();
-              return doGet(res.headers.location, redirectsLeft - 1);
-            }
-            const bufs = [];
-            let totalBytes = 0;
-            let capped = false;
-            res.on('data', (chunk) => {
-              totalBytes += chunk.length;
-              if (!capped) {
-                if (totalBytes <= maxBytes) {
-                  bufs.push(chunk);
-                } else {
-                  const keep = maxBytes - (totalBytes - chunk.length);
-                  if (keep > 0) bufs.push(chunk.slice(0, keep));
-                  capped = true;
-                  // Keep the connection draining so totalBytes reflects reality,
-                  // but stop buffering further bytes.
-                }
-              }
-            });
-            res.on('end', () => {
-              if (abortedByUser) return;
-              detachAbort();
-              const kept = Buffer.concat(bufs);
-              const keptBytes = kept.length;
-              let body = kept.toString('utf8');
-              if (capped) {
-                const origKb = (totalBytes / 1024).toFixed(0);
-                const keptKb = (keptBytes / 1024).toFixed(0);
-                const droppedKb = ((totalBytes - keptBytes) / 1024).toFixed(0);
-                body += `\n\n[... truncated: original was ${origKb}KB, showing first ${keptKb}KB. The remaining ${droppedKb}KB was discarded. If you need the rest, narrow your request (e.g. fetch a specific subpage) rather than retrying this URL.]`;
-              }
-              _log(`  ${FG_GREEN}✓${RST} ${FG_GRAY}HTTP GET ${target} (${res.statusCode}, ${totalBytes} bytes${capped ? `, truncated to ${keptBytes}` : ''})${RST}`);
-              logToolCall('http_get', { url: target }, true, res.statusCode < 400 ? 'ok' : 'error');
-              // `bytes` is the total transferred payload length (pre-cap);
-              // consumers that want to know the wire size without parsing
-              // the appended truncation note rely on this.
-              resolve({ status_code: res.statusCode, body, bytes: totalBytes });
-            });
-          });
-          activeReq = req;
-          req.on('error', (err) => {
-            if (abortedByUser) {
-              detachAbort();
-              finishAborted();
-              return;
-            }
-            detachAbort();
-            _log(`  ${FG_RED}✗ ${err.message}${RST}`);
-            logToolCall('http_get', { url: target }, true, 'error');
-            resolve({ error: err.message, error_code: err.code });
-          });
-          req.setTimeout(reqTimeoutMs, () => {
-            req.destroy();
-            detachAbort();
-            logToolCall('http_get', { url: target }, true, 'error');
-            resolve({ error: 'Request timeout', error_code: 'ETIMEDOUT' });
-          });
-        }
-        doGet(url, 5);
-      });
-    }
-    if (action === 'ask_user') {
-      const question = arg0;
-      const options = _parseNumberedOptions(question);
-      if (options.length >= 2) {
-        const selected = await permissionManager.captureSelect({ options });
-        logToolCall('ask_user', { question }, true, 'ok');
-        return { question, answer: selected || options[0] };
-      }
-      if (!process.stdout.isTTY || process.stdin.isRaw) {
-        writer.scrollback(`\n  ${FG_YELLOW}?${RST} ${question}\n  ${DIM}[auto-answering 'y']${RST}`);
-        logToolCall('ask_user', { question }, true, 'ok');
-        return { question, answer: 'y' };
-      }
-      // audit: allowed — inline prompt without trailing newline; unreachable when TUI writer is active
-      // (process.stdin.isRaw is true while the TUI input field holds raw mode).
-      process.stdout.write(`\n  ${FG_YELLOW}?${RST} ${question}\n  ${FG_GRAY}>${RST} `);
-      const buf = Buffer.alloc(4096);
-      let input = '';
-      while (true) {
-        const n = fs.readSync(0, buf, 0, 1);
-        if (n === 0) break;
-        const ch = buf[0];
-        if (ch === 0x0a) break;
-        if (ch === 0x0d) continue;
-        input += String.fromCharCode(ch);
-      }
-      _log();
-      logToolCall('ask_user', { question }, true, 'ok');
-      return { question, answer: input };
-    }
-    if (action === 'store_memory') {
-      const key = arg0;
-      const value = arg1 || '';
-      try {
-        let mem = {};
-        try { mem = JSON.parse(await fsp.readFile(MEMORY_PATH, 'utf8')); } catch {}
-        mem[key] = value;
-        await fsp.mkdir(path.dirname(MEMORY_PATH), { recursive: true });
-        await fsp.writeFile(MEMORY_PATH, JSON.stringify(mem, null, 2));
-        _log(`  ${FG_GREEN}✓${RST} ${FG_GRAY}Stored memory: ${key}${RST}`);
-        logToolCall('store_memory', { key }, true, 'ok');
-        return { status: 'ok', key };
-      } catch (error) {
-        _log(`  ${FG_RED}✗ ${error.message}${RST}`);
-        logToolCall('store_memory', { key }, true, 'error');
-        return { error: error.message };
-      }
-    }
-    if (action === 'recall_memory') {
-      const key = arg0;
-      try {
-        let mem = {};
-        try { mem = JSON.parse(await fsp.readFile(MEMORY_PATH, 'utf8')); } catch {}
-        const found = key in mem;
-        const value = found ? mem[key] : null;
-        _log(`  ${FG_GREEN}✓${RST} ${FG_GRAY}Recalled memory: ${key}${RST}`);
-        logToolCall('recall_memory', { key }, true, 'ok');
-        return { key, value, found };
-      } catch (error) {
-        _log(`  ${FG_RED}✗ ${error.message}${RST}`);
-        logToolCall('recall_memory', { key }, true, 'error');
-        return { error: error.message };
-      }
-    }
-    if (action === 'list_memories') {
-      try {
-        let mem = {};
-        try { mem = JSON.parse(await fsp.readFile(MEMORY_PATH, 'utf8')); } catch {}
-        const keys = Object.keys(mem);
-        _log(`  ${FG_GREEN}✓${RST} ${FG_GRAY}Listed ${keys.length} memory key(s)${RST}`);
-        logToolCall('list_memories', {}, true, 'ok');
-        return { keys };
-      } catch (error) {
-        _log(`  ${FG_RED}✗ ${error.message}${RST}`);
-        logToolCall('list_memories', {}, true, 'error');
-        return { error: error.message };
-      }
-    }
-    if (action === 'system_info') {
-      const info = {
-        platform: os.platform(),
-        arch: os.arch(),
-        hostname: os.hostname(),
-        user: process.env.USER || process.env.USERNAME || '',
-        total_mem_mb: Math.round(os.totalmem() / 1024 / 1024),
-        free_mem_mb: Math.round(os.freemem() / 1024 / 1024),
-        node_version: process.version,
-        cwd: process.cwd(),
-      };
-      _log(`  ${FG_GREEN}✓${RST} ${FG_GRAY}System info: ${info.platform}/${info.arch}${RST}`);
-      logToolCall('system_info', {}, true, 'ok');
-      return info;
-    }
-    logToolCall(action, { action }, false, 'error');
-    return { error: `Unknown action: ${action}` };
-  }
+    // Dispatch via the tool registry (Task 1.4b). The per-action executor body
+    // lives on its TOOL_REGISTRY entry; ctx carries the collaborators the body
+    // closes over. Signature is unchanged, so agent.js stays untouched.
+    const entry = entryForAction(action);
+    if (!entry || typeof entry.execute !== 'function') {
+      logToolCall(action, { action }, false, 'error');
+      return { error: `Unknown action: ${action}` };
+    }
+    // Checkpoint capture (Task 4.3): snapshot prior state BEFORE the mutation
+    // (post-gate — a denied call never reaches here). Skip under --dry-run (no
+    // mutation will happen). beginCapture is fail-safe (returns null on error),
+    // and commit only fires on a status:'ok' result, so a call the executor
+    // itself refuses (--readonly, sandbox) produces no committed checkpoint.
+    let _cp = null;
+    if (checkpointStore && !_dryRun) {
+      try { _cp = await checkpointStore.beginCapture(action, args); } catch { _cp = null; }
+    }
+    const result = await entry.execute(ctx, args, { signal });
+    if (_cp) {
+      try { if (result && result.status === 'ok') _cp.commit(); } catch { /* fail-safe: never block a completed mutation */ }
+    }
+    return result;
+  }
+  // Dependency bag passed to every registry execute()/permission(). Built once
+  // here so the moved executor bodies see the same collaborators they had inside
+  // this factory closure. _uiActive is a getter so it always reflects the live
+  // module flag rather than a snapshot.
+  const ctx = {
+    _log,
+    logToolCall,
+    isPathSafe,
+    isProtectedSecretPath,
+    isProtectedConfigPath,
+    _sandboxError,
+    _secretReadError,
+    _protectedConfigWriteError,
+    _checkRegexSafety,
+    _isLiteralPattern,
+    _parseNumberedOptions,
+    _parseAskMenu: parseAskMenu,
+    _dryRun,
+    _skippedOps,
+    MEMORY_PATH,
+    permissionManager,
+    getConfig,
+    webChat,
+    webSearch,
+    renderDiff,
+    DIFF_BUBBLE_INSET,
+    writer,
+    agentExecShell,
+    FG_GREEN,
+    FG_GRAY,
+    FG_RED,
+    FG_YELLOW,
+    RST,
+    DIM,
+    BOLD,
+    get _uiActive() { return _uiActive; },
+  };
   return {
     agentExecFile,
@@ -1033,97 +526,11 @@ function createToolExecutor(permissionManager, ui, getConfig) {
   };
 }
-// Map a MiniMax-style {name, params} invocation to the internal
-// [action, arg1, arg2, …] call tuple consumed by the agent loop.
+// Native function-calling path → internal [action, ...args] call tuple.
+// Delegates to the tool registry (lib/tool_registry.js) — the single source
+// shared with the XML path. Kept here (and exported) for backward compatibility.
 function mapInvokeToCall(toolName, params) {
-  const name = (toolName || '').toLowerCase();
-  const p = params || {};
-  switch (name) {
-    case 'write_file':
-    case 'create_file':
-      return p.path ? ['write', p.path, p.content != null ? p.content : ''] : null;
-    case 'read_file':
-      return p.path ? ['read', p.path] : null;
-    case 'append_file':
-      return p.path ? ['append', p.path, p.content != null ? p.content : ''] : null;
-    case 'delete_file':
-      return p.path ? ['delete_file', p.path] : null;
-    case 'list_dir':
-      return ['list_dir', p.path || '.'];
-    case 'make_dir':
-      return p.path ? ['make_dir', p.path] : null;
-    case 'remove_dir':
-      return p.path ? ['remove_dir', p.path] : null;
-    case 'move_file':
-      return p.src && p.dst ? ['move_file', p.src, p.dst] : null;
-    case 'copy_file':
-      return p.src && p.dst ? ['copy_file', p.src, p.dst] : null;
-    case 'file_stat':
-      return p.path ? ['file_stat', p.path] : null;
-    case 'search_files':
-      return ['search_files', p.pattern || '*', p.dir || '.'];
-    case 'search_in_file':
-      return p.path && p.pattern ? ['search_in_file', p.path, p.pattern] : null;
-    case 'replace_in_file':
-      return p.path && p.search !== undefined
-        ? ['replace_in_file', p.path, p.search, p.replace != null ? p.replace : '', p.flags || '']
-        : null;
-    case 'edit_file':
-      return p.path && p.line !== undefined
-        ? ['edit_file', p.path, parseInt(p.line, 10), p.content != null ? p.content : '']
-        : null;
-    case 'get_env':
-      return p.name ? ['get_env', p.name] : null;
-    case 'set_env':
-      return p.name ? ['set_env', p.name, p.value != null ? p.value : ''] : null;
-    case 'download':
-      return p.url ? ['download', p.url] : null;
-    case 'upload':
-      return p.path ? ['upload', p.path, p.content != null ? p.content : ''] : null;
-    case 'http_get':
-      return p.url ? ['http_get', p.url] : null;
-    case 'ask_user':
-      return p.question ? ['ask_user', p.question] : null;
-    case 'store_memory':
-      return p.key ? ['store_memory', p.key, p.value != null ? p.value : ''] : null;
-    case 'recall_memory':
-      return p.key ? ['recall_memory', p.key] : null;
-    case 'list_memories':
-      return ['list_memories'];
-    case 'system_info':
-      return ['system_info'];
-    case 'exec':
-    case 'shell':
-      return p.command ? ['shell', p.command] : null;
-    default:
-      return null;
-  }
-}
-// Compile a regex twice — once with double quotes, once with single — from a
-// template where `Q` stands for the quote char. Matches from both variants
-// are returned in a single iterable.
-function _matchDual(text, template) {
-  const results = [];
-  for (const q of ['"', "'"]) {
-    const re = new RegExp(template.replace(/Q/g, q), 'g');
-    for (const m of text.matchAll(re)) results.push(m);
-  }
-  return results;
-}
-// Models sometimes wrap the inline body of a single-value tool tag in a nested
-// pseudo-tag, e.g. `<list_dir><path>/tmp/foo</path></list_dir>` instead of the
-// documented `<list_dir>/tmp/foo</list_dir>`. When the body is exactly one
-// wrapper element (no siblings, no surrounding text), unwrap it once so the
-// parser recovers the intended value. Safe to call on any inline-content body
-// — a plain path/command/URL won't match the regex and is returned as-is.
-function _unwrapInnerTag(inner) {
-  if (inner == null) return inner;
-  const trimmed = String(inner).trim();
-  const m = trimmed.match(/^<(\w+)(?:\s[^>]*)?>([\s\S]*)<\/\1>$/);
-  if (!m) return inner;
-  return m[2].trim();
+  return fromInvoke(toolName, params);
 }
 // MiniMax-M2 tool-call XML repair. Some inference backends — notably mlx-lm
@@ -1322,137 +729,37 @@ function extractToolCalls(text, options = {}) {
     }
   }
-  for (const match of text.matchAll(/```(?:shell|bash|sh)\n([\s\S]*?)```/g)) {
-    for (const line of match[1].trim().split('\n')) {
-      const cmd = line.trim();
-      if (cmd && !cmd.startsWith('#')) calls.push(['shell', cmd]);
+  // Bare-code-fence TEXT HEURISTIC: a ```bash/```sh/```shell markdown block with
+  // NO tool tag, each non-comment line inferred as a shell command. This is the
+  // ONLY mechanism that fires on untagged prose, so it is the only one gated by
+  // `skipTextHeuristics` (set on the native rail — see lib/agent.js). Every other
+  // pass in this function requires an EXPLICIT tool tag (<minimax:tool_call>,
+  // <function=…>, <tool_call>, the registered <tool> tags, MCP tags) and stays
+  // active regardless. The heuristic itself is unchanged — it is only skipped.
+  if (!options.skipTextHeuristics) {
+    for (const match of text.matchAll(/```(?:shell|bash|sh)\n([\s\S]*?)```/g)) {
+      for (const line of match[1].trim().split('\n')) {
+        const cmd = line.trim();
+        if (cmd && !cmd.startsWith('#')) calls.push(['shell', cmd]);
+      }
     }
   }
-  for (const match of text.matchAll(/<(?:shell|exec|run_command|run)>([\s\S]*?)<\/(?:shell|exec|run_command|run)>/g)) {
-    calls.push(['shell', _unwrapInnerTag(match[1]).trim()]);
-  }
-  for (const match of text.matchAll(/<read_file>([\s\S]*?)<\/read_file>/g)) {
-    calls.push(['read', _unwrapInnerTag(match[1]).trim()]);
-  }
-  for (const match of _matchDual(text, '<read_file\\s+path=Q([^Q]+)Q\\s*\\/?>')) {
-    calls.push(['read', match[1]]);
-  }
-  for (const match of _matchDual(text, '<write_file\\s+path=Q([^Q]+)Q>([\\s\\S]*?)<\\/write_file>')) {
-    calls.push(['write', match[1], match[2]]);
-  }
-  for (const match of _matchDual(text, '<create_file\\s+path=Q([^Q]+)Q>([\\s\\S]*?)<\\/create_file>')) {
-    calls.push(['write', match[1], match[2]]);
-  }
-  for (const match of _matchDual(text, '<append_file\\s+path=Q([^Q]+)Q>([\\s\\S]*?)<\\/append_file>')) {
-    calls.push(['append', match[1], match[2]]);
-  }
-  for (const match of text.matchAll(/<list_dir>([\s\S]*?)<\/list_dir>/g)) {
-    calls.push(['list_dir', _unwrapInnerTag(match[1]).trim()]);
-  }
-  for (const match of text.matchAll(/<search_files>([\s\S]*?)<\/search_files>/g)) {
-    calls.push(['search_files', _unwrapInnerTag(match[1]).trim(), '.']);
-  }
-  for (const match of _matchDual(text, '<search_files\\s+pattern=Q([^Q]+)Q(?:\\s+dir=Q([^Q]*)Q)?\\s*(?:><\\/search_files>|\\/>)')) {
-    calls.push(['search_files', match[1], match[2] || '.']);
-  }
-  for (const match of text.matchAll(/<delete_file>([\s\S]*?)<\/delete_file>/g)) {
-    calls.push(['delete_file', _unwrapInnerTag(match[1]).trim()]);
-  }
-  for (const match of text.matchAll(/<make_dir>([\s\S]*?)<\/make_dir>/g)) {
-    calls.push(['make_dir', _unwrapInnerTag(match[1]).trim()]);
-  }
-  for (const match of text.matchAll(/<remove_dir>([\s\S]*?)<\/remove_dir>/g)) {
-    calls.push(['remove_dir', _unwrapInnerTag(match[1]).trim()]);
-  }
-  for (const match of text.matchAll(/<get_env>([\s\S]*?)<\/get_env>/g)) {
-    calls.push(['get_env', _unwrapInnerTag(match[1]).trim()]);
-  }
-  for (const match of _matchDual(text, '<set_env\\s+name=Q([^Q]+)Q\\s+value=Q([^Q]*)Q\\s*(?:><\\/set_env>|\\/>)')) {
-    calls.push(['set_env', match[1], match[2]]);
-  }
-  for (const match of _matchDual(text, '<move_file\\s+src=Q([^Q]+)Q\\s+dst=Q([^Q]+)Q\\s*(?:><\\/move_file>|\\/>)')) {
-    calls.push(['move_file', match[1], match[2]]);
-  }
-  for (const match of _matchDual(text, '<copy_file\\s+src=Q([^Q]+)Q\\s+dst=Q([^Q]+)Q\\s*(?:><\\/copy_file>|\\/>)')) {
-    calls.push(['copy_file', match[1], match[2]]);
-  }
-  for (const match of _matchDual(text, '<edit_file\\s+path=Q([^Q]+)Q\\s+line=Q(\\d+)Q>([\\s\\S]*?)<\\/edit_file>')) {
-    calls.push(['edit_file', match[1], parseInt(match[2], 10), match[3]]);
-  }
-  for (const match of _matchDual(text, '<search_in_file\\s+path=Q([^Q]+)Q>([\\s\\S]*?)<\\/search_in_file>')) {
-    calls.push(['search_in_file', match[1], match[2].trim()]);
-  }
-  for (const match of _matchDual(text, '<replace_in_file\\s+path=Q([^Q]+)Q\\s+search=Q([^Q]*)Q\\s+replace=Q([^Q]*)Q>([\\s\\S]*?)<\\/replace_in_file>')) {
-    calls.push(['replace_in_file', match[1], match[2], match[3], match[4].trim()]);
-  }
-  for (const match of text.matchAll(/<download>([\s\S]*?)<\/download>/g)) {
-    calls.push(['download', _unwrapInnerTag(match[1]).trim()]);
-  }
-  for (const match of _matchDual(text, '<upload\\s+path=Q([^Q]+)Q>([\\s\\S]*?)<\\/upload>')) {
-    calls.push(['upload', match[1], match[2]]);
-  }
-  for (const match of text.matchAll(/<file_stat>([\s\S]*?)<\/file_stat>/g)) {
-    calls.push(['file_stat', _unwrapInnerTag(match[1]).trim()]);
-  }
-  for (const match of text.matchAll(/<http_get\b([^>]*?)(?:><\/http_get>|\/>)/g)) {
-    const attrStr = match[1];
-    const urlMatch = attrStr.match(/url="([^"]+)"/) || attrStr.match(/url='([^']+)'/);
-    if (urlMatch) calls.push(['http_get', urlMatch[1]]);
-  }
-  // Inline-content form: <http_get>URL</http_get>. Models mirror the style of
-  // <list_dir>, <download>, etc. even though the system prompt advertises the
-  // attribute form — accept both so the second tag in a multi-call response
-  // isn't silently dropped. Also tolerate `<http_get>url="URL"</http_get>` where
-  // the model put the attribute syntax in the body.
-  for (const match of text.matchAll(/<http_get>([\s\S]*?)<\/http_get>/g)) {
-    const inner = match[1].trim();
-    if (!inner) continue;
-    const urlAttr = inner.match(/url="([^"]+)"/) || inner.match(/url='([^']+)'/);
-    calls.push(['http_get', urlAttr ? urlAttr[1] : _unwrapInnerTag(inner).trim()]);
-  }
-  for (const match of _matchDual(text, '<ask_user\\s+question=Q([^Q]+)Q\\s*(?:><\\/ask_user>|\\/>)')) {
-    calls.push(['ask_user', match[1]]);
-  }
-  for (const match of _matchDual(text, '<store_memory\\s+key=Q([^Q]+)Q>([\\s\\S]*?)<\\/store_memory>')) {
-    calls.push(['store_memory', match[1], match[2]]);
-  }
-  for (const match of _matchDual(text, '<recall_memory\\s+key=Q([^Q]+)Q\\s*(?:><\\/recall_memory>|\\/>)')) {
-    calls.push(['recall_memory', match[1]]);
-  }
-  for (const match of text.matchAll(/<list_memories\s*(?:><\/list_memories>|\/>)/g)) {
-    calls.push(['list_memories']);
+  // XML/tag path: each tool's parseAttrs (parseXml) lives next to its spec in
+  // the tool registry. Entries run in array order, which — together with the
+  // wrapper/JSON/fence passes above — reproduces the exact emission order the
+  // characterization tests pin (test/extract-tool-calls.test.js). This replaces
+  // the ~25 standalone regex blocks that used to be inlined here.
+  for (const entry of TOOL_REGISTRY) {
+    if (!entry.parseXml) continue;
+    for (const call of entry.parseXml(text)) calls.push(call);
   }
-  for (const match of text.matchAll(/<system_info\s*(?:><\/system_info>|\/>)/g)) {
-    calls.push(['system_info']);
+  // Dynamic tools (MCP, Task 3.3) get the same XML pass so non-native models can
+  // invoke them via `<mcp__server__tool>{json args}</mcp__server__tool>`.
+  for (const entry of dynamicToolEntries()) {
+    if (!entry.parseXml) continue;
+    for (const call of entry.parseXml(text)) calls.push(call);
   }
   return calls;
@@ -1481,8 +788,14 @@ module.exports = {
   createToolExecutor,
   extractToolCalls,
   getSkippedOps,
+  // Exported for unit testing (Task 1.1). These pure path guards are otherwise
+  // private; exposing them changes no runtime behavior.
+  isPathSafe,
+  isProtectedSecretPath,
+  isProtectedConfigPath,
   isUIActive,
   mapInvokeToCall,
+  parseAskMenu,
   repairMinimaxMalformedXml,
   setUIActive,
 };