npm - @stigmer/runner - Versions diffs - 3.0.2 → 3.0.4 - Mend

@stigmer/runner 3.0.2 → 3.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/src/activities/execute-cursor/approval-state.ts CHANGED Viewed

@@ -8,27 +8,29 @@
  * State file format (JSON):
  * {
  *   "autoApproveAll": false,
- *   "builtInGatedList": ["Write", "StrReplace", "Shell", ...],
  *   "mcpToolPolicies": {
  *     "apply_cloud_resource": { "requiresApproval": true, "message": "..." }
  *   },
- *   "approvedGrants": [{ "toolName": "Write", "mcpServerSlug": "", "argKey": "a.txt" }],
- *   "approvedGrantTokens": ["V3JpdGUKYS50eHQ="]
+ *   "approvedGrants": [{ "toolName": "edit", "mcpServerSlug": "", "key": "write", "salient": "a.txt" }],
+ *   "approvedGrantTokens": ["d3JpdGUKYS50eHQ="]
  * }
  *
- * The hook gates only the explicitly dangerous set (builtInGatedList) and the
- * MCP tools that require approval (mcpToolPolicies, which by construction holds
- * only require-approval entries); every other tool is allowed. This mirrors the
- * native harness and avoids denying auto-approved MCP tools, which are absent
- * from the policy map and indistinguishable from unknown tools by name.
+ * The hook gates the dangerous built-in set and the MCP tools that require
+ * approval (mcpToolPolicies, which by construction holds only require-approval
+ * entries); every other tool is allowed. The gated built-in set and its
+ * name->category mapping are baked into the generated hook script (from
+ * approval-policy.ts), not carried in the state file — only the dynamic inputs
+ * (autoApproveAll, mcpToolPolicies, approvedGrantTokens) live here. This mirrors
+ * the native harness and avoids denying auto-approved MCP tools, which are
+ * absent from the policy map and indistinguishable from unknown tools by name.
  *
  * Why grants instead of tool-call ids: a resumed Cursor agent re-issues the
  * approved tool with a BRAND NEW call id, so matching on the original call id
- * can never let the re-attempt through. Instead we grant by tool identity —
- * tool name plus a "salient" argument (the file path for Write, the command for
- * Shell, …; see extractArgKey). On reinvocation the hook allows a tool call
- * only if its (name, salient-arg) matches an approved grant; rejected/skipped
- * tools and any newly proposed dangerous tool are re-gated.
+ * can never let the re-attempt through. Instead we grant by canonical tool
+ * identity — the approval category plus a "salient" resource value (the file
+ * path, the shell command; see {@link toolIdentity}). On reinvocation the hook
+ * allows a tool call only if its (category, salient) matches an approved grant;
+ * rejected/skipped tools and any newly proposed dangerous tool are re-gated.
  *
  * Tokens: the hook is a self-contained bash script, so it cannot parse an array
  * of grant objects. `approvedGrantTokens` is the flat, base64-encoded form of
@@ -56,30 +58,66 @@ import { PendingApprovalSchema } from "@stigmer/protos/ai/stigmer/agentic/agente
 import type { PendingApproval } from "@stigmer/protos/ai/stigmer/agentic/agentexecution/v1/approval_pb";
 import type { AgentMessage } from "@stigmer/protos/ai/stigmer/agentic/agentexecution/v1/message_pb";
 import type { MergedToolPolicy } from "./approval-policy.js";
-import { getBuiltInGatedList, extractArgKey } from "./approval-policy.js";
+import { extractArgKey, approvalCategory } from "./approval-policy.js";
 export interface McpToolPolicyEntry {
   requiresApproval: boolean;
   message?: string;
 }
+/**
+ * The canonical, taxonomy-agnostic identity of a tool call.
+ *
+ * The Cursor preToolUse hook and the SDK stream name the same operation
+ * differently (hook `Write`/`Shell`/`Delete`; stream `edit`/`shell`/`delete`),
+ * so the raw tool name cannot be a cross-layer identity. Instead:
+ * - `key` is the {@link approvalCategory} (`write`/`delete`/`shell`) for gated
+ *   built-ins, and the tool name for MCP tools (whose name is consistent across
+ *   layers). It is the part that survives the name divergence.
+ * - `salient` is the resource the tool acts on (the absolute file path or the
+ *   shell command) — identical on both sides because it is the argument VALUE,
+ *   not the field name. Empty for MCP tools, matched by `key` alone.
+ *
+ * The denial ledger (hook) and the stream reconciliation (runner) both reduce a
+ * tool call to this identity, so they correlate exactly; an approval grant uses
+ * the same identity so the agent's re-attempt is allowed on reinvocation even
+ * though it carries a fresh tool-call id and a different-taxonomy name.
+ */
+export interface ToolIdentity {
+  key: string;
+  salient: string;
+}
+export function toolIdentity(
+  toolName: string,
+  mcpServerSlug: string,
+  args: Record<string, unknown> | undefined,
+): ToolIdentity {
+  if (mcpServerSlug) {
+    return { key: toolName, salient: "" };
+  }
+  const category = approvalCategory(toolName);
+  // A gated built-in keys on its category; an unknown/non-gated tool falls back
+  // to its own name (harmless — it is not gated, so it never enters the ledger).
+  return { key: category ?? toolName, salient: extractArgKey(args) };
+}
 /**
  * The identity of an approved tool call, stable across agent resume.
  *
- * - argKey is the salient argument (path/command/…) for built-in tools; matched
- *   exactly so only the approved resource is allowed through on the resumed turn.
- * - argKey is empty for MCP tools (and built-in tools with no salient field);
- *   the grant then matches by name alone, since the user approved that tool.
+ * - `key`/`salient` are the canonical {@link ToolIdentity} the hook matches on.
+ * - `toolName`/`mcpServerSlug` are retained for readability, debugging, and the
+ *   structured-vs-token cross-check (the two are always generated together).
  */
 export interface ApprovalGrant {
   toolName: string;
   mcpServerSlug: string;
-  argKey: string;
+  key: string;
+  salient: string;
 }
 export interface ApprovalStateFile {
   autoApproveAll: boolean;
-  builtInGatedList: string[];
   mcpToolPolicies: Record<string, McpToolPolicyEntry>;
   approvedGrants: ApprovalGrant[];
   approvedGrantTokens: string[];
@@ -87,17 +125,19 @@ export interface ApprovalStateFile {
 /**
  * Compute the flat token the bash hook matches on. The hook recomputes the same
- * token from the incoming tool call (`base64(toolName \n salientArg)`), so the
- * encoding here must stay byte-identical to the hook script in hook-script.ts.
+ * token from the incoming tool call (`base64(key \n salient)` — see
+ * {@link toolIdentity}), so the encoding here must stay byte-identical to the
+ * hook script in hook-script.ts.
  */
-export function grantToken(toolName: string, argKey: string): string {
-  return Buffer.from(`${toolName}\n${argKey}`, "utf-8").toString("base64");
+export function grantToken(key: string, salient: string): string {
+  return Buffer.from(`${key}\n${salient}`, "utf-8").toString("base64");
 }
 /**
  * Build approval grants from the pending approvals the user adjudicated and
- * their decisions. Only APPROVE decisions produce grants. Built-in tools are
- * keyed by their salient argument; MCP tools are keyed by name only.
+ * their decisions. Only APPROVE / APPROVE_ALL decisions produce grants. Each
+ * grant carries the canonical {@link ToolIdentity} (category + salient resource)
+ * so the hook allows the exact approved resource on the resumed turn.
  */
 export function buildApprovalGrants(
   pendingApprovals: PendingApproval[],
@@ -113,11 +153,12 @@ export function buildApprovalGrants(
     const decision = decisions.get(pa.toolCallId);
     if (decision !== ApprovalAction.APPROVE && decision !== ApprovalAction.APPROVE_ALL) continue;
-    const argKey = pa.mcpServerSlug ? "" : extractArgKey(parseArgs(pa.argsPreview));
+    const id = toolIdentity(pa.toolName, pa.mcpServerSlug, parseArgs(pa.argsPreview));
     grants.push({
       toolName: pa.toolName,
       mcpServerSlug: pa.mcpServerSlug,
-      argKey,
+      key: id.key,
+      salient: id.salient,
     });
   }
   return grants;
@@ -137,11 +178,13 @@ function parseArgs(argsPreview: string): Record<string, unknown> | undefined {
  * Build the approval state file content from merged policies and any approval
  * grants from a previous HITL cycle.
  *
- * The state file drives the hook script's allow/deny decisions:
- * - builtInGatedList: dangerous built-in tools the hook denies (unless granted)
+ * The state file carries the hook script's DYNAMIC inputs:
  * - mcpToolPolicies: per-tool policy for MCP tools requiring approval
  * - approvedGrants / approvedGrantTokens: tools approved in the current HITL
  *   cycle, allowed through on reinvocation
+ *
+ * The static gated built-in set and its category mapping are baked into the
+ * generated hook script (from approval-policy.ts), not carried here.
  */
 export function buildApprovalState(
   mergedPolicies: Map<string, MergedToolPolicy>,
@@ -160,10 +203,9 @@ export function buildApprovalState(
   return {
     autoApproveAll,
-    builtInGatedList: getBuiltInGatedList(),
     mcpToolPolicies,
     approvedGrants,
-    approvedGrantTokens: approvedGrants.map((g) => grantToken(g.toolName, g.argKey)),
+    approvedGrantTokens: approvedGrants.map((g) => grantToken(g.key, g.salient)),
   };
 }

package/src/activities/execute-cursor/hook-script.ts CHANGED Viewed

@@ -16,66 +16,178 @@
  *    so its ledger is the authoritative record of what was gated this turn
  * 5. Returns { "permission": "allow" } or { "permission": "deny" } on stdout
  *
- * The script is self-contained (no Node.js required) for portability. It uses
- * bash + grep/cut for lightweight JSON field extraction. All policy decisions
- * are pre-computed by the runner into the state file; the hook only performs
- * mechanical field extraction and string lookups — the policy itself is
- * authored once in TypeScript (approval-policy.ts / approval-state.ts).
+ * Identity extraction runs on the SAME Node.js binary as the runner (its
+ * absolute path — process.execPath — is baked into the script at generation
+ * time), because the identity token must be byte-identical to the one the
+ * runner computes from the parsed stream event. The original grep/cut
+ * extraction is kept only as a best-effort fallback if that binary cannot run:
+ * grep's `"command":"[^"]*"` truncates at the first JSON-escaped quote, so for
+ * a shell command like `printf '%s' "x" > file` the fallback token will NOT
+ * match the runner's — the call is still denied (the gate holds) but the
+ * denial cannot be overlaid onto the real streamed tool call and a grant for
+ * it will not match on reinvocation. All policy decisions are pre-computed by
+ * the runner into the state file (and into this generated script); the hook
+ * only performs mechanical field extraction and string lookups — the policy
+ * itself is authored once in TypeScript (approval-policy.ts /
+ * approval-state.ts).
+ *
+ * Cross-taxonomy identity (the crux):
+ * The preToolUse hook and the SDK event stream name the same operation
+ * differently — the hook receives PascalCase `tool_name` (`Write` for any file
+ * create/edit, `Shell`, `Delete`) while the stream emits lowercase `event.name`
+ * (`edit`, `shell`, `delete`). They also name the salient argument differently
+ * (`file_path` in the hook input vs `path` in the stream). So the hook and the
+ * runner cannot correlate on the raw name. Instead both reduce a tool call to a
+ * canonical identity — `base64(category \n salient)` — where `category` is the
+ * approval category (`write`/`delete`/`shell`, baked into the case statement
+ * below from approval-policy.ts) and `salient` is the resource VALUE (the file
+ * path or shell command), which is identical on both sides. The runner mirrors
+ * this exactly in approval-state.ts (toolIdentity + grantToken), so a denial
+ * recorded here correlates to the streamed tool call, and an approval grant
+ * matches the agent's re-attempt on reinvocation.
  *
  * Policy evaluation order (first match wins). The model is "gate the dangerous
  * set, allow the rest" — matching the native harness and avoiding denial of
  * auto-approved MCP tools (which are absent from mcpToolPolicies):
  * 1. autoApproveAll → allow
- * 2. Matches an approved grant token → allow (reinvocation after approval)
- * 3. Tool name in builtInGatedList → deny
- * 4. Tool name in mcpToolPolicies (require-approval) → deny
- * 5. Everything else (read-only built-ins, auto-approved MCP, unknown) → allow
+ * 2. Gated built-in (category non-empty):
+ *    a. identity token in approvedGrantTokens → allow (reinvocation grant)
+ *    b. otherwise → record denial, deny
+ * 3. MCP tool present in mcpToolPolicies (require-approval):
+ *    a. name token in approvedGrantTokens → allow
+ *    b. otherwise → record denial, deny
+ * 4. Everything else (read-only built-ins, auto-approved MCP, unknown) → allow
  */
-import { SALIENT_ARG_FIELDS } from "./approval-policy.js";
+import { SALIENT_ARG_FIELDS, getBuiltInGatedCategories } from "./approval-policy.js";
 const APPROVAL_REQUIRED_AGENT_MESSAGE =
   "STIGMER_APPROVAL_REQUIRED: This tool call requires user approval before " +
-  "execution. Do not attempt alternative approaches or workarounds. The " +
-  "execution will resume after the user reviews and approves this tool call.";
+  "execution. Do not attempt alternative approaches or workarounds (including " +
+  "shell commands). Stop and wait — the execution will resume after the user " +
+  "reviews and approves this tool call.";
+/**
+ * Build the bash `case` arms that map an incoming hook `tool_name` to its
+ * canonical approval category. Generated from approval-policy.ts so the hook and
+ * the runner never disagree on which built-ins are gated or how they categorize.
+ */
+function buildCategoryCaseArms(): string {
+  const byCategory = new Map<string, string[]>();
+  for (const [name, category] of getBuiltInGatedCategories()) {
+    const names = byCategory.get(category) ?? [];
+    names.push(name);
+    byCategory.set(category, names);
+  }
+  const arms: string[] = [];
+  for (const [category, names] of byCategory) {
+    const pattern = names.map((n) => `"${n}"`).join("|");
+    arms.push(`      ${pattern}) CATEGORY="${category}" ;;`);
+  }
+  return arms.join("\n");
+}
+/**
+ * Build the inline Node.js identity extractor embedded in the hook script.
+ *
+ * Parses the hook's stdin JSON properly (the bash fallback's grep truncates
+ * string values at the first escaped quote) and emits four lines:
+ * tool_name, canonical category, identity token, and MCP name-token. The token
+ * encodings must stay byte-identical to grantToken() in approval-state.ts.
+ *
+ * Authored as a single-quoted bash string, so the JS must not contain single
+ * quotes. The category map and salient field list are baked from
+ * approval-policy.ts — the same source the runner uses — so the two sides can
+ * never disagree.
+ */
+function buildNodeIdentityScript(): string {
+  const categoryMap: Record<string, string> = {};
+  for (const [name, category] of getBuiltInGatedCategories()) {
+    categoryMap[name] = category;
+  }
+  const categories = JSON.stringify(categoryMap);
+  const fields = JSON.stringify(SALIENT_ARG_FIELDS);
+  return [
+    `const t=JSON.parse(require("fs").readFileSync(0,"utf8"));`,
+    `const name=typeof t.tool_name==="string"?t.tool_name:"";`,
+    `const cat=(${categories})[name]||"";`,
+    `const a=(t.tool_input&&typeof t.tool_input==="object")?t.tool_input:{};`,
+    `let s="";`,
+    `for(const f of ${fields}){const v=a[f];if(typeof v==="string"&&v){s=v;break;}}`,
+    `const b=(x)=>Buffer.from(x,"utf8").toString("base64");`,
+    `process.stdout.write(name+"\\n"+cat+"\\n"+b(cat+"\\n"+s)+"\\n"+b(name+"\\n"));`,
+  ].join("");
+}
 /**
  * Generates the bash hook script content.
  *
  * The script reads a JSON state file written by the cursor-runner before
  * each agent.send() call. The state file is the single source of truth
- * for all approval decisions.
+ * for the dynamic approval inputs (autoApproveAll, mcpToolPolicies,
+ * approvedGrantTokens). The static policy (which built-ins are gated and their
+ * categories, and which arg fields are salient) is baked into the script at
+ * generation time from approval-policy.ts.
  *
- * Approved grants are matched by a base64 token of `toolName \n salientArg`,
- * recomputed here from the incoming tool call. The salient-arg field list is
- * injected from SALIENT_ARG_FIELDS so the runner and the hook never disagree on
- * which argument identifies the resource. The encoding must stay byte-identical
+ * The identity token encoding (`base64(key \n salient)`) must stay byte-identical
  * to grantToken() in approval-state.ts.
  */
 export function generateHookScript(stateFilePath: string, ledgerFilePath: string): string {
   const salientFields = SALIENT_ARG_FIELDS.join(" ");
+  const categoryCaseArms = buildCategoryCaseArms();
+  const nodeIdentityScript = buildNodeIdentityScript();
   return `#!/bin/bash
 # Stigmer HITL approval hook for Cursor preToolUse
 # Generated by cursor-runner — do not edit manually.
 #
-# Reads tool call from stdin (JSON), checks approval state file,
-# returns permission decision on stdout (JSON). On a deny, appends the call's
+# Reads tool call from stdin (JSON), checks approval state file, returns a
+# permission decision on stdout (JSON). On a deny, appends the call's canonical
 # identity token to the denial ledger so the runner can mark the gated tool call
-# as WAITING_APPROVAL.
+# as WAITING_APPROVAL. See hook-script.ts for the cross-taxonomy identity design.
 set -euo pipefail
 INPUT=$(cat)
-# Extract tool_name from the hook input JSON.
-# Cursor sends the actual tool name (e.g. "search_services" for MCP tools).
-# Every extraction ends with '|| true': under 'set -e' a non-matching grep would
-# otherwise abort the script and emit no decision.
-TOOL_NAME=$(echo "$INPUT" | grep -o '"tool_name":"[^"]*"' | head -1 | cut -d'"' -f4 || true)
 STATE_FILE="${stateFilePath}"
 LEDGER_FILE="${ledgerFilePath}"
+# --- Canonical identity: tool_name / category / identity token / MCP token ---
+# Computed by the same Node.js binary that runs the cursor-runner (absolute path
+# baked at generation time) so JSON string values — file paths and especially
+# shell commands containing quotes, newlines, or unicode escapes — decode to the
+# exact bytes the runner sees in the stream event. ELECTRON_RUN_AS_NODE makes
+# the invocation safe when the runner is embedded in an Electron app (where
+# process.execPath is the Electron binary).
+NODE_BIN="${process.execPath}"
+IDENTITY=$(printf '%s' "$INPUT" | ELECTRON_RUN_AS_NODE=1 "$NODE_BIN" -e '${nodeIdentityScript}' 2>/dev/null || true)
+if [ -n "$IDENTITY" ]; then
+  TOOL_NAME=$(printf '%s\\n' "$IDENTITY" | sed -n 1p)
+  CATEGORY=$(printf '%s\\n' "$IDENTITY" | sed -n 2p)
+  TOKEN=$(printf '%s\\n' "$IDENTITY" | sed -n 3p)
+  MCP_TOKEN=$(printf '%s\\n' "$IDENTITY" | sed -n 4p)
+else
+  # Fallback when the Node binary cannot run: grep/cut extraction. Best-effort
+  # only — '"field":"[^"]*"' truncates at the first JSON-escaped quote, so the
+  # token may not match the runner's for values containing escapes. Gating still
+  # holds (deny goes out); only denial correlation and grant precision degrade.
+  # Every extraction ends with '|| true': under 'set -e' a non-matching grep
+  # would otherwise abort the script and emit no decision.
+  TOOL_NAME=$(echo "$INPUT" | grep -o '"tool_name":"[^"]*"' | head -1 | cut -d'"' -f4 || true)
+  SALIENT=""
+  for field in ${salientFields}; do
+    v=$(echo "$INPUT" | grep -o "\\"$field\\":\\"[^\\"]*\\"" | head -1 | cut -d'"' -f4 || true)
+    if [ -n "$v" ]; then SALIENT="$v"; break; fi
+  done
+  CATEGORY=""
+  case "$TOOL_NAME" in
+${categoryCaseArms}
+      *) CATEGORY="" ;;
+  esac
+  TOKEN=$(printf '%s\\n%s' "$CATEGORY" "$SALIENT" | base64 | tr -d '\\n')
+  MCP_TOKEN=$(printf '%s\\n' "$TOOL_NAME" | base64 | tr -d '\\n')
+fi
 # --- Failsafe: missing state file → deny (fail-closed) ---
 if [ ! -f "$STATE_FILE" ]; then
   echo '{"permission":"deny","agent_message":"${APPROVAL_REQUIRED_AGENT_MESSAGE}","user_message":"Tool requires approval: '"$TOOL_NAME"'"}'
@@ -90,66 +202,48 @@ if echo "$STATE" | grep -q '"autoApproveAll":true'; then
   exit 0
 fi
-# --- 2. Approved grants (reinvocation after SubmitApproval) ---
-# Build the same base64 token the runner stored for an approved tool call and
-# match it against approvedGrantTokens. Match by (name + salient arg); fall back
-# to name-only for grants with no salient arg (MCP tools). Salient-arg field
-# order is injected from SALIENT_ARG_FIELDS (single source of truth).
-TOKEN_NAME=$(printf '%s\\n' "$TOOL_NAME" | base64 | tr -d '\\n')
-if echo "$STATE" | grep -q "\\"$TOKEN_NAME\\""; then
-  echo '{"permission":"allow"}'
-  exit 0
-fi
-SALIENT=""
-for field in ${salientFields}; do
-  v=$(echo "$INPUT" | grep -o "\\"$field\\":\\"[^\\"]*\\"" | head -1 | cut -d'"' -f4 || true)
-  if [ -n "$v" ]; then SALIENT="$v"; break; fi
-done
-if [ -n "$SALIENT" ]; then
-  TOKEN_SALIENT=$(printf '%s\\n%s' "$TOOL_NAME" "$SALIENT" | base64 | tr -d '\\n')
-  if echo "$STATE" | grep -q "\\"$TOKEN_SALIENT\\""; then
-    echo '{"permission":"allow"}'
-    exit 0
-  fi
-fi
-# Identity token recorded on a deny so the runner can correlate the gated call
-# back to its streamed tool call. Prefer the salient-arg token (identifies the
-# specific resource); fall back to name-only. Byte-identical to grantToken().
-if [ -n "$SALIENT" ]; then DENY_TOKEN="$TOKEN_SALIENT"; else DENY_TOKEN="$TOKEN_NAME"; fi
 # Append a denial record to the ledger. Best-effort: a ledger write failure must
 # never abort the decision (the deny still goes out on stdout). toolName is raw
 # for human-readable debugging; token drives correlation in the runner.
 record_denial() {
-  echo '{"toolName":"'"$TOOL_NAME"'","token":"'"$DENY_TOKEN"'"}' >> "$LEDGER_FILE" 2>/dev/null || true
+  echo '{"toolName":"'"$TOOL_NAME"'","token":"'"$1"'"}' >> "$LEDGER_FILE" 2>/dev/null || true
 }
-# --- 3. Gated built-in tools (Write, StrReplace, Shell, ...) → deny ---
-GATED_LIST=$(echo "$STATE" | grep -o '"builtInGatedList":\\[[^]]*\\]' | head -1 || true)
-if [ -n "$GATED_LIST" ] && [ -n "$TOOL_NAME" ] && echo "$GATED_LIST" | grep -q "\\"$TOOL_NAME\\""; then
-  record_denial
+# --- 2. Gated built-in tools (category non-empty) ---
+if [ -n "$CATEGORY" ]; then
+  # Reinvocation grant: this exact resource was approved earlier → allow.
+  if echo "$STATE" | grep -qF "\\"$TOKEN\\""; then
+    echo '{"permission":"allow"}'
+    exit 0
+  fi
+  record_denial "$TOKEN"
   echo '{"permission":"deny","agent_message":"${APPROVAL_REQUIRED_AGENT_MESSAGE}","user_message":"Tool requires approval: '"$TOOL_NAME"'"}'
   exit 0
 fi
-# --- 4. MCP tools that require approval → deny ---
+# --- 3. MCP tools that require approval → deny ---
 # mcpToolPolicies holds only require-approval tools (auto-approved MCP tools are
-# absent), so presence means "deny" unless an entry is explicitly false.
+# absent), so presence means "deny" unless an entry is explicitly false. MCP tool
+# names are consistent across the hook and the stream, so the identity token is
+# name-only: base64("$TOOL_NAME\\n").
 if echo "$STATE" | grep -q "\\"mcpToolPolicies\\"" && [ -n "$TOOL_NAME" ]; then
   TOOL_POLICY=$(echo "$STATE" | grep -o "\\"$TOOL_NAME\\":{[^}]*}" | head -1 || true)
   if [ -n "$TOOL_POLICY" ] && ! echo "$TOOL_POLICY" | grep -q '"requiresApproval":false'; then
+    if echo "$STATE" | grep -qF "\\"$MCP_TOKEN\\""; then
+      echo '{"permission":"allow"}'
+      exit 0
+    fi
     MSG=$(echo "$TOOL_POLICY" | grep -o '"message":"[^"]*"' | head -1 | cut -d'"' -f4 || true)
     if [ -z "$MSG" ]; then
       MSG="Tool requires approval: $TOOL_NAME"
     fi
-    record_denial
+    record_denial "$MCP_TOKEN"
     echo '{"permission":"deny","agent_message":"${APPROVAL_REQUIRED_AGENT_MESSAGE}","user_message":"'"$MSG"'"}'
     exit 0
   fi
 fi
-# --- 5. Everything else → allow ---
+# --- 4. Everything else → allow ---
 # Read-only built-ins, auto-approved MCP tools, and anything not explicitly
 # gated. Fail-open mirrors the native harness (gate the dangerous set, allow the
 # rest) and prevents denying auto-approved MCP tools the state cannot enumerate.