npm - @stigmer/runner - Versions diffs - 3.0.2-dev.20260609093630 → 3.0.3 - Mend

@stigmer/runner 3.0.2-dev.20260609093630 → 3.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/src/activities/execute-cursor/__tests__/approval-gate.test.ts CHANGED Viewed

@@ -1,14 +1,15 @@
 /**
  * Unit tests for the Cursor-harness HITL approval gate logic.
  *
- * Covers the pure policy/grant/prompt builders that drive the preToolUse hook:
- * - built-in tool gating (mutating gated, read-only + unknown allowed)
- * - salient-arg extraction (grant matching key)
- * - grant building from adjudicated approvals
- * - approval-state file content (gated list, MCP policies, grant tokens)
- * - the human-meaningful reinvocation prompt
+ * The crux this suite guards: the Cursor preToolUse hook and the SDK event
+ * stream use DIFFERENT tool taxonomies for the same operation (hook
+ * `Write`/`Shell`/`Delete` with `file_path`/`command`; stream
+ * `edit`/`shell`/`delete` with `path`/`command`). Correlation therefore keys on
+ * a canonical {@link approvalCategory} + the salient resource VALUE, not the raw
+ * tool name. These tests assert that invariant against BOTH taxonomies so a
+ * future SDK tool rename fails loudly instead of silently disabling the gate.
  *
- * These are deterministic and need no Cursor API key.
+ * Deterministic; no Cursor API key required.
  */
 import { describe, it, expect } from "vitest";
@@ -18,9 +19,11 @@ import type { PendingApproval } from "@stigmer/protos/ai/stigmer/agentic/agentex
 import { ApprovalAction } from "@stigmer/protos/ai/stigmer/agentic/agentexecution/v1/enum_pb";
 import {
+  approvalCategory,
   builtInRequiresApproval,
   getBuiltInApprovalMessage,
   getBuiltInGatedList,
+  getBuiltInGatedCategories,
   extractArgKey,
 } from "../approval-policy.js";
 import type { MergedToolPolicy } from "../approval-policy.js";
@@ -28,13 +31,14 @@ import {
   buildApprovalGrants,
   buildApprovalState,
   grantToken,
+  toolIdentity,
 } from "../approval-state.js";
 import { buildReinvocationPrompt } from "../prompt-builder.js";
 function pending(overrides: Partial<PendingApproval>): PendingApproval {
   return create(PendingApprovalSchema, {
     toolCallId: "call-1",
-    toolName: "Write",
+    toolName: "edit",
     message: "",
     argsPreview: "",
     mcpServerSlug: "",
@@ -42,15 +46,47 @@ function pending(overrides: Partial<PendingApproval>): PendingApproval {
   });
 }
+// The real ground-truth taxonomies (captured from @cursor/sdk via live probe).
+const HOOK_NAMES = { write: "Write", shell: "Shell", del: "Delete", read: "Read" };
+const STREAM_NAMES = { write: "edit", shell: "shell", del: "delete", read: "read" };
+describe("approvalCategory (cross-taxonomy drift-guard)", () => {
+  it("maps the HOOK taxonomy (PascalCase) to canonical categories", () => {
+    expect(approvalCategory("Write")).toBe("write");
+    expect(approvalCategory("StrReplace")).toBe("write");
+    expect(approvalCategory("EditNotebook")).toBe("write");
+    expect(approvalCategory("Delete")).toBe("delete");
+    expect(approvalCategory("Shell")).toBe("shell");
+  });
+  it("maps the STREAM taxonomy (lowercase) to the SAME categories", () => {
+    expect(approvalCategory("write")).toBe("write");
+    expect(approvalCategory("edit")).toBe("write");
+    expect(approvalCategory("delete")).toBe("delete");
+    expect(approvalCategory("shell")).toBe("shell");
+    expect(approvalCategory("execute")).toBe("shell");
+  });
+  it("a file mutation collapses to `write` on BOTH sides (hook Write == stream edit)", () => {
+    expect(approvalCategory(HOOK_NAMES.write)).toBe(approvalCategory(STREAM_NAMES.write));
+  });
+  it("returns undefined for read-only / non-gated tools", () => {
+    for (const t of ["read", "Read", "glob", "Glob", "grep", "Grep", "ls", "think", "task"]) {
+      expect(approvalCategory(t)).toBeUndefined();
+    }
+  });
+});
 describe("builtInRequiresApproval", () => {
-  it("gates mutating/destructive built-in tools", () => {
-    for (const t of ["Write", "StrReplace", "EditNotebook", "Shell", "Delete"]) {
+  it("gates mutating/destructive tools in BOTH taxonomies", () => {
+    for (const t of ["Write", "StrReplace", "EditNotebook", "Shell", "Delete", "edit", "shell", "delete", "execute", "write"]) {
       expect(builtInRequiresApproval(t)).toBe(true);
     }
   });
   it("allows read-only built-in tools", () => {
-    for (const t of ["Read", "Grep", "Glob", "SemanticSearch", "WebFetch", "ReadLints"]) {
+    for (const t of ["Read", "read", "Grep", "grep", "Glob", "glob", "ls", "think", "task"]) {
       expect(builtInRequiresApproval(t)).toBe(false);
     }
   });
@@ -60,27 +96,38 @@ describe("builtInRequiresApproval", () => {
     expect(builtInRequiresApproval("search_services")).toBe(false);
   });
-  it("exposes the gated set", () => {
+  it("exposes the gated set in the HOOK taxonomy (what the hook matches)", () => {
     expect(getBuiltInGatedList()).toEqual(
       expect.arrayContaining(["Write", "StrReplace", "EditNotebook", "Shell", "Delete"]),
     );
   });
+  it("every gated built-in resolves to a category (no ungated hole)", () => {
+    for (const name of getBuiltInGatedList()) {
+      expect(approvalCategory(name)).toBeDefined();
+    }
+    // The injected hook map covers exactly the gated set.
+    expect(getBuiltInGatedCategories().map(([n]) => n).sort()).toEqual(getBuiltInGatedList().sort());
+  });
 });
 describe("getBuiltInApprovalMessage", () => {
-  it("returns a template for gated tools and undefined otherwise", () => {
+  it("returns a category template for gated tools in EITHER taxonomy", () => {
     expect(getBuiltInApprovalMessage("Write")).toContain("{{args.path}}");
+    expect(getBuiltInApprovalMessage("edit")).toContain("{{args.path}}");
     expect(getBuiltInApprovalMessage("Shell")).toContain("{{args.command}}");
+    expect(getBuiltInApprovalMessage("shell")).toContain("{{args.command}}");
     expect(getBuiltInApprovalMessage("Read")).toBeUndefined();
+    expect(getBuiltInApprovalMessage("read")).toBeUndefined();
   });
 });
-describe("extractArgKey", () => {
-  it("extracts the salient field by priority (path > command > target_notebook)", () => {
-    expect(extractArgKey({ path: "a.txt" })).toBe("a.txt");
+describe("extractArgKey (spans both taxonomies' field names)", () => {
+  it("extracts the salient value regardless of field name (file_path or path)", () => {
+    expect(extractArgKey({ file_path: "a.txt" })).toBe("a.txt"); // hook shape
+    expect(extractArgKey({ path: "a.txt" })).toBe("a.txt"); // stream shape
     expect(extractArgKey({ command: "ls -la" })).toBe("ls -la");
     expect(extractArgKey({ target_notebook: "nb.ipynb" })).toBe("nb.ipynb");
-    expect(extractArgKey({ path: "a.txt", command: "ls" })).toBe("a.txt");
   });
   it("returns empty string when no salient field is present", () => {
@@ -90,24 +137,33 @@ describe("extractArgKey", () => {
   });
 });
-describe("grantToken", () => {
-  it("is byte-identical to base64(toolName \\n argKey)", () => {
-    expect(grantToken("Write", "gated.txt")).toBe(
-      Buffer.from("Write\ngated.txt", "utf-8").toString("base64"),
-    );
-    expect(grantToken("search_services", "")).toBe(
-      Buffer.from("search_services\n", "utf-8").toString("base64"),
+describe("toolIdentity + grantToken (canonical, taxonomy-agnostic)", () => {
+  it("a hook Write and a stream edit on the SAME path produce the SAME token", () => {
+    const hook = toolIdentity("Write", "", { file_path: "/x/a.txt" });
+    const stream = toolIdentity("edit", "", { path: "/x/a.txt" });
+    expect(hook).toEqual({ key: "write", salient: "/x/a.txt" });
+    expect(stream).toEqual({ key: "write", salient: "/x/a.txt" });
+    expect(grantToken(hook.key, hook.salient)).toBe(grantToken(stream.key, stream.salient));
+  });
+  it("encodes as base64(key \\n salient)", () => {
+    expect(grantToken("write", "/x/a.txt")).toBe(
+      Buffer.from("write\n/x/a.txt", "utf-8").toString("base64"),
     );
   });
+  it("MCP tools key on name only (consistent across layers)", () => {
+    expect(toolIdentity("apply_x", "planton", { path: "ignored" })).toEqual({ key: "apply_x", salient: "" });
+  });
 });
 describe("buildApprovalGrants", () => {
-  it("creates an arg-keyed grant for an approved built-in tool", () => {
+  it("creates an exact-resource grant for an approved built-in (stream-named) tool", () => {
     const grants = buildApprovalGrants(
-      [pending({ toolCallId: "c1", toolName: "Write", argsPreview: JSON.stringify({ path: "gated.txt" }) })],
+      [pending({ toolCallId: "c1", toolName: "edit", argsPreview: JSON.stringify({ path: "/x/gated.txt" }) })],
       new Map([["c1", ApprovalAction.APPROVE]]),
     );
-    expect(grants).toEqual([{ toolName: "Write", mcpServerSlug: "", argKey: "gated.txt" }]);
+    expect(grants).toEqual([{ toolName: "edit", mcpServerSlug: "", key: "write", salient: "/x/gated.txt" }]);
   });
   it("creates a name-only grant for an approved MCP tool", () => {
@@ -115,14 +171,14 @@ describe("buildApprovalGrants", () => {
       [pending({ toolCallId: "c1", toolName: "apply_x", mcpServerSlug: "planton", argsPreview: JSON.stringify({ path: "ignored" }) })],
       new Map([["c1", ApprovalAction.APPROVE]]),
     );
-    expect(grants).toEqual([{ toolName: "apply_x", mcpServerSlug: "planton", argKey: "" }]);
+    expect(grants).toEqual([{ toolName: "apply_x", mcpServerSlug: "planton", key: "apply_x", salient: "" }]);
   });
   it("ignores skipped and rejected approvals", () => {
     const grants = buildApprovalGrants(
       [
-        pending({ toolCallId: "c1", toolName: "Write", argsPreview: JSON.stringify({ path: "a" }) }),
-        pending({ toolCallId: "c2", toolName: "Shell", argsPreview: JSON.stringify({ command: "rm" }) }),
+        pending({ toolCallId: "c1", toolName: "edit", argsPreview: JSON.stringify({ path: "a" }) }),
+        pending({ toolCallId: "c2", toolName: "shell", argsPreview: JSON.stringify({ command: "rm" }) }),
       ],
       new Map([
         ["c1", ApprovalAction.SKIP],
@@ -138,14 +194,15 @@ describe("buildApprovalState", () => {
     ["planton/apply_x", { toolName: "apply_x", mcpServerSlug: "planton", requiresApproval: true, approvalMessage: "Apply X" }],
   ]);
-  it("carries the gated list, MCP policies, and grant tokens", () => {
-    const grants = [{ toolName: "Write", mcpServerSlug: "", argKey: "gated.txt" }];
+  it("carries MCP policies and exact-resource grant tokens (gated set is baked into the hook, not the state)", () => {
+    const grants = [{ toolName: "edit", mcpServerSlug: "", key: "write", salient: "/x/gated.txt" }];
     const state = buildApprovalState(mcpPolicies, false, grants);
     expect(state.autoApproveAll).toBe(false);
-    expect(state.builtInGatedList).toEqual(expect.arrayContaining(["Write", "Shell"]));
     expect(state.mcpToolPolicies.apply_x).toEqual({ requiresApproval: true, message: "Apply X" });
-    expect(state.approvedGrantTokens).toEqual([grantToken("Write", "gated.txt")]);
+    expect(state.approvedGrantTokens).toEqual([grantToken("write", "/x/gated.txt")]);
+    // builtInGatedList is no longer part of the state file (baked into the hook).
+    expect((state as Record<string, unknown>).builtInGatedList).toBeUndefined();
   });
   it("defaults grants to empty when none provided", () => {
@@ -160,8 +217,8 @@ describe("buildReinvocationPrompt", () => {
   it("describes approved and skipped actions in human terms, not opaque ids", () => {
     const prompt = buildReinvocationPrompt(
       [
-        pending({ toolCallId: "c1", toolName: "Write", message: "Write file: gated.txt" }),
-        pending({ toolCallId: "c2", toolName: "Shell", message: "Run command: rm -rf build" }),
+        pending({ toolCallId: "c1", toolName: "edit", message: "Write file: gated.txt" }),
+        pending({ toolCallId: "c2", toolName: "shell", message: "Run command: rm -rf build" }),
       ],
       new Map([
         ["c1", ApprovalAction.APPROVE],
@@ -173,7 +230,6 @@ describe("buildReinvocationPrompt", () => {
     expect(prompt).toContain("Write file: gated.txt");
     expect(prompt).toContain("SKIPPED");
     expect(prompt).toContain("Run command: rm -rf build");
-    // No opaque tool-call ids leak into the prompt.
     expect(prompt).not.toContain("c1");
     expect(prompt).not.toContain("c2");
   });

package/src/activities/execute-cursor/__tests__/hitl-ledger.test.ts CHANGED Viewed

@@ -63,10 +63,14 @@ function makeWorkspace(): string {
   return dir;
 }
+// Stream tool calls use the lowercase SDK taxonomy (edit/shell/delete); the
+// denial ledger uses the hook taxonomy (Write/Shell/Delete) + a canonical
+// category+salient token. The two correlate via approvalCategory — that cross-
+// taxonomy match is exactly what these tests pin.
 function toolCall(overrides: Partial<ToolCall>): ToolCall {
   return create(ToolCallSchema, {
     id: "call-1",
-    name: "Write",
+    name: "edit",
     status: ToolCallStatus.TOOL_CALL_COMPLETED,
     ...overrides,
   });
@@ -96,8 +100,8 @@ describe("denial ledger reset/read", () => {
   it("parses appended JSONL denials and tolerates blank/partial lines", async () => {
     const ws = makeWorkspace();
     await resetDenialLedger(ws);
-    const writeToken = grantToken("Write", "gated.txt");
-    const shellToken = grantToken("Shell", "rm -rf build");
+    const writeToken = grantToken("write", "gated.txt");
+    const shellToken = grantToken("shell", "rm -rf build");
     // Simulate the hook appending records, including a trailing partial line.
     await writeFile(
       denialLedgerPath(ws),
@@ -117,10 +121,13 @@ describe("denial ledger reset/read", () => {
 });
 describe("reconcileDeniedToolCalls", () => {
-  it("overlays WAITING_APPROVAL onto a denied tool reported as completed (the green-check bug)", () => {
+  it("overlays WAITING_APPROVAL onto the REAL denied tool reported as completed (the green-check bug)", () => {
+    // Stream reports the file mutation as `edit` (RUNNING/COMPLETED); the hook
+    // denied it as `Write`. The category+salient token bridges the two so the
+    // overlay lands on this exact streamed tool call — no synthesized placeholder.
     const tc = toolCall({
       id: "c1",
-      name: "Write",
+      name: "edit",
       status: ToolCallStatus.TOOL_CALL_COMPLETED,
       completedAt: "2026-06-07T00:00:00Z",
       result: "wrote file",
@@ -130,10 +137,14 @@ describe("reconcileDeniedToolCalls", () => {
     const messages = [aiMessageWith([tc])];
     const reconciled = reconcileDeniedToolCalls(messages, [
-      { toolName: "Write", token: grantToken("Write", "gated.txt") },
+      { toolName: "Write", token: grantToken("write", "gated.txt") },
     ]);
     expect(reconciled).toHaveLength(1);
+    // The overlay marked the REAL streamed tool call — no synthesized placeholder
+    // and no orphan was appended.
+    expect(reconciled[0]).toBe(tc);
+    expect(messages[0].toolCalls).toHaveLength(1);
     expect(tc.status).toBe(ToolCallStatus.TOOL_CALL_WAITING_APPROVAL);
     expect(tc.requiresApproval).toBe(true);
     expect(tc.approvalMessage).toContain("gated.txt");
@@ -162,7 +173,7 @@ describe("reconcileDeniedToolCalls", () => {
       }],
     ]);
-    // MCP tools are keyed name-only (mirrors the grant convention).
+    // MCP tools are keyed name-only (their name is consistent across layers).
     reconcileDeniedToolCalls(messages, [
       { toolName: "apply_x", token: grantToken("apply_x", "") },
     ], policies);
@@ -174,20 +185,20 @@ describe("reconcileDeniedToolCalls", () => {
   it("leaves non-denied tool calls untouched while overlaying the denied one", () => {
     const denied = toolCall({
       id: "c1",
-      name: "Write",
+      name: "edit",
       status: ToolCallStatus.TOOL_CALL_COMPLETED,
       args: { path: "gated.txt" },
     });
     const allowed = toolCall({
       id: "c2",
-      name: "Read",
+      name: "read",
       status: ToolCallStatus.TOOL_CALL_COMPLETED,
       args: { path: "readme.md" },
     });
     const messages = [aiMessageWith([denied, allowed])];
     const reconciled = reconcileDeniedToolCalls(messages, [
-      { toolName: "Write", token: grantToken("Write", "gated.txt") },
+      { toolName: "Write", token: grantToken("write", "gated.txt") },
     ]);
     // Only the denied call is gated; the read-only call keeps its status and no
@@ -199,12 +210,12 @@ describe("reconcileDeniedToolCalls", () => {
   });
   it("collapses repeated denials of the same resource to a single approval", () => {
-    const first = toolCall({ id: "c1", name: "Write", args: { path: "gated.txt" } });
-    const second = toolCall({ id: "c2", name: "Write", args: { path: "gated.txt" } });
+    const first = toolCall({ id: "c1", name: "edit", args: { path: "gated.txt" } });
+    const second = toolCall({ id: "c2", name: "edit", args: { path: "gated.txt" } });
     const messages = [aiMessageWith([first, second])];
     const reconciled = reconcileDeniedToolCalls(messages, [
-      { toolName: "Write", token: grantToken("Write", "gated.txt") },
+      { toolName: "Write", token: grantToken("write", "gated.txt") },
     ]);
     // One approval anchor (so the backend gate resolves cleanly on one decision).
@@ -217,15 +228,19 @@ describe("reconcileDeniedToolCalls", () => {
     const messages = [aiMessageWith([])];
     const reconciled = reconcileDeniedToolCalls(messages, [
-      { toolName: "Shell", token: grantToken("Shell", "rm -rf build") },
+      { toolName: "Shell", token: grantToken("shell", "rm -rf build") },
     ]);
     expect(reconciled).toHaveLength(1);
     const synthesized = messages[0].toolCalls[0];
     expect(synthesized.status).toBe(ToolCallStatus.TOOL_CALL_WAITING_APPROVAL);
     expect(synthesized.requiresApproval).toBe(true);
+    // The synthesized fallback shows the hook's raw tool name for display...
     expect(synthesized.name).toBe("Shell");
     expect(synthesized.approvalMessage).toContain("rm -rf build");
+    // ...and carries the salient so the grant rebuilt from it keys on the same
+    // resource the hook will see on the re-attempt.
+    expect(synthesized.argsPreview).toContain("rm -rf build");
   });
   it("is a no-op when the ledger is empty", () => {
@@ -240,7 +255,7 @@ describe("reconstructAdjudicatedApprovals", () => {
   it("reads decisions and rebuilds pending approvals from adjudicated tool calls", () => {
     const approved = toolCall({
       id: "c1",
-      name: "Write",
+      name: "edit",
       status: ToolCallStatus.TOOL_CALL_WAITING_APPROVAL,
       approvalAction: ApprovalAction.APPROVE,
       approvalMessage: "Write file: gated.txt",
@@ -248,13 +263,13 @@ describe("reconstructAdjudicatedApprovals", () => {
     });
     const undecided = toolCall({
       id: "c2",
-      name: "Shell",
+      name: "shell",
       status: ToolCallStatus.TOOL_CALL_WAITING_APPROVAL,
       approvalAction: ApprovalAction.UNSPECIFIED,
     });
     const unrelated = toolCall({
       id: "c3",
-      name: "Read",
+      name: "read",
       status: ToolCallStatus.TOOL_CALL_COMPLETED,
       approvalAction: ApprovalAction.APPROVE,
     });
@@ -265,7 +280,7 @@ describe("reconstructAdjudicatedApprovals", () => {
     expect([...decisions.entries()]).toEqual([["c1", ApprovalAction.APPROVE]]);
     expect(pendingApprovals).toHaveLength(1);
     expect(pendingApprovals[0].toolCallId).toBe("c1");
-    expect(pendingApprovals[0].toolName).toBe("Write");
+    expect(pendingApprovals[0].toolName).toBe("edit");
     expect(pendingApprovals[0].argsPreview).toBe(JSON.stringify({ path: "gated.txt" }));
   });

package/src/activities/execute-cursor/__tests__/hook-script.test.ts ADDED Viewed

@@ -0,0 +1,149 @@
+/**
+ * Behavior tests for the generated preToolUse bash hook.
+ *
+ * These run the ACTUAL bash script the runner writes into the workspace, feeding
+ * it the REAL hook-input shape captured from @cursor/sdk (PascalCase
+ * `tool_name`; `file_path`/`command` in `tool_input`). They are the strongest
+ * guard against the regression this work fixes: a gated built-in must be denied,
+ * its denial must be recorded with a token byte-identical to the runner's
+ * grantToken, and an exact-resource grant must allow only that resource.
+ *
+ * Skipped automatically where bash is unavailable.
+ */
+import { describe, it, expect, beforeAll, afterEach } from "vitest";
+import { execFileSync, execSync } from "node:child_process";
+import { mkdtempSync, mkdirSync, writeFileSync, rmSync, readFileSync, existsSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { generateHookScript } from "../hook-script.js";
+import { buildApprovalState, grantToken, toolIdentity, type ApprovalGrant } from "../approval-state.js";
+import type { McpToolPolicyEntry } from "../approval-state.js";
+let hasBash = false;
+try {
+  execSync("bash -c 'exit 0'", { stdio: "ignore" });
+  hasBash = true;
+} catch {
+  hasBash = false;
+}
+const d = hasBash ? describe : describe.skip;
+const tempDirs: string[] = [];
+afterEach(() => {
+  for (const dir of tempDirs.splice(0)) rmSync(dir, { recursive: true, force: true });
+});
+interface Harness {
+  decide(input: object): { permission: string; raw: string };
+  ledger(): Array<{ toolName: string; token: string }>;
+  resetLedger(): void;
+}
+function setup(opts: {
+  autoApproveAll?: boolean;
+  grants?: ApprovalGrant[];
+  mcpPolicies?: Record<string, McpToolPolicyEntry>;
+  noStateFile?: boolean;
+}): Harness {
+  const ws = mkdtempSync(join(tmpdir(), "hook-script-"));
+  tempDirs.push(ws);
+  const dir = join(ws, ".cursor", "hooks");
+  mkdirSync(dir, { recursive: true });
+  const statePath = join(dir, "state.json");
+  const ledgerPath = join(dir, "denials.jsonl");
+  const scriptPath = join(dir, "hook.sh");
+  writeFileSync(scriptPath, generateHookScript(statePath, ledgerPath), "utf-8");
+  if (!opts.noStateFile) {
+    const policies = new Map(
+      Object.entries(opts.mcpPolicies ?? {}).map(([name, p]) => [
+        `srv/${name}`,
+        { toolName: name, mcpServerSlug: "srv", requiresApproval: p.requiresApproval, approvalMessage: p.message ?? "" },
+      ]),
+    );
+    const state = buildApprovalState(policies, opts.autoApproveAll ?? false, opts.grants);
+    writeFileSync(statePath, JSON.stringify(state), "utf-8");
+  }
+  return {
+    decide(input: object) {
+      const raw = execFileSync("bash", [scriptPath], { input: JSON.stringify(input) }).toString();
+      const permission = raw.includes('"permission":"deny"') ? "deny" : raw.includes('"permission":"allow"') ? "allow" : "?";
+      return { permission, raw };
+    },
+    ledger() {
+      if (!existsSync(ledgerPath)) return [];
+      return readFileSync(ledgerPath, "utf-8").split("\n").filter(Boolean).map((l) => JSON.parse(l));
+    },
+    resetLedger() {
+      writeFileSync(ledgerPath, "", "utf-8");
+    },
+  };
+}
+// Real hook-input shapes (PascalCase name, file_path/command in tool_input).
+const hookWrite = (filePath: string) => ({ tool_name: "Write", tool_input: { file_path: filePath, content: "x" } });
+const hookShell = (command: string) => ({ tool_name: "Shell", tool_input: { command, cwd: "/x", timeout: 30000 } });
+const hookDelete = (filePath: string) => ({ tool_name: "Delete", tool_input: { file_path: filePath } });
+const hookRead = (filePath: string) => ({ tool_name: "Read", tool_input: { file_path: filePath } });
+d("generated preToolUse hook", () => {
+  it("denies gated built-ins (Write/Shell/Delete) and records a category+salient token", () => {
+    const h = setup({});
+    for (const [input, category, salient] of [
+      [hookWrite("/x/a.txt"), "write", "/x/a.txt"],
+      [hookShell("rm -rf build"), "shell", "rm -rf build"],
+      [hookDelete("/x/b.txt"), "delete", "/x/b.txt"],
+    ] as const) {
+      h.resetLedger();
+      expect(h.decide(input).permission).toBe("deny");
+      const ledger = h.ledger();
+      expect(ledger).toHaveLength(1);
+      // Byte-identical to the runner's grantToken(category, salient).
+      expect(ledger[0].token).toBe(grantToken(category, salient));
+    }
+  });
+  it("allows read-only built-ins", () => {
+    const h = setup({});
+    expect(h.decide(hookRead("/x/a.txt")).permission).toBe("allow");
+    expect(h.ledger()).toEqual([]);
+  });
+  it("auto-approve-all allows even gated built-ins", () => {
+    const h = setup({ autoApproveAll: true });
+    expect(h.decide(hookWrite("/x/a.txt")).permission).toBe("allow");
+  });
+  it("allows the EXACT granted resource and re-gates any other (no name-only over-grant)", () => {
+    const id = toolIdentity("edit", "", { path: "/x/a.txt" });
+    const h = setup({ grants: [{ toolName: "edit", mcpServerSlug: "", key: id.key, salient: id.salient }] });
+    // Same resource the user approved -> allowed on the resumed turn.
+    expect(h.decide(hookWrite("/x/a.txt")).permission).toBe("allow");
+    // A different file is NOT covered by the grant -> still gated.
+    expect(h.decide(hookWrite("/x/OTHER.txt")).permission).toBe("deny");
+  });
+  it("denies require-approval MCP tools and allows them once granted (name-only)", () => {
+    const mcpPolicies = { apply_x: { requiresApproval: true, message: "Apply X" } };
+    const denyH = setup({ mcpPolicies });
+    expect(denyH.decide({ tool_name: "apply_x", tool_input: {} }).permission).toBe("deny");
+    expect(denyH.ledger()[0].token).toBe(grantToken("apply_x", ""));
+    const grantH = setup({
+      mcpPolicies,
+      grants: [{ toolName: "apply_x", mcpServerSlug: "srv", key: "apply_x", salient: "" }],
+    });
+    expect(grantH.decide({ tool_name: "apply_x", tool_input: {} }).permission).toBe("allow");
+  });
+  it("fails closed (deny) when the state file is missing", () => {
+    const h = setup({ noStateFile: true });
+    expect(h.decide(hookWrite("/x/a.txt")).permission).toBe("deny");
+  });
+});

package/src/activities/execute-cursor/__tests__/message-translator.test.ts CHANGED Viewed

@@ -617,6 +617,99 @@ describe("MessageAccumulator tool call status transitions", () => {
     });
   });
+  // The Cursor SDK can emit the lifecycle for one call_id more than once.
+  // Observed in production: two "running" events ~0.5s apart for a task/edit
+  // tool produced two ToolCall entries with the SAME id (a "thin" copy with no
+  // result and a "full" copy), rendering the same call two or three times in
+  // the UI. The accumulator must upsert by call_id so a call maps to exactly
+  // one ToolCall.
+  describe("tool call idempotency (one ToolCall per call_id)", () => {
+    it("duplicate running events for one call_id create a single ToolCall", () => {
+      const messages: AgentMessage[] = [];
+      const acc = new MessageAccumulator(messages);
+      acc.processEvent(assistantEvent("r1", "Editing a file."));
+      acc.processEvent(toolCallEvent("tc-dup", "edit", "running", "r1", { args: { path: "a.ts" } }));
+      acc.processEvent(toolCallEvent("tc-dup", "edit", "running", "r1", { args: { path: "a.ts" } }));
+      expect(countToolCallsWithId(messages, "tc-dup")).toBe(1);
+      expect(findToolCallById(messages, "tc-dup")!.status).toBe(ToolCallStatus.TOOL_CALL_RUNNING);
+    });
+    it("running -> completed -> running re-emit keeps a single COMPLETED ToolCall", () => {
+      const messages: AgentMessage[] = [];
+      const acc = new MessageAccumulator(messages);
+      acc.processEvent(assistantEvent("r1", "Running a tool."));
+      acc.processEvent(toolCallEvent("tc-1", "Shell", "running", "r1"));
+      acc.processEvent(toolCallEvent("tc-1", "Shell", "completed", "r1", { result: "OK" }));
+      // A late "running" re-emit must not regress the terminal status.
+      acc.processEvent(toolCallEvent("tc-1", "Shell", "running", "r1"));
+      expect(countToolCallsWithId(messages, "tc-1")).toBe(1);
+      const tc = findToolCallById(messages, "tc-1")!;
+      expect(tc.status).toBe(ToolCallStatus.TOOL_CALL_COMPLETED);
+      expect(tc.result).toBe("OK");
+      expect(tc.completedAt).toBeTruthy();
+    });
+    it("thin-then-full: a result-bearing completion populates the single ToolCall created by an empty running", () => {
+      const messages: AgentMessage[] = [];
+      const acc = new MessageAccumulator(messages);
+      // Reproduces the production pattern: two running events, then one
+      // completion that carries the full result.
+      acc.processEvent(assistantEvent("r1", "Delegating work."));
+      acc.processEvent(toolCallEvent("tc-task", "task", "running", "r1", { result: "" }));
+      acc.processEvent(toolCallEvent("tc-task", "task", "running", "r1", { result: "" }));
+      acc.processEvent(toolCallEvent("tc-task", "task", "completed", "r1", { result: "full result blob" }));
+      expect(countToolCallsWithId(messages, "tc-task")).toBe(1);
+      const tc = findToolCallById(messages, "tc-task")!;
+      expect(tc.status).toBe(ToolCallStatus.TOOL_CALL_COMPLETED);
+      expect(tc.result).toBe("full result blob");
+    });
+    it("a result-less re-emit after completion does not wipe the captured result", () => {
+      const messages: AgentMessage[] = [];
+      const acc = new MessageAccumulator(messages);
+      acc.processEvent(assistantEvent("r1", "Running a tool."));
+      acc.processEvent(toolCallEvent("tc-1", "read", "running", "r1"));
+      acc.processEvent(toolCallEvent("tc-1", "read", "completed", "r1", { result: "file contents" }));
+      acc.processEvent(toolCallEvent("tc-1", "read", "completed", "r1", { result: "" }));
+      expect(countToolCallsWithId(messages, "tc-1")).toBe(1);
+      expect(findToolCallById(messages, "tc-1")!.result).toBe("file contents");
+    });
+    it("duplicate task running events yield one task ToolCall and one sub-agent (production repro)", () => {
+      const messages: AgentMessage[] = [];
+      const acc = new MessageAccumulator(messages);
+      // Mirror the ExecuteCursor stream loop: every task tool_call event is fed
+      // to both processEvent() (tool call) and trackSubAgentExecution().
+      acc.processEvent(assistantEvent("r1", "I'll explore the repo."));
+      const args = { subagentType: { kind: "explore" }, description: "Explore repo structure and docs", prompt: "Go" };
+      const run1 = toolCallEvent("tc-explore", "task", "running", "r1", { args, result: "" });
+      acc.processEvent(run1);
+      acc.trackSubAgentExecution(run1);
+      const run2 = toolCallEvent("tc-explore", "task", "running", "r1", { args, result: "" });
+      acc.processEvent(run2);
+      acc.trackSubAgentExecution(run2);
+      const done = toolCallEvent("tc-explore", "task", "completed", "r1", { result: "explored" });
+      acc.processEvent(done);
+      acc.trackSubAgentExecution(done);
+      expect(countToolCallsWithId(messages, "tc-explore")).toBe(1);
+      expect(acc.subAgentExecutions).toHaveLength(1);
+      expect(acc.subAgentExecutions[0].id).toBe("tc-explore");
+    });
+  });
   describe("cancelInProgressSubAgentProtos standalone", () => {
     it("cancels IN_PROGRESS/PENDING protos in place and reports whether anything changed", () => {
       const running = create(SubAgentExecutionSchema, {