@stigmer/runner 3.0.2 → 3.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. package/dist/.build-fingerprint +1 -1
  2. package/dist/activities/execute-cursor/approval-policy.d.ts +55 -16
  3. package/dist/activities/execute-cursor/approval-policy.js +93 -31
  4. package/dist/activities/execute-cursor/approval-policy.js.map +1 -1
  5. package/dist/activities/execute-cursor/approval-state.d.ts +54 -26
  6. package/dist/activities/execute-cursor/approval-state.js +41 -26
  7. package/dist/activities/execute-cursor/approval-state.js.map +1 -1
  8. package/dist/activities/execute-cursor/hook-script.d.ts +41 -14
  9. package/dist/activities/execute-cursor/hook-script.js +155 -63
  10. package/dist/activities/execute-cursor/hook-script.js.map +1 -1
  11. package/dist/activities/execute-cursor/message-translator.d.ts +23 -0
  12. package/dist/activities/execute-cursor/message-translator.js +100 -54
  13. package/dist/activities/execute-cursor/message-translator.js.map +1 -1
  14. package/dist/activities/execute-cursor/session-lifecycle.d.ts +9 -0
  15. package/dist/activities/execute-cursor/session-lifecycle.js +11 -3
  16. package/dist/activities/execute-cursor/session-lifecycle.js.map +1 -1
  17. package/package.json +2 -2
  18. package/src/activities/execute-cursor/__tests__/approval-gate.test.ts +93 -37
  19. package/src/activities/execute-cursor/__tests__/hitl-ledger.test.ts +33 -18
  20. package/src/activities/execute-cursor/__tests__/hook-script.test.ts +204 -0
  21. package/src/activities/execute-cursor/__tests__/message-translator.test.ts +93 -0
  22. package/src/activities/execute-cursor/__tests__/session-lifecycle.test.ts +73 -2
  23. package/src/activities/execute-cursor/approval-policy.ts +113 -31
  24. package/src/activities/execute-cursor/approval-state.ts +74 -32
  25. package/src/activities/execute-cursor/hook-script.ts +157 -63
  26. package/src/activities/execute-cursor/message-translator.ts +114 -57
  27. package/src/activities/execute-cursor/session-lifecycle.ts +21 -3
@@ -0,0 +1,204 @@
1
+ /**
2
+ * Behavior tests for the generated preToolUse bash hook.
3
+ *
4
+ * These run the ACTUAL bash script the runner writes into the workspace, feeding
5
+ * it the REAL hook-input shape captured from @cursor/sdk (PascalCase
6
+ * `tool_name`; `file_path`/`command` in `tool_input`). They are the strongest
7
+ * guard against the regression this work fixes: a gated built-in must be denied,
8
+ * its denial must be recorded with a token byte-identical to the runner's
9
+ * grantToken, and an exact-resource grant must allow only that resource.
10
+ *
11
+ * Skipped automatically where bash is unavailable.
12
+ */
13
+
14
+ import { describe, it, expect, beforeAll, afterEach } from "vitest";
15
+ import { execFileSync, execSync } from "node:child_process";
16
+ import { mkdtempSync, mkdirSync, writeFileSync, rmSync, readFileSync, existsSync } from "node:fs";
17
+ import { tmpdir } from "node:os";
18
+ import { join } from "node:path";
19
+
20
+ import { generateHookScript } from "../hook-script.js";
21
+ import { buildApprovalState, grantToken, toolIdentity, type ApprovalGrant } from "../approval-state.js";
22
+ import type { McpToolPolicyEntry } from "../approval-state.js";
23
+
24
+ let hasBash = false;
25
+ try {
26
+ execSync("bash -c 'exit 0'", { stdio: "ignore" });
27
+ hasBash = true;
28
+ } catch {
29
+ hasBash = false;
30
+ }
31
+
32
+ const d = hasBash ? describe : describe.skip;
33
+
34
+ const tempDirs: string[] = [];
35
+ afterEach(() => {
36
+ for (const dir of tempDirs.splice(0)) rmSync(dir, { recursive: true, force: true });
37
+ });
38
+
39
+ interface Harness {
40
+ decide(input: object): { permission: string; raw: string };
41
+ ledger(): Array<{ toolName: string; token: string }>;
42
+ resetLedger(): void;
43
+ }
44
+
45
+ function setup(opts: {
46
+ autoApproveAll?: boolean;
47
+ grants?: ApprovalGrant[];
48
+ mcpPolicies?: Record<string, McpToolPolicyEntry>;
49
+ noStateFile?: boolean;
50
+ }): Harness {
51
+ const ws = mkdtempSync(join(tmpdir(), "hook-script-"));
52
+ tempDirs.push(ws);
53
+ const dir = join(ws, ".cursor", "hooks");
54
+ mkdirSync(dir, { recursive: true });
55
+ const statePath = join(dir, "state.json");
56
+ const ledgerPath = join(dir, "denials.jsonl");
57
+ const scriptPath = join(dir, "hook.sh");
58
+ writeFileSync(scriptPath, generateHookScript(statePath, ledgerPath), "utf-8");
59
+
60
+ if (!opts.noStateFile) {
61
+ const policies = new Map(
62
+ Object.entries(opts.mcpPolicies ?? {}).map(([name, p]) => [
63
+ `srv/${name}`,
64
+ { toolName: name, mcpServerSlug: "srv", requiresApproval: p.requiresApproval, approvalMessage: p.message ?? "" },
65
+ ]),
66
+ );
67
+ const state = buildApprovalState(policies, opts.autoApproveAll ?? false, opts.grants);
68
+ writeFileSync(statePath, JSON.stringify(state), "utf-8");
69
+ }
70
+
71
+ return {
72
+ decide(input: object) {
73
+ const raw = execFileSync("bash", [scriptPath], { input: JSON.stringify(input) }).toString();
74
+ const permission = raw.includes('"permission":"deny"') ? "deny" : raw.includes('"permission":"allow"') ? "allow" : "?";
75
+ return { permission, raw };
76
+ },
77
+ ledger() {
78
+ if (!existsSync(ledgerPath)) return [];
79
+ return readFileSync(ledgerPath, "utf-8").split("\n").filter(Boolean).map((l) => JSON.parse(l));
80
+ },
81
+ resetLedger() {
82
+ writeFileSync(ledgerPath, "", "utf-8");
83
+ },
84
+ };
85
+ }
86
+
87
+ // Real hook-input shapes (PascalCase name, file_path/command in tool_input).
88
+ const hookWrite = (filePath: string) => ({ tool_name: "Write", tool_input: { file_path: filePath, content: "x" } });
89
+ const hookShell = (command: string) => ({ tool_name: "Shell", tool_input: { command, cwd: "/x", timeout: 30000 } });
90
+ const hookDelete = (filePath: string) => ({ tool_name: "Delete", tool_input: { file_path: filePath } });
91
+ const hookRead = (filePath: string) => ({ tool_name: "Read", tool_input: { file_path: filePath } });
92
+
93
+ d("generated preToolUse hook", () => {
94
+ it("denies gated built-ins (Write/Shell/Delete) and records a category+salient token", () => {
95
+ const h = setup({});
96
+
97
+ for (const [input, category, salient] of [
98
+ [hookWrite("/x/a.txt"), "write", "/x/a.txt"],
99
+ [hookShell("rm -rf build"), "shell", "rm -rf build"],
100
+ [hookDelete("/x/b.txt"), "delete", "/x/b.txt"],
101
+ ] as const) {
102
+ h.resetLedger();
103
+ expect(h.decide(input).permission).toBe("deny");
104
+ const ledger = h.ledger();
105
+ expect(ledger).toHaveLength(1);
106
+ // Byte-identical to the runner's grantToken(category, salient).
107
+ expect(ledger[0].token).toBe(grantToken(category, salient));
108
+ }
109
+ });
110
+
111
+ it("allows read-only built-ins", () => {
112
+ const h = setup({});
113
+ expect(h.decide(hookRead("/x/a.txt")).permission).toBe("allow");
114
+ expect(h.ledger()).toEqual([]);
115
+ });
116
+
117
+ it("auto-approve-all allows even gated built-ins", () => {
118
+ const h = setup({ autoApproveAll: true });
119
+ expect(h.decide(hookWrite("/x/a.txt")).permission).toBe("allow");
120
+ });
121
+
122
+ it("allows the EXACT granted resource and re-gates any other (no name-only over-grant)", () => {
123
+ const id = toolIdentity("edit", "", { path: "/x/a.txt" });
124
+ const h = setup({ grants: [{ toolName: "edit", mcpServerSlug: "", key: id.key, salient: id.salient }] });
125
+
126
+ // Same resource the user approved -> allowed on the resumed turn.
127
+ expect(h.decide(hookWrite("/x/a.txt")).permission).toBe("allow");
128
+ // A different file is NOT covered by the grant -> still gated.
129
+ expect(h.decide(hookWrite("/x/OTHER.txt")).permission).toBe("deny");
130
+ });
131
+
132
+ it("denies require-approval MCP tools and allows them once granted (name-only)", () => {
133
+ const mcpPolicies = { apply_x: { requiresApproval: true, message: "Apply X" } };
134
+ const denyH = setup({ mcpPolicies });
135
+ expect(denyH.decide({ tool_name: "apply_x", tool_input: {} }).permission).toBe("deny");
136
+ expect(denyH.ledger()[0].token).toBe(grantToken("apply_x", ""));
137
+
138
+ const grantH = setup({
139
+ mcpPolicies,
140
+ grants: [{ toolName: "apply_x", mcpServerSlug: "srv", key: "apply_x", salient: "" }],
141
+ });
142
+ expect(grantH.decide({ tool_name: "apply_x", tool_input: {} }).permission).toBe("allow");
143
+ });
144
+
145
+ it("fails closed (deny) when the state file is missing", () => {
146
+ const h = setup({ noStateFile: true });
147
+ expect(h.decide(hookWrite("/x/a.txt")).permission).toBe("deny");
148
+ });
149
+
150
+ // Regression: the original grep-based extraction truncated string values at
151
+ // the first JSON-escaped character, so a shell command containing double
152
+ // quotes (e.g. `printf '%s' 'x' > "file"`) produced a ledger token that never
153
+ // matched the runner's grantToken — the denied call stayed COMPLETED in the
154
+ // persisted messages and a grant for it was re-denied on reinvocation.
155
+ // (Observed live in TestCursorHarness_HITL_ResumedTurn_StillGated.)
156
+ it("records a byte-identical token for commands with quotes, escapes, and newlines", () => {
157
+ const commands = [
158
+ 'printf \'%s\' \'hello\' > "/tmp/a dir/resumed-gate.txt"',
159
+ 'echo "double \\"nested\\" quotes" && echo done',
160
+ "line1\nline2\twith\ttabs",
161
+ 'unicode: caf\u00e9 \u2014 emoji \u{1F600}',
162
+ ];
163
+ for (const command of commands) {
164
+ const h = setup({});
165
+ expect(h.decide(hookShell(command)).permission).toBe("deny");
166
+ const ledger = h.ledger();
167
+ expect(ledger).toHaveLength(1);
168
+ expect(ledger[0].token).toBe(grantToken("shell", command));
169
+ }
170
+ });
171
+
172
+ it("allows the exact granted shell command even when it contains quotes", () => {
173
+ const command = 'printf \'%s\' \'hello-resume\' > "/x/resumed-gate.txt"';
174
+ const id = toolIdentity("shell", "", { command });
175
+ const h = setup({ grants: [{ toolName: "shell", mcpServerSlug: "", key: id.key, salient: id.salient }] });
176
+
177
+ expect(h.decide(hookShell(command)).permission).toBe("allow");
178
+ // A different command is NOT covered by the grant -> still gated.
179
+ expect(h.decide(hookShell('rm -rf "/x"')).permission).toBe("deny");
180
+ });
181
+
182
+ it("still denies gated tools via the bash fallback when the Node binary is unavailable", () => {
183
+ const ws = mkdtempSync(join(tmpdir(), "hook-script-fallback-"));
184
+ tempDirs.push(ws);
185
+ const dir = join(ws, ".cursor", "hooks");
186
+ mkdirSync(dir, { recursive: true });
187
+ const statePath = join(dir, "state.json");
188
+ const ledgerPath = join(dir, "denials.jsonl");
189
+ const scriptPath = join(dir, "hook.sh");
190
+ // Break the baked Node path to force the grep/cut fallback.
191
+ const script = generateHookScript(statePath, ledgerPath)
192
+ .replace(`NODE_BIN="${process.execPath}"`, 'NODE_BIN="/nonexistent/node"');
193
+ writeFileSync(scriptPath, script, "utf-8");
194
+ writeFileSync(statePath, JSON.stringify(buildApprovalState(new Map(), false)), "utf-8");
195
+
196
+ const raw = execFileSync("bash", [scriptPath], {
197
+ input: JSON.stringify(hookWrite("/x/a.txt")),
198
+ }).toString();
199
+ expect(raw).toContain('"permission":"deny"');
200
+ const ledger = readFileSync(ledgerPath, "utf-8").split("\n").filter(Boolean).map((l) => JSON.parse(l));
201
+ expect(ledger).toHaveLength(1);
202
+ expect(ledger[0].token).toBe(grantToken("write", "/x/a.txt"));
203
+ });
204
+ });
@@ -617,6 +617,99 @@ describe("MessageAccumulator tool call status transitions", () => {
617
617
  });
618
618
  });
619
619
 
620
+ // The Cursor SDK can emit the lifecycle for one call_id more than once.
621
+ // Observed in production: two "running" events ~0.5s apart for a task/edit
622
+ // tool produced two ToolCall entries with the SAME id (a "thin" copy with no
623
+ // result and a "full" copy), rendering the same call two or three times in
624
+ // the UI. The accumulator must upsert by call_id so a call maps to exactly
625
+ // one ToolCall.
626
+ describe("tool call idempotency (one ToolCall per call_id)", () => {
627
+ it("duplicate running events for one call_id create a single ToolCall", () => {
628
+ const messages: AgentMessage[] = [];
629
+ const acc = new MessageAccumulator(messages);
630
+
631
+ acc.processEvent(assistantEvent("r1", "Editing a file."));
632
+ acc.processEvent(toolCallEvent("tc-dup", "edit", "running", "r1", { args: { path: "a.ts" } }));
633
+ acc.processEvent(toolCallEvent("tc-dup", "edit", "running", "r1", { args: { path: "a.ts" } }));
634
+
635
+ expect(countToolCallsWithId(messages, "tc-dup")).toBe(1);
636
+ expect(findToolCallById(messages, "tc-dup")!.status).toBe(ToolCallStatus.TOOL_CALL_RUNNING);
637
+ });
638
+
639
+ it("running -> completed -> running re-emit keeps a single COMPLETED ToolCall", () => {
640
+ const messages: AgentMessage[] = [];
641
+ const acc = new MessageAccumulator(messages);
642
+
643
+ acc.processEvent(assistantEvent("r1", "Running a tool."));
644
+ acc.processEvent(toolCallEvent("tc-1", "Shell", "running", "r1"));
645
+ acc.processEvent(toolCallEvent("tc-1", "Shell", "completed", "r1", { result: "OK" }));
646
+ // A late "running" re-emit must not regress the terminal status.
647
+ acc.processEvent(toolCallEvent("tc-1", "Shell", "running", "r1"));
648
+
649
+ expect(countToolCallsWithId(messages, "tc-1")).toBe(1);
650
+ const tc = findToolCallById(messages, "tc-1")!;
651
+ expect(tc.status).toBe(ToolCallStatus.TOOL_CALL_COMPLETED);
652
+ expect(tc.result).toBe("OK");
653
+ expect(tc.completedAt).toBeTruthy();
654
+ });
655
+
656
+ it("thin-then-full: a result-bearing completion populates the single ToolCall created by an empty running", () => {
657
+ const messages: AgentMessage[] = [];
658
+ const acc = new MessageAccumulator(messages);
659
+
660
+ // Reproduces the production pattern: two running events, then one
661
+ // completion that carries the full result.
662
+ acc.processEvent(assistantEvent("r1", "Delegating work."));
663
+ acc.processEvent(toolCallEvent("tc-task", "task", "running", "r1", { result: "" }));
664
+ acc.processEvent(toolCallEvent("tc-task", "task", "running", "r1", { result: "" }));
665
+ acc.processEvent(toolCallEvent("tc-task", "task", "completed", "r1", { result: "full result blob" }));
666
+
667
+ expect(countToolCallsWithId(messages, "tc-task")).toBe(1);
668
+ const tc = findToolCallById(messages, "tc-task")!;
669
+ expect(tc.status).toBe(ToolCallStatus.TOOL_CALL_COMPLETED);
670
+ expect(tc.result).toBe("full result blob");
671
+ });
672
+
673
+ it("a result-less re-emit after completion does not wipe the captured result", () => {
674
+ const messages: AgentMessage[] = [];
675
+ const acc = new MessageAccumulator(messages);
676
+
677
+ acc.processEvent(assistantEvent("r1", "Running a tool."));
678
+ acc.processEvent(toolCallEvent("tc-1", "read", "running", "r1"));
679
+ acc.processEvent(toolCallEvent("tc-1", "read", "completed", "r1", { result: "file contents" }));
680
+ acc.processEvent(toolCallEvent("tc-1", "read", "completed", "r1", { result: "" }));
681
+
682
+ expect(countToolCallsWithId(messages, "tc-1")).toBe(1);
683
+ expect(findToolCallById(messages, "tc-1")!.result).toBe("file contents");
684
+ });
685
+
686
+ it("duplicate task running events yield one task ToolCall and one sub-agent (production repro)", () => {
687
+ const messages: AgentMessage[] = [];
688
+ const acc = new MessageAccumulator(messages);
689
+
690
+ // Mirror the ExecuteCursor stream loop: every task tool_call event is fed
691
+ // to both processEvent() (tool call) and trackSubAgentExecution().
692
+ acc.processEvent(assistantEvent("r1", "I'll explore the repo."));
693
+ const args = { subagentType: { kind: "explore" }, description: "Explore repo structure and docs", prompt: "Go" };
694
+
695
+ const run1 = toolCallEvent("tc-explore", "task", "running", "r1", { args, result: "" });
696
+ acc.processEvent(run1);
697
+ acc.trackSubAgentExecution(run1);
698
+
699
+ const run2 = toolCallEvent("tc-explore", "task", "running", "r1", { args, result: "" });
700
+ acc.processEvent(run2);
701
+ acc.trackSubAgentExecution(run2);
702
+
703
+ const done = toolCallEvent("tc-explore", "task", "completed", "r1", { result: "explored" });
704
+ acc.processEvent(done);
705
+ acc.trackSubAgentExecution(done);
706
+
707
+ expect(countToolCallsWithId(messages, "tc-explore")).toBe(1);
708
+ expect(acc.subAgentExecutions).toHaveLength(1);
709
+ expect(acc.subAgentExecutions[0].id).toBe("tc-explore");
710
+ });
711
+ });
712
+
620
713
  describe("cancelInProgressSubAgentProtos standalone", () => {
621
714
  it("cancels IN_PROGRESS/PENDING protos in place and reports whether anything changed", () => {
622
715
  const running = create(SubAgentExecutionSchema, {
@@ -7,12 +7,20 @@
7
7
  * collide. These invariants are correctness-critical, hence the explicit tests.
8
8
  */
9
9
 
10
- import { describe, it, expect, afterEach } from "vitest";
10
+ import { describe, it, expect, afterEach, vi } from "vitest";
11
11
  import { mkdtempSync, rmSync, existsSync } from "node:fs";
12
12
  import { tmpdir } from "node:os";
13
13
  import { join } from "node:path";
14
14
 
15
- import { resolvePlatformOptions } from "../session-lifecycle.js";
15
+ vi.mock("@cursor/sdk", () => ({
16
+ Agent: {
17
+ create: vi.fn(async () => ({ agentId: "agent-created" })),
18
+ resume: vi.fn(async () => ({ agentId: "agent-resumed" })),
19
+ },
20
+ }));
21
+
22
+ import { Agent } from "@cursor/sdk";
23
+ import { resolvePlatformOptions, createAgent, resumeAgent } from "../session-lifecycle.js";
16
24
 
17
25
  const tempRoots: string[] = [];
18
26
 
@@ -63,3 +71,66 @@ describe("resolvePlatformOptions", () => {
63
71
  expect(() => resolvePlatformOptions("ses-123", "")).toThrow(/workspaceRootDir is required/);
64
72
  });
65
73
  });
74
+
75
+ // ---------------------------------------------------------------------------
76
+ // Workspace binding across create/resume
77
+ //
78
+ // Regression: Agent.resume() does not persist local.cwd. When resumeAgent()
79
+ // omitted it, the SDK fell back to process.cwd() — re-rooting the resumed
80
+ // agent in the runner's own working directory and loading the "project"
81
+ // setting source (the .cursor/hooks.json carrying the HITL approval hook)
82
+ // from that wrong directory. Result: on every resumed turn, file edits and
83
+ // shell commands ran unguarded with no approval card (observed in production
84
+ // execution aex_01ktr5na07f5xtmn0dz3mfjtdp).
85
+ // ---------------------------------------------------------------------------
86
+
87
+ describe("workspace binding on create/resume", () => {
88
+ const baseOptions = {
89
+ apiKey: "key",
90
+ sessionId: "ses-cwd-test",
91
+ model: "gpt-test",
92
+ };
93
+
94
+ it("createAgent passes the single workspace dir as local.cwd", async () => {
95
+ const workspaceRootDir = freshWorkspaceRoot();
96
+ await createAgent({
97
+ ...baseOptions,
98
+ workspaceDirs: ["/work/repo-a"],
99
+ workspaceRootDir,
100
+ });
101
+
102
+ const callOptions = vi.mocked(Agent.create).mock.calls.at(-1)![0] as any;
103
+ expect(callOptions.local.cwd).toBe("/work/repo-a");
104
+ expect(callOptions.local.settingSources).toContain("project");
105
+ });
106
+
107
+ it("resumeAgent re-supplies local.cwd (not persisted by Agent.resume)", async () => {
108
+ const workspaceRootDir = freshWorkspaceRoot();
109
+ await resumeAgent({
110
+ ...baseOptions,
111
+ agentId: "agent-123",
112
+ workspaceDirs: ["/work/repo-a"],
113
+ workspaceRootDir,
114
+ });
115
+
116
+ const [agentId, callOptions] = vi.mocked(Agent.resume).mock.calls.at(-1)! as [string, any];
117
+ expect(agentId).toBe("agent-123");
118
+ // The load-bearing assertion: without cwd the SDK re-roots the agent at
119
+ // process.cwd() and the project HITL hook never loads on resumed turns.
120
+ expect(callOptions.local.cwd).toBe("/work/repo-a");
121
+ expect(callOptions.local.settingSources).toContain("project");
122
+ });
123
+
124
+ it("resumeAgent passes multiple workspace dirs as an array cwd", async () => {
125
+ const workspaceRootDir = freshWorkspaceRoot();
126
+ await resumeAgent({
127
+ ...baseOptions,
128
+ agentId: "agent-456",
129
+ workspaceDirs: ["/work/repo-a", "/work/repo-b"],
130
+ workspaceRootDir,
131
+ });
132
+
133
+ const callOptions = vi.mocked(Agent.resume).mock.calls.at(-1)![1] as any;
134
+ expect(callOptions.local.cwd).toEqual(["/work/repo-a", "/work/repo-b"]);
135
+ });
136
+ });
@@ -18,7 +18,9 @@
18
18
 
19
19
  import type { ToolApprovalPolicy } from "@stigmer/protos/ai/stigmer/agentic/mcpserver/v1/spec_pb";
20
20
  import type { ToolApprovalOverride } from "@stigmer/protos/ai/stigmer/agentic/agent/v1/spec_pb";
21
+ import { ToolKind } from "@stigmer/protos/ai/stigmer/agentic/agentexecution/v1/enum_pb";
21
22
  import type { ResolvedMcpServer } from "./mcp-resolver.js";
23
+ import { classifyTool } from "../../shared/tool-kind.js";
22
24
 
23
25
  /**
24
26
  * A single tool's merged approval decision after evaluating all policy layers.
@@ -31,61 +33,141 @@ export interface MergedToolPolicy {
31
33
  }
32
34
 
33
35
  /**
34
- * Built-in (non-MCP) Cursor tools that mutate the workspace or execute
35
- * commands. These require approval when auto_approve_all is false, mirroring
36
- * the native harness's DANGEROUS_PLATFORM_TOOLS (write/edit/create/delete/
37
- * execute/shell). Each value is an approval-message template resolved against
38
- * the tool args (see resolveApprovalMessage); its placeholder names the same
39
- * field the grant matcher keys on (path/command/target_notebook).
36
+ * Built-in Cursor tools the preToolUse hook gates, named as the hook receives
37
+ * them.
38
+ *
39
+ * Critical: the Cursor preToolUse hook and the SDK event stream use DIFFERENT
40
+ * tool taxonomies for the same operation. The hook's `tool_name` is PascalCase
41
+ * (`Write` for any file create/edit, `Shell`, `Delete`); the stream's
42
+ * `event.name` is lowercase (`edit`, `shell`, `delete`). This set is the HOOK
43
+ * taxonomy because it is consulted only to build the hook's gated set and its
44
+ * name->category mapping. Cross-layer correlation never compares these raw
45
+ * names — it uses {@link approvalCategory} (see below).
40
46
  */
41
- const BUILT_IN_GATED = new Map<string, string>([
42
- ["Write", "Write file: {{args.path}}"],
43
- ["StrReplace", "Edit file: {{args.path}}"],
44
- ["EditNotebook", "Edit notebook: {{args.target_notebook}}"],
45
- ["Shell", "Run command: {{args.command}}"],
46
- ["Delete", "Delete: {{args.path}}"],
47
+ const BUILT_IN_GATED: ReadonlySet<string> = new Set([
48
+ "Write",
49
+ "StrReplace",
50
+ "EditNotebook",
51
+ "Shell",
52
+ "Delete",
47
53
  ]);
48
54
 
55
+ /**
56
+ * Canonical approval category for a gated tool, derived from EITHER taxonomy's
57
+ * name via the shared {@link classifyTool}.
58
+ *
59
+ * The hook (`Write`/`Shell`/`Delete`) and the stream (`edit`/`shell`/`delete`)
60
+ * name the same operation differently, so neither raw name is a stable
61
+ * cross-layer identity. The category collapses both onto one value so the denial
62
+ * ledger (recorded by the hook) correlates to the streamed tool call (read by
63
+ * the runner) and so an approval grant matches the agent's re-attempt on
64
+ * reinvocation regardless of which taxonomy named it. `FILE_WRITE` and
65
+ * `FILE_EDIT` both map to `write` because the Cursor hook reports every file
66
+ * mutation — create or edit — as `Write`.
67
+ *
68
+ * Returns undefined for non-gated tools (read-only built-ins, MCP tools, and
69
+ * anything `classifyTool` does not place in a mutating kind).
70
+ */
71
+ export type ApprovalCategory = "write" | "delete" | "shell";
72
+
73
+ export function approvalCategory(toolName: string): ApprovalCategory | undefined {
74
+ switch (classifyTool(toolName)) {
75
+ case ToolKind.FILE_WRITE:
76
+ case ToolKind.FILE_EDIT:
77
+ return "write";
78
+ case ToolKind.FILE_DELETE:
79
+ return "delete";
80
+ case ToolKind.SHELL:
81
+ return "shell";
82
+ default:
83
+ return undefined;
84
+ }
85
+ }
86
+
87
+ /**
88
+ * Human-readable approval-message template per canonical category. Keyed by
89
+ * category (not raw tool name) so a denial surfaced from either taxonomy renders
90
+ * the same message. Placeholders resolve against the tool args via
91
+ * {@link resolveApprovalMessage}; `{{args.path}}` and `{{args.command}}` are the
92
+ * stream-side field names (the runner builds the approval surface from the
93
+ * streamed tool call, whose args use `path`/`command`).
94
+ */
95
+ const CATEGORY_APPROVAL_MESSAGE: Record<ApprovalCategory, string> = {
96
+ write: "Write file: {{args.path}}",
97
+ delete: "Delete: {{args.path}}",
98
+ shell: "Run command: {{args.command}}",
99
+ };
100
+
49
101
  /**
50
102
  * Top-level tool-argument fields, in priority order, that identify the specific
51
- * resource a built-in tool acts on. Used to render approval messages and to key
52
- * HITL approval grants (see approval-state.ts). Authored here once and injected
53
- * into the generated preToolUse hook script so the runner and the hook always
54
- * agree on which field to match.
103
+ * resource a built-in tool acts on. The list deliberately spans BOTH taxonomies'
104
+ * arg shapes: the hook input names a file `file_path` and the stream names it
105
+ * `path`; both name a shell command `command`. Extracting the same resource
106
+ * VALUE on both sides (the absolute path / the command string) is what lets the
107
+ * hook-recorded denial token equal the stream-computed token. Authored here once
108
+ * and injected into the generated preToolUse hook script so the runner and the
109
+ * hook never disagree on which field to match.
55
110
  */
56
- export const SALIENT_ARG_FIELDS = ["path", "command", "target_notebook"] as const;
111
+ export const SALIENT_ARG_FIELDS = ["file_path", "path", "target_notebook", "command"] as const;
57
112
 
58
113
  /**
59
114
  * Check whether a built-in (non-MCP) Cursor tool requires user approval.
60
115
  *
61
- * Only the explicitly gated, mutating/destructive tools require approval;
62
- * everything else (read-only built-ins, and at the hook layer auto-approved
63
- * MCP tools) is allowed. This "gate the dangerous set, allow the rest" model
64
- * mirrors the native harness's resolveToolApproval, which also defaults
65
- * unlisted tools to no-approval. It is deliberately fail-OPEN for unknown
66
- * tools: the merged MCP policy map carries only the tools that REQUIRE
67
- * approval, so a fail-closed default would wrongly deny every auto-approved
68
- * MCP tool, which the hook cannot distinguish from an unknown built-in by name.
116
+ * Resolved via {@link approvalCategory} so it answers correctly for BOTH
117
+ * taxonomies — the hook's `Write`/`Shell`/`Delete` and the stream's
118
+ * `edit`/`shell`/`delete` all return true. Only mutating/destructive tools are
119
+ * gated; everything else (read-only built-ins, and at the hook layer —
120
+ * auto-approved MCP tools) is allowed. This "gate the dangerous set, allow the
121
+ * rest" model mirrors the native harness's resolveToolApproval. It is
122
+ * deliberately fail-OPEN for unknown tools: the merged MCP policy map carries
123
+ * only the tools that REQUIRE approval, so a fail-closed default would wrongly
124
+ * deny every auto-approved MCP tool, which the hook cannot distinguish from an
125
+ * unknown built-in by name.
69
126
  */
70
127
  export function builtInRequiresApproval(toolName: string): boolean {
71
- return BUILT_IN_GATED.has(toolName);
128
+ return approvalCategory(toolName) !== undefined;
72
129
  }
73
130
 
74
131
  /**
75
132
  * Returns the built-in tool names that require approval (the gated set the
76
133
  * preToolUse hook denies unless auto-approved or granted on reinvocation).
134
+ *
135
+ * These are HOOK-taxonomy names (PascalCase), because the hook matches its own
136
+ * `tool_name`. See {@link approvalCategory} for the cross-layer identity.
77
137
  */
78
138
  export function getBuiltInGatedList(): string[] {
79
- return [...BUILT_IN_GATED.keys()];
139
+ return [...BUILT_IN_GATED];
140
+ }
141
+
142
+ /**
143
+ * Returns the gated built-in tools as `(hookToolName, category)` pairs.
144
+ *
145
+ * Injected into the generated preToolUse hook so the bash script can map its
146
+ * incoming `tool_name` to the canonical category used for the denial/grant
147
+ * token — the same category the runner computes from the stream side via
148
+ * {@link approvalCategory}. Authoring it here keeps the mapping single-sourced;
149
+ * a gated built-in with no category would be a programming error, so it is
150
+ * filtered out (and would simply not be gated rather than crash the hook).
151
+ */
152
+ export function getBuiltInGatedCategories(): Array<[string, ApprovalCategory]> {
153
+ const pairs: Array<[string, ApprovalCategory]> = [];
154
+ for (const name of BUILT_IN_GATED) {
155
+ const category = approvalCategory(name);
156
+ if (category) pairs.push([name, category]);
157
+ }
158
+ return pairs;
80
159
  }
81
160
 
82
161
  /**
83
- * Approval-message template for a gated built-in tool, or undefined when the
84
- * tool is not a known gated built-in. Callers resolve the placeholders against
85
- * the tool args via resolveApprovalMessage.
162
+ * Approval-message template for a gated built-in tool (either taxonomy), or
163
+ * undefined when the tool is not gated. Resolved via {@link approvalCategory}
164
+ * so stream-side names (`edit`/`shell`/`delete`) and hook-side names
165
+ * (`Write`/`Shell`/`Delete`) both map to the same template. Callers resolve the
166
+ * placeholders against the tool args via resolveApprovalMessage.
86
167
  */
87
168
  export function getBuiltInApprovalMessage(toolName: string): string | undefined {
88
- return BUILT_IN_GATED.get(toolName);
169
+ const category = approvalCategory(toolName);
170
+ return category ? CATEGORY_APPROVAL_MESSAGE[category] : undefined;
89
171
  }
90
172
 
91
173
  /**