@stigmer/runner 3.0.2-dev.20260609093630 → 3.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,14 +1,15 @@
1
1
  /**
2
2
  * Unit tests for the Cursor-harness HITL approval gate logic.
3
3
  *
4
- * Covers the pure policy/grant/prompt builders that drive the preToolUse hook:
5
- * - built-in tool gating (mutating gated, read-only + unknown allowed)
6
- * - salient-arg extraction (grant matching key)
7
- * - grant building from adjudicated approvals
8
- * - approval-state file content (gated list, MCP policies, grant tokens)
9
- * - the human-meaningful reinvocation prompt
4
+ * The crux this suite guards: the Cursor preToolUse hook and the SDK event
5
+ * stream use DIFFERENT tool taxonomies for the same operation (hook
6
+ * `Write`/`Shell`/`Delete` with `file_path`/`command`; stream
7
+ * `edit`/`shell`/`delete` with `path`/`command`). Correlation therefore keys on
8
+ * a canonical {@link approvalCategory} + the salient resource VALUE, not the raw
9
+ * tool name. These tests assert that invariant against BOTH taxonomies so a
10
+ * future SDK tool rename fails loudly instead of silently disabling the gate.
10
11
  *
11
- * These are deterministic and need no Cursor API key.
12
+ * Deterministic; no Cursor API key required.
12
13
  */
13
14
 
14
15
  import { describe, it, expect } from "vitest";
@@ -18,9 +19,11 @@ import type { PendingApproval } from "@stigmer/protos/ai/stigmer/agentic/agentex
18
19
  import { ApprovalAction } from "@stigmer/protos/ai/stigmer/agentic/agentexecution/v1/enum_pb";
19
20
 
20
21
  import {
22
+ approvalCategory,
21
23
  builtInRequiresApproval,
22
24
  getBuiltInApprovalMessage,
23
25
  getBuiltInGatedList,
26
+ getBuiltInGatedCategories,
24
27
  extractArgKey,
25
28
  } from "../approval-policy.js";
26
29
  import type { MergedToolPolicy } from "../approval-policy.js";
@@ -28,13 +31,14 @@ import {
28
31
  buildApprovalGrants,
29
32
  buildApprovalState,
30
33
  grantToken,
34
+ toolIdentity,
31
35
  } from "../approval-state.js";
32
36
  import { buildReinvocationPrompt } from "../prompt-builder.js";
33
37
 
34
38
  function pending(overrides: Partial<PendingApproval>): PendingApproval {
35
39
  return create(PendingApprovalSchema, {
36
40
  toolCallId: "call-1",
37
- toolName: "Write",
41
+ toolName: "edit",
38
42
  message: "",
39
43
  argsPreview: "",
40
44
  mcpServerSlug: "",
@@ -42,15 +46,47 @@ function pending(overrides: Partial<PendingApproval>): PendingApproval {
42
46
  });
43
47
  }
44
48
 
49
+ // The real ground-truth taxonomies (captured from @cursor/sdk via live probe).
50
+ const HOOK_NAMES = { write: "Write", shell: "Shell", del: "Delete", read: "Read" };
51
+ const STREAM_NAMES = { write: "edit", shell: "shell", del: "delete", read: "read" };
52
+
53
+ describe("approvalCategory (cross-taxonomy drift-guard)", () => {
54
+ it("maps the HOOK taxonomy (PascalCase) to canonical categories", () => {
55
+ expect(approvalCategory("Write")).toBe("write");
56
+ expect(approvalCategory("StrReplace")).toBe("write");
57
+ expect(approvalCategory("EditNotebook")).toBe("write");
58
+ expect(approvalCategory("Delete")).toBe("delete");
59
+ expect(approvalCategory("Shell")).toBe("shell");
60
+ });
61
+
62
+ it("maps the STREAM taxonomy (lowercase) to the SAME categories", () => {
63
+ expect(approvalCategory("write")).toBe("write");
64
+ expect(approvalCategory("edit")).toBe("write");
65
+ expect(approvalCategory("delete")).toBe("delete");
66
+ expect(approvalCategory("shell")).toBe("shell");
67
+ expect(approvalCategory("execute")).toBe("shell");
68
+ });
69
+
70
+ it("a file mutation collapses to `write` on BOTH sides (hook Write == stream edit)", () => {
71
+ expect(approvalCategory(HOOK_NAMES.write)).toBe(approvalCategory(STREAM_NAMES.write));
72
+ });
73
+
74
+ it("returns undefined for read-only / non-gated tools", () => {
75
+ for (const t of ["read", "Read", "glob", "Glob", "grep", "Grep", "ls", "think", "task"]) {
76
+ expect(approvalCategory(t)).toBeUndefined();
77
+ }
78
+ });
79
+ });
80
+
45
81
  describe("builtInRequiresApproval", () => {
46
- it("gates mutating/destructive built-in tools", () => {
47
- for (const t of ["Write", "StrReplace", "EditNotebook", "Shell", "Delete"]) {
82
+ it("gates mutating/destructive tools in BOTH taxonomies", () => {
83
+ for (const t of ["Write", "StrReplace", "EditNotebook", "Shell", "Delete", "edit", "shell", "delete", "execute", "write"]) {
48
84
  expect(builtInRequiresApproval(t)).toBe(true);
49
85
  }
50
86
  });
51
87
 
52
88
  it("allows read-only built-in tools", () => {
53
- for (const t of ["Read", "Grep", "Glob", "SemanticSearch", "WebFetch", "ReadLints"]) {
89
+ for (const t of ["Read", "read", "Grep", "grep", "Glob", "glob", "ls", "think", "task"]) {
54
90
  expect(builtInRequiresApproval(t)).toBe(false);
55
91
  }
56
92
  });
@@ -60,27 +96,38 @@ describe("builtInRequiresApproval", () => {
60
96
  expect(builtInRequiresApproval("search_services")).toBe(false);
61
97
  });
62
98
 
63
- it("exposes the gated set", () => {
99
+ it("exposes the gated set in the HOOK taxonomy (what the hook matches)", () => {
64
100
  expect(getBuiltInGatedList()).toEqual(
65
101
  expect.arrayContaining(["Write", "StrReplace", "EditNotebook", "Shell", "Delete"]),
66
102
  );
67
103
  });
104
+
105
+ it("every gated built-in resolves to a category (no ungated hole)", () => {
106
+ for (const name of getBuiltInGatedList()) {
107
+ expect(approvalCategory(name)).toBeDefined();
108
+ }
109
+ // The injected hook map covers exactly the gated set.
110
+ expect(getBuiltInGatedCategories().map(([n]) => n).sort()).toEqual(getBuiltInGatedList().sort());
111
+ });
68
112
  });
69
113
 
70
114
  describe("getBuiltInApprovalMessage", () => {
71
- it("returns a template for gated tools and undefined otherwise", () => {
115
+ it("returns a category template for gated tools in EITHER taxonomy", () => {
72
116
  expect(getBuiltInApprovalMessage("Write")).toContain("{{args.path}}");
117
+ expect(getBuiltInApprovalMessage("edit")).toContain("{{args.path}}");
73
118
  expect(getBuiltInApprovalMessage("Shell")).toContain("{{args.command}}");
119
+ expect(getBuiltInApprovalMessage("shell")).toContain("{{args.command}}");
74
120
  expect(getBuiltInApprovalMessage("Read")).toBeUndefined();
121
+ expect(getBuiltInApprovalMessage("read")).toBeUndefined();
75
122
  });
76
123
  });
77
124
 
78
- describe("extractArgKey", () => {
79
- it("extracts the salient field by priority (path > command > target_notebook)", () => {
80
- expect(extractArgKey({ path: "a.txt" })).toBe("a.txt");
125
+ describe("extractArgKey (spans both taxonomies' field names)", () => {
126
+ it("extracts the salient value regardless of field name (file_path or path)", () => {
127
+ expect(extractArgKey({ file_path: "a.txt" })).toBe("a.txt"); // hook shape
128
+ expect(extractArgKey({ path: "a.txt" })).toBe("a.txt"); // stream shape
81
129
  expect(extractArgKey({ command: "ls -la" })).toBe("ls -la");
82
130
  expect(extractArgKey({ target_notebook: "nb.ipynb" })).toBe("nb.ipynb");
83
- expect(extractArgKey({ path: "a.txt", command: "ls" })).toBe("a.txt");
84
131
  });
85
132
 
86
133
  it("returns empty string when no salient field is present", () => {
@@ -90,24 +137,33 @@ describe("extractArgKey", () => {
90
137
  });
91
138
  });
92
139
 
93
- describe("grantToken", () => {
94
- it("is byte-identical to base64(toolName \\n argKey)", () => {
95
- expect(grantToken("Write", "gated.txt")).toBe(
96
- Buffer.from("Write\ngated.txt", "utf-8").toString("base64"),
97
- );
98
- expect(grantToken("search_services", "")).toBe(
99
- Buffer.from("search_services\n", "utf-8").toString("base64"),
140
+ describe("toolIdentity + grantToken (canonical, taxonomy-agnostic)", () => {
141
+ it("a hook Write and a stream edit on the SAME path produce the SAME token", () => {
142
+ const hook = toolIdentity("Write", "", { file_path: "/x/a.txt" });
143
+ const stream = toolIdentity("edit", "", { path: "/x/a.txt" });
144
+ expect(hook).toEqual({ key: "write", salient: "/x/a.txt" });
145
+ expect(stream).toEqual({ key: "write", salient: "/x/a.txt" });
146
+ expect(grantToken(hook.key, hook.salient)).toBe(grantToken(stream.key, stream.salient));
147
+ });
148
+
149
+ it("encodes as base64(key \\n salient)", () => {
150
+ expect(grantToken("write", "/x/a.txt")).toBe(
151
+ Buffer.from("write\n/x/a.txt", "utf-8").toString("base64"),
100
152
  );
101
153
  });
154
+
155
+ it("MCP tools key on name only (consistent across layers)", () => {
156
+ expect(toolIdentity("apply_x", "planton", { path: "ignored" })).toEqual({ key: "apply_x", salient: "" });
157
+ });
102
158
  });
103
159
 
104
160
  describe("buildApprovalGrants", () => {
105
- it("creates an arg-keyed grant for an approved built-in tool", () => {
161
+ it("creates an exact-resource grant for an approved built-in (stream-named) tool", () => {
106
162
  const grants = buildApprovalGrants(
107
- [pending({ toolCallId: "c1", toolName: "Write", argsPreview: JSON.stringify({ path: "gated.txt" }) })],
163
+ [pending({ toolCallId: "c1", toolName: "edit", argsPreview: JSON.stringify({ path: "/x/gated.txt" }) })],
108
164
  new Map([["c1", ApprovalAction.APPROVE]]),
109
165
  );
110
- expect(grants).toEqual([{ toolName: "Write", mcpServerSlug: "", argKey: "gated.txt" }]);
166
+ expect(grants).toEqual([{ toolName: "edit", mcpServerSlug: "", key: "write", salient: "/x/gated.txt" }]);
111
167
  });
112
168
 
113
169
  it("creates a name-only grant for an approved MCP tool", () => {
@@ -115,14 +171,14 @@ describe("buildApprovalGrants", () => {
115
171
  [pending({ toolCallId: "c1", toolName: "apply_x", mcpServerSlug: "planton", argsPreview: JSON.stringify({ path: "ignored" }) })],
116
172
  new Map([["c1", ApprovalAction.APPROVE]]),
117
173
  );
118
- expect(grants).toEqual([{ toolName: "apply_x", mcpServerSlug: "planton", argKey: "" }]);
174
+ expect(grants).toEqual([{ toolName: "apply_x", mcpServerSlug: "planton", key: "apply_x", salient: "" }]);
119
175
  });
120
176
 
121
177
  it("ignores skipped and rejected approvals", () => {
122
178
  const grants = buildApprovalGrants(
123
179
  [
124
- pending({ toolCallId: "c1", toolName: "Write", argsPreview: JSON.stringify({ path: "a" }) }),
125
- pending({ toolCallId: "c2", toolName: "Shell", argsPreview: JSON.stringify({ command: "rm" }) }),
180
+ pending({ toolCallId: "c1", toolName: "edit", argsPreview: JSON.stringify({ path: "a" }) }),
181
+ pending({ toolCallId: "c2", toolName: "shell", argsPreview: JSON.stringify({ command: "rm" }) }),
126
182
  ],
127
183
  new Map([
128
184
  ["c1", ApprovalAction.SKIP],
@@ -138,14 +194,15 @@ describe("buildApprovalState", () => {
138
194
  ["planton/apply_x", { toolName: "apply_x", mcpServerSlug: "planton", requiresApproval: true, approvalMessage: "Apply X" }],
139
195
  ]);
140
196
 
141
- it("carries the gated list, MCP policies, and grant tokens", () => {
142
- const grants = [{ toolName: "Write", mcpServerSlug: "", argKey: "gated.txt" }];
197
+ it("carries MCP policies and exact-resource grant tokens (gated set is baked into the hook, not the state)", () => {
198
+ const grants = [{ toolName: "edit", mcpServerSlug: "", key: "write", salient: "/x/gated.txt" }];
143
199
  const state = buildApprovalState(mcpPolicies, false, grants);
144
200
 
145
201
  expect(state.autoApproveAll).toBe(false);
146
- expect(state.builtInGatedList).toEqual(expect.arrayContaining(["Write", "Shell"]));
147
202
  expect(state.mcpToolPolicies.apply_x).toEqual({ requiresApproval: true, message: "Apply X" });
148
- expect(state.approvedGrantTokens).toEqual([grantToken("Write", "gated.txt")]);
203
+ expect(state.approvedGrantTokens).toEqual([grantToken("write", "/x/gated.txt")]);
204
+ // builtInGatedList is no longer part of the state file (baked into the hook).
205
+ expect((state as Record<string, unknown>).builtInGatedList).toBeUndefined();
149
206
  });
150
207
 
151
208
  it("defaults grants to empty when none provided", () => {
@@ -160,8 +217,8 @@ describe("buildReinvocationPrompt", () => {
160
217
  it("describes approved and skipped actions in human terms, not opaque ids", () => {
161
218
  const prompt = buildReinvocationPrompt(
162
219
  [
163
- pending({ toolCallId: "c1", toolName: "Write", message: "Write file: gated.txt" }),
164
- pending({ toolCallId: "c2", toolName: "Shell", message: "Run command: rm -rf build" }),
220
+ pending({ toolCallId: "c1", toolName: "edit", message: "Write file: gated.txt" }),
221
+ pending({ toolCallId: "c2", toolName: "shell", message: "Run command: rm -rf build" }),
165
222
  ],
166
223
  new Map([
167
224
  ["c1", ApprovalAction.APPROVE],
@@ -173,7 +230,6 @@ describe("buildReinvocationPrompt", () => {
173
230
  expect(prompt).toContain("Write file: gated.txt");
174
231
  expect(prompt).toContain("SKIPPED");
175
232
  expect(prompt).toContain("Run command: rm -rf build");
176
- // No opaque tool-call ids leak into the prompt.
177
233
  expect(prompt).not.toContain("c1");
178
234
  expect(prompt).not.toContain("c2");
179
235
  });
@@ -63,10 +63,14 @@ function makeWorkspace(): string {
63
63
  return dir;
64
64
  }
65
65
 
66
+ // Stream tool calls use the lowercase SDK taxonomy (edit/shell/delete); the
67
+ // denial ledger uses the hook taxonomy (Write/Shell/Delete) + a canonical
68
+ // category+salient token. The two correlate via approvalCategory — that cross-
69
+ // taxonomy match is exactly what these tests pin.
66
70
  function toolCall(overrides: Partial<ToolCall>): ToolCall {
67
71
  return create(ToolCallSchema, {
68
72
  id: "call-1",
69
- name: "Write",
73
+ name: "edit",
70
74
  status: ToolCallStatus.TOOL_CALL_COMPLETED,
71
75
  ...overrides,
72
76
  });
@@ -96,8 +100,8 @@ describe("denial ledger reset/read", () => {
96
100
  it("parses appended JSONL denials and tolerates blank/partial lines", async () => {
97
101
  const ws = makeWorkspace();
98
102
  await resetDenialLedger(ws);
99
- const writeToken = grantToken("Write", "gated.txt");
100
- const shellToken = grantToken("Shell", "rm -rf build");
103
+ const writeToken = grantToken("write", "gated.txt");
104
+ const shellToken = grantToken("shell", "rm -rf build");
101
105
  // Simulate the hook appending records, including a trailing partial line.
102
106
  await writeFile(
103
107
  denialLedgerPath(ws),
@@ -117,10 +121,13 @@ describe("denial ledger reset/read", () => {
117
121
  });
118
122
 
119
123
  describe("reconcileDeniedToolCalls", () => {
120
- it("overlays WAITING_APPROVAL onto a denied tool reported as completed (the green-check bug)", () => {
124
+ it("overlays WAITING_APPROVAL onto the REAL denied tool reported as completed (the green-check bug)", () => {
125
+ // Stream reports the file mutation as `edit` (RUNNING/COMPLETED); the hook
126
+ // denied it as `Write`. The category+salient token bridges the two so the
127
+ // overlay lands on this exact streamed tool call — no synthesized placeholder.
121
128
  const tc = toolCall({
122
129
  id: "c1",
123
- name: "Write",
130
+ name: "edit",
124
131
  status: ToolCallStatus.TOOL_CALL_COMPLETED,
125
132
  completedAt: "2026-06-07T00:00:00Z",
126
133
  result: "wrote file",
@@ -130,10 +137,14 @@ describe("reconcileDeniedToolCalls", () => {
130
137
  const messages = [aiMessageWith([tc])];
131
138
 
132
139
  const reconciled = reconcileDeniedToolCalls(messages, [
133
- { toolName: "Write", token: grantToken("Write", "gated.txt") },
140
+ { toolName: "Write", token: grantToken("write", "gated.txt") },
134
141
  ]);
135
142
 
136
143
  expect(reconciled).toHaveLength(1);
144
+ // The overlay marked the REAL streamed tool call — no synthesized placeholder
145
+ // and no orphan was appended.
146
+ expect(reconciled[0]).toBe(tc);
147
+ expect(messages[0].toolCalls).toHaveLength(1);
137
148
  expect(tc.status).toBe(ToolCallStatus.TOOL_CALL_WAITING_APPROVAL);
138
149
  expect(tc.requiresApproval).toBe(true);
139
150
  expect(tc.approvalMessage).toContain("gated.txt");
@@ -162,7 +173,7 @@ describe("reconcileDeniedToolCalls", () => {
162
173
  }],
163
174
  ]);
164
175
 
165
- // MCP tools are keyed name-only (mirrors the grant convention).
176
+ // MCP tools are keyed name-only (their name is consistent across layers).
166
177
  reconcileDeniedToolCalls(messages, [
167
178
  { toolName: "apply_x", token: grantToken("apply_x", "") },
168
179
  ], policies);
@@ -174,20 +185,20 @@ describe("reconcileDeniedToolCalls", () => {
174
185
  it("leaves non-denied tool calls untouched while overlaying the denied one", () => {
175
186
  const denied = toolCall({
176
187
  id: "c1",
177
- name: "Write",
188
+ name: "edit",
178
189
  status: ToolCallStatus.TOOL_CALL_COMPLETED,
179
190
  args: { path: "gated.txt" },
180
191
  });
181
192
  const allowed = toolCall({
182
193
  id: "c2",
183
- name: "Read",
194
+ name: "read",
184
195
  status: ToolCallStatus.TOOL_CALL_COMPLETED,
185
196
  args: { path: "readme.md" },
186
197
  });
187
198
  const messages = [aiMessageWith([denied, allowed])];
188
199
 
189
200
  const reconciled = reconcileDeniedToolCalls(messages, [
190
- { toolName: "Write", token: grantToken("Write", "gated.txt") },
201
+ { toolName: "Write", token: grantToken("write", "gated.txt") },
191
202
  ]);
192
203
 
193
204
  // Only the denied call is gated; the read-only call keeps its status and no
@@ -199,12 +210,12 @@ describe("reconcileDeniedToolCalls", () => {
199
210
  });
200
211
 
201
212
  it("collapses repeated denials of the same resource to a single approval", () => {
202
- const first = toolCall({ id: "c1", name: "Write", args: { path: "gated.txt" } });
203
- const second = toolCall({ id: "c2", name: "Write", args: { path: "gated.txt" } });
213
+ const first = toolCall({ id: "c1", name: "edit", args: { path: "gated.txt" } });
214
+ const second = toolCall({ id: "c2", name: "edit", args: { path: "gated.txt" } });
204
215
  const messages = [aiMessageWith([first, second])];
205
216
 
206
217
  const reconciled = reconcileDeniedToolCalls(messages, [
207
- { toolName: "Write", token: grantToken("Write", "gated.txt") },
218
+ { toolName: "Write", token: grantToken("write", "gated.txt") },
208
219
  ]);
209
220
 
210
221
  // One approval anchor (so the backend gate resolves cleanly on one decision).
@@ -217,15 +228,19 @@ describe("reconcileDeniedToolCalls", () => {
217
228
  const messages = [aiMessageWith([])];
218
229
 
219
230
  const reconciled = reconcileDeniedToolCalls(messages, [
220
- { toolName: "Shell", token: grantToken("Shell", "rm -rf build") },
231
+ { toolName: "Shell", token: grantToken("shell", "rm -rf build") },
221
232
  ]);
222
233
 
223
234
  expect(reconciled).toHaveLength(1);
224
235
  const synthesized = messages[0].toolCalls[0];
225
236
  expect(synthesized.status).toBe(ToolCallStatus.TOOL_CALL_WAITING_APPROVAL);
226
237
  expect(synthesized.requiresApproval).toBe(true);
238
+ // The synthesized fallback shows the hook's raw tool name for display...
227
239
  expect(synthesized.name).toBe("Shell");
228
240
  expect(synthesized.approvalMessage).toContain("rm -rf build");
241
+ // ...and carries the salient so the grant rebuilt from it keys on the same
242
+ // resource the hook will see on the re-attempt.
243
+ expect(synthesized.argsPreview).toContain("rm -rf build");
229
244
  });
230
245
 
231
246
  it("is a no-op when the ledger is empty", () => {
@@ -240,7 +255,7 @@ describe("reconstructAdjudicatedApprovals", () => {
240
255
  it("reads decisions and rebuilds pending approvals from adjudicated tool calls", () => {
241
256
  const approved = toolCall({
242
257
  id: "c1",
243
- name: "Write",
258
+ name: "edit",
244
259
  status: ToolCallStatus.TOOL_CALL_WAITING_APPROVAL,
245
260
  approvalAction: ApprovalAction.APPROVE,
246
261
  approvalMessage: "Write file: gated.txt",
@@ -248,13 +263,13 @@ describe("reconstructAdjudicatedApprovals", () => {
248
263
  });
249
264
  const undecided = toolCall({
250
265
  id: "c2",
251
- name: "Shell",
266
+ name: "shell",
252
267
  status: ToolCallStatus.TOOL_CALL_WAITING_APPROVAL,
253
268
  approvalAction: ApprovalAction.UNSPECIFIED,
254
269
  });
255
270
  const unrelated = toolCall({
256
271
  id: "c3",
257
- name: "Read",
272
+ name: "read",
258
273
  status: ToolCallStatus.TOOL_CALL_COMPLETED,
259
274
  approvalAction: ApprovalAction.APPROVE,
260
275
  });
@@ -265,7 +280,7 @@ describe("reconstructAdjudicatedApprovals", () => {
265
280
  expect([...decisions.entries()]).toEqual([["c1", ApprovalAction.APPROVE]]);
266
281
  expect(pendingApprovals).toHaveLength(1);
267
282
  expect(pendingApprovals[0].toolCallId).toBe("c1");
268
- expect(pendingApprovals[0].toolName).toBe("Write");
283
+ expect(pendingApprovals[0].toolName).toBe("edit");
269
284
  expect(pendingApprovals[0].argsPreview).toBe(JSON.stringify({ path: "gated.txt" }));
270
285
  });
271
286
 
@@ -0,0 +1,149 @@
1
+ /**
2
+ * Behavior tests for the generated preToolUse bash hook.
3
+ *
4
+ * These run the ACTUAL bash script the runner writes into the workspace, feeding
5
+ * it the REAL hook-input shape captured from @cursor/sdk (PascalCase
6
+ * `tool_name`; `file_path`/`command` in `tool_input`). They are the strongest
7
+ * guard against the regression this work fixes: a gated built-in must be denied,
8
+ * its denial must be recorded with a token byte-identical to the runner's
9
+ * grantToken, and an exact-resource grant must allow only that resource.
10
+ *
11
+ * Skipped automatically where bash is unavailable.
12
+ */
13
+
14
+ import { describe, it, expect, beforeAll, afterEach } from "vitest";
15
+ import { execFileSync, execSync } from "node:child_process";
16
+ import { mkdtempSync, mkdirSync, writeFileSync, rmSync, readFileSync, existsSync } from "node:fs";
17
+ import { tmpdir } from "node:os";
18
+ import { join } from "node:path";
19
+
20
+ import { generateHookScript } from "../hook-script.js";
21
+ import { buildApprovalState, grantToken, toolIdentity, type ApprovalGrant } from "../approval-state.js";
22
+ import type { McpToolPolicyEntry } from "../approval-state.js";
23
+
24
+ let hasBash = false;
25
+ try {
26
+ execSync("bash -c 'exit 0'", { stdio: "ignore" });
27
+ hasBash = true;
28
+ } catch {
29
+ hasBash = false;
30
+ }
31
+
32
+ const d = hasBash ? describe : describe.skip;
33
+
34
+ const tempDirs: string[] = [];
35
+ afterEach(() => {
36
+ for (const dir of tempDirs.splice(0)) rmSync(dir, { recursive: true, force: true });
37
+ });
38
+
39
+ interface Harness {
40
+ decide(input: object): { permission: string; raw: string };
41
+ ledger(): Array<{ toolName: string; token: string }>;
42
+ resetLedger(): void;
43
+ }
44
+
45
+ function setup(opts: {
46
+ autoApproveAll?: boolean;
47
+ grants?: ApprovalGrant[];
48
+ mcpPolicies?: Record<string, McpToolPolicyEntry>;
49
+ noStateFile?: boolean;
50
+ }): Harness {
51
+ const ws = mkdtempSync(join(tmpdir(), "hook-script-"));
52
+ tempDirs.push(ws);
53
+ const dir = join(ws, ".cursor", "hooks");
54
+ mkdirSync(dir, { recursive: true });
55
+ const statePath = join(dir, "state.json");
56
+ const ledgerPath = join(dir, "denials.jsonl");
57
+ const scriptPath = join(dir, "hook.sh");
58
+ writeFileSync(scriptPath, generateHookScript(statePath, ledgerPath), "utf-8");
59
+
60
+ if (!opts.noStateFile) {
61
+ const policies = new Map(
62
+ Object.entries(opts.mcpPolicies ?? {}).map(([name, p]) => [
63
+ `srv/${name}`,
64
+ { toolName: name, mcpServerSlug: "srv", requiresApproval: p.requiresApproval, approvalMessage: p.message ?? "" },
65
+ ]),
66
+ );
67
+ const state = buildApprovalState(policies, opts.autoApproveAll ?? false, opts.grants);
68
+ writeFileSync(statePath, JSON.stringify(state), "utf-8");
69
+ }
70
+
71
+ return {
72
+ decide(input: object) {
73
+ const raw = execFileSync("bash", [scriptPath], { input: JSON.stringify(input) }).toString();
74
+ const permission = raw.includes('"permission":"deny"') ? "deny" : raw.includes('"permission":"allow"') ? "allow" : "?";
75
+ return { permission, raw };
76
+ },
77
+ ledger() {
78
+ if (!existsSync(ledgerPath)) return [];
79
+ return readFileSync(ledgerPath, "utf-8").split("\n").filter(Boolean).map((l) => JSON.parse(l));
80
+ },
81
+ resetLedger() {
82
+ writeFileSync(ledgerPath, "", "utf-8");
83
+ },
84
+ };
85
+ }
86
+
87
+ // Real hook-input shapes (PascalCase name, file_path/command in tool_input).
88
+ const hookWrite = (filePath: string) => ({ tool_name: "Write", tool_input: { file_path: filePath, content: "x" } });
89
+ const hookShell = (command: string) => ({ tool_name: "Shell", tool_input: { command, cwd: "/x", timeout: 30000 } });
90
+ const hookDelete = (filePath: string) => ({ tool_name: "Delete", tool_input: { file_path: filePath } });
91
+ const hookRead = (filePath: string) => ({ tool_name: "Read", tool_input: { file_path: filePath } });
92
+
93
+ d("generated preToolUse hook", () => {
94
+ it("denies gated built-ins (Write/Shell/Delete) and records a category+salient token", () => {
95
+ const h = setup({});
96
+
97
+ for (const [input, category, salient] of [
98
+ [hookWrite("/x/a.txt"), "write", "/x/a.txt"],
99
+ [hookShell("rm -rf build"), "shell", "rm -rf build"],
100
+ [hookDelete("/x/b.txt"), "delete", "/x/b.txt"],
101
+ ] as const) {
102
+ h.resetLedger();
103
+ expect(h.decide(input).permission).toBe("deny");
104
+ const ledger = h.ledger();
105
+ expect(ledger).toHaveLength(1);
106
+ // Byte-identical to the runner's grantToken(category, salient).
107
+ expect(ledger[0].token).toBe(grantToken(category, salient));
108
+ }
109
+ });
110
+
111
+ it("allows read-only built-ins", () => {
112
+ const h = setup({});
113
+ expect(h.decide(hookRead("/x/a.txt")).permission).toBe("allow");
114
+ expect(h.ledger()).toEqual([]);
115
+ });
116
+
117
+ it("auto-approve-all allows even gated built-ins", () => {
118
+ const h = setup({ autoApproveAll: true });
119
+ expect(h.decide(hookWrite("/x/a.txt")).permission).toBe("allow");
120
+ });
121
+
122
+ it("allows the EXACT granted resource and re-gates any other (no name-only over-grant)", () => {
123
+ const id = toolIdentity("edit", "", { path: "/x/a.txt" });
124
+ const h = setup({ grants: [{ toolName: "edit", mcpServerSlug: "", key: id.key, salient: id.salient }] });
125
+
126
+ // Same resource the user approved -> allowed on the resumed turn.
127
+ expect(h.decide(hookWrite("/x/a.txt")).permission).toBe("allow");
128
+ // A different file is NOT covered by the grant -> still gated.
129
+ expect(h.decide(hookWrite("/x/OTHER.txt")).permission).toBe("deny");
130
+ });
131
+
132
+ it("denies require-approval MCP tools and allows them once granted (name-only)", () => {
133
+ const mcpPolicies = { apply_x: { requiresApproval: true, message: "Apply X" } };
134
+ const denyH = setup({ mcpPolicies });
135
+ expect(denyH.decide({ tool_name: "apply_x", tool_input: {} }).permission).toBe("deny");
136
+ expect(denyH.ledger()[0].token).toBe(grantToken("apply_x", ""));
137
+
138
+ const grantH = setup({
139
+ mcpPolicies,
140
+ grants: [{ toolName: "apply_x", mcpServerSlug: "srv", key: "apply_x", salient: "" }],
141
+ });
142
+ expect(grantH.decide({ tool_name: "apply_x", tool_input: {} }).permission).toBe("allow");
143
+ });
144
+
145
+ it("fails closed (deny) when the state file is missing", () => {
146
+ const h = setup({ noStateFile: true });
147
+ expect(h.decide(hookWrite("/x/a.txt")).permission).toBe("deny");
148
+ });
149
+ });
@@ -617,6 +617,99 @@ describe("MessageAccumulator tool call status transitions", () => {
617
617
  });
618
618
  });
619
619
 
620
+ // The Cursor SDK can emit the lifecycle for one call_id more than once.
621
+ // Observed in production: two "running" events ~0.5s apart for a task/edit
622
+ // tool produced two ToolCall entries with the SAME id (a "thin" copy with no
623
+ // result and a "full" copy), rendering the same call two or three times in
624
+ // the UI. The accumulator must upsert by call_id so a call maps to exactly
625
+ // one ToolCall.
626
+ describe("tool call idempotency (one ToolCall per call_id)", () => {
627
+ it("duplicate running events for one call_id create a single ToolCall", () => {
628
+ const messages: AgentMessage[] = [];
629
+ const acc = new MessageAccumulator(messages);
630
+
631
+ acc.processEvent(assistantEvent("r1", "Editing a file."));
632
+ acc.processEvent(toolCallEvent("tc-dup", "edit", "running", "r1", { args: { path: "a.ts" } }));
633
+ acc.processEvent(toolCallEvent("tc-dup", "edit", "running", "r1", { args: { path: "a.ts" } }));
634
+
635
+ expect(countToolCallsWithId(messages, "tc-dup")).toBe(1);
636
+ expect(findToolCallById(messages, "tc-dup")!.status).toBe(ToolCallStatus.TOOL_CALL_RUNNING);
637
+ });
638
+
639
+ it("running -> completed -> running re-emit keeps a single COMPLETED ToolCall", () => {
640
+ const messages: AgentMessage[] = [];
641
+ const acc = new MessageAccumulator(messages);
642
+
643
+ acc.processEvent(assistantEvent("r1", "Running a tool."));
644
+ acc.processEvent(toolCallEvent("tc-1", "Shell", "running", "r1"));
645
+ acc.processEvent(toolCallEvent("tc-1", "Shell", "completed", "r1", { result: "OK" }));
646
+ // A late "running" re-emit must not regress the terminal status.
647
+ acc.processEvent(toolCallEvent("tc-1", "Shell", "running", "r1"));
648
+
649
+ expect(countToolCallsWithId(messages, "tc-1")).toBe(1);
650
+ const tc = findToolCallById(messages, "tc-1")!;
651
+ expect(tc.status).toBe(ToolCallStatus.TOOL_CALL_COMPLETED);
652
+ expect(tc.result).toBe("OK");
653
+ expect(tc.completedAt).toBeTruthy();
654
+ });
655
+
656
+ it("thin-then-full: a result-bearing completion populates the single ToolCall created by an empty running", () => {
657
+ const messages: AgentMessage[] = [];
658
+ const acc = new MessageAccumulator(messages);
659
+
660
+ // Reproduces the production pattern: two running events, then one
661
+ // completion that carries the full result.
662
+ acc.processEvent(assistantEvent("r1", "Delegating work."));
663
+ acc.processEvent(toolCallEvent("tc-task", "task", "running", "r1", { result: "" }));
664
+ acc.processEvent(toolCallEvent("tc-task", "task", "running", "r1", { result: "" }));
665
+ acc.processEvent(toolCallEvent("tc-task", "task", "completed", "r1", { result: "full result blob" }));
666
+
667
+ expect(countToolCallsWithId(messages, "tc-task")).toBe(1);
668
+ const tc = findToolCallById(messages, "tc-task")!;
669
+ expect(tc.status).toBe(ToolCallStatus.TOOL_CALL_COMPLETED);
670
+ expect(tc.result).toBe("full result blob");
671
+ });
672
+
673
+ it("a result-less re-emit after completion does not wipe the captured result", () => {
674
+ const messages: AgentMessage[] = [];
675
+ const acc = new MessageAccumulator(messages);
676
+
677
+ acc.processEvent(assistantEvent("r1", "Running a tool."));
678
+ acc.processEvent(toolCallEvent("tc-1", "read", "running", "r1"));
679
+ acc.processEvent(toolCallEvent("tc-1", "read", "completed", "r1", { result: "file contents" }));
680
+ acc.processEvent(toolCallEvent("tc-1", "read", "completed", "r1", { result: "" }));
681
+
682
+ expect(countToolCallsWithId(messages, "tc-1")).toBe(1);
683
+ expect(findToolCallById(messages, "tc-1")!.result).toBe("file contents");
684
+ });
685
+
686
+ it("duplicate task running events yield one task ToolCall and one sub-agent (production repro)", () => {
687
+ const messages: AgentMessage[] = [];
688
+ const acc = new MessageAccumulator(messages);
689
+
690
+ // Mirror the ExecuteCursor stream loop: every task tool_call event is fed
691
+ // to both processEvent() (tool call) and trackSubAgentExecution().
692
+ acc.processEvent(assistantEvent("r1", "I'll explore the repo."));
693
+ const args = { subagentType: { kind: "explore" }, description: "Explore repo structure and docs", prompt: "Go" };
694
+
695
+ const run1 = toolCallEvent("tc-explore", "task", "running", "r1", { args, result: "" });
696
+ acc.processEvent(run1);
697
+ acc.trackSubAgentExecution(run1);
698
+
699
+ const run2 = toolCallEvent("tc-explore", "task", "running", "r1", { args, result: "" });
700
+ acc.processEvent(run2);
701
+ acc.trackSubAgentExecution(run2);
702
+
703
+ const done = toolCallEvent("tc-explore", "task", "completed", "r1", { result: "explored" });
704
+ acc.processEvent(done);
705
+ acc.trackSubAgentExecution(done);
706
+
707
+ expect(countToolCallsWithId(messages, "tc-explore")).toBe(1);
708
+ expect(acc.subAgentExecutions).toHaveLength(1);
709
+ expect(acc.subAgentExecutions[0].id).toBe("tc-explore");
710
+ });
711
+ });
712
+
620
713
  describe("cancelInProgressSubAgentProtos standalone", () => {
621
714
  it("cancels IN_PROGRESS/PENDING protos in place and reports whether anything changed", () => {
622
715
  const running = create(SubAgentExecutionSchema, {