@stigmer/runner 3.0.2-dev.20260609093630 → 3.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/.build-fingerprint +1 -1
- package/dist/activities/execute-cursor/approval-policy.d.ts +55 -16
- package/dist/activities/execute-cursor/approval-policy.js +93 -31
- package/dist/activities/execute-cursor/approval-policy.js.map +1 -1
- package/dist/activities/execute-cursor/approval-state.d.ts +54 -26
- package/dist/activities/execute-cursor/approval-state.js +41 -26
- package/dist/activities/execute-cursor/approval-state.js.map +1 -1
- package/dist/activities/execute-cursor/hook-script.d.ts +31 -12
- package/dist/activities/execute-cursor/hook-script.js +93 -52
- package/dist/activities/execute-cursor/hook-script.js.map +1 -1
- package/dist/activities/execute-cursor/message-translator.d.ts +23 -0
- package/dist/activities/execute-cursor/message-translator.js +100 -54
- package/dist/activities/execute-cursor/message-translator.js.map +1 -1
- package/package.json +2 -2
- package/src/activities/execute-cursor/__tests__/approval-gate.test.ts +93 -37
- package/src/activities/execute-cursor/__tests__/hitl-ledger.test.ts +33 -18
- package/src/activities/execute-cursor/__tests__/hook-script.test.ts +149 -0
- package/src/activities/execute-cursor/__tests__/message-translator.test.ts +93 -0
- package/src/activities/execute-cursor/approval-policy.ts +113 -31
- package/src/activities/execute-cursor/approval-state.ts +74 -32
- package/src/activities/execute-cursor/hook-script.ts +94 -52
- package/src/activities/execute-cursor/message-translator.ts +114 -57
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Unit tests for the Cursor-harness HITL approval gate logic.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
4
|
+
* The crux this suite guards: the Cursor preToolUse hook and the SDK event
|
|
5
|
+
* stream use DIFFERENT tool taxonomies for the same operation (hook
|
|
6
|
+
* `Write`/`Shell`/`Delete` with `file_path`/`command`; stream
|
|
7
|
+
* `edit`/`shell`/`delete` with `path`/`command`). Correlation therefore keys on
|
|
8
|
+
* a canonical {@link approvalCategory} + the salient resource VALUE, not the raw
|
|
9
|
+
* tool name. These tests assert that invariant against BOTH taxonomies so a
|
|
10
|
+
* future SDK tool rename fails loudly instead of silently disabling the gate.
|
|
10
11
|
*
|
|
11
|
-
*
|
|
12
|
+
* Deterministic; no Cursor API key required.
|
|
12
13
|
*/
|
|
13
14
|
|
|
14
15
|
import { describe, it, expect } from "vitest";
|
|
@@ -18,9 +19,11 @@ import type { PendingApproval } from "@stigmer/protos/ai/stigmer/agentic/agentex
|
|
|
18
19
|
import { ApprovalAction } from "@stigmer/protos/ai/stigmer/agentic/agentexecution/v1/enum_pb";
|
|
19
20
|
|
|
20
21
|
import {
|
|
22
|
+
approvalCategory,
|
|
21
23
|
builtInRequiresApproval,
|
|
22
24
|
getBuiltInApprovalMessage,
|
|
23
25
|
getBuiltInGatedList,
|
|
26
|
+
getBuiltInGatedCategories,
|
|
24
27
|
extractArgKey,
|
|
25
28
|
} from "../approval-policy.js";
|
|
26
29
|
import type { MergedToolPolicy } from "../approval-policy.js";
|
|
@@ -28,13 +31,14 @@ import {
|
|
|
28
31
|
buildApprovalGrants,
|
|
29
32
|
buildApprovalState,
|
|
30
33
|
grantToken,
|
|
34
|
+
toolIdentity,
|
|
31
35
|
} from "../approval-state.js";
|
|
32
36
|
import { buildReinvocationPrompt } from "../prompt-builder.js";
|
|
33
37
|
|
|
34
38
|
function pending(overrides: Partial<PendingApproval>): PendingApproval {
|
|
35
39
|
return create(PendingApprovalSchema, {
|
|
36
40
|
toolCallId: "call-1",
|
|
37
|
-
toolName: "
|
|
41
|
+
toolName: "edit",
|
|
38
42
|
message: "",
|
|
39
43
|
argsPreview: "",
|
|
40
44
|
mcpServerSlug: "",
|
|
@@ -42,15 +46,47 @@ function pending(overrides: Partial<PendingApproval>): PendingApproval {
|
|
|
42
46
|
});
|
|
43
47
|
}
|
|
44
48
|
|
|
49
|
+
// The real ground-truth taxonomies (captured from @cursor/sdk via live probe).
|
|
50
|
+
const HOOK_NAMES = { write: "Write", shell: "Shell", del: "Delete", read: "Read" };
|
|
51
|
+
const STREAM_NAMES = { write: "edit", shell: "shell", del: "delete", read: "read" };
|
|
52
|
+
|
|
53
|
+
describe("approvalCategory (cross-taxonomy drift-guard)", () => {
|
|
54
|
+
it("maps the HOOK taxonomy (PascalCase) to canonical categories", () => {
|
|
55
|
+
expect(approvalCategory("Write")).toBe("write");
|
|
56
|
+
expect(approvalCategory("StrReplace")).toBe("write");
|
|
57
|
+
expect(approvalCategory("EditNotebook")).toBe("write");
|
|
58
|
+
expect(approvalCategory("Delete")).toBe("delete");
|
|
59
|
+
expect(approvalCategory("Shell")).toBe("shell");
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
it("maps the STREAM taxonomy (lowercase) to the SAME categories", () => {
|
|
63
|
+
expect(approvalCategory("write")).toBe("write");
|
|
64
|
+
expect(approvalCategory("edit")).toBe("write");
|
|
65
|
+
expect(approvalCategory("delete")).toBe("delete");
|
|
66
|
+
expect(approvalCategory("shell")).toBe("shell");
|
|
67
|
+
expect(approvalCategory("execute")).toBe("shell");
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
it("a file mutation collapses to `write` on BOTH sides (hook Write == stream edit)", () => {
|
|
71
|
+
expect(approvalCategory(HOOK_NAMES.write)).toBe(approvalCategory(STREAM_NAMES.write));
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
it("returns undefined for read-only / non-gated tools", () => {
|
|
75
|
+
for (const t of ["read", "Read", "glob", "Glob", "grep", "Grep", "ls", "think", "task"]) {
|
|
76
|
+
expect(approvalCategory(t)).toBeUndefined();
|
|
77
|
+
}
|
|
78
|
+
});
|
|
79
|
+
});
|
|
80
|
+
|
|
45
81
|
describe("builtInRequiresApproval", () => {
|
|
46
|
-
it("gates mutating/destructive
|
|
47
|
-
for (const t of ["Write", "StrReplace", "EditNotebook", "Shell", "Delete"]) {
|
|
82
|
+
it("gates mutating/destructive tools in BOTH taxonomies", () => {
|
|
83
|
+
for (const t of ["Write", "StrReplace", "EditNotebook", "Shell", "Delete", "edit", "shell", "delete", "execute", "write"]) {
|
|
48
84
|
expect(builtInRequiresApproval(t)).toBe(true);
|
|
49
85
|
}
|
|
50
86
|
});
|
|
51
87
|
|
|
52
88
|
it("allows read-only built-in tools", () => {
|
|
53
|
-
for (const t of ["Read", "Grep", "Glob", "
|
|
89
|
+
for (const t of ["Read", "read", "Grep", "grep", "Glob", "glob", "ls", "think", "task"]) {
|
|
54
90
|
expect(builtInRequiresApproval(t)).toBe(false);
|
|
55
91
|
}
|
|
56
92
|
});
|
|
@@ -60,27 +96,38 @@ describe("builtInRequiresApproval", () => {
|
|
|
60
96
|
expect(builtInRequiresApproval("search_services")).toBe(false);
|
|
61
97
|
});
|
|
62
98
|
|
|
63
|
-
it("exposes the gated set", () => {
|
|
99
|
+
it("exposes the gated set in the HOOK taxonomy (what the hook matches)", () => {
|
|
64
100
|
expect(getBuiltInGatedList()).toEqual(
|
|
65
101
|
expect.arrayContaining(["Write", "StrReplace", "EditNotebook", "Shell", "Delete"]),
|
|
66
102
|
);
|
|
67
103
|
});
|
|
104
|
+
|
|
105
|
+
it("every gated built-in resolves to a category (no ungated hole)", () => {
|
|
106
|
+
for (const name of getBuiltInGatedList()) {
|
|
107
|
+
expect(approvalCategory(name)).toBeDefined();
|
|
108
|
+
}
|
|
109
|
+
// The injected hook map covers exactly the gated set.
|
|
110
|
+
expect(getBuiltInGatedCategories().map(([n]) => n).sort()).toEqual(getBuiltInGatedList().sort());
|
|
111
|
+
});
|
|
68
112
|
});
|
|
69
113
|
|
|
70
114
|
describe("getBuiltInApprovalMessage", () => {
|
|
71
|
-
it("returns a template for gated tools
|
|
115
|
+
it("returns a category template for gated tools in EITHER taxonomy", () => {
|
|
72
116
|
expect(getBuiltInApprovalMessage("Write")).toContain("{{args.path}}");
|
|
117
|
+
expect(getBuiltInApprovalMessage("edit")).toContain("{{args.path}}");
|
|
73
118
|
expect(getBuiltInApprovalMessage("Shell")).toContain("{{args.command}}");
|
|
119
|
+
expect(getBuiltInApprovalMessage("shell")).toContain("{{args.command}}");
|
|
74
120
|
expect(getBuiltInApprovalMessage("Read")).toBeUndefined();
|
|
121
|
+
expect(getBuiltInApprovalMessage("read")).toBeUndefined();
|
|
75
122
|
});
|
|
76
123
|
});
|
|
77
124
|
|
|
78
|
-
describe("extractArgKey", () => {
|
|
79
|
-
it("extracts the salient
|
|
80
|
-
expect(extractArgKey({
|
|
125
|
+
describe("extractArgKey (spans both taxonomies' field names)", () => {
|
|
126
|
+
it("extracts the salient value regardless of field name (file_path or path)", () => {
|
|
127
|
+
expect(extractArgKey({ file_path: "a.txt" })).toBe("a.txt"); // hook shape
|
|
128
|
+
expect(extractArgKey({ path: "a.txt" })).toBe("a.txt"); // stream shape
|
|
81
129
|
expect(extractArgKey({ command: "ls -la" })).toBe("ls -la");
|
|
82
130
|
expect(extractArgKey({ target_notebook: "nb.ipynb" })).toBe("nb.ipynb");
|
|
83
|
-
expect(extractArgKey({ path: "a.txt", command: "ls" })).toBe("a.txt");
|
|
84
131
|
});
|
|
85
132
|
|
|
86
133
|
it("returns empty string when no salient field is present", () => {
|
|
@@ -90,24 +137,33 @@ describe("extractArgKey", () => {
|
|
|
90
137
|
});
|
|
91
138
|
});
|
|
92
139
|
|
|
93
|
-
describe("grantToken", () => {
|
|
94
|
-
it("
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
);
|
|
98
|
-
expect(
|
|
99
|
-
|
|
140
|
+
describe("toolIdentity + grantToken (canonical, taxonomy-agnostic)", () => {
|
|
141
|
+
it("a hook Write and a stream edit on the SAME path produce the SAME token", () => {
|
|
142
|
+
const hook = toolIdentity("Write", "", { file_path: "/x/a.txt" });
|
|
143
|
+
const stream = toolIdentity("edit", "", { path: "/x/a.txt" });
|
|
144
|
+
expect(hook).toEqual({ key: "write", salient: "/x/a.txt" });
|
|
145
|
+
expect(stream).toEqual({ key: "write", salient: "/x/a.txt" });
|
|
146
|
+
expect(grantToken(hook.key, hook.salient)).toBe(grantToken(stream.key, stream.salient));
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
it("encodes as base64(key \\n salient)", () => {
|
|
150
|
+
expect(grantToken("write", "/x/a.txt")).toBe(
|
|
151
|
+
Buffer.from("write\n/x/a.txt", "utf-8").toString("base64"),
|
|
100
152
|
);
|
|
101
153
|
});
|
|
154
|
+
|
|
155
|
+
it("MCP tools key on name only (consistent across layers)", () => {
|
|
156
|
+
expect(toolIdentity("apply_x", "planton", { path: "ignored" })).toEqual({ key: "apply_x", salient: "" });
|
|
157
|
+
});
|
|
102
158
|
});
|
|
103
159
|
|
|
104
160
|
describe("buildApprovalGrants", () => {
|
|
105
|
-
it("creates an
|
|
161
|
+
it("creates an exact-resource grant for an approved built-in (stream-named) tool", () => {
|
|
106
162
|
const grants = buildApprovalGrants(
|
|
107
|
-
[pending({ toolCallId: "c1", toolName: "
|
|
163
|
+
[pending({ toolCallId: "c1", toolName: "edit", argsPreview: JSON.stringify({ path: "/x/gated.txt" }) })],
|
|
108
164
|
new Map([["c1", ApprovalAction.APPROVE]]),
|
|
109
165
|
);
|
|
110
|
-
expect(grants).toEqual([{ toolName: "
|
|
166
|
+
expect(grants).toEqual([{ toolName: "edit", mcpServerSlug: "", key: "write", salient: "/x/gated.txt" }]);
|
|
111
167
|
});
|
|
112
168
|
|
|
113
169
|
it("creates a name-only grant for an approved MCP tool", () => {
|
|
@@ -115,14 +171,14 @@ describe("buildApprovalGrants", () => {
|
|
|
115
171
|
[pending({ toolCallId: "c1", toolName: "apply_x", mcpServerSlug: "planton", argsPreview: JSON.stringify({ path: "ignored" }) })],
|
|
116
172
|
new Map([["c1", ApprovalAction.APPROVE]]),
|
|
117
173
|
);
|
|
118
|
-
expect(grants).toEqual([{ toolName: "apply_x", mcpServerSlug: "planton",
|
|
174
|
+
expect(grants).toEqual([{ toolName: "apply_x", mcpServerSlug: "planton", key: "apply_x", salient: "" }]);
|
|
119
175
|
});
|
|
120
176
|
|
|
121
177
|
it("ignores skipped and rejected approvals", () => {
|
|
122
178
|
const grants = buildApprovalGrants(
|
|
123
179
|
[
|
|
124
|
-
pending({ toolCallId: "c1", toolName: "
|
|
125
|
-
pending({ toolCallId: "c2", toolName: "
|
|
180
|
+
pending({ toolCallId: "c1", toolName: "edit", argsPreview: JSON.stringify({ path: "a" }) }),
|
|
181
|
+
pending({ toolCallId: "c2", toolName: "shell", argsPreview: JSON.stringify({ command: "rm" }) }),
|
|
126
182
|
],
|
|
127
183
|
new Map([
|
|
128
184
|
["c1", ApprovalAction.SKIP],
|
|
@@ -138,14 +194,15 @@ describe("buildApprovalState", () => {
|
|
|
138
194
|
["planton/apply_x", { toolName: "apply_x", mcpServerSlug: "planton", requiresApproval: true, approvalMessage: "Apply X" }],
|
|
139
195
|
]);
|
|
140
196
|
|
|
141
|
-
it("carries
|
|
142
|
-
const grants = [{ toolName: "
|
|
197
|
+
it("carries MCP policies and exact-resource grant tokens (gated set is baked into the hook, not the state)", () => {
|
|
198
|
+
const grants = [{ toolName: "edit", mcpServerSlug: "", key: "write", salient: "/x/gated.txt" }];
|
|
143
199
|
const state = buildApprovalState(mcpPolicies, false, grants);
|
|
144
200
|
|
|
145
201
|
expect(state.autoApproveAll).toBe(false);
|
|
146
|
-
expect(state.builtInGatedList).toEqual(expect.arrayContaining(["Write", "Shell"]));
|
|
147
202
|
expect(state.mcpToolPolicies.apply_x).toEqual({ requiresApproval: true, message: "Apply X" });
|
|
148
|
-
expect(state.approvedGrantTokens).toEqual([grantToken("
|
|
203
|
+
expect(state.approvedGrantTokens).toEqual([grantToken("write", "/x/gated.txt")]);
|
|
204
|
+
// builtInGatedList is no longer part of the state file (baked into the hook).
|
|
205
|
+
expect((state as Record<string, unknown>).builtInGatedList).toBeUndefined();
|
|
149
206
|
});
|
|
150
207
|
|
|
151
208
|
it("defaults grants to empty when none provided", () => {
|
|
@@ -160,8 +217,8 @@ describe("buildReinvocationPrompt", () => {
|
|
|
160
217
|
it("describes approved and skipped actions in human terms, not opaque ids", () => {
|
|
161
218
|
const prompt = buildReinvocationPrompt(
|
|
162
219
|
[
|
|
163
|
-
pending({ toolCallId: "c1", toolName: "
|
|
164
|
-
pending({ toolCallId: "c2", toolName: "
|
|
220
|
+
pending({ toolCallId: "c1", toolName: "edit", message: "Write file: gated.txt" }),
|
|
221
|
+
pending({ toolCallId: "c2", toolName: "shell", message: "Run command: rm -rf build" }),
|
|
165
222
|
],
|
|
166
223
|
new Map([
|
|
167
224
|
["c1", ApprovalAction.APPROVE],
|
|
@@ -173,7 +230,6 @@ describe("buildReinvocationPrompt", () => {
|
|
|
173
230
|
expect(prompt).toContain("Write file: gated.txt");
|
|
174
231
|
expect(prompt).toContain("SKIPPED");
|
|
175
232
|
expect(prompt).toContain("Run command: rm -rf build");
|
|
176
|
-
// No opaque tool-call ids leak into the prompt.
|
|
177
233
|
expect(prompt).not.toContain("c1");
|
|
178
234
|
expect(prompt).not.toContain("c2");
|
|
179
235
|
});
|
|
@@ -63,10 +63,14 @@ function makeWorkspace(): string {
|
|
|
63
63
|
return dir;
|
|
64
64
|
}
|
|
65
65
|
|
|
66
|
+
// Stream tool calls use the lowercase SDK taxonomy (edit/shell/delete); the
|
|
67
|
+
// denial ledger uses the hook taxonomy (Write/Shell/Delete) + a canonical
|
|
68
|
+
// category+salient token. The two correlate via approvalCategory — that cross-
|
|
69
|
+
// taxonomy match is exactly what these tests pin.
|
|
66
70
|
function toolCall(overrides: Partial<ToolCall>): ToolCall {
|
|
67
71
|
return create(ToolCallSchema, {
|
|
68
72
|
id: "call-1",
|
|
69
|
-
name: "
|
|
73
|
+
name: "edit",
|
|
70
74
|
status: ToolCallStatus.TOOL_CALL_COMPLETED,
|
|
71
75
|
...overrides,
|
|
72
76
|
});
|
|
@@ -96,8 +100,8 @@ describe("denial ledger reset/read", () => {
|
|
|
96
100
|
it("parses appended JSONL denials and tolerates blank/partial lines", async () => {
|
|
97
101
|
const ws = makeWorkspace();
|
|
98
102
|
await resetDenialLedger(ws);
|
|
99
|
-
const writeToken = grantToken("
|
|
100
|
-
const shellToken = grantToken("
|
|
103
|
+
const writeToken = grantToken("write", "gated.txt");
|
|
104
|
+
const shellToken = grantToken("shell", "rm -rf build");
|
|
101
105
|
// Simulate the hook appending records, including a trailing partial line.
|
|
102
106
|
await writeFile(
|
|
103
107
|
denialLedgerPath(ws),
|
|
@@ -117,10 +121,13 @@ describe("denial ledger reset/read", () => {
|
|
|
117
121
|
});
|
|
118
122
|
|
|
119
123
|
describe("reconcileDeniedToolCalls", () => {
|
|
120
|
-
it("overlays WAITING_APPROVAL onto
|
|
124
|
+
it("overlays WAITING_APPROVAL onto the REAL denied tool reported as completed (the green-check bug)", () => {
|
|
125
|
+
// Stream reports the file mutation as `edit` (RUNNING/COMPLETED); the hook
|
|
126
|
+
// denied it as `Write`. The category+salient token bridges the two so the
|
|
127
|
+
// overlay lands on this exact streamed tool call — no synthesized placeholder.
|
|
121
128
|
const tc = toolCall({
|
|
122
129
|
id: "c1",
|
|
123
|
-
name: "
|
|
130
|
+
name: "edit",
|
|
124
131
|
status: ToolCallStatus.TOOL_CALL_COMPLETED,
|
|
125
132
|
completedAt: "2026-06-07T00:00:00Z",
|
|
126
133
|
result: "wrote file",
|
|
@@ -130,10 +137,14 @@ describe("reconcileDeniedToolCalls", () => {
|
|
|
130
137
|
const messages = [aiMessageWith([tc])];
|
|
131
138
|
|
|
132
139
|
const reconciled = reconcileDeniedToolCalls(messages, [
|
|
133
|
-
{ toolName: "Write", token: grantToken("
|
|
140
|
+
{ toolName: "Write", token: grantToken("write", "gated.txt") },
|
|
134
141
|
]);
|
|
135
142
|
|
|
136
143
|
expect(reconciled).toHaveLength(1);
|
|
144
|
+
// The overlay marked the REAL streamed tool call — no synthesized placeholder
|
|
145
|
+
// and no orphan was appended.
|
|
146
|
+
expect(reconciled[0]).toBe(tc);
|
|
147
|
+
expect(messages[0].toolCalls).toHaveLength(1);
|
|
137
148
|
expect(tc.status).toBe(ToolCallStatus.TOOL_CALL_WAITING_APPROVAL);
|
|
138
149
|
expect(tc.requiresApproval).toBe(true);
|
|
139
150
|
expect(tc.approvalMessage).toContain("gated.txt");
|
|
@@ -162,7 +173,7 @@ describe("reconcileDeniedToolCalls", () => {
|
|
|
162
173
|
}],
|
|
163
174
|
]);
|
|
164
175
|
|
|
165
|
-
// MCP tools are keyed name-only (
|
|
176
|
+
// MCP tools are keyed name-only (their name is consistent across layers).
|
|
166
177
|
reconcileDeniedToolCalls(messages, [
|
|
167
178
|
{ toolName: "apply_x", token: grantToken("apply_x", "") },
|
|
168
179
|
], policies);
|
|
@@ -174,20 +185,20 @@ describe("reconcileDeniedToolCalls", () => {
|
|
|
174
185
|
it("leaves non-denied tool calls untouched while overlaying the denied one", () => {
|
|
175
186
|
const denied = toolCall({
|
|
176
187
|
id: "c1",
|
|
177
|
-
name: "
|
|
188
|
+
name: "edit",
|
|
178
189
|
status: ToolCallStatus.TOOL_CALL_COMPLETED,
|
|
179
190
|
args: { path: "gated.txt" },
|
|
180
191
|
});
|
|
181
192
|
const allowed = toolCall({
|
|
182
193
|
id: "c2",
|
|
183
|
-
name: "
|
|
194
|
+
name: "read",
|
|
184
195
|
status: ToolCallStatus.TOOL_CALL_COMPLETED,
|
|
185
196
|
args: { path: "readme.md" },
|
|
186
197
|
});
|
|
187
198
|
const messages = [aiMessageWith([denied, allowed])];
|
|
188
199
|
|
|
189
200
|
const reconciled = reconcileDeniedToolCalls(messages, [
|
|
190
|
-
{ toolName: "Write", token: grantToken("
|
|
201
|
+
{ toolName: "Write", token: grantToken("write", "gated.txt") },
|
|
191
202
|
]);
|
|
192
203
|
|
|
193
204
|
// Only the denied call is gated; the read-only call keeps its status and no
|
|
@@ -199,12 +210,12 @@ describe("reconcileDeniedToolCalls", () => {
|
|
|
199
210
|
});
|
|
200
211
|
|
|
201
212
|
it("collapses repeated denials of the same resource to a single approval", () => {
|
|
202
|
-
const first = toolCall({ id: "c1", name: "
|
|
203
|
-
const second = toolCall({ id: "c2", name: "
|
|
213
|
+
const first = toolCall({ id: "c1", name: "edit", args: { path: "gated.txt" } });
|
|
214
|
+
const second = toolCall({ id: "c2", name: "edit", args: { path: "gated.txt" } });
|
|
204
215
|
const messages = [aiMessageWith([first, second])];
|
|
205
216
|
|
|
206
217
|
const reconciled = reconcileDeniedToolCalls(messages, [
|
|
207
|
-
{ toolName: "Write", token: grantToken("
|
|
218
|
+
{ toolName: "Write", token: grantToken("write", "gated.txt") },
|
|
208
219
|
]);
|
|
209
220
|
|
|
210
221
|
// One approval anchor (so the backend gate resolves cleanly on one decision).
|
|
@@ -217,15 +228,19 @@ describe("reconcileDeniedToolCalls", () => {
|
|
|
217
228
|
const messages = [aiMessageWith([])];
|
|
218
229
|
|
|
219
230
|
const reconciled = reconcileDeniedToolCalls(messages, [
|
|
220
|
-
{ toolName: "Shell", token: grantToken("
|
|
231
|
+
{ toolName: "Shell", token: grantToken("shell", "rm -rf build") },
|
|
221
232
|
]);
|
|
222
233
|
|
|
223
234
|
expect(reconciled).toHaveLength(1);
|
|
224
235
|
const synthesized = messages[0].toolCalls[0];
|
|
225
236
|
expect(synthesized.status).toBe(ToolCallStatus.TOOL_CALL_WAITING_APPROVAL);
|
|
226
237
|
expect(synthesized.requiresApproval).toBe(true);
|
|
238
|
+
// The synthesized fallback shows the hook's raw tool name for display...
|
|
227
239
|
expect(synthesized.name).toBe("Shell");
|
|
228
240
|
expect(synthesized.approvalMessage).toContain("rm -rf build");
|
|
241
|
+
// ...and carries the salient so the grant rebuilt from it keys on the same
|
|
242
|
+
// resource the hook will see on the re-attempt.
|
|
243
|
+
expect(synthesized.argsPreview).toContain("rm -rf build");
|
|
229
244
|
});
|
|
230
245
|
|
|
231
246
|
it("is a no-op when the ledger is empty", () => {
|
|
@@ -240,7 +255,7 @@ describe("reconstructAdjudicatedApprovals", () => {
|
|
|
240
255
|
it("reads decisions and rebuilds pending approvals from adjudicated tool calls", () => {
|
|
241
256
|
const approved = toolCall({
|
|
242
257
|
id: "c1",
|
|
243
|
-
name: "
|
|
258
|
+
name: "edit",
|
|
244
259
|
status: ToolCallStatus.TOOL_CALL_WAITING_APPROVAL,
|
|
245
260
|
approvalAction: ApprovalAction.APPROVE,
|
|
246
261
|
approvalMessage: "Write file: gated.txt",
|
|
@@ -248,13 +263,13 @@ describe("reconstructAdjudicatedApprovals", () => {
|
|
|
248
263
|
});
|
|
249
264
|
const undecided = toolCall({
|
|
250
265
|
id: "c2",
|
|
251
|
-
name: "
|
|
266
|
+
name: "shell",
|
|
252
267
|
status: ToolCallStatus.TOOL_CALL_WAITING_APPROVAL,
|
|
253
268
|
approvalAction: ApprovalAction.UNSPECIFIED,
|
|
254
269
|
});
|
|
255
270
|
const unrelated = toolCall({
|
|
256
271
|
id: "c3",
|
|
257
|
-
name: "
|
|
272
|
+
name: "read",
|
|
258
273
|
status: ToolCallStatus.TOOL_CALL_COMPLETED,
|
|
259
274
|
approvalAction: ApprovalAction.APPROVE,
|
|
260
275
|
});
|
|
@@ -265,7 +280,7 @@ describe("reconstructAdjudicatedApprovals", () => {
|
|
|
265
280
|
expect([...decisions.entries()]).toEqual([["c1", ApprovalAction.APPROVE]]);
|
|
266
281
|
expect(pendingApprovals).toHaveLength(1);
|
|
267
282
|
expect(pendingApprovals[0].toolCallId).toBe("c1");
|
|
268
|
-
expect(pendingApprovals[0].toolName).toBe("
|
|
283
|
+
expect(pendingApprovals[0].toolName).toBe("edit");
|
|
269
284
|
expect(pendingApprovals[0].argsPreview).toBe(JSON.stringify({ path: "gated.txt" }));
|
|
270
285
|
});
|
|
271
286
|
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Behavior tests for the generated preToolUse bash hook.
|
|
3
|
+
*
|
|
4
|
+
* These run the ACTUAL bash script the runner writes into the workspace, feeding
|
|
5
|
+
* it the REAL hook-input shape captured from @cursor/sdk (PascalCase
|
|
6
|
+
* `tool_name`; `file_path`/`command` in `tool_input`). They are the strongest
|
|
7
|
+
* guard against the regression this work fixes: a gated built-in must be denied,
|
|
8
|
+
* its denial must be recorded with a token byte-identical to the runner's
|
|
9
|
+
* grantToken, and an exact-resource grant must allow only that resource.
|
|
10
|
+
*
|
|
11
|
+
* Skipped automatically where bash is unavailable.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { describe, it, expect, beforeAll, afterEach } from "vitest";
|
|
15
|
+
import { execFileSync, execSync } from "node:child_process";
|
|
16
|
+
import { mkdtempSync, mkdirSync, writeFileSync, rmSync, readFileSync, existsSync } from "node:fs";
|
|
17
|
+
import { tmpdir } from "node:os";
|
|
18
|
+
import { join } from "node:path";
|
|
19
|
+
|
|
20
|
+
import { generateHookScript } from "../hook-script.js";
|
|
21
|
+
import { buildApprovalState, grantToken, toolIdentity, type ApprovalGrant } from "../approval-state.js";
|
|
22
|
+
import type { McpToolPolicyEntry } from "../approval-state.js";
|
|
23
|
+
|
|
24
|
+
let hasBash = false;
|
|
25
|
+
try {
|
|
26
|
+
execSync("bash -c 'exit 0'", { stdio: "ignore" });
|
|
27
|
+
hasBash = true;
|
|
28
|
+
} catch {
|
|
29
|
+
hasBash = false;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
const d = hasBash ? describe : describe.skip;
|
|
33
|
+
|
|
34
|
+
const tempDirs: string[] = [];
|
|
35
|
+
afterEach(() => {
|
|
36
|
+
for (const dir of tempDirs.splice(0)) rmSync(dir, { recursive: true, force: true });
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
interface Harness {
|
|
40
|
+
decide(input: object): { permission: string; raw: string };
|
|
41
|
+
ledger(): Array<{ toolName: string; token: string }>;
|
|
42
|
+
resetLedger(): void;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function setup(opts: {
|
|
46
|
+
autoApproveAll?: boolean;
|
|
47
|
+
grants?: ApprovalGrant[];
|
|
48
|
+
mcpPolicies?: Record<string, McpToolPolicyEntry>;
|
|
49
|
+
noStateFile?: boolean;
|
|
50
|
+
}): Harness {
|
|
51
|
+
const ws = mkdtempSync(join(tmpdir(), "hook-script-"));
|
|
52
|
+
tempDirs.push(ws);
|
|
53
|
+
const dir = join(ws, ".cursor", "hooks");
|
|
54
|
+
mkdirSync(dir, { recursive: true });
|
|
55
|
+
const statePath = join(dir, "state.json");
|
|
56
|
+
const ledgerPath = join(dir, "denials.jsonl");
|
|
57
|
+
const scriptPath = join(dir, "hook.sh");
|
|
58
|
+
writeFileSync(scriptPath, generateHookScript(statePath, ledgerPath), "utf-8");
|
|
59
|
+
|
|
60
|
+
if (!opts.noStateFile) {
|
|
61
|
+
const policies = new Map(
|
|
62
|
+
Object.entries(opts.mcpPolicies ?? {}).map(([name, p]) => [
|
|
63
|
+
`srv/${name}`,
|
|
64
|
+
{ toolName: name, mcpServerSlug: "srv", requiresApproval: p.requiresApproval, approvalMessage: p.message ?? "" },
|
|
65
|
+
]),
|
|
66
|
+
);
|
|
67
|
+
const state = buildApprovalState(policies, opts.autoApproveAll ?? false, opts.grants);
|
|
68
|
+
writeFileSync(statePath, JSON.stringify(state), "utf-8");
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
return {
|
|
72
|
+
decide(input: object) {
|
|
73
|
+
const raw = execFileSync("bash", [scriptPath], { input: JSON.stringify(input) }).toString();
|
|
74
|
+
const permission = raw.includes('"permission":"deny"') ? "deny" : raw.includes('"permission":"allow"') ? "allow" : "?";
|
|
75
|
+
return { permission, raw };
|
|
76
|
+
},
|
|
77
|
+
ledger() {
|
|
78
|
+
if (!existsSync(ledgerPath)) return [];
|
|
79
|
+
return readFileSync(ledgerPath, "utf-8").split("\n").filter(Boolean).map((l) => JSON.parse(l));
|
|
80
|
+
},
|
|
81
|
+
resetLedger() {
|
|
82
|
+
writeFileSync(ledgerPath, "", "utf-8");
|
|
83
|
+
},
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// Real hook-input shapes (PascalCase name, file_path/command in tool_input).
|
|
88
|
+
const hookWrite = (filePath: string) => ({ tool_name: "Write", tool_input: { file_path: filePath, content: "x" } });
|
|
89
|
+
const hookShell = (command: string) => ({ tool_name: "Shell", tool_input: { command, cwd: "/x", timeout: 30000 } });
|
|
90
|
+
const hookDelete = (filePath: string) => ({ tool_name: "Delete", tool_input: { file_path: filePath } });
|
|
91
|
+
const hookRead = (filePath: string) => ({ tool_name: "Read", tool_input: { file_path: filePath } });
|
|
92
|
+
|
|
93
|
+
d("generated preToolUse hook", () => {
|
|
94
|
+
it("denies gated built-ins (Write/Shell/Delete) and records a category+salient token", () => {
|
|
95
|
+
const h = setup({});
|
|
96
|
+
|
|
97
|
+
for (const [input, category, salient] of [
|
|
98
|
+
[hookWrite("/x/a.txt"), "write", "/x/a.txt"],
|
|
99
|
+
[hookShell("rm -rf build"), "shell", "rm -rf build"],
|
|
100
|
+
[hookDelete("/x/b.txt"), "delete", "/x/b.txt"],
|
|
101
|
+
] as const) {
|
|
102
|
+
h.resetLedger();
|
|
103
|
+
expect(h.decide(input).permission).toBe("deny");
|
|
104
|
+
const ledger = h.ledger();
|
|
105
|
+
expect(ledger).toHaveLength(1);
|
|
106
|
+
// Byte-identical to the runner's grantToken(category, salient).
|
|
107
|
+
expect(ledger[0].token).toBe(grantToken(category, salient));
|
|
108
|
+
}
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
it("allows read-only built-ins", () => {
|
|
112
|
+
const h = setup({});
|
|
113
|
+
expect(h.decide(hookRead("/x/a.txt")).permission).toBe("allow");
|
|
114
|
+
expect(h.ledger()).toEqual([]);
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
it("auto-approve-all allows even gated built-ins", () => {
|
|
118
|
+
const h = setup({ autoApproveAll: true });
|
|
119
|
+
expect(h.decide(hookWrite("/x/a.txt")).permission).toBe("allow");
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
it("allows the EXACT granted resource and re-gates any other (no name-only over-grant)", () => {
|
|
123
|
+
const id = toolIdentity("edit", "", { path: "/x/a.txt" });
|
|
124
|
+
const h = setup({ grants: [{ toolName: "edit", mcpServerSlug: "", key: id.key, salient: id.salient }] });
|
|
125
|
+
|
|
126
|
+
// Same resource the user approved -> allowed on the resumed turn.
|
|
127
|
+
expect(h.decide(hookWrite("/x/a.txt")).permission).toBe("allow");
|
|
128
|
+
// A different file is NOT covered by the grant -> still gated.
|
|
129
|
+
expect(h.decide(hookWrite("/x/OTHER.txt")).permission).toBe("deny");
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
it("denies require-approval MCP tools and allows them once granted (name-only)", () => {
|
|
133
|
+
const mcpPolicies = { apply_x: { requiresApproval: true, message: "Apply X" } };
|
|
134
|
+
const denyH = setup({ mcpPolicies });
|
|
135
|
+
expect(denyH.decide({ tool_name: "apply_x", tool_input: {} }).permission).toBe("deny");
|
|
136
|
+
expect(denyH.ledger()[0].token).toBe(grantToken("apply_x", ""));
|
|
137
|
+
|
|
138
|
+
const grantH = setup({
|
|
139
|
+
mcpPolicies,
|
|
140
|
+
grants: [{ toolName: "apply_x", mcpServerSlug: "srv", key: "apply_x", salient: "" }],
|
|
141
|
+
});
|
|
142
|
+
expect(grantH.decide({ tool_name: "apply_x", tool_input: {} }).permission).toBe("allow");
|
|
143
|
+
});
|
|
144
|
+
|
|
145
|
+
it("fails closed (deny) when the state file is missing", () => {
|
|
146
|
+
const h = setup({ noStateFile: true });
|
|
147
|
+
expect(h.decide(hookWrite("/x/a.txt")).permission).toBe("deny");
|
|
148
|
+
});
|
|
149
|
+
});
|
|
@@ -617,6 +617,99 @@ describe("MessageAccumulator tool call status transitions", () => {
|
|
|
617
617
|
});
|
|
618
618
|
});
|
|
619
619
|
|
|
620
|
+
// The Cursor SDK can emit the lifecycle for one call_id more than once.
|
|
621
|
+
// Observed in production: two "running" events ~0.5s apart for a task/edit
|
|
622
|
+
// tool produced two ToolCall entries with the SAME id (a "thin" copy with no
|
|
623
|
+
// result and a "full" copy), rendering the same call two or three times in
|
|
624
|
+
// the UI. The accumulator must upsert by call_id so a call maps to exactly
|
|
625
|
+
// one ToolCall.
|
|
626
|
+
describe("tool call idempotency (one ToolCall per call_id)", () => {
|
|
627
|
+
it("duplicate running events for one call_id create a single ToolCall", () => {
|
|
628
|
+
const messages: AgentMessage[] = [];
|
|
629
|
+
const acc = new MessageAccumulator(messages);
|
|
630
|
+
|
|
631
|
+
acc.processEvent(assistantEvent("r1", "Editing a file."));
|
|
632
|
+
acc.processEvent(toolCallEvent("tc-dup", "edit", "running", "r1", { args: { path: "a.ts" } }));
|
|
633
|
+
acc.processEvent(toolCallEvent("tc-dup", "edit", "running", "r1", { args: { path: "a.ts" } }));
|
|
634
|
+
|
|
635
|
+
expect(countToolCallsWithId(messages, "tc-dup")).toBe(1);
|
|
636
|
+
expect(findToolCallById(messages, "tc-dup")!.status).toBe(ToolCallStatus.TOOL_CALL_RUNNING);
|
|
637
|
+
});
|
|
638
|
+
|
|
639
|
+
it("running -> completed -> running re-emit keeps a single COMPLETED ToolCall", () => {
|
|
640
|
+
const messages: AgentMessage[] = [];
|
|
641
|
+
const acc = new MessageAccumulator(messages);
|
|
642
|
+
|
|
643
|
+
acc.processEvent(assistantEvent("r1", "Running a tool."));
|
|
644
|
+
acc.processEvent(toolCallEvent("tc-1", "Shell", "running", "r1"));
|
|
645
|
+
acc.processEvent(toolCallEvent("tc-1", "Shell", "completed", "r1", { result: "OK" }));
|
|
646
|
+
// A late "running" re-emit must not regress the terminal status.
|
|
647
|
+
acc.processEvent(toolCallEvent("tc-1", "Shell", "running", "r1"));
|
|
648
|
+
|
|
649
|
+
expect(countToolCallsWithId(messages, "tc-1")).toBe(1);
|
|
650
|
+
const tc = findToolCallById(messages, "tc-1")!;
|
|
651
|
+
expect(tc.status).toBe(ToolCallStatus.TOOL_CALL_COMPLETED);
|
|
652
|
+
expect(tc.result).toBe("OK");
|
|
653
|
+
expect(tc.completedAt).toBeTruthy();
|
|
654
|
+
});
|
|
655
|
+
|
|
656
|
+
it("thin-then-full: a result-bearing completion populates the single ToolCall created by an empty running", () => {
|
|
657
|
+
const messages: AgentMessage[] = [];
|
|
658
|
+
const acc = new MessageAccumulator(messages);
|
|
659
|
+
|
|
660
|
+
// Reproduces the production pattern: two running events, then one
|
|
661
|
+
// completion that carries the full result.
|
|
662
|
+
acc.processEvent(assistantEvent("r1", "Delegating work."));
|
|
663
|
+
acc.processEvent(toolCallEvent("tc-task", "task", "running", "r1", { result: "" }));
|
|
664
|
+
acc.processEvent(toolCallEvent("tc-task", "task", "running", "r1", { result: "" }));
|
|
665
|
+
acc.processEvent(toolCallEvent("tc-task", "task", "completed", "r1", { result: "full result blob" }));
|
|
666
|
+
|
|
667
|
+
expect(countToolCallsWithId(messages, "tc-task")).toBe(1);
|
|
668
|
+
const tc = findToolCallById(messages, "tc-task")!;
|
|
669
|
+
expect(tc.status).toBe(ToolCallStatus.TOOL_CALL_COMPLETED);
|
|
670
|
+
expect(tc.result).toBe("full result blob");
|
|
671
|
+
});
|
|
672
|
+
|
|
673
|
+
it("a result-less re-emit after completion does not wipe the captured result", () => {
|
|
674
|
+
const messages: AgentMessage[] = [];
|
|
675
|
+
const acc = new MessageAccumulator(messages);
|
|
676
|
+
|
|
677
|
+
acc.processEvent(assistantEvent("r1", "Running a tool."));
|
|
678
|
+
acc.processEvent(toolCallEvent("tc-1", "read", "running", "r1"));
|
|
679
|
+
acc.processEvent(toolCallEvent("tc-1", "read", "completed", "r1", { result: "file contents" }));
|
|
680
|
+
acc.processEvent(toolCallEvent("tc-1", "read", "completed", "r1", { result: "" }));
|
|
681
|
+
|
|
682
|
+
expect(countToolCallsWithId(messages, "tc-1")).toBe(1);
|
|
683
|
+
expect(findToolCallById(messages, "tc-1")!.result).toBe("file contents");
|
|
684
|
+
});
|
|
685
|
+
|
|
686
|
+
it("duplicate task running events yield one task ToolCall and one sub-agent (production repro)", () => {
|
|
687
|
+
const messages: AgentMessage[] = [];
|
|
688
|
+
const acc = new MessageAccumulator(messages);
|
|
689
|
+
|
|
690
|
+
// Mirror the ExecuteCursor stream loop: every task tool_call event is fed
|
|
691
|
+
// to both processEvent() (tool call) and trackSubAgentExecution().
|
|
692
|
+
acc.processEvent(assistantEvent("r1", "I'll explore the repo."));
|
|
693
|
+
const args = { subagentType: { kind: "explore" }, description: "Explore repo structure and docs", prompt: "Go" };
|
|
694
|
+
|
|
695
|
+
const run1 = toolCallEvent("tc-explore", "task", "running", "r1", { args, result: "" });
|
|
696
|
+
acc.processEvent(run1);
|
|
697
|
+
acc.trackSubAgentExecution(run1);
|
|
698
|
+
|
|
699
|
+
const run2 = toolCallEvent("tc-explore", "task", "running", "r1", { args, result: "" });
|
|
700
|
+
acc.processEvent(run2);
|
|
701
|
+
acc.trackSubAgentExecution(run2);
|
|
702
|
+
|
|
703
|
+
const done = toolCallEvent("tc-explore", "task", "completed", "r1", { result: "explored" });
|
|
704
|
+
acc.processEvent(done);
|
|
705
|
+
acc.trackSubAgentExecution(done);
|
|
706
|
+
|
|
707
|
+
expect(countToolCallsWithId(messages, "tc-explore")).toBe(1);
|
|
708
|
+
expect(acc.subAgentExecutions).toHaveLength(1);
|
|
709
|
+
expect(acc.subAgentExecutions[0].id).toBe("tc-explore");
|
|
710
|
+
});
|
|
711
|
+
});
|
|
712
|
+
|
|
620
713
|
describe("cancelInProgressSubAgentProtos standalone", () => {
|
|
621
714
|
it("cancels IN_PROGRESS/PENDING protos in place and reports whether anything changed", () => {
|
|
622
715
|
const running = create(SubAgentExecutionSchema, {
|