@stigmer/runner 3.0.2 → 3.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/.build-fingerprint +1 -1
- package/dist/activities/execute-cursor/approval-policy.d.ts +55 -16
- package/dist/activities/execute-cursor/approval-policy.js +93 -31
- package/dist/activities/execute-cursor/approval-policy.js.map +1 -1
- package/dist/activities/execute-cursor/approval-state.d.ts +54 -26
- package/dist/activities/execute-cursor/approval-state.js +41 -26
- package/dist/activities/execute-cursor/approval-state.js.map +1 -1
- package/dist/activities/execute-cursor/hook-script.d.ts +41 -14
- package/dist/activities/execute-cursor/hook-script.js +155 -63
- package/dist/activities/execute-cursor/hook-script.js.map +1 -1
- package/dist/activities/execute-cursor/message-translator.d.ts +23 -0
- package/dist/activities/execute-cursor/message-translator.js +100 -54
- package/dist/activities/execute-cursor/message-translator.js.map +1 -1
- package/dist/activities/execute-cursor/session-lifecycle.d.ts +9 -0
- package/dist/activities/execute-cursor/session-lifecycle.js +11 -3
- package/dist/activities/execute-cursor/session-lifecycle.js.map +1 -1
- package/package.json +2 -2
- package/src/activities/execute-cursor/__tests__/approval-gate.test.ts +93 -37
- package/src/activities/execute-cursor/__tests__/hitl-ledger.test.ts +33 -18
- package/src/activities/execute-cursor/__tests__/hook-script.test.ts +204 -0
- package/src/activities/execute-cursor/__tests__/message-translator.test.ts +93 -0
- package/src/activities/execute-cursor/__tests__/session-lifecycle.test.ts +73 -2
- package/src/activities/execute-cursor/approval-policy.ts +113 -31
- package/src/activities/execute-cursor/approval-state.ts +74 -32
- package/src/activities/execute-cursor/hook-script.ts +157 -63
- package/src/activities/execute-cursor/message-translator.ts +114 -57
- package/src/activities/execute-cursor/session-lifecycle.ts +21 -3
|
@@ -16,32 +16,59 @@
|
|
|
16
16
|
* so its ledger is the authoritative record of what was gated this turn
|
|
17
17
|
* 5. Returns { "permission": "allow" } or { "permission": "deny" } on stdout
|
|
18
18
|
*
|
|
19
|
-
*
|
|
20
|
-
*
|
|
21
|
-
*
|
|
22
|
-
*
|
|
23
|
-
*
|
|
19
|
+
* Identity extraction runs on the SAME Node.js binary as the runner (its
|
|
20
|
+
* absolute path — process.execPath — is baked into the script at generation
|
|
21
|
+
* time), because the identity token must be byte-identical to the one the
|
|
22
|
+
* runner computes from the parsed stream event. The original grep/cut
|
|
23
|
+
* extraction is kept only as a best-effort fallback if that binary cannot run:
|
|
24
|
+
* grep's `"command":"[^"]*"` truncates at the first JSON-escaped quote, so for
|
|
25
|
+
* a shell command like `printf '%s' "x" > file` the fallback token will NOT
|
|
26
|
+
* match the runner's — the call is still denied (the gate holds) but the
|
|
27
|
+
* denial cannot be overlaid onto the real streamed tool call and a grant for
|
|
28
|
+
* it will not match on reinvocation. All policy decisions are pre-computed by
|
|
29
|
+
* the runner into the state file (and into this generated script); the hook
|
|
30
|
+
* only performs mechanical field extraction and string lookups — the policy
|
|
31
|
+
* itself is authored once in TypeScript (approval-policy.ts /
|
|
32
|
+
* approval-state.ts).
|
|
33
|
+
*
|
|
34
|
+
* Cross-taxonomy identity (the crux):
|
|
35
|
+
* The preToolUse hook and the SDK event stream name the same operation
|
|
36
|
+
* differently — the hook receives PascalCase `tool_name` (`Write` for any file
|
|
37
|
+
* create/edit, `Shell`, `Delete`) while the stream emits lowercase `event.name`
|
|
38
|
+
* (`edit`, `shell`, `delete`). They also name the salient argument differently
|
|
39
|
+
* (`file_path` in the hook input vs `path` in the stream). So the hook and the
|
|
40
|
+
* runner cannot correlate on the raw name. Instead both reduce a tool call to a
|
|
41
|
+
* canonical identity — `base64(category \n salient)` — where `category` is the
|
|
42
|
+
* approval category (`write`/`delete`/`shell`, baked into the case statement
|
|
43
|
+
* below from approval-policy.ts) and `salient` is the resource VALUE (the file
|
|
44
|
+
* path or shell command), which is identical on both sides. The runner mirrors
|
|
45
|
+
* this exactly in approval-state.ts (toolIdentity + grantToken), so a denial
|
|
46
|
+
* recorded here correlates to the streamed tool call, and an approval grant
|
|
47
|
+
* matches the agent's re-attempt on reinvocation.
|
|
24
48
|
*
|
|
25
49
|
* Policy evaluation order (first match wins). The model is "gate the dangerous
|
|
26
50
|
* set, allow the rest" — matching the native harness and avoiding denial of
|
|
27
51
|
* auto-approved MCP tools (which are absent from mcpToolPolicies):
|
|
28
52
|
* 1. autoApproveAll → allow
|
|
29
|
-
* 2.
|
|
30
|
-
*
|
|
31
|
-
*
|
|
32
|
-
*
|
|
53
|
+
* 2. Gated built-in (category non-empty):
|
|
54
|
+
* a. identity token in approvedGrantTokens → allow (reinvocation grant)
|
|
55
|
+
* b. otherwise → record denial, deny
|
|
56
|
+
* 3. MCP tool present in mcpToolPolicies (require-approval):
|
|
57
|
+
* a. name token in approvedGrantTokens → allow
|
|
58
|
+
* b. otherwise → record denial, deny
|
|
59
|
+
* 4. Everything else (read-only built-ins, auto-approved MCP, unknown) → allow
|
|
33
60
|
*/
|
|
34
61
|
/**
|
|
35
62
|
* Generates the bash hook script content.
|
|
36
63
|
*
|
|
37
64
|
* The script reads a JSON state file written by the cursor-runner before
|
|
38
65
|
* each agent.send() call. The state file is the single source of truth
|
|
39
|
-
* for
|
|
66
|
+
* for the dynamic approval inputs (autoApproveAll, mcpToolPolicies,
|
|
67
|
+
* approvedGrantTokens). The static policy (which built-ins are gated and their
|
|
68
|
+
* categories, and which arg fields are salient) is baked into the script at
|
|
69
|
+
* generation time from approval-policy.ts.
|
|
40
70
|
*
|
|
41
|
-
*
|
|
42
|
-
* recomputed here from the incoming tool call. The salient-arg field list is
|
|
43
|
-
* injected from SALIENT_ARG_FIELDS so the runner and the hook never disagree on
|
|
44
|
-
* which argument identifies the resource. The encoding must stay byte-identical
|
|
71
|
+
* The identity token encoding (`base64(key \n salient)`) must stay byte-identical
|
|
45
72
|
* to grantToken() in approval-state.ts.
|
|
46
73
|
*/
|
|
47
74
|
export declare function generateHookScript(stateFilePath: string, ledgerFilePath: string): string;
|
|
@@ -16,62 +16,172 @@
|
|
|
16
16
|
* so its ledger is the authoritative record of what was gated this turn
|
|
17
17
|
* 5. Returns { "permission": "allow" } or { "permission": "deny" } on stdout
|
|
18
18
|
*
|
|
19
|
-
*
|
|
20
|
-
*
|
|
21
|
-
*
|
|
22
|
-
*
|
|
23
|
-
*
|
|
19
|
+
* Identity extraction runs on the SAME Node.js binary as the runner (its
|
|
20
|
+
* absolute path — process.execPath — is baked into the script at generation
|
|
21
|
+
* time), because the identity token must be byte-identical to the one the
|
|
22
|
+
* runner computes from the parsed stream event. The original grep/cut
|
|
23
|
+
* extraction is kept only as a best-effort fallback if that binary cannot run:
|
|
24
|
+
* grep's `"command":"[^"]*"` truncates at the first JSON-escaped quote, so for
|
|
25
|
+
* a shell command like `printf '%s' "x" > file` the fallback token will NOT
|
|
26
|
+
* match the runner's — the call is still denied (the gate holds) but the
|
|
27
|
+
* denial cannot be overlaid onto the real streamed tool call and a grant for
|
|
28
|
+
* it will not match on reinvocation. All policy decisions are pre-computed by
|
|
29
|
+
* the runner into the state file (and into this generated script); the hook
|
|
30
|
+
* only performs mechanical field extraction and string lookups — the policy
|
|
31
|
+
* itself is authored once in TypeScript (approval-policy.ts /
|
|
32
|
+
* approval-state.ts).
|
|
33
|
+
*
|
|
34
|
+
* Cross-taxonomy identity (the crux):
|
|
35
|
+
* The preToolUse hook and the SDK event stream name the same operation
|
|
36
|
+
* differently — the hook receives PascalCase `tool_name` (`Write` for any file
|
|
37
|
+
* create/edit, `Shell`, `Delete`) while the stream emits lowercase `event.name`
|
|
38
|
+
* (`edit`, `shell`, `delete`). They also name the salient argument differently
|
|
39
|
+
* (`file_path` in the hook input vs `path` in the stream). So the hook and the
|
|
40
|
+
* runner cannot correlate on the raw name. Instead both reduce a tool call to a
|
|
41
|
+
* canonical identity — `base64(category \n salient)` — where `category` is the
|
|
42
|
+
* approval category (`write`/`delete`/`shell`, baked into the case statement
|
|
43
|
+
* below from approval-policy.ts) and `salient` is the resource VALUE (the file
|
|
44
|
+
* path or shell command), which is identical on both sides. The runner mirrors
|
|
45
|
+
* this exactly in approval-state.ts (toolIdentity + grantToken), so a denial
|
|
46
|
+
* recorded here correlates to the streamed tool call, and an approval grant
|
|
47
|
+
* matches the agent's re-attempt on reinvocation.
|
|
24
48
|
*
|
|
25
49
|
* Policy evaluation order (first match wins). The model is "gate the dangerous
|
|
26
50
|
* set, allow the rest" — matching the native harness and avoiding denial of
|
|
27
51
|
* auto-approved MCP tools (which are absent from mcpToolPolicies):
|
|
28
52
|
* 1. autoApproveAll → allow
|
|
29
|
-
* 2.
|
|
30
|
-
*
|
|
31
|
-
*
|
|
32
|
-
*
|
|
53
|
+
* 2. Gated built-in (category non-empty):
|
|
54
|
+
* a. identity token in approvedGrantTokens → allow (reinvocation grant)
|
|
55
|
+
* b. otherwise → record denial, deny
|
|
56
|
+
* 3. MCP tool present in mcpToolPolicies (require-approval):
|
|
57
|
+
* a. name token in approvedGrantTokens → allow
|
|
58
|
+
* b. otherwise → record denial, deny
|
|
59
|
+
* 4. Everything else (read-only built-ins, auto-approved MCP, unknown) → allow
|
|
33
60
|
*/
|
|
34
|
-
import { SALIENT_ARG_FIELDS } from "./approval-policy.js";
|
|
61
|
+
import { SALIENT_ARG_FIELDS, getBuiltInGatedCategories } from "./approval-policy.js";
|
|
35
62
|
const APPROVAL_REQUIRED_AGENT_MESSAGE = "STIGMER_APPROVAL_REQUIRED: This tool call requires user approval before " +
|
|
36
|
-
"execution. Do not attempt alternative approaches or workarounds
|
|
37
|
-
"execution will resume after the user
|
|
63
|
+
"execution. Do not attempt alternative approaches or workarounds (including " +
|
|
64
|
+
"shell commands). Stop and wait — the execution will resume after the user " +
|
|
65
|
+
"reviews and approves this tool call.";
|
|
66
|
+
/**
|
|
67
|
+
* Build the bash `case` arms that map an incoming hook `tool_name` to its
|
|
68
|
+
* canonical approval category. Generated from approval-policy.ts so the hook and
|
|
69
|
+
* the runner never disagree on which built-ins are gated or how they categorize.
|
|
70
|
+
*/
|
|
71
|
+
function buildCategoryCaseArms() {
|
|
72
|
+
const byCategory = new Map();
|
|
73
|
+
for (const [name, category] of getBuiltInGatedCategories()) {
|
|
74
|
+
const names = byCategory.get(category) ?? [];
|
|
75
|
+
names.push(name);
|
|
76
|
+
byCategory.set(category, names);
|
|
77
|
+
}
|
|
78
|
+
const arms = [];
|
|
79
|
+
for (const [category, names] of byCategory) {
|
|
80
|
+
const pattern = names.map((n) => `"${n}"`).join("|");
|
|
81
|
+
arms.push(` ${pattern}) CATEGORY="${category}" ;;`);
|
|
82
|
+
}
|
|
83
|
+
return arms.join("\n");
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Build the inline Node.js identity extractor embedded in the hook script.
|
|
87
|
+
*
|
|
88
|
+
* Parses the hook's stdin JSON properly (the bash fallback's grep truncates
|
|
89
|
+
* string values at the first escaped quote) and emits four lines:
|
|
90
|
+
* tool_name, canonical category, identity token, and MCP name-token. The token
|
|
91
|
+
* encodings must stay byte-identical to grantToken() in approval-state.ts.
|
|
92
|
+
*
|
|
93
|
+
* Authored as a single-quoted bash string, so the JS must not contain single
|
|
94
|
+
* quotes. The category map and salient field list are baked from
|
|
95
|
+
* approval-policy.ts — the same source the runner uses — so the two sides can
|
|
96
|
+
* never disagree.
|
|
97
|
+
*/
|
|
98
|
+
function buildNodeIdentityScript() {
|
|
99
|
+
const categoryMap = {};
|
|
100
|
+
for (const [name, category] of getBuiltInGatedCategories()) {
|
|
101
|
+
categoryMap[name] = category;
|
|
102
|
+
}
|
|
103
|
+
const categories = JSON.stringify(categoryMap);
|
|
104
|
+
const fields = JSON.stringify(SALIENT_ARG_FIELDS);
|
|
105
|
+
return [
|
|
106
|
+
`const t=JSON.parse(require("fs").readFileSync(0,"utf8"));`,
|
|
107
|
+
`const name=typeof t.tool_name==="string"?t.tool_name:"";`,
|
|
108
|
+
`const cat=(${categories})[name]||"";`,
|
|
109
|
+
`const a=(t.tool_input&&typeof t.tool_input==="object")?t.tool_input:{};`,
|
|
110
|
+
`let s="";`,
|
|
111
|
+
`for(const f of ${fields}){const v=a[f];if(typeof v==="string"&&v){s=v;break;}}`,
|
|
112
|
+
`const b=(x)=>Buffer.from(x,"utf8").toString("base64");`,
|
|
113
|
+
`process.stdout.write(name+"\\n"+cat+"\\n"+b(cat+"\\n"+s)+"\\n"+b(name+"\\n"));`,
|
|
114
|
+
].join("");
|
|
115
|
+
}
|
|
38
116
|
/**
|
|
39
117
|
* Generates the bash hook script content.
|
|
40
118
|
*
|
|
41
119
|
* The script reads a JSON state file written by the cursor-runner before
|
|
42
120
|
* each agent.send() call. The state file is the single source of truth
|
|
43
|
-
* for
|
|
121
|
+
* for the dynamic approval inputs (autoApproveAll, mcpToolPolicies,
|
|
122
|
+
* approvedGrantTokens). The static policy (which built-ins are gated and their
|
|
123
|
+
* categories, and which arg fields are salient) is baked into the script at
|
|
124
|
+
* generation time from approval-policy.ts.
|
|
44
125
|
*
|
|
45
|
-
*
|
|
46
|
-
* recomputed here from the incoming tool call. The salient-arg field list is
|
|
47
|
-
* injected from SALIENT_ARG_FIELDS so the runner and the hook never disagree on
|
|
48
|
-
* which argument identifies the resource. The encoding must stay byte-identical
|
|
126
|
+
* The identity token encoding (`base64(key \n salient)`) must stay byte-identical
|
|
49
127
|
* to grantToken() in approval-state.ts.
|
|
50
128
|
*/
|
|
51
129
|
export function generateHookScript(stateFilePath, ledgerFilePath) {
|
|
52
130
|
const salientFields = SALIENT_ARG_FIELDS.join(" ");
|
|
131
|
+
const categoryCaseArms = buildCategoryCaseArms();
|
|
132
|
+
const nodeIdentityScript = buildNodeIdentityScript();
|
|
53
133
|
return `#!/bin/bash
|
|
54
134
|
# Stigmer HITL approval hook for Cursor preToolUse
|
|
55
135
|
# Generated by cursor-runner — do not edit manually.
|
|
56
136
|
#
|
|
57
|
-
# Reads tool call from stdin (JSON), checks approval state file,
|
|
58
|
-
#
|
|
137
|
+
# Reads tool call from stdin (JSON), checks approval state file, returns a
|
|
138
|
+
# permission decision on stdout (JSON). On a deny, appends the call's canonical
|
|
59
139
|
# identity token to the denial ledger so the runner can mark the gated tool call
|
|
60
|
-
# as WAITING_APPROVAL.
|
|
140
|
+
# as WAITING_APPROVAL. See hook-script.ts for the cross-taxonomy identity design.
|
|
61
141
|
|
|
62
142
|
set -euo pipefail
|
|
63
143
|
|
|
64
144
|
INPUT=$(cat)
|
|
65
145
|
|
|
66
|
-
# Extract tool_name from the hook input JSON.
|
|
67
|
-
# Cursor sends the actual tool name (e.g. "search_services" for MCP tools).
|
|
68
|
-
# Every extraction ends with '|| true': under 'set -e' a non-matching grep would
|
|
69
|
-
# otherwise abort the script and emit no decision.
|
|
70
|
-
TOOL_NAME=$(echo "$INPUT" | grep -o '"tool_name":"[^"]*"' | head -1 | cut -d'"' -f4 || true)
|
|
71
|
-
|
|
72
146
|
STATE_FILE="${stateFilePath}"
|
|
73
147
|
LEDGER_FILE="${ledgerFilePath}"
|
|
74
148
|
|
|
149
|
+
# --- Canonical identity: tool_name / category / identity token / MCP token ---
|
|
150
|
+
# Computed by the same Node.js binary that runs the cursor-runner (absolute path
|
|
151
|
+
# baked at generation time) so JSON string values — file paths and especially
|
|
152
|
+
# shell commands containing quotes, newlines, or unicode escapes — decode to the
|
|
153
|
+
# exact bytes the runner sees in the stream event. ELECTRON_RUN_AS_NODE makes
|
|
154
|
+
# the invocation safe when the runner is embedded in an Electron app (where
|
|
155
|
+
# process.execPath is the Electron binary).
|
|
156
|
+
NODE_BIN="${process.execPath}"
|
|
157
|
+
IDENTITY=$(printf '%s' "$INPUT" | ELECTRON_RUN_AS_NODE=1 "$NODE_BIN" -e '${nodeIdentityScript}' 2>/dev/null || true)
|
|
158
|
+
if [ -n "$IDENTITY" ]; then
|
|
159
|
+
TOOL_NAME=$(printf '%s\\n' "$IDENTITY" | sed -n 1p)
|
|
160
|
+
CATEGORY=$(printf '%s\\n' "$IDENTITY" | sed -n 2p)
|
|
161
|
+
TOKEN=$(printf '%s\\n' "$IDENTITY" | sed -n 3p)
|
|
162
|
+
MCP_TOKEN=$(printf '%s\\n' "$IDENTITY" | sed -n 4p)
|
|
163
|
+
else
|
|
164
|
+
# Fallback when the Node binary cannot run: grep/cut extraction. Best-effort
|
|
165
|
+
# only — '"field":"[^"]*"' truncates at the first JSON-escaped quote, so the
|
|
166
|
+
# token may not match the runner's for values containing escapes. Gating still
|
|
167
|
+
# holds (deny goes out); only denial correlation and grant precision degrade.
|
|
168
|
+
# Every extraction ends with '|| true': under 'set -e' a non-matching grep
|
|
169
|
+
# would otherwise abort the script and emit no decision.
|
|
170
|
+
TOOL_NAME=$(echo "$INPUT" | grep -o '"tool_name":"[^"]*"' | head -1 | cut -d'"' -f4 || true)
|
|
171
|
+
SALIENT=""
|
|
172
|
+
for field in ${salientFields}; do
|
|
173
|
+
v=$(echo "$INPUT" | grep -o "\\"$field\\":\\"[^\\"]*\\"" | head -1 | cut -d'"' -f4 || true)
|
|
174
|
+
if [ -n "$v" ]; then SALIENT="$v"; break; fi
|
|
175
|
+
done
|
|
176
|
+
CATEGORY=""
|
|
177
|
+
case "$TOOL_NAME" in
|
|
178
|
+
${categoryCaseArms}
|
|
179
|
+
*) CATEGORY="" ;;
|
|
180
|
+
esac
|
|
181
|
+
TOKEN=$(printf '%s\\n%s' "$CATEGORY" "$SALIENT" | base64 | tr -d '\\n')
|
|
182
|
+
MCP_TOKEN=$(printf '%s\\n' "$TOOL_NAME" | base64 | tr -d '\\n')
|
|
183
|
+
fi
|
|
184
|
+
|
|
75
185
|
# --- Failsafe: missing state file → deny (fail-closed) ---
|
|
76
186
|
if [ ! -f "$STATE_FILE" ]; then
|
|
77
187
|
echo '{"permission":"deny","agent_message":"${APPROVAL_REQUIRED_AGENT_MESSAGE}","user_message":"Tool requires approval: '"$TOOL_NAME"'"}'
|
|
@@ -86,66 +196,48 @@ if echo "$STATE" | grep -q '"autoApproveAll":true'; then
|
|
|
86
196
|
exit 0
|
|
87
197
|
fi
|
|
88
198
|
|
|
89
|
-
# --- 2. Approved grants (reinvocation after SubmitApproval) ---
|
|
90
|
-
# Build the same base64 token the runner stored for an approved tool call and
|
|
91
|
-
# match it against approvedGrantTokens. Match by (name + salient arg); fall back
|
|
92
|
-
# to name-only for grants with no salient arg (MCP tools). Salient-arg field
|
|
93
|
-
# order is injected from SALIENT_ARG_FIELDS (single source of truth).
|
|
94
|
-
TOKEN_NAME=$(printf '%s\\n' "$TOOL_NAME" | base64 | tr -d '\\n')
|
|
95
|
-
if echo "$STATE" | grep -q "\\"$TOKEN_NAME\\""; then
|
|
96
|
-
echo '{"permission":"allow"}'
|
|
97
|
-
exit 0
|
|
98
|
-
fi
|
|
99
|
-
SALIENT=""
|
|
100
|
-
for field in ${salientFields}; do
|
|
101
|
-
v=$(echo "$INPUT" | grep -o "\\"$field\\":\\"[^\\"]*\\"" | head -1 | cut -d'"' -f4 || true)
|
|
102
|
-
if [ -n "$v" ]; then SALIENT="$v"; break; fi
|
|
103
|
-
done
|
|
104
|
-
if [ -n "$SALIENT" ]; then
|
|
105
|
-
TOKEN_SALIENT=$(printf '%s\\n%s' "$TOOL_NAME" "$SALIENT" | base64 | tr -d '\\n')
|
|
106
|
-
if echo "$STATE" | grep -q "\\"$TOKEN_SALIENT\\""; then
|
|
107
|
-
echo '{"permission":"allow"}'
|
|
108
|
-
exit 0
|
|
109
|
-
fi
|
|
110
|
-
fi
|
|
111
|
-
|
|
112
|
-
# Identity token recorded on a deny so the runner can correlate the gated call
|
|
113
|
-
# back to its streamed tool call. Prefer the salient-arg token (identifies the
|
|
114
|
-
# specific resource); fall back to name-only. Byte-identical to grantToken().
|
|
115
|
-
if [ -n "$SALIENT" ]; then DENY_TOKEN="$TOKEN_SALIENT"; else DENY_TOKEN="$TOKEN_NAME"; fi
|
|
116
|
-
|
|
117
199
|
# Append a denial record to the ledger. Best-effort: a ledger write failure must
|
|
118
200
|
# never abort the decision (the deny still goes out on stdout). toolName is raw
|
|
119
201
|
# for human-readable debugging; token drives correlation in the runner.
|
|
120
202
|
record_denial() {
|
|
121
|
-
echo '{"toolName":"'"$TOOL_NAME"'","token":"'"$
|
|
203
|
+
echo '{"toolName":"'"$TOOL_NAME"'","token":"'"$1"'"}' >> "$LEDGER_FILE" 2>/dev/null || true
|
|
122
204
|
}
|
|
123
205
|
|
|
124
|
-
# ---
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
206
|
+
# --- 2. Gated built-in tools (category non-empty) ---
|
|
207
|
+
if [ -n "$CATEGORY" ]; then
|
|
208
|
+
# Reinvocation grant: this exact resource was approved earlier → allow.
|
|
209
|
+
if echo "$STATE" | grep -qF "\\"$TOKEN\\""; then
|
|
210
|
+
echo '{"permission":"allow"}'
|
|
211
|
+
exit 0
|
|
212
|
+
fi
|
|
213
|
+
record_denial "$TOKEN"
|
|
128
214
|
echo '{"permission":"deny","agent_message":"${APPROVAL_REQUIRED_AGENT_MESSAGE}","user_message":"Tool requires approval: '"$TOOL_NAME"'"}'
|
|
129
215
|
exit 0
|
|
130
216
|
fi
|
|
131
217
|
|
|
132
|
-
# ---
|
|
218
|
+
# --- 3. MCP tools that require approval → deny ---
|
|
133
219
|
# mcpToolPolicies holds only require-approval tools (auto-approved MCP tools are
|
|
134
|
-
# absent), so presence means "deny" unless an entry is explicitly false.
|
|
220
|
+
# absent), so presence means "deny" unless an entry is explicitly false. MCP tool
|
|
221
|
+
# names are consistent across the hook and the stream, so the identity token is
|
|
222
|
+
# name-only: base64("$TOOL_NAME\\n").
|
|
135
223
|
if echo "$STATE" | grep -q "\\"mcpToolPolicies\\"" && [ -n "$TOOL_NAME" ]; then
|
|
136
224
|
TOOL_POLICY=$(echo "$STATE" | grep -o "\\"$TOOL_NAME\\":{[^}]*}" | head -1 || true)
|
|
137
225
|
if [ -n "$TOOL_POLICY" ] && ! echo "$TOOL_POLICY" | grep -q '"requiresApproval":false'; then
|
|
226
|
+
if echo "$STATE" | grep -qF "\\"$MCP_TOKEN\\""; then
|
|
227
|
+
echo '{"permission":"allow"}'
|
|
228
|
+
exit 0
|
|
229
|
+
fi
|
|
138
230
|
MSG=$(echo "$TOOL_POLICY" | grep -o '"message":"[^"]*"' | head -1 | cut -d'"' -f4 || true)
|
|
139
231
|
if [ -z "$MSG" ]; then
|
|
140
232
|
MSG="Tool requires approval: $TOOL_NAME"
|
|
141
233
|
fi
|
|
142
|
-
record_denial
|
|
234
|
+
record_denial "$MCP_TOKEN"
|
|
143
235
|
echo '{"permission":"deny","agent_message":"${APPROVAL_REQUIRED_AGENT_MESSAGE}","user_message":"'"$MSG"'"}'
|
|
144
236
|
exit 0
|
|
145
237
|
fi
|
|
146
238
|
fi
|
|
147
239
|
|
|
148
|
-
# ---
|
|
240
|
+
# --- 4. Everything else → allow ---
|
|
149
241
|
# Read-only built-ins, auto-approved MCP tools, and anything not explicitly
|
|
150
242
|
# gated. Fail-open mirrors the native harness (gate the dangerous set, allow the
|
|
151
243
|
# rest) and prevents denying auto-approved MCP tools the state cannot enumerate.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"hook-script.js","sourceRoot":"","sources":["../../../src/activities/execute-cursor/hook-script.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"hook-script.js","sourceRoot":"","sources":["../../../src/activities/execute-cursor/hook-script.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2DG;AAEH,OAAO,EAAE,kBAAkB,EAAE,yBAAyB,EAAE,MAAM,sBAAsB,CAAC;AAErF,MAAM,+BAA+B,GACnC,0EAA0E;IAC1E,6EAA6E;IAC7E,4EAA4E;IAC5E,sCAAsC,CAAC;AAEzC;;;;GAIG;AACH,SAAS,qBAAqB;IAC5B,MAAM,UAAU,GAAG,IAAI,GAAG,EAAoB,CAAC;IAC/C,KAAK,MAAM,CAAC,IAAI,EAAE,QAAQ,CAAC,IAAI,yBAAyB,EAAE,EAAE,CAAC;QAC3D,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;QAC7C,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACjB,UAAU,CAAC,GAAG,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;IAClC,CAAC;IACD,MAAM,IAAI,GAAa,EAAE,CAAC;IAC1B,KAAK,MAAM,CAAC,QAAQ,EAAE,KAAK,CAAC,IAAI,UAAU,EAAE,CAAC;QAC3C,MAAM,OAAO,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACrD,IAAI,CAAC,IAAI,CAAC,SAAS,OAAO,eAAe,QAAQ,MAAM,CAAC,CAAC;IAC3D,CAAC;IACD,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACzB,CAAC;AAED;;;;;;;;;;;;GAYG;AACH,SAAS,uBAAuB;IAC9B,MAAM,WAAW,GAA2B,EAAE,CAAC;IAC/C,KAAK,MAAM,CAAC,IAAI,EAAE,QAAQ,CAAC,IAAI,yBAAyB,EAAE,EAAE,CAAC;QAC3D,WAAW,CAAC,IAAI,CAAC,GAAG,QAAQ,CAAC;IAC/B,CAAC;IACD,MAAM,UAAU,GAAG,IAAI,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC;IAC/C,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,kBAAkB,CAAC,CAAC;IAClD,OAAO;QACL,2DAA2D;QAC3D,0DAA0D;QAC1D,cAAc,UAAU,cAAc;QACtC,yEAAyE;QACzE,WAAW;QACX,kBAAkB,MAAM,wDAAwD;QAChF,wDAAwD;QACxD,gFAAgF;KACjF,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;AACb,CAAC;AAED;;;;;;;;;;;;GAYG;AACH,MAAM,UAAU,kBAAkB,CAAC,aAAqB,EAAE,cAAsB;IAC9E,MAAM,aAAa,GAAG,kBAAkB,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACnD,MAAM,gBAAgB,GAAG,qBAAqB,EAAE,CAAC;IACjD,MAAM,kBAAkB,GAAG,uBAAuB,EAAE,CAAC;IACrD,OAAO;;;;;;;;;;;;;cAaK,aAAa;eACZ,cAAc;;;;;;;;;YASjB,OAAO,CAAC,QAAQ;2EAC+C,kBAAkB;;;;;;;;;;;;;;;iBAe5E,aAAa;;;;;;EAM5B,gBAAgB;;;;;;;;;gDAS8B,+BAA+B;;;;;;;;;;;;;;;;;;;;;;;;;;;gDA2B/B,+BAA+B;;;;;;;;;;;;;;;;;;;;;kDAqB7B,+BAA+B;;;;;;;;;;;CAWhF,CAAC;AACF,CAAC"}
|
|
@@ -174,7 +174,30 @@ export declare class MessageAccumulator {
|
|
|
174
174
|
cancelInProgressSubAgents(): void;
|
|
175
175
|
processEvent(event: SDKMessage): void;
|
|
176
176
|
finalize(): void;
|
|
177
|
+
/**
|
|
178
|
+
* Attach a tool call to the current AI message, upserting by `call_id` so a
|
|
179
|
+
* single call maps to at most ONE ToolCall across all messages.
|
|
180
|
+
*
|
|
181
|
+
* The Cursor SDK can emit the lifecycle for one `call_id` more than once —
|
|
182
|
+
* observed in production as two "running" events ~0.5s apart for task/edit
|
|
183
|
+
* tools, which previously appended a duplicate ToolCall (the same call
|
|
184
|
+
* rendered two or three times in the UI). We therefore index by `call_id`
|
|
185
|
+
* and merge subsequent events into the existing proto, mirroring how
|
|
186
|
+
* trackSubAgentExecution() upserts via subAgentMap. The first event for a
|
|
187
|
+
* `call_id` (running or terminal) creates the proto on the last AI message;
|
|
188
|
+
* the index keeps pointing at it even after later assistant text starts a
|
|
189
|
+
* new AI message, so cross-message completions still land on the original.
|
|
190
|
+
*/
|
|
177
191
|
private attachToolCallToLastAi;
|
|
192
|
+
/**
|
|
193
|
+
* Merge a repeated tool_call event into the ToolCall already tracked for this
|
|
194
|
+
* `call_id`. The merge is defensive because a re-emitted event may carry less
|
|
195
|
+
* information than an earlier one (a late "running" after "completed", or a
|
|
196
|
+
* completion with an empty result): status only advances toward terminal,
|
|
197
|
+
* timestamps are stamped once, and a populated result/args is never clobbered
|
|
198
|
+
* by an empty one.
|
|
199
|
+
*/
|
|
200
|
+
private mergeToolCallEvent;
|
|
178
201
|
private findOrCreateLastAiMessage;
|
|
179
202
|
trackSubAgentExecution(event: Extract<SDKMessage, {
|
|
180
203
|
type: "tool_call";
|
|
@@ -33,8 +33,8 @@ import { create } from "@bufbuild/protobuf";
|
|
|
33
33
|
import { AgentMessageSchema, ToolCallSchema } from "@stigmer/protos/ai/stigmer/agentic/agentexecution/v1/message_pb";
|
|
34
34
|
import { SubAgentExecutionSchema } from "@stigmer/protos/ai/stigmer/agentic/agentexecution/v1/subagent_pb";
|
|
35
35
|
import { MessageType, ToolCallStatus, SubAgentStatus } from "@stigmer/protos/ai/stigmer/agentic/agentexecution/v1/enum_pb";
|
|
36
|
-
import { lookupMcpToolPolicy, resolveApprovalMessage, builtInRequiresApproval, getBuiltInApprovalMessage
|
|
37
|
-
import { grantToken } from "./approval-state.js";
|
|
36
|
+
import { lookupMcpToolPolicy, resolveApprovalMessage, builtInRequiresApproval, getBuiltInApprovalMessage } from "./approval-policy.js";
|
|
37
|
+
import { grantToken, toolIdentity } from "./approval-state.js";
|
|
38
38
|
import { utcTimestamp } from "../../shared/status.js";
|
|
39
39
|
import { classifyTool } from "../../shared/tool-kind.js";
|
|
40
40
|
export { utcTimestamp };
|
|
@@ -260,6 +260,16 @@ function safeString(obj, key) {
|
|
|
260
260
|
}
|
|
261
261
|
return "";
|
|
262
262
|
}
|
|
263
|
+
/**
|
|
264
|
+
* Normalize a tool_call event result into a string for the ToolCall proto.
|
|
265
|
+
* Returns "" for an absent result so callers can treat "no result yet" and
|
|
266
|
+
* "empty result" uniformly (e.g. to avoid clobbering a captured result).
|
|
267
|
+
*/
|
|
268
|
+
function toResultString(result) {
|
|
269
|
+
if (result == null)
|
|
270
|
+
return "";
|
|
271
|
+
return typeof result === "string" ? result : JSON.stringify(result);
|
|
272
|
+
}
|
|
263
273
|
/**
|
|
264
274
|
* Parse the task tool's completed result into AgentMessages.
|
|
265
275
|
*
|
|
@@ -478,49 +488,74 @@ export class MessageAccumulator {
|
|
|
478
488
|
this.activeAiByRunId.clear();
|
|
479
489
|
this.activeThinkingByRunId.clear();
|
|
480
490
|
}
|
|
491
|
+
/**
|
|
492
|
+
* Attach a tool call to the current AI message, upserting by `call_id` so a
|
|
493
|
+
* single call maps to at most ONE ToolCall across all messages.
|
|
494
|
+
*
|
|
495
|
+
* The Cursor SDK can emit the lifecycle for one `call_id` more than once —
|
|
496
|
+
* observed in production as two "running" events ~0.5s apart for task/edit
|
|
497
|
+
* tools, which previously appended a duplicate ToolCall (the same call
|
|
498
|
+
* rendered two or three times in the UI). We therefore index by `call_id`
|
|
499
|
+
* and merge subsequent events into the existing proto, mirroring how
|
|
500
|
+
* trackSubAgentExecution() upserts via subAgentMap. The first event for a
|
|
501
|
+
* `call_id` (running or terminal) creates the proto on the last AI message;
|
|
502
|
+
* the index keeps pointing at it even after later assistant text starts a
|
|
503
|
+
* new AI message, so cross-message completions still land on the original.
|
|
504
|
+
*/
|
|
481
505
|
attachToolCallToLastAi(event) {
|
|
482
506
|
if (SUPPRESSED_TOOL_NAMES.has(event.name))
|
|
483
507
|
return;
|
|
484
|
-
const
|
|
485
|
-
if (
|
|
486
|
-
const aiMsg = this.findOrCreateLastAiMessage();
|
|
508
|
+
const existing = this.toolCallIndex.get(event.call_id);
|
|
509
|
+
if (!existing) {
|
|
487
510
|
const tc = buildToolCallProto(event, this.mergedPolicies);
|
|
488
|
-
|
|
511
|
+
this.findOrCreateLastAiMessage().toolCalls.push(tc);
|
|
489
512
|
this.toolCallIndex.set(event.call_id, tc);
|
|
513
|
+
return;
|
|
490
514
|
}
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
515
|
+
this.mergeToolCallEvent(existing, event);
|
|
516
|
+
}
|
|
517
|
+
/**
|
|
518
|
+
* Merge a repeated tool_call event into the ToolCall already tracked for this
|
|
519
|
+
* `call_id`. The merge is defensive because a re-emitted event may carry less
|
|
520
|
+
* information than an earlier one (a late "running" after "completed", or a
|
|
521
|
+
* completion with an empty result): status only advances toward terminal,
|
|
522
|
+
* timestamps are stamped once, and a populated result/args is never clobbered
|
|
523
|
+
* by an empty one.
|
|
524
|
+
*/
|
|
525
|
+
mergeToolCallEvent(existing, event) {
|
|
526
|
+
const status = mapToolCallStatus(event.status);
|
|
527
|
+
// Status advances monotonically: once terminal (completed/failed/skipped)
|
|
528
|
+
// a later "running" re-emit must not regress it back to RUNNING.
|
|
529
|
+
if (!isTerminalToolStatus(existing.status)) {
|
|
530
|
+
existing.status = status;
|
|
531
|
+
}
|
|
532
|
+
if (isTerminalToolStatus(status) && !existing.completedAt) {
|
|
533
|
+
existing.completedAt = utcTimestamp();
|
|
534
|
+
}
|
|
535
|
+
if (!existing.startedAt && status === ToolCallStatus.TOOL_CALL_RUNNING) {
|
|
536
|
+
existing.startedAt = utcTimestamp();
|
|
537
|
+
}
|
|
538
|
+
// Only a non-empty incoming result overwrites; a result-less "running"
|
|
539
|
+
// re-emit must not wipe a result captured on completion (or vice versa).
|
|
540
|
+
const incomingResult = toResultString(event.result);
|
|
541
|
+
if (incomingResult) {
|
|
542
|
+
existing.result = incomingResult;
|
|
543
|
+
}
|
|
544
|
+
if (status === ToolCallStatus.TOOL_CALL_FAILED) {
|
|
545
|
+
if (!existing.error) {
|
|
546
|
+
existing.error = typeof event.result === "string"
|
|
547
|
+
? event.result
|
|
548
|
+
: "Tool call failed";
|
|
516
549
|
}
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
const tc = buildToolCallProto(event, this.mergedPolicies);
|
|
520
|
-
aiMsg.toolCalls.push(tc);
|
|
521
|
-
this.toolCallIndex.set(event.call_id, tc);
|
|
550
|
+
if (existing.requiresApproval && !existing.approvalRequestedAt) {
|
|
551
|
+
existing.approvalRequestedAt = utcTimestamp();
|
|
522
552
|
}
|
|
523
553
|
}
|
|
554
|
+
if (event.args != null && !existing.argsPreview) {
|
|
555
|
+
existing.argsPreview = typeof event.args === "string"
|
|
556
|
+
? event.args
|
|
557
|
+
: JSON.stringify(event.args);
|
|
558
|
+
}
|
|
524
559
|
}
|
|
525
560
|
findOrCreateLastAiMessage() {
|
|
526
561
|
for (let i = this.messages.length - 1; i >= 0; i--) {
|
|
@@ -666,13 +701,18 @@ export function reconcileDeniedToolCalls(messages, ledger, mergedPolicies) {
|
|
|
666
701
|
}
|
|
667
702
|
}
|
|
668
703
|
// 2. Synthesize a tool call for any denial that never produced a stream event.
|
|
704
|
+
// Rare with correct correlation (Cursor emits a tool_call for every attempt),
|
|
705
|
+
// so this is a defensive net that still surfaces the gate rather than letting
|
|
706
|
+
// a denied tool render as a silent success.
|
|
669
707
|
for (const entry of ledger) {
|
|
670
708
|
if (matched.has(entry.token))
|
|
671
709
|
continue;
|
|
672
710
|
const decoded = decodeIdentityToken(entry.token);
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
const
|
|
711
|
+
// Display the hook's raw tool name; carry the decoded salient so the grant
|
|
712
|
+
// rebuilt from this tool call on reinvocation keys on the same resource.
|
|
713
|
+
const displayName = entry.toolName || decoded?.key || "tool";
|
|
714
|
+
const salient = decoded?.salient ?? "";
|
|
715
|
+
const tc = synthesizeWaitingApprovalToolCall(displayName, salient, entry.token, mergedPolicies);
|
|
676
716
|
appendToolCallToLastAiMessage(messages, tc);
|
|
677
717
|
matched.add(entry.token);
|
|
678
718
|
result.push(tc);
|
|
@@ -680,23 +720,24 @@ export function reconcileDeniedToolCalls(messages, ledger, mergedPolicies) {
|
|
|
680
720
|
return result;
|
|
681
721
|
}
|
|
682
722
|
/**
|
|
683
|
-
* Compute a tool call's identity token in the same space the
|
|
684
|
-
*
|
|
685
|
-
*
|
|
686
|
-
*
|
|
723
|
+
* Compute a streamed tool call's identity token in the same canonical space the
|
|
724
|
+
* preToolUse hook records denials in (see {@link toolIdentity} and grantToken).
|
|
725
|
+
* The token keys on the cross-taxonomy category + salient resource, so a stream
|
|
726
|
+
* `edit` (token `base64("write\n/path")`) correlates to the hook's `Write` deny
|
|
727
|
+
* for the same path, even though the two layers name the tool differently.
|
|
687
728
|
*/
|
|
688
729
|
function toolCallIdentityToken(tc) {
|
|
689
|
-
const
|
|
690
|
-
return grantToken(
|
|
730
|
+
const id = toolIdentity(tc.name, tc.mcpServerSlug, toolCallArgs(tc));
|
|
731
|
+
return grantToken(id.key, id.salient);
|
|
691
732
|
}
|
|
692
|
-
/** Decode a
|
|
733
|
+
/** Decode a grantToken back into its (key, salient) for the synthesis fallback. */
|
|
693
734
|
function decodeIdentityToken(token) {
|
|
694
735
|
try {
|
|
695
736
|
const decoded = Buffer.from(token, "base64").toString("utf-8");
|
|
696
737
|
const nl = decoded.indexOf("\n");
|
|
697
738
|
if (nl < 0)
|
|
698
739
|
return undefined;
|
|
699
|
-
return {
|
|
740
|
+
return { key: decoded.slice(0, nl), salient: decoded.slice(nl + 1) };
|
|
700
741
|
}
|
|
701
742
|
catch {
|
|
702
743
|
return undefined;
|
|
@@ -735,19 +776,24 @@ function markWaitingApproval(tc, mergedPolicies) {
|
|
|
735
776
|
tc.error = "";
|
|
736
777
|
tc.result = "";
|
|
737
778
|
}
|
|
738
|
-
function synthesizeWaitingApprovalToolCall(
|
|
779
|
+
function synthesizeWaitingApprovalToolCall(displayName, salient, token, mergedPolicies) {
|
|
739
780
|
const tc = create(ToolCallSchema, {
|
|
740
|
-
id: `approval:${
|
|
741
|
-
name,
|
|
781
|
+
id: `approval:${token}`,
|
|
782
|
+
name: displayName,
|
|
742
783
|
status: ToolCallStatus.TOOL_CALL_WAITING_APPROVAL,
|
|
743
784
|
requiresApproval: true,
|
|
744
785
|
startedAt: utcTimestamp(),
|
|
745
786
|
approvalRequestedAt: utcTimestamp(),
|
|
746
|
-
toolKind: classifyTool(
|
|
787
|
+
toolKind: classifyTool(displayName),
|
|
747
788
|
});
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
789
|
+
// Carry the salient resource so reconstructAdjudicatedApprovals -> the grant
|
|
790
|
+
// builder keys on the same resource the hook will see on the re-attempt.
|
|
791
|
+
if (salient) {
|
|
792
|
+
tc.argsPreview = JSON.stringify({ path: salient });
|
|
793
|
+
}
|
|
794
|
+
tc.approvalMessage = salient
|
|
795
|
+
? `Tool requires approval: ${displayName} (${salient})`
|
|
796
|
+
: resolveDeniedApprovalMessage(displayName, "", {}, mergedPolicies);
|
|
751
797
|
return tc;
|
|
752
798
|
}
|
|
753
799
|
/**
|