@stigmer/runner 3.0.2 → 3.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. package/dist/.build-fingerprint +1 -1
  2. package/dist/activities/execute-cursor/approval-policy.d.ts +55 -16
  3. package/dist/activities/execute-cursor/approval-policy.js +93 -31
  4. package/dist/activities/execute-cursor/approval-policy.js.map +1 -1
  5. package/dist/activities/execute-cursor/approval-state.d.ts +54 -26
  6. package/dist/activities/execute-cursor/approval-state.js +41 -26
  7. package/dist/activities/execute-cursor/approval-state.js.map +1 -1
  8. package/dist/activities/execute-cursor/hook-script.d.ts +41 -14
  9. package/dist/activities/execute-cursor/hook-script.js +155 -63
  10. package/dist/activities/execute-cursor/hook-script.js.map +1 -1
  11. package/dist/activities/execute-cursor/message-translator.d.ts +23 -0
  12. package/dist/activities/execute-cursor/message-translator.js +100 -54
  13. package/dist/activities/execute-cursor/message-translator.js.map +1 -1
  14. package/dist/activities/execute-cursor/session-lifecycle.d.ts +9 -0
  15. package/dist/activities/execute-cursor/session-lifecycle.js +11 -3
  16. package/dist/activities/execute-cursor/session-lifecycle.js.map +1 -1
  17. package/package.json +2 -2
  18. package/src/activities/execute-cursor/__tests__/approval-gate.test.ts +93 -37
  19. package/src/activities/execute-cursor/__tests__/hitl-ledger.test.ts +33 -18
  20. package/src/activities/execute-cursor/__tests__/hook-script.test.ts +204 -0
  21. package/src/activities/execute-cursor/__tests__/message-translator.test.ts +93 -0
  22. package/src/activities/execute-cursor/__tests__/session-lifecycle.test.ts +73 -2
  23. package/src/activities/execute-cursor/approval-policy.ts +113 -31
  24. package/src/activities/execute-cursor/approval-state.ts +74 -32
  25. package/src/activities/execute-cursor/hook-script.ts +157 -63
  26. package/src/activities/execute-cursor/message-translator.ts +114 -57
  27. package/src/activities/execute-cursor/session-lifecycle.ts +21 -3
@@ -16,32 +16,59 @@
16
16
  * so its ledger is the authoritative record of what was gated this turn
17
17
  * 5. Returns { "permission": "allow" } or { "permission": "deny" } on stdout
18
18
  *
19
- * The script is self-contained (no Node.js required) for portability. It uses
20
- * bash + grep/cut for lightweight JSON field extraction. All policy decisions
21
- * are pre-computed by the runner into the state file; the hook only performs
22
- * mechanical field extraction and string lookups the policy itself is
23
- * authored once in TypeScript (approval-policy.ts / approval-state.ts).
19
+ * Identity extraction runs on the SAME Node.js binary as the runner (its
20
+ * absolute path process.execPath is baked into the script at generation
21
+ * time), because the identity token must be byte-identical to the one the
22
+ * runner computes from the parsed stream event. The original grep/cut
23
+ * extraction is kept only as a best-effort fallback if that binary cannot run:
24
+ * grep's `"command":"[^"]*"` truncates at the first JSON-escaped quote, so for
25
+ * a shell command like `printf '%s' "x" > file` the fallback token will NOT
26
+ * match the runner's — the call is still denied (the gate holds) but the
27
+ * denial cannot be overlaid onto the real streamed tool call and a grant for
28
+ * it will not match on reinvocation. All policy decisions are pre-computed by
29
+ * the runner into the state file (and into this generated script); the hook
30
+ * only performs mechanical field extraction and string lookups — the policy
31
+ * itself is authored once in TypeScript (approval-policy.ts /
32
+ * approval-state.ts).
33
+ *
34
+ * Cross-taxonomy identity (the crux):
35
+ * The preToolUse hook and the SDK event stream name the same operation
36
+ * differently — the hook receives PascalCase `tool_name` (`Write` for any file
37
+ * create/edit, `Shell`, `Delete`) while the stream emits lowercase `event.name`
38
+ * (`edit`, `shell`, `delete`). They also name the salient argument differently
39
+ * (`file_path` in the hook input vs `path` in the stream). So the hook and the
40
+ * runner cannot correlate on the raw name. Instead both reduce a tool call to a
41
+ * canonical identity — `base64(category \n salient)` — where `category` is the
42
+ * approval category (`write`/`delete`/`shell`, baked into the case statement
43
+ * below from approval-policy.ts) and `salient` is the resource VALUE (the file
44
+ * path or shell command), which is identical on both sides. The runner mirrors
45
+ * this exactly in approval-state.ts (toolIdentity + grantToken), so a denial
46
+ * recorded here correlates to the streamed tool call, and an approval grant
47
+ * matches the agent's re-attempt on reinvocation.
24
48
  *
25
49
  * Policy evaluation order (first match wins). The model is "gate the dangerous
26
50
  * set, allow the rest" — matching the native harness and avoiding denial of
27
51
  * auto-approved MCP tools (which are absent from mcpToolPolicies):
28
52
  * 1. autoApproveAll → allow
29
- * 2. Matches an approved grant token → allow (reinvocation after approval)
30
- * 3. Tool name in builtInGatedListdeny
31
- * 4. Tool name in mcpToolPolicies (require-approval) → deny
32
- * 5. Everything else (read-only built-ins, auto-approved MCP, unknown) → allow
53
+ * 2. Gated built-in (category non-empty):
54
+ * a. identity token in approvedGrantTokensallow (reinvocation grant)
55
+ * b. otherwise record denial, deny
56
+ * 3. MCP tool present in mcpToolPolicies (require-approval):
57
+ * a. name token in approvedGrantTokens → allow
58
+ * b. otherwise → record denial, deny
59
+ * 4. Everything else (read-only built-ins, auto-approved MCP, unknown) → allow
33
60
  */
34
61
  /**
35
62
  * Generates the bash hook script content.
36
63
  *
37
64
  * The script reads a JSON state file written by the cursor-runner before
38
65
  * each agent.send() call. The state file is the single source of truth
39
- * for all approval decisions.
66
+ * for the dynamic approval inputs (autoApproveAll, mcpToolPolicies,
67
+ * approvedGrantTokens). The static policy (which built-ins are gated and their
68
+ * categories, and which arg fields are salient) is baked into the script at
69
+ * generation time from approval-policy.ts.
40
70
  *
41
- * Approved grants are matched by a base64 token of `toolName \n salientArg`,
42
- * recomputed here from the incoming tool call. The salient-arg field list is
43
- * injected from SALIENT_ARG_FIELDS so the runner and the hook never disagree on
44
- * which argument identifies the resource. The encoding must stay byte-identical
71
+ * The identity token encoding (`base64(key \n salient)`) must stay byte-identical
45
72
  * to grantToken() in approval-state.ts.
46
73
  */
47
74
  export declare function generateHookScript(stateFilePath: string, ledgerFilePath: string): string;
@@ -16,62 +16,172 @@
16
16
  * so its ledger is the authoritative record of what was gated this turn
17
17
  * 5. Returns { "permission": "allow" } or { "permission": "deny" } on stdout
18
18
  *
19
- * The script is self-contained (no Node.js required) for portability. It uses
20
- * bash + grep/cut for lightweight JSON field extraction. All policy decisions
21
- * are pre-computed by the runner into the state file; the hook only performs
22
- * mechanical field extraction and string lookups the policy itself is
23
- * authored once in TypeScript (approval-policy.ts / approval-state.ts).
19
+ * Identity extraction runs on the SAME Node.js binary as the runner (its
20
+ * absolute path process.execPath is baked into the script at generation
21
+ * time), because the identity token must be byte-identical to the one the
22
+ * runner computes from the parsed stream event. The original grep/cut
23
+ * extraction is kept only as a best-effort fallback if that binary cannot run:
24
+ * grep's `"command":"[^"]*"` truncates at the first JSON-escaped quote, so for
25
+ * a shell command like `printf '%s' "x" > file` the fallback token will NOT
26
+ * match the runner's — the call is still denied (the gate holds) but the
27
+ * denial cannot be overlaid onto the real streamed tool call and a grant for
28
+ * it will not match on reinvocation. All policy decisions are pre-computed by
29
+ * the runner into the state file (and into this generated script); the hook
30
+ * only performs mechanical field extraction and string lookups — the policy
31
+ * itself is authored once in TypeScript (approval-policy.ts /
32
+ * approval-state.ts).
33
+ *
34
+ * Cross-taxonomy identity (the crux):
35
+ * The preToolUse hook and the SDK event stream name the same operation
36
+ * differently — the hook receives PascalCase `tool_name` (`Write` for any file
37
+ * create/edit, `Shell`, `Delete`) while the stream emits lowercase `event.name`
38
+ * (`edit`, `shell`, `delete`). They also name the salient argument differently
39
+ * (`file_path` in the hook input vs `path` in the stream). So the hook and the
40
+ * runner cannot correlate on the raw name. Instead both reduce a tool call to a
41
+ * canonical identity — `base64(category \n salient)` — where `category` is the
42
+ * approval category (`write`/`delete`/`shell`, baked into the case statement
43
+ * below from approval-policy.ts) and `salient` is the resource VALUE (the file
44
+ * path or shell command), which is identical on both sides. The runner mirrors
45
+ * this exactly in approval-state.ts (toolIdentity + grantToken), so a denial
46
+ * recorded here correlates to the streamed tool call, and an approval grant
47
+ * matches the agent's re-attempt on reinvocation.
24
48
  *
25
49
  * Policy evaluation order (first match wins). The model is "gate the dangerous
26
50
  * set, allow the rest" — matching the native harness and avoiding denial of
27
51
  * auto-approved MCP tools (which are absent from mcpToolPolicies):
28
52
  * 1. autoApproveAll → allow
29
- * 2. Matches an approved grant token → allow (reinvocation after approval)
30
- * 3. Tool name in builtInGatedListdeny
31
- * 4. Tool name in mcpToolPolicies (require-approval) → deny
32
- * 5. Everything else (read-only built-ins, auto-approved MCP, unknown) → allow
53
+ * 2. Gated built-in (category non-empty):
54
+ * a. identity token in approvedGrantTokensallow (reinvocation grant)
55
+ * b. otherwise record denial, deny
56
+ * 3. MCP tool present in mcpToolPolicies (require-approval):
57
+ * a. name token in approvedGrantTokens → allow
58
+ * b. otherwise → record denial, deny
59
+ * 4. Everything else (read-only built-ins, auto-approved MCP, unknown) → allow
33
60
  */
34
- import { SALIENT_ARG_FIELDS } from "./approval-policy.js";
61
+ import { SALIENT_ARG_FIELDS, getBuiltInGatedCategories } from "./approval-policy.js";
35
62
  const APPROVAL_REQUIRED_AGENT_MESSAGE = "STIGMER_APPROVAL_REQUIRED: This tool call requires user approval before " +
36
- "execution. Do not attempt alternative approaches or workarounds. The " +
37
- "execution will resume after the user reviews and approves this tool call.";
63
+ "execution. Do not attempt alternative approaches or workarounds (including " +
64
+ "shell commands). Stop and wait — the execution will resume after the user " +
65
+ "reviews and approves this tool call.";
66
+ /**
67
+ * Build the bash `case` arms that map an incoming hook `tool_name` to its
68
+ * canonical approval category. Generated from approval-policy.ts so the hook and
69
+ * the runner never disagree on which built-ins are gated or how they categorize.
70
+ */
71
+ function buildCategoryCaseArms() {
72
+ const byCategory = new Map();
73
+ for (const [name, category] of getBuiltInGatedCategories()) {
74
+ const names = byCategory.get(category) ?? [];
75
+ names.push(name);
76
+ byCategory.set(category, names);
77
+ }
78
+ const arms = [];
79
+ for (const [category, names] of byCategory) {
80
+ const pattern = names.map((n) => `"${n}"`).join("|");
81
+ arms.push(` ${pattern}) CATEGORY="${category}" ;;`);
82
+ }
83
+ return arms.join("\n");
84
+ }
85
+ /**
86
+ * Build the inline Node.js identity extractor embedded in the hook script.
87
+ *
88
+ * Parses the hook's stdin JSON properly (the bash fallback's grep truncates
89
+ * string values at the first escaped quote) and emits four lines:
90
+ * tool_name, canonical category, identity token, and MCP name-token. The token
91
+ * encodings must stay byte-identical to grantToken() in approval-state.ts.
92
+ *
93
+ * Authored as a single-quoted bash string, so the JS must not contain single
94
+ * quotes. The category map and salient field list are baked from
95
+ * approval-policy.ts — the same source the runner uses — so the two sides can
96
+ * never disagree.
97
+ */
98
+ function buildNodeIdentityScript() {
99
+ const categoryMap = {};
100
+ for (const [name, category] of getBuiltInGatedCategories()) {
101
+ categoryMap[name] = category;
102
+ }
103
+ const categories = JSON.stringify(categoryMap);
104
+ const fields = JSON.stringify(SALIENT_ARG_FIELDS);
105
+ return [
106
+ `const t=JSON.parse(require("fs").readFileSync(0,"utf8"));`,
107
+ `const name=typeof t.tool_name==="string"?t.tool_name:"";`,
108
+ `const cat=(${categories})[name]||"";`,
109
+ `const a=(t.tool_input&&typeof t.tool_input==="object")?t.tool_input:{};`,
110
+ `let s="";`,
111
+ `for(const f of ${fields}){const v=a[f];if(typeof v==="string"&&v){s=v;break;}}`,
112
+ `const b=(x)=>Buffer.from(x,"utf8").toString("base64");`,
113
+ `process.stdout.write(name+"\\n"+cat+"\\n"+b(cat+"\\n"+s)+"\\n"+b(name+"\\n"));`,
114
+ ].join("");
115
+ }
38
116
  /**
39
117
  * Generates the bash hook script content.
40
118
  *
41
119
  * The script reads a JSON state file written by the cursor-runner before
42
120
  * each agent.send() call. The state file is the single source of truth
43
- * for all approval decisions.
121
+ * for the dynamic approval inputs (autoApproveAll, mcpToolPolicies,
122
+ * approvedGrantTokens). The static policy (which built-ins are gated and their
123
+ * categories, and which arg fields are salient) is baked into the script at
124
+ * generation time from approval-policy.ts.
44
125
  *
45
- * Approved grants are matched by a base64 token of `toolName \n salientArg`,
46
- * recomputed here from the incoming tool call. The salient-arg field list is
47
- * injected from SALIENT_ARG_FIELDS so the runner and the hook never disagree on
48
- * which argument identifies the resource. The encoding must stay byte-identical
126
+ * The identity token encoding (`base64(key \n salient)`) must stay byte-identical
49
127
  * to grantToken() in approval-state.ts.
50
128
  */
51
129
  export function generateHookScript(stateFilePath, ledgerFilePath) {
52
130
  const salientFields = SALIENT_ARG_FIELDS.join(" ");
131
+ const categoryCaseArms = buildCategoryCaseArms();
132
+ const nodeIdentityScript = buildNodeIdentityScript();
53
133
  return `#!/bin/bash
54
134
  # Stigmer HITL approval hook for Cursor preToolUse
55
135
  # Generated by cursor-runner — do not edit manually.
56
136
  #
57
- # Reads tool call from stdin (JSON), checks approval state file,
58
- # returns permission decision on stdout (JSON). On a deny, appends the call's
137
+ # Reads tool call from stdin (JSON), checks approval state file, returns a
138
+ # permission decision on stdout (JSON). On a deny, appends the call's canonical
59
139
  # identity token to the denial ledger so the runner can mark the gated tool call
60
- # as WAITING_APPROVAL.
140
+ # as WAITING_APPROVAL. See hook-script.ts for the cross-taxonomy identity design.
61
141
 
62
142
  set -euo pipefail
63
143
 
64
144
  INPUT=$(cat)
65
145
 
66
- # Extract tool_name from the hook input JSON.
67
- # Cursor sends the actual tool name (e.g. "search_services" for MCP tools).
68
- # Every extraction ends with '|| true': under 'set -e' a non-matching grep would
69
- # otherwise abort the script and emit no decision.
70
- TOOL_NAME=$(echo "$INPUT" | grep -o '"tool_name":"[^"]*"' | head -1 | cut -d'"' -f4 || true)
71
-
72
146
  STATE_FILE="${stateFilePath}"
73
147
  LEDGER_FILE="${ledgerFilePath}"
74
148
 
149
+ # --- Canonical identity: tool_name / category / identity token / MCP token ---
150
+ # Computed by the same Node.js binary that runs the cursor-runner (absolute path
151
+ # baked at generation time) so JSON string values — file paths and especially
152
+ # shell commands containing quotes, newlines, or unicode escapes — decode to the
153
+ # exact bytes the runner sees in the stream event. ELECTRON_RUN_AS_NODE makes
154
+ # the invocation safe when the runner is embedded in an Electron app (where
155
+ # process.execPath is the Electron binary).
156
+ NODE_BIN="${process.execPath}"
157
+ IDENTITY=$(printf '%s' "$INPUT" | ELECTRON_RUN_AS_NODE=1 "$NODE_BIN" -e '${nodeIdentityScript}' 2>/dev/null || true)
158
+ if [ -n "$IDENTITY" ]; then
159
+ TOOL_NAME=$(printf '%s\\n' "$IDENTITY" | sed -n 1p)
160
+ CATEGORY=$(printf '%s\\n' "$IDENTITY" | sed -n 2p)
161
+ TOKEN=$(printf '%s\\n' "$IDENTITY" | sed -n 3p)
162
+ MCP_TOKEN=$(printf '%s\\n' "$IDENTITY" | sed -n 4p)
163
+ else
164
+ # Fallback when the Node binary cannot run: grep/cut extraction. Best-effort
165
+ # only — '"field":"[^"]*"' truncates at the first JSON-escaped quote, so the
166
+ # token may not match the runner's for values containing escapes. Gating still
167
+ # holds (deny goes out); only denial correlation and grant precision degrade.
168
+ # Every extraction ends with '|| true': under 'set -e' a non-matching grep
169
+ # would otherwise abort the script and emit no decision.
170
+ TOOL_NAME=$(echo "$INPUT" | grep -o '"tool_name":"[^"]*"' | head -1 | cut -d'"' -f4 || true)
171
+ SALIENT=""
172
+ for field in ${salientFields}; do
173
+ v=$(echo "$INPUT" | grep -o "\\"$field\\":\\"[^\\"]*\\"" | head -1 | cut -d'"' -f4 || true)
174
+ if [ -n "$v" ]; then SALIENT="$v"; break; fi
175
+ done
176
+ CATEGORY=""
177
+ case "$TOOL_NAME" in
178
+ ${categoryCaseArms}
179
+ *) CATEGORY="" ;;
180
+ esac
181
+ TOKEN=$(printf '%s\\n%s' "$CATEGORY" "$SALIENT" | base64 | tr -d '\\n')
182
+ MCP_TOKEN=$(printf '%s\\n' "$TOOL_NAME" | base64 | tr -d '\\n')
183
+ fi
184
+
75
185
  # --- Failsafe: missing state file → deny (fail-closed) ---
76
186
  if [ ! -f "$STATE_FILE" ]; then
77
187
  echo '{"permission":"deny","agent_message":"${APPROVAL_REQUIRED_AGENT_MESSAGE}","user_message":"Tool requires approval: '"$TOOL_NAME"'"}'
@@ -86,66 +196,48 @@ if echo "$STATE" | grep -q '"autoApproveAll":true'; then
86
196
  exit 0
87
197
  fi
88
198
 
89
- # --- 2. Approved grants (reinvocation after SubmitApproval) ---
90
- # Build the same base64 token the runner stored for an approved tool call and
91
- # match it against approvedGrantTokens. Match by (name + salient arg); fall back
92
- # to name-only for grants with no salient arg (MCP tools). Salient-arg field
93
- # order is injected from SALIENT_ARG_FIELDS (single source of truth).
94
- TOKEN_NAME=$(printf '%s\\n' "$TOOL_NAME" | base64 | tr -d '\\n')
95
- if echo "$STATE" | grep -q "\\"$TOKEN_NAME\\""; then
96
- echo '{"permission":"allow"}'
97
- exit 0
98
- fi
99
- SALIENT=""
100
- for field in ${salientFields}; do
101
- v=$(echo "$INPUT" | grep -o "\\"$field\\":\\"[^\\"]*\\"" | head -1 | cut -d'"' -f4 || true)
102
- if [ -n "$v" ]; then SALIENT="$v"; break; fi
103
- done
104
- if [ -n "$SALIENT" ]; then
105
- TOKEN_SALIENT=$(printf '%s\\n%s' "$TOOL_NAME" "$SALIENT" | base64 | tr -d '\\n')
106
- if echo "$STATE" | grep -q "\\"$TOKEN_SALIENT\\""; then
107
- echo '{"permission":"allow"}'
108
- exit 0
109
- fi
110
- fi
111
-
112
- # Identity token recorded on a deny so the runner can correlate the gated call
113
- # back to its streamed tool call. Prefer the salient-arg token (identifies the
114
- # specific resource); fall back to name-only. Byte-identical to grantToken().
115
- if [ -n "$SALIENT" ]; then DENY_TOKEN="$TOKEN_SALIENT"; else DENY_TOKEN="$TOKEN_NAME"; fi
116
-
117
199
  # Append a denial record to the ledger. Best-effort: a ledger write failure must
118
200
  # never abort the decision (the deny still goes out on stdout). toolName is raw
119
201
  # for human-readable debugging; token drives correlation in the runner.
120
202
  record_denial() {
121
- echo '{"toolName":"'"$TOOL_NAME"'","token":"'"$DENY_TOKEN"'"}' >> "$LEDGER_FILE" 2>/dev/null || true
203
+ echo '{"toolName":"'"$TOOL_NAME"'","token":"'"$1"'"}' >> "$LEDGER_FILE" 2>/dev/null || true
122
204
  }
123
205
 
124
- # --- 3. Gated built-in tools (Write, StrReplace, Shell, ...) → deny ---
125
- GATED_LIST=$(echo "$STATE" | grep -o '"builtInGatedList":\\[[^]]*\\]' | head -1 || true)
126
- if [ -n "$GATED_LIST" ] && [ -n "$TOOL_NAME" ] && echo "$GATED_LIST" | grep -q "\\"$TOOL_NAME\\""; then
127
- record_denial
206
+ # --- 2. Gated built-in tools (category non-empty) ---
207
+ if [ -n "$CATEGORY" ]; then
208
+ # Reinvocation grant: this exact resource was approved earlier allow.
209
+ if echo "$STATE" | grep -qF "\\"$TOKEN\\""; then
210
+ echo '{"permission":"allow"}'
211
+ exit 0
212
+ fi
213
+ record_denial "$TOKEN"
128
214
  echo '{"permission":"deny","agent_message":"${APPROVAL_REQUIRED_AGENT_MESSAGE}","user_message":"Tool requires approval: '"$TOOL_NAME"'"}'
129
215
  exit 0
130
216
  fi
131
217
 
132
- # --- 4. MCP tools that require approval → deny ---
218
+ # --- 3. MCP tools that require approval → deny ---
133
219
  # mcpToolPolicies holds only require-approval tools (auto-approved MCP tools are
134
- # absent), so presence means "deny" unless an entry is explicitly false.
220
+ # absent), so presence means "deny" unless an entry is explicitly false. MCP tool
221
+ # names are consistent across the hook and the stream, so the identity token is
222
+ # name-only: base64("$TOOL_NAME\\n").
135
223
  if echo "$STATE" | grep -q "\\"mcpToolPolicies\\"" && [ -n "$TOOL_NAME" ]; then
136
224
  TOOL_POLICY=$(echo "$STATE" | grep -o "\\"$TOOL_NAME\\":{[^}]*}" | head -1 || true)
137
225
  if [ -n "$TOOL_POLICY" ] && ! echo "$TOOL_POLICY" | grep -q '"requiresApproval":false'; then
226
+ if echo "$STATE" | grep -qF "\\"$MCP_TOKEN\\""; then
227
+ echo '{"permission":"allow"}'
228
+ exit 0
229
+ fi
138
230
  MSG=$(echo "$TOOL_POLICY" | grep -o '"message":"[^"]*"' | head -1 | cut -d'"' -f4 || true)
139
231
  if [ -z "$MSG" ]; then
140
232
  MSG="Tool requires approval: $TOOL_NAME"
141
233
  fi
142
- record_denial
234
+ record_denial "$MCP_TOKEN"
143
235
  echo '{"permission":"deny","agent_message":"${APPROVAL_REQUIRED_AGENT_MESSAGE}","user_message":"'"$MSG"'"}'
144
236
  exit 0
145
237
  fi
146
238
  fi
147
239
 
148
- # --- 5. Everything else → allow ---
240
+ # --- 4. Everything else → allow ---
149
241
  # Read-only built-ins, auto-approved MCP tools, and anything not explicitly
150
242
  # gated. Fail-open mirrors the native harness (gate the dangerous set, allow the
151
243
  # rest) and prevents denying auto-approved MCP tools the state cannot enumerate.
@@ -1 +1 @@
1
- {"version":3,"file":"hook-script.js","sourceRoot":"","sources":["../../../src/activities/execute-cursor/hook-script.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgCG;AAEH,OAAO,EAAE,kBAAkB,EAAE,MAAM,sBAAsB,CAAC;AAE1D,MAAM,+BAA+B,GACnC,0EAA0E;IAC1E,uEAAuE;IACvE,2EAA2E,CAAC;AAE9E;;;;;;;;;;;;GAYG;AACH,MAAM,UAAU,kBAAkB,CAAC,aAAqB,EAAE,cAAsB;IAC9E,MAAM,aAAa,GAAG,kBAAkB,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACnD,OAAO;;;;;;;;;;;;;;;;;;;cAmBK,aAAa;eACZ,cAAc;;;;gDAImB,+BAA+B;;;;;;;;;;;;;;;;;;;;;;;eAuBhE,aAAa;;;;;;;;;;;;;;;;;;;;;;;;;;;;gDA4BoB,+BAA+B;;;;;;;;;;;;;;;kDAe7B,+BAA+B;;;;;;;;;;;CAWhF,CAAC;AACF,CAAC"}
1
+ {"version":3,"file":"hook-script.js","sourceRoot":"","sources":["../../../src/activities/execute-cursor/hook-script.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2DG;AAEH,OAAO,EAAE,kBAAkB,EAAE,yBAAyB,EAAE,MAAM,sBAAsB,CAAC;AAErF,MAAM,+BAA+B,GACnC,0EAA0E;IAC1E,6EAA6E;IAC7E,4EAA4E;IAC5E,sCAAsC,CAAC;AAEzC;;;;GAIG;AACH,SAAS,qBAAqB;IAC5B,MAAM,UAAU,GAAG,IAAI,GAAG,EAAoB,CAAC;IAC/C,KAAK,MAAM,CAAC,IAAI,EAAE,QAAQ,CAAC,IAAI,yBAAyB,EAAE,EAAE,CAAC;QAC3D,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;QAC7C,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACjB,UAAU,CAAC,GAAG,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;IAClC,CAAC;IACD,MAAM,IAAI,GAAa,EAAE,CAAC;IAC1B,KAAK,MAAM,CAAC,QAAQ,EAAE,KAAK,CAAC,IAAI,UAAU,EAAE,CAAC;QAC3C,MAAM,OAAO,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACrD,IAAI,CAAC,IAAI,CAAC,SAAS,OAAO,eAAe,QAAQ,MAAM,CAAC,CAAC;IAC3D,CAAC;IACD,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACzB,CAAC;AAED;;;;;;;;;;;;GAYG;AACH,SAAS,uBAAuB;IAC9B,MAAM,WAAW,GAA2B,EAAE,CAAC;IAC/C,KAAK,MAAM,CAAC,IAAI,EAAE,QAAQ,CAAC,IAAI,yBAAyB,EAAE,EAAE,CAAC;QAC3D,WAAW,CAAC,IAAI,CAAC,GAAG,QAAQ,CAAC;IAC/B,CAAC;IACD,MAAM,UAAU,GAAG,IAAI,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC;IAC/C,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,kBAAkB,CAAC,CAAC;IAClD,OAAO;QACL,2DAA2D;QAC3D,0DAA0D;QAC1D,cAAc,UAAU,cAAc;QACtC,yEAAyE;QACzE,WAAW;QACX,kBAAkB,MAAM,wDAAwD;QAChF,wDAAwD;QACxD,gFAAgF;KACjF,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;AACb,CAAC;AAED;;;;;;;;;;;;GAYG;AACH,MAAM,UAAU,kBAAkB,CAAC,aAAqB,EAAE,cAAsB;IAC9E,MAAM,aAAa,GAAG,kBAAkB,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACnD,MAAM,gBAAgB,GAAG,qBAAqB,EAAE,CAAC;IACjD,MAAM,kBAAkB,GAAG,uBAAuB,EAAE,CAAC;IACrD,OAAO;;;;;;;;;;;;;cAaK,aAAa;eACZ,cAAc;;;;;;;;;YASjB,OAAO,CAAC,QAAQ;2EAC+C,kBAAkB;;;;;;;;;;;;;;;iBAe5E,aAAa;;;;;;EAM5B,gBAAgB;;;;;;;;;gDAS8B,+BAA+B;;;;;;;;;;;;;;;;;;;;;;;;;;;gDA2B/B,+BAA+B;;;;;;;;;;;;;;;;;;;;;kDAqB7B,+BAA+B;;;;;;;;;;;CAWhF,CAAC;AACF,CAAC"}
@@ -174,7 +174,30 @@ export declare class MessageAccumulator {
174
174
  cancelInProgressSubAgents(): void;
175
175
  processEvent(event: SDKMessage): void;
176
176
  finalize(): void;
177
+ /**
178
+ * Attach a tool call to the current AI message, upserting by `call_id` so a
179
+ * single call maps to at most ONE ToolCall across all messages.
180
+ *
181
+ * The Cursor SDK can emit the lifecycle for one `call_id` more than once —
182
+ * observed in production as two "running" events ~0.5s apart for task/edit
183
+ * tools, which previously appended a duplicate ToolCall (the same call
184
+ * rendered two or three times in the UI). We therefore index by `call_id`
185
+ * and merge subsequent events into the existing proto, mirroring how
186
+ * trackSubAgentExecution() upserts via subAgentMap. The first event for a
187
+ * `call_id` (running or terminal) creates the proto on the last AI message;
188
+ * the index keeps pointing at it even after later assistant text starts a
189
+ * new AI message, so cross-message completions still land on the original.
190
+ */
177
191
  private attachToolCallToLastAi;
192
+ /**
193
+ * Merge a repeated tool_call event into the ToolCall already tracked for this
194
+ * `call_id`. The merge is defensive because a re-emitted event may carry less
195
+ * information than an earlier one (a late "running" after "completed", or a
196
+ * completion with an empty result): status only advances toward terminal,
197
+ * timestamps are stamped once, and a populated result/args is never clobbered
198
+ * by an empty one.
199
+ */
200
+ private mergeToolCallEvent;
178
201
  private findOrCreateLastAiMessage;
179
202
  trackSubAgentExecution(event: Extract<SDKMessage, {
180
203
  type: "tool_call";
@@ -33,8 +33,8 @@ import { create } from "@bufbuild/protobuf";
33
33
  import { AgentMessageSchema, ToolCallSchema } from "@stigmer/protos/ai/stigmer/agentic/agentexecution/v1/message_pb";
34
34
  import { SubAgentExecutionSchema } from "@stigmer/protos/ai/stigmer/agentic/agentexecution/v1/subagent_pb";
35
35
  import { MessageType, ToolCallStatus, SubAgentStatus } from "@stigmer/protos/ai/stigmer/agentic/agentexecution/v1/enum_pb";
36
- import { lookupMcpToolPolicy, resolveApprovalMessage, builtInRequiresApproval, getBuiltInApprovalMessage, extractArgKey } from "./approval-policy.js";
37
- import { grantToken } from "./approval-state.js";
36
+ import { lookupMcpToolPolicy, resolveApprovalMessage, builtInRequiresApproval, getBuiltInApprovalMessage } from "./approval-policy.js";
37
+ import { grantToken, toolIdentity } from "./approval-state.js";
38
38
  import { utcTimestamp } from "../../shared/status.js";
39
39
  import { classifyTool } from "../../shared/tool-kind.js";
40
40
  export { utcTimestamp };
@@ -260,6 +260,16 @@ function safeString(obj, key) {
260
260
  }
261
261
  return "";
262
262
  }
263
+ /**
264
+ * Normalize a tool_call event result into a string for the ToolCall proto.
265
+ * Returns "" for an absent result so callers can treat "no result yet" and
266
+ * "empty result" uniformly (e.g. to avoid clobbering a captured result).
267
+ */
268
+ function toResultString(result) {
269
+ if (result == null)
270
+ return "";
271
+ return typeof result === "string" ? result : JSON.stringify(result);
272
+ }
263
273
  /**
264
274
  * Parse the task tool's completed result into AgentMessages.
265
275
  *
@@ -478,49 +488,74 @@ export class MessageAccumulator {
478
488
  this.activeAiByRunId.clear();
479
489
  this.activeThinkingByRunId.clear();
480
490
  }
491
+ /**
492
+ * Attach a tool call to the current AI message, upserting by `call_id` so a
493
+ * single call maps to at most ONE ToolCall across all messages.
494
+ *
495
+ * The Cursor SDK can emit the lifecycle for one `call_id` more than once —
496
+ * observed in production as two "running" events ~0.5s apart for task/edit
497
+ * tools, which previously appended a duplicate ToolCall (the same call
498
+ * rendered two or three times in the UI). We therefore index by `call_id`
499
+ * and merge subsequent events into the existing proto, mirroring how
500
+ * trackSubAgentExecution() upserts via subAgentMap. The first event for a
501
+ * `call_id` (running or terminal) creates the proto on the last AI message;
502
+ * the index keeps pointing at it even after later assistant text starts a
503
+ * new AI message, so cross-message completions still land on the original.
504
+ */
481
505
  attachToolCallToLastAi(event) {
482
506
  if (SUPPRESSED_TOOL_NAMES.has(event.name))
483
507
  return;
484
- const status = mapToolCallStatus(event.status);
485
- if (event.status === "running") {
486
- const aiMsg = this.findOrCreateLastAiMessage();
508
+ const existing = this.toolCallIndex.get(event.call_id);
509
+ if (!existing) {
487
510
  const tc = buildToolCallProto(event, this.mergedPolicies);
488
- aiMsg.toolCalls.push(tc);
511
+ this.findOrCreateLastAiMessage().toolCalls.push(tc);
489
512
  this.toolCallIndex.set(event.call_id, tc);
513
+ return;
490
514
  }
491
- else {
492
- const existing = this.toolCallIndex.get(event.call_id);
493
- if (existing) {
494
- existing.status = status;
495
- if (isTerminalToolStatus(status)) {
496
- existing.completedAt = utcTimestamp();
497
- }
498
- if (event.result != null) {
499
- existing.result = typeof event.result === "string"
500
- ? event.result
501
- : JSON.stringify(event.result);
502
- }
503
- if (status === ToolCallStatus.TOOL_CALL_FAILED) {
504
- existing.error = typeof event.result === "string"
505
- ? event.result
506
- : "Tool call failed";
507
- if (existing.requiresApproval) {
508
- existing.approvalRequestedAt = utcTimestamp();
509
- }
510
- }
511
- if (event.args != null && !existing.argsPreview) {
512
- existing.argsPreview = typeof event.args === "string"
513
- ? event.args
514
- : JSON.stringify(event.args);
515
- }
515
+ this.mergeToolCallEvent(existing, event);
516
+ }
517
+ /**
518
+ * Merge a repeated tool_call event into the ToolCall already tracked for this
519
+ * `call_id`. The merge is defensive because a re-emitted event may carry less
520
+ * information than an earlier one (a late "running" after "completed", or a
521
+ * completion with an empty result): status only advances toward terminal,
522
+ * timestamps are stamped once, and a populated result/args is never clobbered
523
+ * by an empty one.
524
+ */
525
+ mergeToolCallEvent(existing, event) {
526
+ const status = mapToolCallStatus(event.status);
527
+ // Status advances monotonically: once terminal (completed/failed/skipped)
528
+ // a later "running" re-emit must not regress it back to RUNNING.
529
+ if (!isTerminalToolStatus(existing.status)) {
530
+ existing.status = status;
531
+ }
532
+ if (isTerminalToolStatus(status) && !existing.completedAt) {
533
+ existing.completedAt = utcTimestamp();
534
+ }
535
+ if (!existing.startedAt && status === ToolCallStatus.TOOL_CALL_RUNNING) {
536
+ existing.startedAt = utcTimestamp();
537
+ }
538
+ // Only a non-empty incoming result overwrites; a result-less "running"
539
+ // re-emit must not wipe a result captured on completion (or vice versa).
540
+ const incomingResult = toResultString(event.result);
541
+ if (incomingResult) {
542
+ existing.result = incomingResult;
543
+ }
544
+ if (status === ToolCallStatus.TOOL_CALL_FAILED) {
545
+ if (!existing.error) {
546
+ existing.error = typeof event.result === "string"
547
+ ? event.result
548
+ : "Tool call failed";
516
549
  }
517
- else {
518
- const aiMsg = this.findOrCreateLastAiMessage();
519
- const tc = buildToolCallProto(event, this.mergedPolicies);
520
- aiMsg.toolCalls.push(tc);
521
- this.toolCallIndex.set(event.call_id, tc);
550
+ if (existing.requiresApproval && !existing.approvalRequestedAt) {
551
+ existing.approvalRequestedAt = utcTimestamp();
522
552
  }
523
553
  }
554
+ if (event.args != null && !existing.argsPreview) {
555
+ existing.argsPreview = typeof event.args === "string"
556
+ ? event.args
557
+ : JSON.stringify(event.args);
558
+ }
524
559
  }
525
560
  findOrCreateLastAiMessage() {
526
561
  for (let i = this.messages.length - 1; i >= 0; i--) {
@@ -666,13 +701,18 @@ export function reconcileDeniedToolCalls(messages, ledger, mergedPolicies) {
666
701
  }
667
702
  }
668
703
  // 2. Synthesize a tool call for any denial that never produced a stream event.
704
+ // Rare with correct correlation (Cursor emits a tool_call for every attempt),
705
+ // so this is a defensive net that still surfaces the gate rather than letting
706
+ // a denied tool render as a silent success.
669
707
  for (const entry of ledger) {
670
708
  if (matched.has(entry.token))
671
709
  continue;
672
710
  const decoded = decodeIdentityToken(entry.token);
673
- const name = decoded?.name || entry.toolName || "tool";
674
- const argKey = decoded?.argKey ?? "";
675
- const tc = synthesizeWaitingApprovalToolCall(name, argKey, mergedPolicies);
711
+ // Display the hook's raw tool name; carry the decoded salient so the grant
712
+ // rebuilt from this tool call on reinvocation keys on the same resource.
713
+ const displayName = entry.toolName || decoded?.key || "tool";
714
+ const salient = decoded?.salient ?? "";
715
+ const tc = synthesizeWaitingApprovalToolCall(displayName, salient, entry.token, mergedPolicies);
676
716
  appendToolCallToLastAiMessage(messages, tc);
677
717
  matched.add(entry.token);
678
718
  result.push(tc);
@@ -680,23 +720,24 @@ export function reconcileDeniedToolCalls(messages, ledger, mergedPolicies) {
680
720
  return result;
681
721
  }
682
722
  /**
683
- * Compute a tool call's identity token in the same space the preToolUse hook
684
- * uses (grantToken: base64 of `toolName \n salientArg`). Mirrors the hook's
685
- * choice: MCP tools are name-only (no top-level salient arg in the hook input,
686
- * matching the grant convention); built-in tools key on their salient arg.
723
+ * Compute a streamed tool call's identity token in the same canonical space the
724
+ * preToolUse hook records denials in (see {@link toolIdentity} and grantToken).
725
+ * The token keys on the cross-taxonomy category + salient resource, so a stream
726
+ * `edit` (token `base64("write\n/path")`) correlates to the hook's `Write` deny
727
+ * for the same path, even though the two layers name the tool differently.
687
728
  */
688
729
  function toolCallIdentityToken(tc) {
689
- const argKey = tc.mcpServerSlug ? "" : extractArgKey(toolCallArgs(tc));
690
- return grantToken(tc.name, argKey);
730
+ const id = toolIdentity(tc.name, tc.mcpServerSlug, toolCallArgs(tc));
731
+ return grantToken(id.key, id.salient);
691
732
  }
692
- /** Decode a `grantToken` back into its (name, argKey) for synthesis fallback. */
733
+ /** Decode a grantToken back into its (key, salient) for the synthesis fallback. */
693
734
  function decodeIdentityToken(token) {
694
735
  try {
695
736
  const decoded = Buffer.from(token, "base64").toString("utf-8");
696
737
  const nl = decoded.indexOf("\n");
697
738
  if (nl < 0)
698
739
  return undefined;
699
- return { name: decoded.slice(0, nl), argKey: decoded.slice(nl + 1) };
740
+ return { key: decoded.slice(0, nl), salient: decoded.slice(nl + 1) };
700
741
  }
701
742
  catch {
702
743
  return undefined;
@@ -735,19 +776,24 @@ function markWaitingApproval(tc, mergedPolicies) {
735
776
  tc.error = "";
736
777
  tc.result = "";
737
778
  }
738
- function synthesizeWaitingApprovalToolCall(name, argKey, mergedPolicies) {
779
+ function synthesizeWaitingApprovalToolCall(displayName, salient, token, mergedPolicies) {
739
780
  const tc = create(ToolCallSchema, {
740
- id: `approval:${grantToken(name, argKey)}`,
741
- name,
781
+ id: `approval:${token}`,
782
+ name: displayName,
742
783
  status: ToolCallStatus.TOOL_CALL_WAITING_APPROVAL,
743
784
  requiresApproval: true,
744
785
  startedAt: utcTimestamp(),
745
786
  approvalRequestedAt: utcTimestamp(),
746
- toolKind: classifyTool(name),
787
+ toolKind: classifyTool(displayName),
747
788
  });
748
- tc.approvalMessage = argKey
749
- ? `Tool requires approval: ${name} (${argKey})`
750
- : resolveDeniedApprovalMessage(name, "", {}, mergedPolicies);
789
+ // Carry the salient resource so reconstructAdjudicatedApprovals -> the grant
790
+ // builder keys on the same resource the hook will see on the re-attempt.
791
+ if (salient) {
792
+ tc.argsPreview = JSON.stringify({ path: salient });
793
+ }
794
+ tc.approvalMessage = salient
795
+ ? `Tool requires approval: ${displayName} (${salient})`
796
+ : resolveDeniedApprovalMessage(displayName, "", {}, mergedPolicies);
751
797
  return tc;
752
798
  }
753
799
  /**