@stigmer/runner 3.0.2-dev.20260609093630 → 3.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/.build-fingerprint +1 -1
- package/dist/activities/execute-cursor/approval-policy.d.ts +55 -16
- package/dist/activities/execute-cursor/approval-policy.js +93 -31
- package/dist/activities/execute-cursor/approval-policy.js.map +1 -1
- package/dist/activities/execute-cursor/approval-state.d.ts +54 -26
- package/dist/activities/execute-cursor/approval-state.js +41 -26
- package/dist/activities/execute-cursor/approval-state.js.map +1 -1
- package/dist/activities/execute-cursor/hook-script.d.ts +31 -12
- package/dist/activities/execute-cursor/hook-script.js +93 -52
- package/dist/activities/execute-cursor/hook-script.js.map +1 -1
- package/dist/activities/execute-cursor/message-translator.d.ts +23 -0
- package/dist/activities/execute-cursor/message-translator.js +100 -54
- package/dist/activities/execute-cursor/message-translator.js.map +1 -1
- package/package.json +2 -2
- package/src/activities/execute-cursor/__tests__/approval-gate.test.ts +93 -37
- package/src/activities/execute-cursor/__tests__/hitl-ledger.test.ts +33 -18
- package/src/activities/execute-cursor/__tests__/hook-script.test.ts +149 -0
- package/src/activities/execute-cursor/__tests__/message-translator.test.ts +93 -0
- package/src/activities/execute-cursor/approval-policy.ts +113 -31
- package/src/activities/execute-cursor/approval-state.ts +74 -32
- package/src/activities/execute-cursor/hook-script.ts +94 -52
- package/src/activities/execute-cursor/message-translator.ts +114 -57
|
@@ -38,8 +38,8 @@ import type { SubAgentExecution } from "@stigmer/protos/ai/stigmer/agentic/agent
|
|
|
38
38
|
import { MessageType, ToolCallStatus, SubAgentStatus } from "@stigmer/protos/ai/stigmer/agentic/agentexecution/v1/enum_pb";
|
|
39
39
|
import type { SDKMessage } from "@cursor/sdk";
|
|
40
40
|
import type { MergedToolPolicy } from "./approval-policy.js";
|
|
41
|
-
import { lookupMcpToolPolicy, resolveApprovalMessage, builtInRequiresApproval, getBuiltInApprovalMessage
|
|
42
|
-
import { grantToken, type DeniedLedgerEntry } from "./approval-state.js";
|
|
41
|
+
import { lookupMcpToolPolicy, resolveApprovalMessage, builtInRequiresApproval, getBuiltInApprovalMessage } from "./approval-policy.js";
|
|
42
|
+
import { grantToken, toolIdentity, type DeniedLedgerEntry } from "./approval-state.js";
|
|
43
43
|
import { utcTimestamp } from "../../shared/status.js";
|
|
44
44
|
import { classifyTool } from "../../shared/tool-kind.js";
|
|
45
45
|
|
|
@@ -313,6 +313,16 @@ function safeString(obj: unknown, key: string): string {
|
|
|
313
313
|
return "";
|
|
314
314
|
}
|
|
315
315
|
|
|
316
|
+
/**
|
|
317
|
+
* Normalize a tool_call event result into a string for the ToolCall proto.
|
|
318
|
+
* Returns "" for an absent result so callers can treat "no result yet" and
|
|
319
|
+
* "empty result" uniformly (e.g. to avoid clobbering a captured result).
|
|
320
|
+
*/
|
|
321
|
+
function toResultString(result: unknown): string {
|
|
322
|
+
if (result == null) return "";
|
|
323
|
+
return typeof result === "string" ? result : JSON.stringify(result);
|
|
324
|
+
}
|
|
325
|
+
|
|
316
326
|
/**
|
|
317
327
|
* Parse the task tool's completed result into AgentMessages.
|
|
318
328
|
*
|
|
@@ -552,49 +562,84 @@ export class MessageAccumulator {
|
|
|
552
562
|
this.activeThinkingByRunId.clear();
|
|
553
563
|
}
|
|
554
564
|
|
|
565
|
+
/**
|
|
566
|
+
* Attach a tool call to the current AI message, upserting by `call_id` so a
|
|
567
|
+
* single call maps to at most ONE ToolCall across all messages.
|
|
568
|
+
*
|
|
569
|
+
* The Cursor SDK can emit the lifecycle for one `call_id` more than once —
|
|
570
|
+
* observed in production as two "running" events ~0.5s apart for task/edit
|
|
571
|
+
* tools, which previously appended a duplicate ToolCall (the same call
|
|
572
|
+
* rendered two or three times in the UI). We therefore index by `call_id`
|
|
573
|
+
* and merge subsequent events into the existing proto, mirroring how
|
|
574
|
+
* trackSubAgentExecution() upserts via subAgentMap. The first event for a
|
|
575
|
+
* `call_id` (running or terminal) creates the proto on the last AI message;
|
|
576
|
+
* the index keeps pointing at it even after later assistant text starts a
|
|
577
|
+
* new AI message, so cross-message completions still land on the original.
|
|
578
|
+
*/
|
|
555
579
|
private attachToolCallToLastAi(
|
|
556
580
|
event: Extract<SDKMessage, { type: "tool_call" }>,
|
|
557
581
|
): void {
|
|
558
582
|
if (SUPPRESSED_TOOL_NAMES.has(event.name)) return;
|
|
559
583
|
|
|
560
|
-
const
|
|
561
|
-
|
|
562
|
-
if (event.status === "running") {
|
|
563
|
-
const aiMsg = this.findOrCreateLastAiMessage();
|
|
584
|
+
const existing = this.toolCallIndex.get(event.call_id);
|
|
585
|
+
if (!existing) {
|
|
564
586
|
const tc = buildToolCallProto(event, this.mergedPolicies);
|
|
565
|
-
|
|
587
|
+
this.findOrCreateLastAiMessage().toolCalls.push(tc);
|
|
566
588
|
this.toolCallIndex.set(event.call_id, tc);
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
589
|
+
return;
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
this.mergeToolCallEvent(existing, event);
|
|
593
|
+
}
|
|
594
|
+
|
|
595
|
+
/**
|
|
596
|
+
* Merge a repeated tool_call event into the ToolCall already tracked for this
|
|
597
|
+
* `call_id`. The merge is defensive because a re-emitted event may carry less
|
|
598
|
+
* information than an earlier one (a late "running" after "completed", or a
|
|
599
|
+
* completion with an empty result): status only advances toward terminal,
|
|
600
|
+
* timestamps are stamped once, and a populated result/args is never clobbered
|
|
601
|
+
* by an empty one.
|
|
602
|
+
*/
|
|
603
|
+
private mergeToolCallEvent(
|
|
604
|
+
existing: ToolCall,
|
|
605
|
+
event: Extract<SDKMessage, { type: "tool_call" }>,
|
|
606
|
+
): void {
|
|
607
|
+
const status = mapToolCallStatus(event.status);
|
|
608
|
+
|
|
609
|
+
// Status advances monotonically: once terminal (completed/failed/skipped)
|
|
610
|
+
// a later "running" re-emit must not regress it back to RUNNING.
|
|
611
|
+
if (!isTerminalToolStatus(existing.status)) {
|
|
612
|
+
existing.status = status;
|
|
613
|
+
}
|
|
614
|
+
if (isTerminalToolStatus(status) && !existing.completedAt) {
|
|
615
|
+
existing.completedAt = utcTimestamp();
|
|
616
|
+
}
|
|
617
|
+
if (!existing.startedAt && status === ToolCallStatus.TOOL_CALL_RUNNING) {
|
|
618
|
+
existing.startedAt = utcTimestamp();
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
// Only a non-empty incoming result overwrites; a result-less "running"
|
|
622
|
+
// re-emit must not wipe a result captured on completion (or vice versa).
|
|
623
|
+
const incomingResult = toResultString(event.result);
|
|
624
|
+
if (incomingResult) {
|
|
625
|
+
existing.result = incomingResult;
|
|
626
|
+
}
|
|
627
|
+
|
|
628
|
+
if (status === ToolCallStatus.TOOL_CALL_FAILED) {
|
|
629
|
+
if (!existing.error) {
|
|
630
|
+
existing.error = typeof event.result === "string"
|
|
631
|
+
? event.result
|
|
632
|
+
: "Tool call failed";
|
|
597
633
|
}
|
|
634
|
+
if (existing.requiresApproval && !existing.approvalRequestedAt) {
|
|
635
|
+
existing.approvalRequestedAt = utcTimestamp();
|
|
636
|
+
}
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
if (event.args != null && !existing.argsPreview) {
|
|
640
|
+
existing.argsPreview = typeof event.args === "string"
|
|
641
|
+
? event.args
|
|
642
|
+
: JSON.stringify(event.args);
|
|
598
643
|
}
|
|
599
644
|
}
|
|
600
645
|
|
|
@@ -763,12 +808,17 @@ export function reconcileDeniedToolCalls(
|
|
|
763
808
|
}
|
|
764
809
|
|
|
765
810
|
// 2. Synthesize a tool call for any denial that never produced a stream event.
|
|
811
|
+
// Rare with correct correlation (Cursor emits a tool_call for every attempt),
|
|
812
|
+
// so this is a defensive net that still surfaces the gate rather than letting
|
|
813
|
+
// a denied tool render as a silent success.
|
|
766
814
|
for (const entry of ledger) {
|
|
767
815
|
if (matched.has(entry.token)) continue;
|
|
768
816
|
const decoded = decodeIdentityToken(entry.token);
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
const
|
|
817
|
+
// Display the hook's raw tool name; carry the decoded salient so the grant
|
|
818
|
+
// rebuilt from this tool call on reinvocation keys on the same resource.
|
|
819
|
+
const displayName = entry.toolName || decoded?.key || "tool";
|
|
820
|
+
const salient = decoded?.salient ?? "";
|
|
821
|
+
const tc = synthesizeWaitingApprovalToolCall(displayName, salient, entry.token, mergedPolicies);
|
|
772
822
|
appendToolCallToLastAiMessage(messages, tc);
|
|
773
823
|
matched.add(entry.token);
|
|
774
824
|
result.push(tc);
|
|
@@ -778,23 +828,24 @@ export function reconcileDeniedToolCalls(
|
|
|
778
828
|
}
|
|
779
829
|
|
|
780
830
|
/**
|
|
781
|
-
* Compute a tool call's identity token in the same space the
|
|
782
|
-
*
|
|
783
|
-
*
|
|
784
|
-
*
|
|
831
|
+
* Compute a streamed tool call's identity token in the same canonical space the
|
|
832
|
+
* preToolUse hook records denials in (see {@link toolIdentity} and grantToken).
|
|
833
|
+
* The token keys on the cross-taxonomy category + salient resource, so a stream
|
|
834
|
+
* `edit` (token `base64("write\n/path")`) correlates to the hook's `Write` deny
|
|
835
|
+
* for the same path, even though the two layers name the tool differently.
|
|
785
836
|
*/
|
|
786
837
|
function toolCallIdentityToken(tc: ToolCall): string {
|
|
787
|
-
const
|
|
788
|
-
return grantToken(
|
|
838
|
+
const id = toolIdentity(tc.name, tc.mcpServerSlug, toolCallArgs(tc));
|
|
839
|
+
return grantToken(id.key, id.salient);
|
|
789
840
|
}
|
|
790
841
|
|
|
791
|
-
/** Decode a
|
|
792
|
-
function decodeIdentityToken(token: string): {
|
|
842
|
+
/** Decode a grantToken back into its (key, salient) for the synthesis fallback. */
|
|
843
|
+
function decodeIdentityToken(token: string): { key: string; salient: string } | undefined {
|
|
793
844
|
try {
|
|
794
845
|
const decoded = Buffer.from(token, "base64").toString("utf-8");
|
|
795
846
|
const nl = decoded.indexOf("\n");
|
|
796
847
|
if (nl < 0) return undefined;
|
|
797
|
-
return {
|
|
848
|
+
return { key: decoded.slice(0, nl), salient: decoded.slice(nl + 1) };
|
|
798
849
|
} catch {
|
|
799
850
|
return undefined;
|
|
800
851
|
}
|
|
@@ -838,22 +889,28 @@ function markWaitingApproval(
|
|
|
838
889
|
}
|
|
839
890
|
|
|
840
891
|
function synthesizeWaitingApprovalToolCall(
|
|
841
|
-
|
|
842
|
-
|
|
892
|
+
displayName: string,
|
|
893
|
+
salient: string,
|
|
894
|
+
token: string,
|
|
843
895
|
mergedPolicies?: Map<string, MergedToolPolicy>,
|
|
844
896
|
): ToolCall {
|
|
845
897
|
const tc = create(ToolCallSchema, {
|
|
846
|
-
id: `approval:${
|
|
847
|
-
name,
|
|
898
|
+
id: `approval:${token}`,
|
|
899
|
+
name: displayName,
|
|
848
900
|
status: ToolCallStatus.TOOL_CALL_WAITING_APPROVAL,
|
|
849
901
|
requiresApproval: true,
|
|
850
902
|
startedAt: utcTimestamp(),
|
|
851
903
|
approvalRequestedAt: utcTimestamp(),
|
|
852
|
-
toolKind: classifyTool(
|
|
904
|
+
toolKind: classifyTool(displayName),
|
|
853
905
|
});
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
906
|
+
// Carry the salient resource so reconstructAdjudicatedApprovals -> the grant
|
|
907
|
+
// builder keys on the same resource the hook will see on the re-attempt.
|
|
908
|
+
if (salient) {
|
|
909
|
+
tc.argsPreview = JSON.stringify({ path: salient });
|
|
910
|
+
}
|
|
911
|
+
tc.approvalMessage = salient
|
|
912
|
+
? `Tool requires approval: ${displayName} (${salient})`
|
|
913
|
+
: resolveDeniedApprovalMessage(displayName, "", {}, mergedPolicies);
|
|
857
914
|
return tc;
|
|
858
915
|
}
|
|
859
916
|
|