@stigmer/runner 3.0.2-dev.20260609093630 → 3.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -38,8 +38,8 @@ import type { SubAgentExecution } from "@stigmer/protos/ai/stigmer/agentic/agent
38
38
  import { MessageType, ToolCallStatus, SubAgentStatus } from "@stigmer/protos/ai/stigmer/agentic/agentexecution/v1/enum_pb";
39
39
  import type { SDKMessage } from "@cursor/sdk";
40
40
  import type { MergedToolPolicy } from "./approval-policy.js";
41
- import { lookupMcpToolPolicy, resolveApprovalMessage, builtInRequiresApproval, getBuiltInApprovalMessage, extractArgKey } from "./approval-policy.js";
42
- import { grantToken, type DeniedLedgerEntry } from "./approval-state.js";
41
+ import { lookupMcpToolPolicy, resolveApprovalMessage, builtInRequiresApproval, getBuiltInApprovalMessage } from "./approval-policy.js";
42
+ import { grantToken, toolIdentity, type DeniedLedgerEntry } from "./approval-state.js";
43
43
  import { utcTimestamp } from "../../shared/status.js";
44
44
  import { classifyTool } from "../../shared/tool-kind.js";
45
45
 
@@ -313,6 +313,16 @@ function safeString(obj: unknown, key: string): string {
313
313
  return "";
314
314
  }
315
315
 
316
+ /**
317
+ * Normalize a tool_call event result into a string for the ToolCall proto.
318
+ * Returns "" for an absent result so callers can treat "no result yet" and
319
+ * "empty result" uniformly (e.g. to avoid clobbering a captured result).
320
+ */
321
+ function toResultString(result: unknown): string {
322
+ if (result == null) return "";
323
+ return typeof result === "string" ? result : JSON.stringify(result);
324
+ }
325
+
316
326
  /**
317
327
  * Parse the task tool's completed result into AgentMessages.
318
328
  *
@@ -552,49 +562,84 @@ export class MessageAccumulator {
552
562
  this.activeThinkingByRunId.clear();
553
563
  }
554
564
 
565
+ /**
566
+ * Attach a tool call to the current AI message, upserting by `call_id` so a
567
+ * single call maps to at most ONE ToolCall across all messages.
568
+ *
569
+ * The Cursor SDK can emit the lifecycle for one `call_id` more than once —
570
+ * observed in production as two "running" events ~0.5s apart for task/edit
571
+ * tools, which previously appended a duplicate ToolCall (the same call
572
+ * rendered two or three times in the UI). We therefore index by `call_id`
573
+ * and merge subsequent events into the existing proto, mirroring how
574
+ * trackSubAgentExecution() upserts via subAgentMap. The first event for a
575
+ * `call_id` (running or terminal) creates the proto on the last AI message;
576
+ * the index keeps pointing at it even after later assistant text starts a
577
+ * new AI message, so cross-message completions still land on the original.
578
+ */
555
579
  private attachToolCallToLastAi(
556
580
  event: Extract<SDKMessage, { type: "tool_call" }>,
557
581
  ): void {
558
582
  if (SUPPRESSED_TOOL_NAMES.has(event.name)) return;
559
583
 
560
- const status = mapToolCallStatus(event.status);
561
-
562
- if (event.status === "running") {
563
- const aiMsg = this.findOrCreateLastAiMessage();
584
+ const existing = this.toolCallIndex.get(event.call_id);
585
+ if (!existing) {
564
586
  const tc = buildToolCallProto(event, this.mergedPolicies);
565
- aiMsg.toolCalls.push(tc);
587
+ this.findOrCreateLastAiMessage().toolCalls.push(tc);
566
588
  this.toolCallIndex.set(event.call_id, tc);
567
- } else {
568
- const existing = this.toolCallIndex.get(event.call_id);
569
- if (existing) {
570
- existing.status = status;
571
- if (isTerminalToolStatus(status)) {
572
- existing.completedAt = utcTimestamp();
573
- }
574
- if (event.result != null) {
575
- existing.result = typeof event.result === "string"
576
- ? event.result
577
- : JSON.stringify(event.result);
578
- }
579
- if (status === ToolCallStatus.TOOL_CALL_FAILED) {
580
- existing.error = typeof event.result === "string"
581
- ? event.result
582
- : "Tool call failed";
583
- if (existing.requiresApproval) {
584
- existing.approvalRequestedAt = utcTimestamp();
585
- }
586
- }
587
- if (event.args != null && !existing.argsPreview) {
588
- existing.argsPreview = typeof event.args === "string"
589
- ? event.args
590
- : JSON.stringify(event.args);
591
- }
592
- } else {
593
- const aiMsg = this.findOrCreateLastAiMessage();
594
- const tc = buildToolCallProto(event, this.mergedPolicies);
595
- aiMsg.toolCalls.push(tc);
596
- this.toolCallIndex.set(event.call_id, tc);
589
+ return;
590
+ }
591
+
592
+ this.mergeToolCallEvent(existing, event);
593
+ }
594
+
595
+ /**
596
+ * Merge a repeated tool_call event into the ToolCall already tracked for this
597
+ * `call_id`. The merge is defensive because a re-emitted event may carry less
598
+ * information than an earlier one (a late "running" after "completed", or a
599
+ * completion with an empty result): status only advances toward terminal,
600
+ * timestamps are stamped once, and a populated result/args is never clobbered
601
+ * by an empty one.
602
+ */
603
+ private mergeToolCallEvent(
604
+ existing: ToolCall,
605
+ event: Extract<SDKMessage, { type: "tool_call" }>,
606
+ ): void {
607
+ const status = mapToolCallStatus(event.status);
608
+
609
+ // Status advances monotonically: once terminal (completed/failed/skipped)
610
+ // a later "running" re-emit must not regress it back to RUNNING.
611
+ if (!isTerminalToolStatus(existing.status)) {
612
+ existing.status = status;
613
+ }
614
+ if (isTerminalToolStatus(status) && !existing.completedAt) {
615
+ existing.completedAt = utcTimestamp();
616
+ }
617
+ if (!existing.startedAt && status === ToolCallStatus.TOOL_CALL_RUNNING) {
618
+ existing.startedAt = utcTimestamp();
619
+ }
620
+
621
+ // Only a non-empty incoming result overwrites; a result-less "running"
622
+ // re-emit must not wipe a result captured on completion (or vice versa).
623
+ const incomingResult = toResultString(event.result);
624
+ if (incomingResult) {
625
+ existing.result = incomingResult;
626
+ }
627
+
628
+ if (status === ToolCallStatus.TOOL_CALL_FAILED) {
629
+ if (!existing.error) {
630
+ existing.error = typeof event.result === "string"
631
+ ? event.result
632
+ : "Tool call failed";
597
633
  }
634
+ if (existing.requiresApproval && !existing.approvalRequestedAt) {
635
+ existing.approvalRequestedAt = utcTimestamp();
636
+ }
637
+ }
638
+
639
+ if (event.args != null && !existing.argsPreview) {
640
+ existing.argsPreview = typeof event.args === "string"
641
+ ? event.args
642
+ : JSON.stringify(event.args);
598
643
  }
599
644
  }
600
645
 
@@ -763,12 +808,17 @@ export function reconcileDeniedToolCalls(
763
808
  }
764
809
 
765
810
  // 2. Synthesize a tool call for any denial that never produced a stream event.
811
+ // Rare with correct correlation (Cursor emits a tool_call for every attempt),
812
+ // so this is a defensive net that still surfaces the gate rather than letting
813
+ // a denied tool render as a silent success.
766
814
  for (const entry of ledger) {
767
815
  if (matched.has(entry.token)) continue;
768
816
  const decoded = decodeIdentityToken(entry.token);
769
- const name = decoded?.name || entry.toolName || "tool";
770
- const argKey = decoded?.argKey ?? "";
771
- const tc = synthesizeWaitingApprovalToolCall(name, argKey, mergedPolicies);
817
+ // Display the hook's raw tool name; carry the decoded salient so the grant
818
+ // rebuilt from this tool call on reinvocation keys on the same resource.
819
+ const displayName = entry.toolName || decoded?.key || "tool";
820
+ const salient = decoded?.salient ?? "";
821
+ const tc = synthesizeWaitingApprovalToolCall(displayName, salient, entry.token, mergedPolicies);
772
822
  appendToolCallToLastAiMessage(messages, tc);
773
823
  matched.add(entry.token);
774
824
  result.push(tc);
@@ -778,23 +828,24 @@ export function reconcileDeniedToolCalls(
778
828
  }
779
829
 
780
830
  /**
781
- * Compute a tool call's identity token in the same space the preToolUse hook
782
- * uses (grantToken: base64 of `toolName \n salientArg`). Mirrors the hook's
783
- * choice: MCP tools are name-only (no top-level salient arg in the hook input,
784
- * matching the grant convention); built-in tools key on their salient arg.
831
+ * Compute a streamed tool call's identity token in the same canonical space the
832
+ * preToolUse hook records denials in (see {@link toolIdentity} and grantToken).
833
+ * The token keys on the cross-taxonomy category + salient resource, so a stream
834
+ * `edit` (token `base64("write\n/path")`) correlates to the hook's `Write` deny
835
+ * for the same path, even though the two layers name the tool differently.
785
836
  */
786
837
  function toolCallIdentityToken(tc: ToolCall): string {
787
- const argKey = tc.mcpServerSlug ? "" : extractArgKey(toolCallArgs(tc));
788
- return grantToken(tc.name, argKey);
838
+ const id = toolIdentity(tc.name, tc.mcpServerSlug, toolCallArgs(tc));
839
+ return grantToken(id.key, id.salient);
789
840
  }
790
841
 
791
- /** Decode a `grantToken` back into its (name, argKey) for synthesis fallback. */
792
- function decodeIdentityToken(token: string): { name: string; argKey: string } | undefined {
842
+ /** Decode a grantToken back into its (key, salient) for the synthesis fallback. */
843
+ function decodeIdentityToken(token: string): { key: string; salient: string } | undefined {
793
844
  try {
794
845
  const decoded = Buffer.from(token, "base64").toString("utf-8");
795
846
  const nl = decoded.indexOf("\n");
796
847
  if (nl < 0) return undefined;
797
- return { name: decoded.slice(0, nl), argKey: decoded.slice(nl + 1) };
848
+ return { key: decoded.slice(0, nl), salient: decoded.slice(nl + 1) };
798
849
  } catch {
799
850
  return undefined;
800
851
  }
@@ -838,22 +889,28 @@ function markWaitingApproval(
838
889
  }
839
890
 
840
891
  function synthesizeWaitingApprovalToolCall(
841
- name: string,
842
- argKey: string,
892
+ displayName: string,
893
+ salient: string,
894
+ token: string,
843
895
  mergedPolicies?: Map<string, MergedToolPolicy>,
844
896
  ): ToolCall {
845
897
  const tc = create(ToolCallSchema, {
846
- id: `approval:${grantToken(name, argKey)}`,
847
- name,
898
+ id: `approval:${token}`,
899
+ name: displayName,
848
900
  status: ToolCallStatus.TOOL_CALL_WAITING_APPROVAL,
849
901
  requiresApproval: true,
850
902
  startedAt: utcTimestamp(),
851
903
  approvalRequestedAt: utcTimestamp(),
852
- toolKind: classifyTool(name),
904
+ toolKind: classifyTool(displayName),
853
905
  });
854
- tc.approvalMessage = argKey
855
- ? `Tool requires approval: ${name} (${argKey})`
856
- : resolveDeniedApprovalMessage(name, "", {}, mergedPolicies);
906
+ // Carry the salient resource so reconstructAdjudicatedApprovals -> the grant
907
+ // builder keys on the same resource the hook will see on the re-attempt.
908
+ if (salient) {
909
+ tc.argsPreview = JSON.stringify({ path: salient });
910
+ }
911
+ tc.approvalMessage = salient
912
+ ? `Tool requires approval: ${displayName} (${salient})`
913
+ : resolveDeniedApprovalMessage(displayName, "", {}, mergedPolicies);
857
914
  return tc;
858
915
  }
859
916