@prometheus-ai/agent-core 0.5.3 → 0.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/agent-loop.ts CHANGED
@@ -3,6 +3,7 @@
3
3
  * Transforms to Message[] only at the LLM call boundary.
4
4
  */
5
5
  import {
6
+ type ApiKeyResolveContext,
6
7
  type AssistantMessage,
7
8
  type AssistantMessageEvent,
8
9
  type Context,
@@ -14,7 +15,7 @@ import {
14
15
  validateToolArguments,
15
16
  zodToWireSchema,
16
17
  } from "@prometheus-ai/ai";
17
- import { sanitizeText } from "@prometheus-ai/utils";
18
+ import { logger, sanitizeText } from "@prometheus-ai/utils";
18
19
  import {
19
20
  createHarmonyAuditEvent,
20
21
  detectHarmonyLeakInAssistantMessage,
@@ -22,6 +23,7 @@ import {
22
23
  type HarmonyDetection,
23
24
  type HarmonyRecoveredToolCall,
24
25
  isHarmonyLeakMitigationTarget,
26
+ recoverHarmonyToolCall,
25
27
  signalListLabel,
26
28
  } from "./harmony-leak";
27
29
  import { type AgentRunCoverage, type AgentRunSummary, ToolCallBlockedError } from "./run-collector";
@@ -32,7 +34,7 @@ import {
32
34
  finishExecuteToolSpan,
33
35
  finishInvokeAgentSpan,
34
36
  fireOnRunEnd,
35
- PrometheusGenAIAttr,
37
+ PiGenAIAttr as PrometheusGenAIAttr,
36
38
  recordSkippedTool,
37
39
  resolveTelemetry,
38
40
  runInActiveSpan,
@@ -48,6 +50,7 @@ import type {
48
50
  AgentMessage,
49
51
  AgentTool,
50
52
  AgentToolResult,
53
+ AsideMessage,
51
54
  StreamFn,
52
55
  } from "./types";
53
56
  import { yieldIfDue } from "./utils/yield";
@@ -55,6 +58,14 @@ import { yieldIfDue } from "./utils/yield";
55
58
  /** Sentinel returned by the abort race in `streamAssistantResponse`. */
56
59
  const ABORTED: unique symbol = Symbol("agent-loop-aborted");
57
60
 
61
+ /**
62
+ * Cap on consecutive re-samples triggered by a non-terminal stop
63
+ * (`stopDetails.type === "pause_turn"`) without an intervening tool call. Each
64
+ * continuation is a full model request, so a backend that never stops pausing
65
+ * must not spin the loop forever. Resets whenever a turn carries tool calls.
66
+ */
67
+ const MAX_PAUSED_TURN_CONTINUATIONS = 8;
68
+
58
69
  class HarmonyLeakInterruption extends Error {
59
70
  constructor(
60
71
  readonly detection: HarmonyDetection,
@@ -66,6 +77,76 @@ class HarmonyLeakInterruption extends Error {
66
77
  }
67
78
  }
68
79
 
80
+ type AssistantContentBlock = AssistantMessage["content"][number];
81
+ type AssistantToolCallBlock = Extract<AssistantContentBlock, { type: "toolCall" }>;
82
+ type CloneableRecord = Record<string, unknown>;
83
+
84
+ function cloneUnknown(value: unknown): unknown {
85
+ if (Array.isArray(value)) return value.map(cloneUnknown);
86
+ if (!value || typeof value !== "object") return value;
87
+ const source = value as CloneableRecord;
88
+ const out: CloneableRecord = {};
89
+ for (const [key, child] of Object.entries(source)) {
90
+ out[key] = cloneUnknown(child);
91
+ }
92
+ return out;
93
+ }
94
+
95
+ function cloneToolArguments(args: AssistantToolCallBlock["arguments"]): AssistantToolCallBlock["arguments"] {
96
+ return cloneUnknown(args) as AssistantToolCallBlock["arguments"];
97
+ }
98
+
99
+ function snapshotAssistantContentBlock(block: AssistantContentBlock): AssistantContentBlock {
100
+ switch (block.type) {
101
+ case "text":
102
+ return { ...block };
103
+ case "thinking":
104
+ return { ...block };
105
+ case "redactedThinking":
106
+ return { ...block };
107
+ case "toolCall":
108
+ return { ...block, arguments: cloneToolArguments(block.arguments) };
109
+ }
110
+ }
111
+
112
+ function snapshotAssistantMessage(message: AssistantMessage): AssistantMessage {
113
+ return {
114
+ ...message,
115
+ content: message.content.map(snapshotAssistantContentBlock),
116
+ usage: {
117
+ ...message.usage,
118
+ cost: { ...message.usage.cost },
119
+ },
120
+ disabledFeatures: message.disabledFeatures ? [...message.disabledFeatures] : undefined,
121
+ };
122
+ }
123
+
124
+ function snapshotAssistantMessageEvent(event: AssistantMessageEvent): AssistantMessageEvent {
125
+ switch (event.type) {
126
+ case "start":
127
+ return { ...event, partial: snapshotAssistantMessage(event.partial) };
128
+ case "text_start":
129
+ case "text_delta":
130
+ case "text_end":
131
+ case "thinking_start":
132
+ case "thinking_delta":
133
+ case "thinking_end":
134
+ case "toolcall_start":
135
+ case "toolcall_delta":
136
+ return { ...event, partial: snapshotAssistantMessage(event.partial) };
137
+ case "toolcall_end":
138
+ return {
139
+ ...event,
140
+ toolCall: snapshotAssistantContentBlock(event.toolCall) as AssistantToolCallBlock,
141
+ partial: snapshotAssistantMessage(event.partial),
142
+ };
143
+ case "done":
144
+ return { ...event, message: snapshotAssistantMessage(event.message) };
145
+ case "error":
146
+ return { ...event, error: snapshotAssistantMessage(event.error) };
147
+ }
148
+ }
149
+
69
150
  /**
70
151
  * Normalize a value coming back from `tool.execute()` (or its streaming partial-update callback)
71
152
  * into a structurally valid {@link AgentToolResult}.
@@ -75,7 +156,17 @@ class HarmonyLeakInterruption extends Error {
75
156
  * (missing `content` array → crash on reload). We coerce at the single boundary where untyped
76
157
  * results enter the agent loop, so every downstream consumer can rely on the type.
77
158
  */
78
- function coerceToolResult(raw: unknown): { result: AgentToolResult<any>; malformed: boolean } {
159
+ const EMPTY_ERROR_TOOL_RESULT_TEXT = "Tool failed with no output.";
160
+
161
+ function hasSubstantiveToolResultContent(content: AgentToolResult["content"]): boolean {
162
+ for (const block of content) {
163
+ if (block.type === "image") return true;
164
+ if (block.type === "text" && block.text.trim().length > 0) return true;
165
+ }
166
+ return false;
167
+ }
168
+
169
+ function coerceToolResult(raw: unknown): { result: AgentToolResult<unknown>; malformed: boolean } {
79
170
  const rawObj = raw && typeof raw === "object" ? (raw as Record<string, unknown>) : null;
80
171
  const rawContent = rawObj?.content;
81
172
  const details = rawObj && "details" in rawObj ? rawObj.details : {};
@@ -83,6 +174,9 @@ function coerceToolResult(raw: unknown): { result: AgentToolResult<any>; malform
83
174
  // aggregator that catches per-entry errors and synthesizes a combined
84
175
  // result). Preserve the flag so agent-loop can surface it on the wire.
85
176
  const explicitError = Boolean(rawObj && "isError" in rawObj && rawObj.isError);
177
+ // Tools may flag the result contextually useless (zero matches, elapsed
178
+ // wait) so compaction can elide it once consumed. Errors are never useless.
179
+ const useless = Boolean(rawObj && "useless" in rawObj && rawObj.useless);
86
180
 
87
181
  if (!Array.isArray(rawContent)) {
88
182
  return {
@@ -96,8 +190,12 @@ function coerceToolResult(raw: unknown): { result: AgentToolResult<any>; malform
96
190
  }
97
191
 
98
192
  const content: AgentToolResult["content"] = [];
193
+ let invalidBlocks = 0;
99
194
  for (const block of rawContent) {
100
- if (!block || typeof block !== "object" || !("type" in block)) continue;
195
+ if (!block || typeof block !== "object" || !("type" in block)) {
196
+ invalidBlocks++;
197
+ continue;
198
+ }
101
199
  if (block.type === "text" && typeof (block as { text?: unknown }).text === "string") {
102
200
  content.push({ type: "text", text: sanitizeText((block as { text: string }).text) });
103
201
  } else if (
@@ -106,9 +204,31 @@ function coerceToolResult(raw: unknown): { result: AgentToolResult<any>; malform
106
204
  typeof (block as { mimeType?: unknown }).mimeType === "string"
107
205
  ) {
108
206
  content.push(block as { type: "image"; data: string; mimeType: string });
207
+ } else {
208
+ invalidBlocks++;
109
209
  }
110
210
  }
111
- return { result: { content, details, ...(explicitError ? { isError: true } : {}) }, malformed: false };
211
+ if (invalidBlocks > 0) {
212
+ content.push({
213
+ type: "text",
214
+ text: `Tool returned an invalid result: ${invalidBlocks} content block${invalidBlocks === 1 ? "" : "s"} had an unsupported shape.`,
215
+ });
216
+ }
217
+ const isError = explicitError || invalidBlocks > 0;
218
+ // Anthropic rejects tool_result blocks with is_error: true and empty content.
219
+ if (isError && !hasSubstantiveToolResultContent(content)) {
220
+ content.length = 0;
221
+ content.push({ type: "text", text: EMPTY_ERROR_TOOL_RESULT_TEXT });
222
+ }
223
+ return {
224
+ result: {
225
+ content,
226
+ details,
227
+ ...(isError ? { isError: true } : {}),
228
+ ...(useless && !isError ? { useless: true } : {}),
229
+ },
230
+ malformed: invalidBlocks > 0,
231
+ };
112
232
  }
113
233
 
114
234
  /**
@@ -174,7 +294,7 @@ export function agentLoopContinue(
174
294
 
175
295
  (async () => {
176
296
  const newMessages: AgentMessage[] = [];
177
- const currentContext: AgentContext = { ...context };
297
+ const currentContext: AgentContext = { ...context, messages: [...context.messages] };
178
298
 
179
299
  stream.push({ type: "agent_start" });
180
300
  stream.push({ type: "turn_start" });
@@ -311,22 +431,26 @@ function normalizeMessagesForProvider(
311
431
  return messages;
312
432
  }
313
433
 
314
- let changed = false;
315
- const normalized = messages.map(message => {
316
- if (message.role !== "assistant" || !Array.isArray(message.content)) {
317
- return message;
434
+ let hasThinking = false;
435
+ for (const message of messages) {
436
+ if (message.role !== "assistant" || !Array.isArray(message.content)) continue;
437
+ for (const block of message.content) {
438
+ if (block.type === "thinking") {
439
+ hasThinking = true;
440
+ break;
441
+ }
318
442
  }
443
+ if (hasThinking) break;
444
+ }
445
+ if (!hasThinking) return messages;
319
446
 
320
- const filtered = message.content.filter(block => block.type !== "thinking");
321
- if (filtered.length === message.content.length) {
447
+ return messages.map(message => {
448
+ if (message.role !== "assistant" || !Array.isArray(message.content)) {
322
449
  return message;
323
450
  }
324
-
325
- changed = true;
326
- return { ...message, content: filtered };
451
+ const filtered = message.content.filter(block => block.type !== "thinking");
452
+ return filtered.length === message.content.length ? message : { ...message, content: filtered };
327
453
  });
328
-
329
- return changed ? normalized : messages;
330
454
  }
331
455
 
332
456
  export const INTENT_FIELD = "_i";
@@ -443,25 +567,21 @@ interface StepCounter {
443
567
  count: number;
444
568
  }
445
569
 
446
- function normalizeMaxToolCallsPerTurn(value: number | undefined): number | undefined {
447
- if (value === undefined || !Number.isFinite(value)) return undefined;
448
- const normalized = Math.trunc(value);
449
- return normalized > 0 ? normalized : undefined;
450
- }
451
-
452
- function cloneAssistantMessageForToolCallCap(message: AssistantMessage): AssistantMessage {
453
- return {
454
- ...message,
455
- content: message.content.map(block => {
456
- if (block.type === "toolCall") {
457
- return { ...block, arguments: structuredClone(block.arguments) };
458
- }
459
- return { ...block };
460
- }),
461
- stopReason: "toolUse",
462
- errorMessage: undefined,
463
- errorStatus: undefined,
464
- };
570
+ /**
571
+ * Resolve aside entries at the moment the loop is about to inject them. Each entry
572
+ * is either a ready {@link AgentMessage} or a sync thunk evaluated here so the
573
+ * producer can make the final inject-or-drop decision (return null) against
574
+ * up-to-the-injection state — e.g. dropping late diagnostics a newer edit
575
+ * superseded. Kept sync so it can never stall the loop.
576
+ */
577
+ function resolveAsides(entries: AsideMessage[] | undefined): AgentMessage[] {
578
+ if (!entries || entries.length === 0) return [];
579
+ const out: AgentMessage[] = [];
580
+ for (const entry of entries) {
581
+ const message = typeof entry === "function" ? entry() : entry;
582
+ if (message) out.push(message);
583
+ }
584
+ return out;
465
585
  }
466
586
 
467
587
  async function runLoopBody(
@@ -476,10 +596,13 @@ async function runLoopBody(
476
596
  streamFn?: StreamFn,
477
597
  ): Promise<void> {
478
598
  let firstTurn = true;
479
- // Check for steering messages at start (user may have typed while waiting)
480
- let pendingMessages: AgentMessage[] = (await config.getSteeringMessages?.()) || [];
599
+ // Check for steering messages at start (user may have typed while waiting).
600
+ // Skip when the run is already externally aborted — dequeuing would strand
601
+ // the messages in a run that is about to die.
602
+ let pendingMessages: AgentMessage[] = signal?.aborted ? [] : (await config.getSteeringMessages?.()) || [];
481
603
  let harmonyRetryAttempt = 0;
482
604
  let harmonyTruncateResumeCount = 0;
605
+ let pausedTurnContinuations = 0;
483
606
 
484
607
  // Outer loop: continues when queued follow-up messages arrive after agent would stop
485
608
  while (true) {
@@ -554,8 +677,13 @@ async function runLoopBody(
554
677
  continue;
555
678
  }
556
679
  }
680
+ if (recovered) {
681
+ message = snapshotAssistantMessage(message);
682
+ currentContext.messages.push(message);
683
+ stream.push({ type: "message_start", message: snapshotAssistantMessage(message) });
684
+ stream.push({ type: "message_end", message: snapshotAssistantMessage(message) });
685
+ }
557
686
  newMessages.push(message);
558
- let steeringMessagesFromExecution: AgentMessage[] | undefined;
559
687
 
560
688
  if (message.stopReason === "error" || message.stopReason === "aborted") {
561
689
  // Create placeholder tool results for any tool calls in the aborted message
@@ -580,6 +708,7 @@ async function runLoopBody(
580
708
  });
581
709
  }
582
710
  stream.push({ type: "turn_end", message, toolResults });
711
+
583
712
  stream.push(buildAgentEndEvent(newMessages, telemetry, stepCounter.count));
584
713
  stream.end(newMessages);
585
714
  return;
@@ -616,7 +745,6 @@ async function runLoopBody(
616
745
  );
617
746
 
618
747
  toolResults.push(...executionResult.toolResults);
619
- steeringMessagesFromExecution = executionResult.steeringMessages;
620
748
 
621
749
  for (const result of toolResults) {
622
750
  currentContext.messages.push(result);
@@ -642,19 +770,61 @@ async function runLoopBody(
642
770
  status: "skipped",
643
771
  });
644
772
  }
773
+ if (message.stopReason === "length" && toolResults.length > 0) {
774
+ hasMoreToolCalls = true;
775
+ }
776
+ }
777
+
778
+ if (toolCalls.length > 0) {
779
+ pausedTurnContinuations = 0;
780
+ } else if (
781
+ !hasMoreToolCalls &&
782
+ message.stopReason === "stop" &&
783
+ message.stopDetails?.type === "pause_turn" &&
784
+ pausedTurnContinuations < MAX_PAUSED_TURN_CONTINUATIONS
785
+ ) {
786
+ // Non-terminal stop: the provider ended the response but not the turn
787
+ // (e.g. Codex `end_turn: false` on a commentary-only progress update).
788
+ // Re-sample with the assistant message replayed so the model keeps
789
+ // working; the next round folds steering/asides in like any other
790
+ // mid-work turn.
791
+ pausedTurnContinuations++;
792
+ hasMoreToolCalls = true;
645
793
  }
646
794
 
647
795
  stream.push({ type: "turn_end", message, toolResults });
648
796
 
649
- pendingMessages = steeringMessagesFromExecution ?? ((await config.getSteeringMessages?.()) || []);
797
+ // On external abort (user interrupt), leave the steering queue intact: the
798
+ // session aborts then continues, delivering the queue into a fresh run.
799
+ // Draining it here would inject the messages right before a model call that
800
+ // instantly aborts — message lands in history, agent never responds. The
801
+ // mid-batch interrupt poll only peeks (hasSteeringMessages), so the queue
802
+ // still owns every message until this dequeue.
803
+ const steering = signal?.aborted ? [] : (await config.getSteeringMessages?.()) || [];
804
+ if (hasMoreToolCalls) {
805
+ // Mid-work: fold any non-interrupting asides into the next turn alongside steering.
806
+ const asides = resolveAsides(await config.getAsideMessages?.());
807
+ pendingMessages = asides.length > 0 ? [...steering, ...asides] : steering;
808
+ } else {
809
+ // Stop boundary: only steering (live user input) forces another turn here. Leave
810
+ // asides for the outer drain below so a passive aside can't trigger an extra model
811
+ // turn ahead of a queued follow-up — the outer drain batches asides + follow-ups together.
812
+ pendingMessages = steering;
813
+ }
650
814
  }
651
815
 
652
- // Agent would stop here. Check for follow-up messages.
816
+ // Agent would stop here. Drain non-interrupting asides + follow-up messages.
653
817
  await config.onBeforeYield?.();
654
- const followUpMessages = (await config.getFollowUpMessages?.()) || [];
655
- if (followUpMessages.length > 0) {
656
- // Set as pending so inner loop processes them
657
- pendingMessages = followUpMessages;
818
+ // Skip queue drains when externally aborted (same stranding hazard as above).
819
+ // Re-poll steering too: a steer can land between the stop-boundary dequeue
820
+ // above and this yield point (e.g. queued while onBeforeYield ran). Without
821
+ // this poll it would strand in the queue until the next manual prompt.
822
+ const lateSteering = signal?.aborted ? [] : (await config.getSteeringMessages?.()) || [];
823
+ const asideMessages = signal?.aborted ? [] : resolveAsides(await config.getAsideMessages?.());
824
+ const followUpMessages = signal?.aborted ? [] : (await config.getFollowUpMessages?.()) || [];
825
+ if (lateSteering.length > 0 || asideMessages.length > 0 || followUpMessages.length > 0) {
826
+ // Set as pending so the inner loop processes them before stopping.
827
+ pendingMessages = [...lateSteering, ...asideMessages, ...followUpMessages];
658
828
  continue;
659
829
  }
660
830
 
@@ -721,14 +891,18 @@ async function streamAssistantResponse(
721
891
  tools: normalizeTools(context.tools, !!config.intentTracing),
722
892
  };
723
893
  }
894
+ if (config.transformProviderContext) {
895
+ llmContext = config.transformProviderContext(llmContext, config.model);
896
+ }
724
897
 
725
898
  const streamFunction = streamFn || streamSimple;
726
899
 
727
900
  // Resolve API key (important for expiring tokens) — do this before resolving
728
901
  // metadata so that the session-sticky credential recorded by getApiKey is
729
902
  // visible to metadataResolver (e.g. for the correct account_uuid in metadata.user_id).
903
+ const staticApiKey = typeof config.apiKey === "string" ? config.apiKey : undefined;
730
904
  const resolvedApiKey =
731
- (config.getApiKey ? await config.getApiKey(config.model.provider) : undefined) || config.apiKey;
905
+ (config.getApiKey ? await config.getApiKey(config.model.provider) : undefined) || staticApiKey;
732
906
 
733
907
  // Re-resolve metadata after credential selection so the per-request value
734
908
  // reflects the credential actually used, not the snapshot from AgentLoopConfig construction.
@@ -736,24 +910,23 @@ async function streamAssistantResponse(
736
910
 
737
911
  const dynamicToolChoice = config.getToolChoice?.();
738
912
  const dynamicReasoning = config.getReasoning?.();
913
+ const dynamicDisableReasoning = config.getDisableReasoning?.();
739
914
  const harmonyMitigationEnabled = isHarmonyLeakMitigationTarget(config.model);
740
915
  const harmonyAbortController = harmonyMitigationEnabled ? new AbortController() : undefined;
741
- const maxToolCallsPerTurn = normalizeMaxToolCallsPerTurn(config.maxToolCallsPerTurn);
742
- const toolCallCapAbortController = maxToolCallsPerTurn === undefined ? undefined : new AbortController();
743
- const requestSignals: AbortSignal[] = [];
744
- if (signal) requestSignals.push(signal);
745
- if (harmonyAbortController) requestSignals.push(harmonyAbortController.signal);
746
- if (toolCallCapAbortController) requestSignals.push(toolCallCapAbortController.signal);
747
- const requestSignal =
748
- requestSignals.length === 0
749
- ? undefined
750
- : requestSignals.length === 1
751
- ? requestSignals[0]
752
- : AbortSignal.any(requestSignals);
916
+ const requestSignal = harmonyAbortController
917
+ ? signal
918
+ ? AbortSignal.any([signal, harmonyAbortController.signal])
919
+ : harmonyAbortController.signal
920
+ : signal;
921
+ const repetitionAbortController = new AbortController();
922
+ const finalRequestSignal = requestSignal
923
+ ? AbortSignal.any([requestSignal, repetitionAbortController.signal])
924
+ : repetitionAbortController.signal;
753
925
  const effectiveTemperature =
754
926
  harmonyRetryAttempt > 0 && config.temperature !== undefined ? config.temperature + 0.05 : config.temperature;
755
927
  const effectiveToolChoice = dynamicToolChoice ?? config.toolChoice;
756
928
  const effectiveReasoning = dynamicReasoning ?? config.reasoning;
929
+ const effectiveDisableReasoning = dynamicDisableReasoning ?? config.disableReasoning;
757
930
 
758
931
  const chatStepNumber = stepCounter.count;
759
932
  stepCounter.count += 1;
@@ -798,38 +971,100 @@ async function streamAssistantResponse(
798
971
  return await runInActiveSpan(chatSpan, async () => {
799
972
  const response = await streamFunction(config.model, llmContext, {
800
973
  ...config,
801
- apiKey: resolvedApiKey,
974
+ // Hand streamSimple a resolver so its central auth-retry policy can
975
+ // re-resolve on 401 / usage-limit: the initial step reuses the key
976
+ // already resolved above (which set the session-sticky credential
977
+ // feeding metadataResolver), and retry steps forward the a/b/c ctx
978
+ // to config.getApiKey (force-refresh, then rotate). With no
979
+ // getApiKey hook the caller's own apiKey (string or resolver) flows
980
+ // through unchanged.
981
+ apiKey: config.getApiKey
982
+ ? (ctx: ApiKeyResolveContext) =>
983
+ ctx.error === undefined
984
+ ? resolvedApiKey
985
+ : Promise.resolve(config.getApiKey!(config.model.provider, ctx))
986
+ : config.apiKey,
802
987
  metadata: resolvedMetadata,
803
988
  toolChoice: effectiveToolChoice,
804
989
  reasoning: effectiveReasoning,
990
+ disableReasoning: effectiveDisableReasoning,
805
991
  temperature: effectiveTemperature,
806
- signal: requestSignal,
992
+ signal: finalRequestSignal,
807
993
  onResponse: captureOnResponse,
808
994
  });
809
995
 
810
996
  let partialMessage: AssistantMessage | null = null;
811
997
  let addedPartial = false;
998
+ const completedToolCallIds = new Set<string>();
812
999
 
813
1000
  const responseIterator = response[Symbol.asyncIterator]();
814
- let completedToolCalls = 0;
815
- let cappedMessage: AssistantMessage | undefined;
816
- let capFinalized = false;
817
-
818
- const finishCappedAssistantMessage = async (): Promise<AssistantMessage | undefined> => {
819
- if (!cappedMessage) return undefined;
820
- responseIterator.return?.()?.catch(() => {});
821
- if (!capFinalized) {
822
- if (addedPartial) {
823
- context.messages[context.messages.length - 1] = cappedMessage;
824
- } else {
825
- context.messages.push(cappedMessage);
826
- stream.push({ type: "message_start", message: { ...cappedMessage } });
827
- }
828
- stream.push({ type: "message_end", message: cappedMessage });
829
- await finishChat(cappedMessage);
830
- capFinalized = true;
1001
+ const finishAbortedStream = async (): Promise<AssistantMessage> => {
1002
+ try {
1003
+ const cleanup = responseIterator.return?.();
1004
+ if (cleanup) void cleanup.catch(() => {});
1005
+ } catch {
1006
+ // Provider cancellation failures cannot change the committed aborted message.
831
1007
  }
832
- return cappedMessage;
1008
+ const aborted = emitAbortedAssistantMessage(
1009
+ partialMessage,
1010
+ addedPartial,
1011
+ completedToolCallIds,
1012
+ context,
1013
+ config,
1014
+ stream,
1015
+ requestSignal,
1016
+ );
1017
+ await finishChat(aborted);
1018
+ return aborted;
1019
+ };
1020
+ const finishRepetitionStream = async (
1021
+ kind: "text" | "thinking",
1022
+ pattern: string,
1023
+ count: number,
1024
+ ): Promise<AssistantMessage> => {
1025
+ repetitionAbortController.abort();
1026
+ try {
1027
+ const cleanup = responseIterator.return?.();
1028
+ if (cleanup) void cleanup.catch(() => {});
1029
+ } catch {
1030
+ // Provider cancellation failures cannot change the committed repetition message.
1031
+ }
1032
+ if (partialMessage) {
1033
+ truncateRepetition(partialMessage, kind, pattern);
1034
+ partialMessage.stopReason = "error";
1035
+ partialMessage.errorMessage = `Repetition loop detected: assistant repeated "${pattern.trim()}" ${count} times consecutively.`;
1036
+ }
1037
+ const finalMessage = snapshotAssistantMessage(
1038
+ partialMessage ?? {
1039
+ role: "assistant",
1040
+ content: [],
1041
+ api: config.model.api,
1042
+ provider: config.model.provider,
1043
+ model: config.model.id,
1044
+ usage: {
1045
+ input: 0,
1046
+ output: 0,
1047
+ cacheRead: 0,
1048
+ cacheWrite: 0,
1049
+ totalTokens: 0,
1050
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
1051
+ },
1052
+ stopReason: "error",
1053
+ errorMessage: "Repetition loop detected.",
1054
+ timestamp: Date.now(),
1055
+ },
1056
+ );
1057
+ if (addedPartial) {
1058
+ context.messages[context.messages.length - 1] = finalMessage;
1059
+ } else {
1060
+ context.messages.push(finalMessage);
1061
+ }
1062
+ if (!addedPartial) {
1063
+ stream.push({ type: "message_start", message: snapshotAssistantMessage(finalMessage) });
1064
+ }
1065
+ stream.push({ type: "message_end", message: snapshotAssistantMessage(finalMessage) });
1066
+ await finishChat(finalMessage);
1067
+ return finalMessage;
833
1068
  };
834
1069
 
835
1070
  // Set up a single abort race: register the abort listener once for the whole
@@ -839,9 +1074,7 @@ async function streamAssistantResponse(
839
1074
  let detachAbortListener: (() => void) | undefined;
840
1075
  if (requestSignal) {
841
1076
  if (requestSignal.aborted) {
842
- const aborted = emitAbortedAssistantMessage(partialMessage, addedPartial, context, config, stream);
843
- await finishChat(aborted);
844
- return aborted;
1077
+ return await finishAbortedStream();
845
1078
  }
846
1079
  const { promise, resolve } = Promise.withResolvers<typeof ABORTED>();
847
1080
  const onAbort = () => resolve(ABORTED);
@@ -850,37 +1083,65 @@ async function streamAssistantResponse(
850
1083
  detachAbortListener = () => requestSignal.removeEventListener("abort", onAbort);
851
1084
  }
852
1085
 
1086
+ // Rolling tail of streamed text/thinking used for repetition-loop detection.
1087
+ // Bounded to REPETITION_WINDOW chars and reset when the active block kind
1088
+ // switches (text <-> thinking) so detection stays O(1) per delta and never
1089
+ // miscounts a repeated unit across a thinking/answer boundary.
1090
+ let repetitionTail = "";
1091
+ let repetitionKind: "text" | "thinking" | undefined;
1092
+ const isGeminiModel = config.model.provider.includes("google") || config.model.provider.includes("gemini");
1093
+
853
1094
  try {
854
1095
  while (true) {
855
1096
  let next: IteratorResult<AssistantMessageEvent>;
856
1097
  if (abortRacePromise) {
857
1098
  const result = await Promise.race([responseIterator.next(), abortRacePromise]);
858
1099
  if (result === ABORTED) {
859
- if (toolCallCapAbortController?.signal.aborted) {
860
- const capped = await finishCappedAssistantMessage();
861
- if (capped) return capped;
862
- }
863
- responseIterator.return?.()?.catch(() => {});
864
- const aborted = emitAbortedAssistantMessage(partialMessage, addedPartial, context, config, stream);
865
- await finishChat(aborted);
866
- return aborted;
1100
+ return await finishAbortedStream();
867
1101
  }
868
1102
  next = result;
869
1103
  } else {
870
1104
  next = await responseIterator.next();
871
1105
  }
872
- if (requestSignal?.aborted) {
873
- if (toolCallCapAbortController?.signal.aborted) {
874
- const capped = await finishCappedAssistantMessage();
875
- if (capped) return capped;
876
- }
877
- const aborted = emitAbortedAssistantMessage(partialMessage, addedPartial, context, config, stream);
878
- await finishChat(aborted);
879
- return aborted;
880
- }
881
1106
  if (next.done) break;
882
1107
 
883
1108
  const event = next.value;
1109
+ if (event.type === "done" || event.type === "error") {
1110
+ let finalMessage = retainCompletedToolCalls(await response.result(), completedToolCallIds);
1111
+ if (harmonyMitigationEnabled) {
1112
+ const detection = detectHarmonyLeakInAssistantMessage(finalMessage);
1113
+ if (detection) {
1114
+ const recovered = recoverHarmonyToolCall(finalMessage, detection);
1115
+ const removed = recovered?.removed ?? extractHarmonyRemoved(finalMessage, detection);
1116
+ if (addedPartial) {
1117
+ emitDiscardedHarmonyPartial(
1118
+ partialMessage,
1119
+ stream,
1120
+ `Discarded after GPT-5 Harmony protocol leakage (${signalListLabel(detection.signals)})`,
1121
+ );
1122
+ context.messages.pop();
1123
+ addedPartial = false;
1124
+ }
1125
+ throw new HarmonyLeakInterruption(detection, removed, recovered);
1126
+ }
1127
+ }
1128
+ finalMessage = snapshotAssistantMessage(finalMessage);
1129
+ if (addedPartial) {
1130
+ context.messages[context.messages.length - 1] = finalMessage;
1131
+ } else {
1132
+ context.messages.push(finalMessage);
1133
+ }
1134
+ if (!addedPartial) {
1135
+ stream.push({ type: "message_start", message: snapshotAssistantMessage(finalMessage) });
1136
+ }
1137
+ stream.push({ type: "message_end", message: snapshotAssistantMessage(finalMessage) });
1138
+ await finishChat(finalMessage);
1139
+ return finalMessage;
1140
+ }
1141
+ if (requestSignal?.aborted) {
1142
+ return await finishAbortedStream();
1143
+ }
1144
+
884
1145
  // Yield to the event loop periodically to prevent busy-wait
885
1146
  // when the LLM is streaming chunks faster than the loop can rest.
886
1147
  await yieldIfDue();
@@ -890,7 +1151,7 @@ async function streamAssistantResponse(
890
1151
  partialMessage = event.partial;
891
1152
  context.messages.push(partialMessage);
892
1153
  addedPartial = true;
893
- stream.push({ type: "message_start", message: { ...partialMessage } });
1154
+ stream.push({ type: "message_start", message: snapshotAssistantMessage(partialMessage) });
894
1155
  break;
895
1156
 
896
1157
  case "text_start":
@@ -903,72 +1164,68 @@ async function streamAssistantResponse(
903
1164
  case "toolcall_delta":
904
1165
  case "toolcall_end":
905
1166
  if (partialMessage) {
1167
+ if (event.type === "toolcall_end") {
1168
+ completedToolCallIds.add(event.toolCall.id);
1169
+ }
906
1170
  partialMessage = event.partial;
907
1171
  context.messages[context.messages.length - 1] = partialMessage;
908
1172
  config.onAssistantMessageEvent?.(partialMessage, event);
909
- if (signal?.aborted) {
910
- continue;
911
- }
912
1173
  stream.push({
913
1174
  type: "message_update",
914
- assistantMessageEvent: event,
915
- message: { ...partialMessage },
1175
+ assistantMessageEvent: snapshotAssistantMessageEvent(event),
1176
+ message: snapshotAssistantMessage(partialMessage),
916
1177
  });
917
- if (event.type === "toolcall_end" && maxToolCallsPerTurn !== undefined) {
918
- completedToolCalls++;
919
- if (completedToolCalls >= maxToolCallsPerTurn) {
920
- cappedMessage = cloneAssistantMessageForToolCallCap(partialMessage);
921
- toolCallCapAbortController?.abort();
922
- const capped = await finishCappedAssistantMessage();
923
- if (capped) return capped;
1178
+ if (isGeminiModel && (event.type === "text_delta" || event.type === "thinking_delta")) {
1179
+ const kind = event.type === "text_delta" ? "text" : "thinking";
1180
+ if (repetitionKind !== kind) {
1181
+ repetitionKind = kind;
1182
+ repetitionTail = "";
924
1183
  }
925
- }
926
- }
927
- break;
928
-
929
- case "done":
930
- case "error": {
931
- const finalMessage = await response.result();
932
- if (harmonyMitigationEnabled) {
933
- const detection = detectHarmonyLeakInAssistantMessage(finalMessage);
934
- if (detection) {
935
- const removed = extractHarmonyRemoved(finalMessage, detection);
936
- if (addedPartial) {
937
- context.messages.pop();
938
- addedPartial = false;
1184
+ repetitionTail += event.delta;
1185
+ if (repetitionTail.length > REPETITION_WINDOW) {
1186
+ repetitionTail = repetitionTail.slice(-REPETITION_WINDOW);
1187
+ }
1188
+ const repetition = detectRepetition(repetitionTail);
1189
+ if (repetition) {
1190
+ const [pattern, count] = repetition;
1191
+ logger.warn("Repetition loop detected during assistant stream, aborting.", {
1192
+ pattern,
1193
+ count,
1194
+ });
1195
+ return await finishRepetitionStream(kind, pattern, count);
939
1196
  }
940
- throw new HarmonyLeakInterruption(detection, removed);
941
1197
  }
942
1198
  }
943
- if (addedPartial) {
944
- context.messages[context.messages.length - 1] = finalMessage;
945
- } else {
946
- context.messages.push(finalMessage);
947
- }
948
- if (!addedPartial) {
949
- stream.push({ type: "message_start", message: { ...finalMessage } });
950
- }
951
- stream.push({ type: "message_end", message: finalMessage });
952
- await finishChat(finalMessage);
953
- return finalMessage;
954
- }
1199
+ break;
955
1200
  }
956
1201
  }
957
1202
  } finally {
958
1203
  detachAbortListener?.();
959
1204
  }
960
1205
 
961
- const trailing = await response.result();
1206
+ let trailing = await response.result();
962
1207
  if (harmonyMitigationEnabled) {
963
1208
  const detection = detectHarmonyLeakInAssistantMessage(trailing);
964
1209
  if (detection) {
1210
+ const recovered = recoverHarmonyToolCall(trailing, detection);
1211
+ const removed = recovered?.removed ?? extractHarmonyRemoved(trailing, detection);
965
1212
  if (addedPartial) {
1213
+ emitDiscardedHarmonyPartial(
1214
+ partialMessage,
1215
+ stream,
1216
+ `Discarded after GPT-5 Harmony protocol leakage (${signalListLabel(detection.signals)})`,
1217
+ );
966
1218
  context.messages.pop();
967
1219
  addedPartial = false;
968
1220
  }
969
- throw new HarmonyLeakInterruption(detection, extractHarmonyRemoved(trailing, detection));
1221
+ throw new HarmonyLeakInterruption(detection, removed, recovered);
970
1222
  }
971
1223
  }
1224
+ trailing = snapshotAssistantMessage(trailing);
1225
+ if (addedPartial) {
1226
+ context.messages[context.messages.length - 1] = trailing;
1227
+ stream.push({ type: "message_end", message: snapshotAssistantMessage(trailing) });
1228
+ }
972
1229
  await finishChat(trailing);
973
1230
  return trailing;
974
1231
  });
@@ -982,15 +1239,73 @@ async function streamAssistantResponse(
982
1239
  }
983
1240
  }
984
1241
 
1242
+ function retainCompletedToolCalls(
1243
+ message: AssistantMessage,
1244
+ completedToolCallIds: ReadonlySet<string>,
1245
+ ): AssistantMessage {
1246
+ if (message.stopReason !== "error" && message.stopReason !== "aborted") return message;
1247
+ let changed = false;
1248
+ const content = message.content.filter(block => {
1249
+ if (block.type !== "toolCall") return true;
1250
+ const keep = completedToolCallIds.has(block.id);
1251
+ if (!keep) changed = true;
1252
+ return keep;
1253
+ });
1254
+ return changed ? { ...message, content } : message;
1255
+ }
1256
+
1257
+ function emitDiscardedHarmonyPartial(
1258
+ partialMessage: AssistantMessage | null,
1259
+ stream: EventStream<AgentEvent, AgentMessage[]>,
1260
+ errorMessage: string,
1261
+ ): void {
1262
+ if (!partialMessage) return;
1263
+ stream.push({
1264
+ type: "message_end",
1265
+ message: snapshotAssistantMessage({ ...partialMessage, stopReason: "error", errorMessage }),
1266
+ });
1267
+ }
1268
+
1269
+ /** Resolve the human-readable reason an abort carried. A caller that aborts via
1270
+ * `AbortController.abort(reason)` with a string or a non-`AbortError` `Error`
1271
+ * (e.g. the coding agent's user-interrupt label) gets that text surfaced on the
1272
+ * synthesized assistant message's `errorMessage`; a bare `abort()` (whose
1273
+ * `signal.reason` is the default `AbortError` `DOMException`) falls back to the
1274
+ * generic sentinel that downstream renderers treat as "no specific reason". */
1275
+ export function abortReasonText(signal: AbortSignal | undefined): string {
1276
+ const reason = signal?.reason;
1277
+ if (typeof reason === "string" && reason.trim().length > 0) return reason;
1278
+ if (reason instanceof Error && reason.name !== "AbortError" && reason.message.trim().length > 0) {
1279
+ return reason.message;
1280
+ }
1281
+ return "Request was aborted";
1282
+ }
1283
+
1284
+ /** True when an abort carried a *deliberate*, human-meaningful reason — a string
1285
+ * reason or a non-`AbortError` `Error` (TTSR rule match, user-interrupt label).
1286
+ * A bare `abort()` (default `AbortError` `DOMException`) is anonymous and returns
1287
+ * false. Used to decide whether a mid-stream tool call survives the abort: a
1288
+ * deliberate interruption is a conscious decision made after the (partial) call
1289
+ * was observed, so the block is retained and paired with a labeled placeholder;
1290
+ * an anonymous abort drops incomplete calls whose args may be unsafe to replay. */
1291
+ function isExplicitAbortReason(signal: AbortSignal | undefined): boolean {
1292
+ const reason = signal?.reason;
1293
+ if (typeof reason === "string") return reason.trim().length > 0;
1294
+ if (reason instanceof Error) return reason.name !== "AbortError" && reason.message.trim().length > 0;
1295
+ return false;
1296
+ }
1297
+
985
1298
  function emitAbortedAssistantMessage(
986
1299
  partialMessage: AssistantMessage | null,
987
1300
  addedPartial: boolean,
1301
+ completedToolCallIds: ReadonlySet<string>,
988
1302
  context: AgentContext,
989
1303
  config: AgentLoopConfig,
990
1304
  stream: EventStream<AgentEvent, AgentMessage[]>,
1305
+ requestSignal: AbortSignal | undefined,
991
1306
  ): AssistantMessage {
992
- const errorMessage = "Request was aborted";
993
- const abortedMessage: AssistantMessage = partialMessage
1307
+ const errorMessage = abortReasonText(requestSignal);
1308
+ const base: AssistantMessage = partialMessage
994
1309
  ? { ...partialMessage, stopReason: "aborted", errorMessage }
995
1310
  : {
996
1311
  role: "assistant",
@@ -1010,13 +1325,19 @@ function emitAbortedAssistantMessage(
1010
1325
  errorMessage,
1011
1326
  timestamp: Date.now(),
1012
1327
  };
1328
+ // A deliberate, labeled abort (TTSR rule match, user interrupt) keeps every
1329
+ // committed tool-call block so the loop pairs it with a placeholder labeled by
1330
+ // `errorMessage`; an anonymous abort still drops calls that never completed
1331
+ // (no `toolcall_end`), whose partial args are unsafe to replay.
1332
+ const retained = isExplicitAbortReason(requestSignal) ? base : retainCompletedToolCalls(base, completedToolCallIds);
1333
+ const abortedMessage = snapshotAssistantMessage(retained);
1013
1334
  if (addedPartial) {
1014
1335
  context.messages[context.messages.length - 1] = abortedMessage;
1015
1336
  } else {
1016
1337
  context.messages.push(abortedMessage);
1017
- stream.push({ type: "message_start", message: { ...abortedMessage } });
1338
+ stream.push({ type: "message_start", message: snapshotAssistantMessage(abortedMessage) });
1018
1339
  }
1019
- stream.push({ type: "message_end", message: abortedMessage });
1340
+ stream.push({ type: "message_end", message: snapshotAssistantMessage(abortedMessage) });
1020
1341
  return abortedMessage;
1021
1342
  }
1022
1343
 
@@ -1031,9 +1352,10 @@ async function executeToolCalls(
1031
1352
  config: AgentLoopConfig,
1032
1353
  telemetry: AgentTelemetry | undefined,
1033
1354
  invokeAgentSpan: Span | undefined,
1034
- ): Promise<{ toolResults: ToolResultMessage[]; steeringMessages?: AgentMessage[] }> {
1355
+ ): Promise<{ toolResults: ToolResultMessage[] }> {
1035
1356
  const tools = currentContext.tools;
1036
1357
  const {
1358
+ hasSteeringMessages,
1037
1359
  getSteeringMessages,
1038
1360
  interruptMode = "immediate",
1039
1361
  getToolContext,
@@ -1053,8 +1375,6 @@ async function executeToolCalls(
1053
1375
  ? AbortSignal.any([signal, steeringAbortController.signal])
1054
1376
  : steeringAbortController.signal;
1055
1377
  const interruptState = { triggered: false };
1056
- let steeringMessages: AgentMessage[] | undefined;
1057
- let steeringCheck: Promise<void> | null = null;
1058
1378
 
1059
1379
  const records = toolCalls.map(toolCall => ({
1060
1380
  toolCall,
@@ -1075,24 +1395,31 @@ async function executeToolCalls(
1075
1395
  }));
1076
1396
 
1077
1397
  const checkSteering = async (): Promise<void> => {
1078
- if (!shouldInterruptImmediately || !getSteeringMessages || interruptState.triggered) {
1398
+ // `signal` (external/user abort) is checked separately from the internal
1399
+ // steeringAbortController: once the run is externally aborted it is
1400
+ // unwinding and the interrupt would be redundant.
1401
+ if (!shouldInterruptImmediately || interruptState.triggered || signal?.aborted) {
1079
1402
  return;
1080
1403
  }
1081
- if (steeringCheck) {
1082
- await steeringCheck;
1404
+ // Prefer the non-consuming peek (`hasSteeringMessages`) when available.
1405
+ // Fall back to calling `getSteeringMessages` directly when only it is
1406
+ // provided (e.g. in tests or minimal integrations without a separate
1407
+ // peek function). In that case the message is consumed here rather than
1408
+ // at the outer injection boundary, but the interrupt still fires.
1409
+ let hasMessages: boolean;
1410
+ if (hasSteeringMessages) {
1411
+ hasMessages = await hasSteeringMessages();
1412
+ } else if (getSteeringMessages) {
1413
+ const msgs = await getSteeringMessages();
1414
+ hasMessages = (msgs?.length ?? 0) > 0;
1415
+ } else {
1083
1416
  return;
1084
1417
  }
1085
- steeringCheck = (async () => {
1086
- const steering = await getSteeringMessages();
1087
- if (steering.length > 0) {
1088
- steeringMessages = steering;
1089
- interruptState.triggered = true;
1090
- steeringAbortController.abort();
1091
- }
1092
- })().finally(() => {
1093
- steeringCheck = null;
1094
- });
1095
- await steeringCheck;
1418
+ if (hasMessages) {
1419
+ if (interruptState.triggered || signal?.aborted) return;
1420
+ interruptState.triggered = true;
1421
+ steeringAbortController.abort();
1422
+ }
1096
1423
  };
1097
1424
 
1098
1425
  const emitToolResult = (record: (typeof records)[number], result: AgentToolResult<any>, isError: boolean): void => {
@@ -1122,6 +1449,7 @@ async function executeToolCalls(
1122
1449
  content: result.content,
1123
1450
  details: result.details,
1124
1451
  isError,
1452
+ ...(result.useless && !isError ? { useless: true } : {}),
1125
1453
  timestamp: Date.now(),
1126
1454
  };
1127
1455
  record.result = result;
@@ -1164,6 +1492,16 @@ async function executeToolCalls(
1164
1492
  }
1165
1493
  }
1166
1494
  record.args = argsForExecution;
1495
+ if (toolSignal.aborted) {
1496
+ record.skipped = true;
1497
+ recordSkippedTool(telemetry, {
1498
+ toolCallId: toolCall.id,
1499
+ toolName: toolCall.name,
1500
+ status: "aborted",
1501
+ });
1502
+ emitToolResult(record, createToolSignalAbortedResult(toolSignal), true);
1503
+ return;
1504
+ }
1167
1505
  record.started = true;
1168
1506
  stream.push({
1169
1507
  type: "tool_execution_start",
@@ -1187,10 +1525,16 @@ async function executeToolCalls(
1187
1525
  let result: AgentToolResult<any> = { content: [], details: {} };
1188
1526
  let isError = false;
1189
1527
  let caughtError: unknown;
1528
+ let completedToolExecution = false;
1190
1529
 
1191
1530
  await runInActiveSpan(toolSpan, async () => {
1192
1531
  try {
1193
1532
  if (!tool) throw new Error(`Tool ${toolCall.name} not found`);
1533
+ if (toolSignal.aborted) {
1534
+ result = createToolSignalAbortedResult(toolSignal);
1535
+ isError = true;
1536
+ return;
1537
+ }
1194
1538
 
1195
1539
  let effectiveArgs: Record<string, unknown>;
1196
1540
  try {
@@ -1217,8 +1561,15 @@ async function executeToolCalls(
1217
1561
  throw new ToolCallBlockedError(beforeResult.reason);
1218
1562
  }
1219
1563
  }
1220
- // Reflect post-hook args so emitted tool results / afterToolCall see what actually executed.
1221
- record.args = effectiveArgs;
1564
+ if (toolSignal.aborted) {
1565
+ result = createToolSignalAbortedResult(toolSignal);
1566
+ isError = true;
1567
+ return;
1568
+ }
1569
+ const executionArgs = transformToolCallArguments
1570
+ ? transformToolCallArguments(effectiveArgs, toolCall.name)
1571
+ : effectiveArgs;
1572
+ record.args = executionArgs;
1222
1573
 
1223
1574
  const toolContext = getToolContext
1224
1575
  ? getToolContext({
@@ -1230,19 +1581,20 @@ async function executeToolCalls(
1230
1581
  : undefined;
1231
1582
  const rawResult = await tool.execute(
1232
1583
  toolCall.id,
1233
- transformToolCallArguments ? transformToolCallArguments(effectiveArgs, toolCall.name) : effectiveArgs,
1234
- tool.nonAbortable ? undefined : toolSignal,
1584
+ executionArgs,
1585
+ toolSignal,
1235
1586
  partialResult => {
1236
1587
  stream.push({
1237
1588
  type: "tool_execution_update",
1238
1589
  toolCallId: toolCall.id,
1239
1590
  toolName: toolCall.name,
1240
- args: effectiveArgs,
1591
+ args: executionArgs,
1241
1592
  partialResult: coerceToolResult(partialResult).result,
1242
1593
  });
1243
1594
  },
1244
1595
  toolContext,
1245
1596
  );
1597
+ completedToolExecution = true;
1246
1598
  const coerced = coerceToolResult(rawResult);
1247
1599
  result = coerced.result;
1248
1600
  if (coerced.malformed || result.isError) isError = true;
@@ -1255,7 +1607,7 @@ async function executeToolCalls(
1255
1607
  isError = true;
1256
1608
  }
1257
1609
 
1258
- if (afterToolCall) {
1610
+ if (afterToolCall && (!toolSignal.aborted || completedToolExecution)) {
1259
1611
  try {
1260
1612
  const after = await afterToolCall(
1261
1613
  {
@@ -1269,12 +1621,18 @@ async function executeToolCalls(
1269
1621
  toolSignal,
1270
1622
  );
1271
1623
  if (after) {
1272
- result = {
1624
+ // Re-normalize the post-hook result: `afterToolCall` is untyped user/extension
1625
+ // code and may return malformed `content` (non-array / invalid blocks), which
1626
+ // would otherwise be persisted verbatim and corrupt the session — the same
1627
+ // hazard `coerceToolResult` guards on the execute path.
1628
+ const coerced = coerceToolResult({
1273
1629
  content: after.content ?? result.content,
1274
1630
  details: after.details ?? result.details,
1275
1631
  isError: after.isError ?? result.isError,
1276
- };
1277
- isError = after.isError ?? isError;
1632
+ useless: after.useless ?? result.useless,
1633
+ });
1634
+ result = coerced.result;
1635
+ isError = coerced.malformed || (after.isError ?? isError);
1278
1636
  }
1279
1637
  } catch (e) {
1280
1638
  caughtError = e;
@@ -1288,23 +1646,30 @@ async function executeToolCalls(
1288
1646
  });
1289
1647
 
1290
1648
  const interrupted = interruptState.triggered;
1291
- if (interrupted) {
1649
+ const abortedDuringExecution = toolSignal.aborted && isError;
1650
+ if (interrupted && isError) {
1651
+ // Steering/abort fired AND this tool failed — it was cut off before producing a
1652
+ // usable result, so report it as skipped.
1292
1653
  record.skipped = true;
1293
1654
  emitToolResult(record, createSkippedToolResult(), true);
1294
1655
  } else {
1656
+ // No interrupt, or the tool finished (successfully or with a genuine error) before
1657
+ // the interrupt landed. Keep its real result: a completed tool already ran its side
1658
+ // effects, so the model must see what actually happened rather than a false "skipped".
1295
1659
  emitToolResult(record, result, isError);
1296
1660
  }
1297
1661
 
1298
1662
  const firstTextBlock = result.content?.[0];
1299
1663
  const errorMessageForSpan =
1300
1664
  caughtError === undefined && isError && firstTextBlock?.type === "text" ? firstTextBlock.text : undefined;
1301
- const status = interrupted
1302
- ? "aborted"
1303
- : caughtError instanceof ToolCallBlockedError
1304
- ? "blocked"
1305
- : isError
1306
- ? "error"
1307
- : "ok";
1665
+ const status =
1666
+ (interrupted && isError) || abortedDuringExecution
1667
+ ? "aborted"
1668
+ : caughtError instanceof ToolCallBlockedError
1669
+ ? "blocked"
1670
+ : isError
1671
+ ? "error"
1672
+ : "ok";
1308
1673
  finishExecuteToolSpan(telemetry, toolSpan, {
1309
1674
  result,
1310
1675
  isError,
@@ -1324,7 +1689,19 @@ async function executeToolCalls(
1324
1689
 
1325
1690
  for (let index = 0; index < records.length; index++) {
1326
1691
  const record = records[index];
1327
- const concurrency = record.tool?.concurrency ?? "shared";
1692
+ const concurrencyMode = record.tool?.concurrency;
1693
+ let concurrency: "shared" | "exclusive";
1694
+ if (typeof concurrencyMode === "function") {
1695
+ // Resolved from raw pre-validation args; a throwing resolver must not
1696
+ // take down the whole batch, so fall back to the safe (serial) mode.
1697
+ try {
1698
+ concurrency = concurrencyMode(record.args);
1699
+ } catch {
1700
+ concurrency = "exclusive";
1701
+ }
1702
+ } else {
1703
+ concurrency = concurrencyMode ?? "shared";
1704
+ }
1328
1705
  const start = concurrency === "exclusive" ? Promise.all([lastExclusive, ...sharedTasks]) : lastExclusive;
1329
1706
  const task = start.then(() => runTool(record, index));
1330
1707
  tasks.push(task);
@@ -1353,7 +1730,7 @@ async function executeToolCalls(
1353
1730
  }
1354
1731
  }
1355
1732
 
1356
- return { toolResults: emittedToolResults, steeringMessages };
1733
+ return { toolResults: emittedToolResults };
1357
1734
  }
1358
1735
 
1359
1736
  /**
@@ -1410,9 +1787,111 @@ function createAbortedToolResult(
1410
1787
  return toolResultMessage;
1411
1788
  }
1412
1789
 
1790
+ function createToolSignalAbortedResult(signal: AbortSignal): AgentToolResult<unknown> {
1791
+ const reason = abortReasonText(signal);
1792
+ return {
1793
+ content: [{ type: "text", text: `Tool was not executed because the run was aborted: ${reason}.` }],
1794
+ details: {},
1795
+ };
1796
+ }
1797
+
1413
1798
  function createSkippedToolResult(): AgentToolResult<any> {
1414
1799
  return {
1415
1800
  content: [{ type: "text", text: "Skipped due to queued user message." }],
1416
1801
  details: {},
1417
1802
  };
1418
1803
  }
1804
+
1805
+ const REPETITION_WINDOW = 250;
1806
+ const REPETITION_MIN_REPEATED_CHARS = 180;
1807
+
1808
+ function detectRepetition(text: string): [pattern: string, count: number] | null {
1809
+ if (text.length < REPETITION_MIN_REPEATED_CHARS) return null;
1810
+
1811
+ const windowSize = Math.min(text.length, REPETITION_WINDOW);
1812
+ const searchSpace = text.slice(-windowSize);
1813
+
1814
+ for (let len = 2; len <= 60; len++) {
1815
+ if (searchSpace.length < len * 4) continue;
1816
+
1817
+ const pattern = searchSpace.slice(-len);
1818
+ // Only treat a repeated unit as a pathological loop when it carries real
1819
+ // linguistic content (a letter or a pictographic emoji). Runs made purely of
1820
+ // digits, whitespace or punctuation are legitimate in tabular / hex / numeric
1821
+ // output (e.g. "00 00 00", "0, 0, 0", "| -- | -- |") and must not trip.
1822
+ if (!/[\p{L}\p{Extended_Pictographic}]/u.test(pattern)) continue;
1823
+
1824
+ let count = 0;
1825
+ let pos = searchSpace.length;
1826
+ while (pos >= len) {
1827
+ const chunk = searchSpace.slice(pos - len, pos);
1828
+ if (chunk === pattern) {
1829
+ count++;
1830
+ pos -= len;
1831
+ } else {
1832
+ break;
1833
+ }
1834
+ }
1835
+
1836
+ if (count >= 4 && len * count >= REPETITION_MIN_REPEATED_CHARS) {
1837
+ return [pattern, count];
1838
+ }
1839
+ }
1840
+ return null;
1841
+ }
1842
+
1843
+ function truncateRepetition(message: AssistantMessage, kind: "text" | "thinking", pattern: string): void {
1844
+ // A repetition loop streams into a single growing block (real providers) or a run
1845
+ // of same-kind blocks (some transports), always at the tail of the message. Gather
1846
+ // that trailing contiguous run and collapse its repeated copies down to one, so the
1847
+ // committed transcript keeps a representative sample instead of the full runaway.
1848
+ const matches = (block: AssistantContentBlock): boolean =>
1849
+ kind === "text" ? block.type === "text" : block.type === "thinking";
1850
+ const readBlock = (block: AssistantContentBlock): string =>
1851
+ block.type === "text" ? block.text : block.type === "thinking" ? block.thinking : "";
1852
+ const clearThinkingReplayAnchors = (block: AssistantContentBlock): void => {
1853
+ if (block.type !== "thinking") return;
1854
+ block.thinkingSignature = undefined;
1855
+ block.itemId = undefined;
1856
+ };
1857
+ const writeBlock = (block: AssistantContentBlock, value: string): void => {
1858
+ if (block.type === "text") {
1859
+ block.text = value;
1860
+ } else if (block.type === "thinking") {
1861
+ block.thinking = value;
1862
+ clearThinkingReplayAnchors(block);
1863
+ }
1864
+ };
1865
+
1866
+ const trailing: AssistantContentBlock[] = [];
1867
+ for (let i = message.content.length - 1; i >= 0; i--) {
1868
+ const block = message.content[i];
1869
+ if (!matches(block)) break;
1870
+ trailing.unshift(block);
1871
+ }
1872
+ if (trailing.length === 0) return;
1873
+ if (kind === "thinking") {
1874
+ for (const block of trailing) clearThinkingReplayAnchors(block);
1875
+ }
1876
+
1877
+ let joined = "";
1878
+ for (const block of trailing) joined += readBlock(block);
1879
+
1880
+ let kept = joined;
1881
+ while (kept.length >= pattern.length * 2 && kept.slice(kept.length - pattern.length * 2) === pattern + pattern) {
1882
+ kept = kept.slice(0, kept.length - pattern.length);
1883
+ }
1884
+
1885
+ let remainingToRemove = joined.length - kept.length;
1886
+ for (let i = trailing.length - 1; i >= 0 && remainingToRemove > 0; i--) {
1887
+ const block = trailing[i];
1888
+ const value = readBlock(block);
1889
+ if (value.length <= remainingToRemove) {
1890
+ remainingToRemove -= value.length;
1891
+ writeBlock(block, "");
1892
+ } else {
1893
+ writeBlock(block, value.slice(0, value.length - remainingToRemove));
1894
+ remainingToRemove = 0;
1895
+ }
1896
+ }
1897
+ }