@prometheus-ai/agent-core 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/CHANGELOG.md +7 -0
  2. package/README.md +473 -0
  3. package/dist/types/agent-loop.d.ts +55 -0
  4. package/dist/types/agent.d.ts +331 -0
  5. package/dist/types/append-only-context.d.ts +113 -0
  6. package/dist/types/compaction/branch-summarization.d.ts +94 -0
  7. package/dist/types/compaction/compaction.d.ts +183 -0
  8. package/dist/types/compaction/entries.d.ts +103 -0
  9. package/dist/types/compaction/errors.d.ts +26 -0
  10. package/dist/types/compaction/index.d.ts +12 -0
  11. package/dist/types/compaction/messages.d.ts +61 -0
  12. package/dist/types/compaction/openai.d.ts +58 -0
  13. package/dist/types/compaction/pruning.d.ts +19 -0
  14. package/dist/types/compaction/shake.d.ts +82 -0
  15. package/dist/types/compaction/tool-protection.d.ts +17 -0
  16. package/dist/types/compaction/utils.d.ts +32 -0
  17. package/dist/types/compaction.d.ts +1 -0
  18. package/dist/types/harmony-leak.d.ts +118 -0
  19. package/dist/types/index.d.ts +11 -0
  20. package/dist/types/proxy.d.ts +84 -0
  21. package/dist/types/run-collector.d.ts +196 -0
  22. package/dist/types/telemetry.d.ts +588 -0
  23. package/dist/types/thinking.d.ts +17 -0
  24. package/dist/types/types.d.ts +443 -0
  25. package/dist/types/utils/yield.d.ts +52 -0
  26. package/package.json +75 -0
  27. package/src/agent-loop.ts +1418 -0
  28. package/src/agent.ts +1236 -0
  29. package/src/append-only-context.ts +297 -0
  30. package/src/compaction/branch-summarization.ts +339 -0
  31. package/src/compaction/compaction.ts +1155 -0
  32. package/src/compaction/entries.ts +133 -0
  33. package/src/compaction/errors.ts +31 -0
  34. package/src/compaction/index.ts +13 -0
  35. package/src/compaction/messages.ts +212 -0
  36. package/src/compaction/openai.ts +552 -0
  37. package/src/compaction/prompts/auto-handoff-threshold-focus.md +1 -0
  38. package/src/compaction/prompts/branch-summary-context.md +5 -0
  39. package/src/compaction/prompts/branch-summary-preamble.md +2 -0
  40. package/src/compaction/prompts/branch-summary.md +30 -0
  41. package/src/compaction/prompts/compaction-short-summary.md +9 -0
  42. package/src/compaction/prompts/compaction-summary-context.md +5 -0
  43. package/src/compaction/prompts/compaction-summary.md +38 -0
  44. package/src/compaction/prompts/compaction-turn-prefix.md +17 -0
  45. package/src/compaction/prompts/compaction-update-summary.md +45 -0
  46. package/src/compaction/prompts/file-operations.md +10 -0
  47. package/src/compaction/prompts/handoff-document.md +49 -0
  48. package/src/compaction/prompts/summarization-system.md +3 -0
  49. package/src/compaction/pruning.ts +99 -0
  50. package/src/compaction/shake.ts +406 -0
  51. package/src/compaction/tool-protection.ts +55 -0
  52. package/src/compaction/utils.ts +185 -0
  53. package/src/compaction.ts +1 -0
  54. package/src/harmony-leak.ts +456 -0
  55. package/src/index.ts +21 -0
  56. package/src/proxy.ts +326 -0
  57. package/src/run-collector.ts +631 -0
  58. package/src/telemetry.ts +2020 -0
  59. package/src/thinking.ts +19 -0
  60. package/src/types.ts +505 -0
  61. package/src/utils/yield.ts +146 -0
@@ -0,0 +1,1418 @@
1
+ /**
2
+ * Agent loop that works with AgentMessage throughout.
3
+ * Transforms to Message[] only at the LLM call boundary.
4
+ */
5
+ import {
6
+ type AssistantMessage,
7
+ type AssistantMessageEvent,
8
+ type Context,
9
+ EventStream,
10
+ isZodSchema,
11
+ streamSimple,
12
+ type ToolResultMessage,
13
+ type TSchema,
14
+ validateToolArguments,
15
+ zodToWireSchema,
16
+ } from "@prometheus-ai/ai";
17
+ import { sanitizeText } from "@prometheus-ai/utils";
18
+ import {
19
+ createHarmonyAuditEvent,
20
+ detectHarmonyLeakInAssistantMessage,
21
+ extractHarmonyRemoved,
22
+ type HarmonyDetection,
23
+ type HarmonyRecoveredToolCall,
24
+ isHarmonyLeakMitigationTarget,
25
+ signalListLabel,
26
+ } from "./harmony-leak";
27
+ import { type AgentRunCoverage, type AgentRunSummary, ToolCallBlockedError } from "./run-collector";
28
+ import {
29
+ type AgentTelemetry,
30
+ failChatSpan,
31
+ finishChatSpan,
32
+ finishExecuteToolSpan,
33
+ finishInvokeAgentSpan,
34
+ fireOnRunEnd,
35
+ PrometheusGenAIAttr,
36
+ recordSkippedTool,
37
+ resolveTelemetry,
38
+ runInActiveSpan,
39
+ type Span,
40
+ startChatSpan,
41
+ startExecuteToolSpan,
42
+ startInvokeAgentSpan,
43
+ } from "./telemetry";
44
+ import type {
45
+ AgentContext,
46
+ AgentEvent,
47
+ AgentLoopConfig,
48
+ AgentMessage,
49
+ AgentTool,
50
+ AgentToolResult,
51
+ StreamFn,
52
+ } from "./types";
53
+ import { yieldIfDue } from "./utils/yield";
54
+
55
+ /** Sentinel returned by the abort race in `streamAssistantResponse`. */
56
+ const ABORTED: unique symbol = Symbol("agent-loop-aborted");
57
+
58
+ class HarmonyLeakInterruption extends Error {
59
+ constructor(
60
+ readonly detection: HarmonyDetection,
61
+ readonly removed: string,
62
+ readonly recovered?: HarmonyRecoveredToolCall,
63
+ ) {
64
+ super(`Detected GPT-5 Harmony protocol leakage (${signalListLabel(detection.signals)})`);
65
+ this.name = "HarmonyLeakInterruption";
66
+ }
67
+ }
68
+
69
+ /**
70
+ * Normalize a value coming back from `tool.execute()` (or its streaming partial-update callback)
71
+ * into a structurally valid {@link AgentToolResult}.
72
+ *
73
+ * The tool interface is typed, but third-party tools (MCP, extensions, user-authored AgentTools)
74
+ * can violate the contract at runtime. Persisting a malformed result corrupts the session file
75
+ * (missing `content` array → crash on reload). We coerce at the single boundary where untyped
76
+ * results enter the agent loop, so every downstream consumer can rely on the type.
77
+ */
78
+ function coerceToolResult(raw: unknown): { result: AgentToolResult<any>; malformed: boolean } {
79
+ const rawObj = raw && typeof raw === "object" ? (raw as Record<string, unknown>) : null;
80
+ const rawContent = rawObj?.content;
81
+ const details = rawObj && "details" in rawObj ? rawObj.details : {};
82
+ // Tools may flag a non-throwing failure on the result itself (e.g. an
83
+ // aggregator that catches per-entry errors and synthesizes a combined
84
+ // result). Preserve the flag so agent-loop can surface it on the wire.
85
+ const explicitError = Boolean(rawObj && "isError" in rawObj && rawObj.isError);
86
+
87
+ if (!Array.isArray(rawContent)) {
88
+ return {
89
+ result: {
90
+ content: [{ type: "text", text: "Tool returned an invalid result: missing content array." }],
91
+ details,
92
+ isError: true,
93
+ },
94
+ malformed: true,
95
+ };
96
+ }
97
+
98
+ const content: AgentToolResult["content"] = [];
99
+ for (const block of rawContent) {
100
+ if (!block || typeof block !== "object" || !("type" in block)) continue;
101
+ if (block.type === "text" && typeof (block as { text?: unknown }).text === "string") {
102
+ content.push({ type: "text", text: sanitizeText((block as { text: string }).text) });
103
+ } else if (
104
+ block.type === "image" &&
105
+ typeof (block as { data?: unknown }).data === "string" &&
106
+ typeof (block as { mimeType?: unknown }).mimeType === "string"
107
+ ) {
108
+ content.push(block as { type: "image"; data: string; mimeType: string });
109
+ }
110
+ }
111
+ return { result: { content, details, ...(explicitError ? { isError: true } : {}) }, malformed: false };
112
+ }
113
+
114
+ /**
115
+ * Start an agent loop with a new prompt message.
116
+ * The prompt is added to the context and events are emitted for it.
117
+ */
118
+ export function agentLoop(
119
+ prompts: AgentMessage[],
120
+ context: AgentContext,
121
+ config: AgentLoopConfig,
122
+ signal?: AbortSignal,
123
+ streamFn?: StreamFn,
124
+ ): EventStream<AgentEvent, AgentMessage[]> {
125
+ const stream = createAgentStream();
126
+
127
+ (async () => {
128
+ const newMessages: AgentMessage[] = [...prompts];
129
+ const currentContext: AgentContext = {
130
+ ...context,
131
+ messages: [...context.messages, ...prompts],
132
+ };
133
+
134
+ stream.push({ type: "agent_start" });
135
+ stream.push({ type: "turn_start" });
136
+ for (const prompt of prompts) {
137
+ stream.push({ type: "message_start", message: prompt });
138
+ stream.push({ type: "message_end", message: prompt });
139
+ }
140
+
141
+ try {
142
+ await runLoop(currentContext, newMessages, config, signal, stream, streamFn);
143
+ } catch (err) {
144
+ stream.fail(err);
145
+ }
146
+ })();
147
+
148
+ return stream;
149
+ }
150
+
151
+ /**
152
+ * Continue an agent loop from the current context without adding a new message.
153
+ * Used for retries - context already has user message or tool results.
154
+ *
155
+ * **Important:** The last message in context must convert to a `user` or `toolResult` message
156
+ * via `convertToLlm`. If it doesn't, the LLM provider will reject the request.
157
+ * This cannot be validated here since `convertToLlm` is only called once per turn.
158
+ */
159
+ export function agentLoopContinue(
160
+ context: AgentContext,
161
+ config: AgentLoopConfig,
162
+ signal?: AbortSignal,
163
+ streamFn?: StreamFn,
164
+ ): EventStream<AgentEvent, AgentMessage[]> {
165
+ if (context.messages.length === 0) {
166
+ throw new Error("Cannot continue: no messages in context");
167
+ }
168
+
169
+ if (context.messages[context.messages.length - 1].role === "assistant") {
170
+ throw new Error("Cannot continue from message role: assistant");
171
+ }
172
+
173
+ const stream = createAgentStream();
174
+
175
+ (async () => {
176
+ const newMessages: AgentMessage[] = [];
177
+ const currentContext: AgentContext = { ...context };
178
+
179
+ stream.push({ type: "agent_start" });
180
+ stream.push({ type: "turn_start" });
181
+
182
+ try {
183
+ await runLoop(currentContext, newMessages, config, signal, stream, streamFn);
184
+ } catch (err) {
185
+ stream.fail(err);
186
+ }
187
+ })();
188
+
189
+ return stream;
190
+ }
191
+
192
+ function createAgentStream(): EventStream<AgentEvent, AgentMessage[]> {
193
+ return new EventStream<AgentEvent, AgentMessage[]>(
194
+ (event: AgentEvent) => event.type === "agent_end",
195
+ (event: AgentEvent) => (event.type === "agent_end" ? event.messages : []),
196
+ );
197
+ }
198
+
199
+ /**
200
+ * Build the `agent_end` event payload. When telemetry is enabled, snapshots
201
+ * the run collector so consumers receive {@link AgentRunSummary} +
202
+ * {@link AgentRunCoverage} alongside the messages without parsing OTEL spans.
203
+ * When telemetry is unset, returns the bare event for backwards compatibility.
204
+ */
205
+ function buildAgentEndEvent(
206
+ messages: AgentMessage[],
207
+ telemetry: AgentTelemetry | undefined,
208
+ stepCount: number,
209
+ ): Extract<AgentEvent, { type: "agent_end" }> {
210
+ if (!telemetry) return { type: "agent_end", messages };
211
+ const snapshot = telemetry.collector.snapshot({ stepCount });
212
+ if (telemetry.collector.markRunEnded()) {
213
+ fireOnRunEnd(telemetry, snapshot.summary, snapshot.coverage);
214
+ }
215
+ return { type: "agent_end", messages, telemetry: snapshot.summary, coverage: snapshot.coverage };
216
+ }
217
+
218
+ /**
219
+ * Detailed-result handle returned by {@link agentLoopDetailed}. Adds the
220
+ * run-level telemetry/coverage rollup to the existing `AgentMessage[]`
221
+ * payload without changing the resolved type of `stream.result()`.
222
+ */
223
+ export interface AgentLoopDetailedResult {
224
+ readonly messages: AgentMessage[];
225
+ readonly telemetry: AgentRunSummary | undefined;
226
+ readonly coverage: AgentRunCoverage | undefined;
227
+ }
228
+
229
+ /**
230
+ * Convenience wrapper over {@link agentLoop} that exposes the run-level
231
+ * summary + coverage alongside the messages. The returned `stream` is the
232
+ * same `EventStream` callers already consume; `detailed()` awaits the
233
+ * stream's `agent_end` event and returns the additive fields.
234
+ *
235
+ * Existing `stream.result()` semantics are preserved — it still resolves to
236
+ * `AgentMessage[]`. Use {@link agentLoopDetailed} when you need the rollup;
237
+ * use {@link agentLoop} when you do not.
238
+ */
239
+ export function agentLoopDetailed(
240
+ prompts: AgentMessage[],
241
+ context: AgentContext,
242
+ config: AgentLoopConfig,
243
+ signal?: AbortSignal,
244
+ streamFn?: StreamFn,
245
+ ): {
246
+ readonly stream: EventStream<AgentEvent, AgentMessage[]>;
247
+ readonly detailed: () => Promise<AgentLoopDetailedResult>;
248
+ } {
249
+ const capture = createDetailedCapture(config);
250
+ const stream = agentLoop(prompts, context, capture.config, signal, streamFn);
251
+ return { stream, detailed: () => capture.detailed(stream) };
252
+ }
253
+
254
+ /**
255
+ * Like {@link agentLoopDetailed} but built on top of
256
+ * {@link agentLoopContinue}.
257
+ */
258
+ export function agentLoopContinueDetailed(
259
+ context: AgentContext,
260
+ config: AgentLoopConfig,
261
+ signal?: AbortSignal,
262
+ streamFn?: StreamFn,
263
+ ): {
264
+ readonly stream: EventStream<AgentEvent, AgentMessage[]>;
265
+ readonly detailed: () => Promise<AgentLoopDetailedResult>;
266
+ } {
267
+ const capture = createDetailedCapture(config);
268
+ const stream = agentLoopContinue(context, capture.config, signal, streamFn);
269
+ return { stream, detailed: () => capture.detailed(stream) };
270
+ }
271
+
272
+ /**
273
+ * Wire an `onRunEnd` telemetry hook onto `config` so the detailed helper can
274
+ * capture the run summary without consuming the event stream. Preserves any
275
+ * existing `onRunEnd` the caller had set.
276
+ */
277
+ function createDetailedCapture(config: AgentLoopConfig): {
278
+ readonly config: AgentLoopConfig;
279
+ readonly detailed: (stream: EventStream<AgentEvent, AgentMessage[]>) => Promise<AgentLoopDetailedResult>;
280
+ } {
281
+ let captured: { summary: AgentRunSummary; coverage: AgentRunCoverage } | undefined;
282
+ const userHook = config.telemetry?.onRunEnd;
283
+ const wired: AgentLoopConfig = {
284
+ ...config,
285
+ telemetry: {
286
+ ...(config.telemetry ?? {}),
287
+ onRunEnd: (summary, coverage) => {
288
+ captured = { summary, coverage };
289
+ userHook?.(summary, coverage);
290
+ },
291
+ },
292
+ };
293
+ return {
294
+ config: wired,
295
+ detailed: async stream => {
296
+ const messages = await stream.result();
297
+ return {
298
+ messages,
299
+ telemetry: captured?.summary,
300
+ coverage: captured?.coverage,
301
+ };
302
+ },
303
+ };
304
+ }
305
+
306
+ function normalizeMessagesForProvider(
307
+ messages: Context["messages"],
308
+ model: AgentLoopConfig["model"],
309
+ ): Context["messages"] {
310
+ if (model.provider !== "cerebras") {
311
+ return messages;
312
+ }
313
+
314
+ let changed = false;
315
+ const normalized = messages.map(message => {
316
+ if (message.role !== "assistant" || !Array.isArray(message.content)) {
317
+ return message;
318
+ }
319
+
320
+ const filtered = message.content.filter(block => block.type !== "thinking");
321
+ if (filtered.length === message.content.length) {
322
+ return message;
323
+ }
324
+
325
+ changed = true;
326
+ return { ...message, content: filtered };
327
+ });
328
+
329
+ return changed ? normalized : messages;
330
+ }
331
+
332
+ export const INTENT_FIELD = "_i";
333
+
334
+ function injectIntentIntoSchema(schema: unknown, mode: "require" | "optional" = "require"): unknown {
335
+ if (!schema || typeof schema !== "object" || Array.isArray(schema)) return schema;
336
+ const schemaRecord = schema as Record<string, unknown>;
337
+ const propertiesValue = schemaRecord.properties;
338
+ const properties =
339
+ propertiesValue && typeof propertiesValue === "object" && !Array.isArray(propertiesValue)
340
+ ? (propertiesValue as Record<string, unknown>)
341
+ : {};
342
+ const requiredValue = schemaRecord.required;
343
+ const required = Array.isArray(requiredValue)
344
+ ? requiredValue.filter((item): item is string => typeof item === "string")
345
+ : [];
346
+ if (INTENT_FIELD in properties) {
347
+ const { [INTENT_FIELD]: intentProp, ...rest } = properties;
348
+ const needsReorder = Object.keys(properties)[0] !== INTENT_FIELD;
349
+ const needsRequired = mode === "require" && !required.includes(INTENT_FIELD);
350
+ if (!needsReorder && !needsRequired) return schema;
351
+ return {
352
+ ...schemaRecord,
353
+ ...(needsReorder ? { properties: { [INTENT_FIELD]: intentProp, ...rest } } : {}),
354
+ ...(needsRequired ? { required: [...required, INTENT_FIELD] } : {}),
355
+ };
356
+ }
357
+ return {
358
+ ...schemaRecord,
359
+ properties: {
360
+ [INTENT_FIELD]: {
361
+ type: "string",
362
+ },
363
+ ...properties,
364
+ },
365
+ ...(mode === "require" ? { required: [...required, INTENT_FIELD] } : {}),
366
+ };
367
+ }
368
+
369
+ export function normalizeTools(tools: AgentContext["tools"], injectIntent: boolean): Context["tools"] {
370
+ injectIntent = injectIntent && Bun.env.PROMETHEUS_NO_INTENT !== "1";
371
+ return tools?.map(t => {
372
+ const intentMode = resolveIntentMode(t.intent);
373
+ let parameters: TSchema = t.parameters;
374
+ if (injectIntent && intentMode !== "omit") {
375
+ if (isZodSchema(parameters)) {
376
+ const wired = zodToWireSchema(parameters);
377
+ parameters = injectIntentIntoSchema(wired, intentMode) as TSchema;
378
+ } else {
379
+ parameters = injectIntentIntoSchema(parameters, intentMode) as TSchema;
380
+ }
381
+ }
382
+ const description = t.description ?? "";
383
+ return { ...t, parameters, description };
384
+ });
385
+ }
386
+
387
+ function resolveIntentMode(intent: AgentTool["intent"]): "require" | "optional" | "omit" {
388
+ if (typeof intent === "function") return "omit";
389
+ if (intent === "optional" || intent === "omit") return intent;
390
+ return "require";
391
+ }
392
+
393
+ function extractIntent(args: Record<string, unknown>): { intent?: string; strippedArgs: Record<string, unknown> } {
394
+ const { [INTENT_FIELD]: intent, ...strippedArgs } = args;
395
+ if (typeof intent !== "string") {
396
+ return { strippedArgs };
397
+ }
398
+ const trimmed = intent.trim();
399
+ return { intent: trimmed.length > 0 ? trimmed : undefined, strippedArgs };
400
+ }
401
+
402
+ /**
403
+ * Main loop logic shared by agentLoop and agentLoopContinue.
404
+ */
405
+ async function runLoop(
406
+ currentContext: AgentContext,
407
+ newMessages: AgentMessage[],
408
+ config: AgentLoopConfig,
409
+ signal: AbortSignal | undefined,
410
+ stream: EventStream<AgentEvent, AgentMessage[]>,
411
+ streamFn?: StreamFn,
412
+ ): Promise<void> {
413
+ const telemetry = resolveTelemetry(config.telemetry, config.sessionId);
414
+ const invokeAgentSpan = startInvokeAgentSpan(telemetry, config.model);
415
+ const stepCounter = { count: 0 };
416
+ let caughtError: unknown;
417
+ try {
418
+ await runInActiveSpan(invokeAgentSpan, () =>
419
+ runLoopBody(
420
+ currentContext,
421
+ newMessages,
422
+ config,
423
+ signal,
424
+ stream,
425
+ telemetry,
426
+ invokeAgentSpan,
427
+ stepCounter,
428
+ streamFn,
429
+ ),
430
+ );
431
+ } catch (err) {
432
+ caughtError = err;
433
+ throw err;
434
+ } finally {
435
+ finishInvokeAgentSpan(telemetry, invokeAgentSpan, {
436
+ stepCount: stepCounter.count,
437
+ errorObject: caughtError,
438
+ });
439
+ }
440
+ }
441
+
442
+ interface StepCounter {
443
+ count: number;
444
+ }
445
+
446
+ function normalizeMaxToolCallsPerTurn(value: number | undefined): number | undefined {
447
+ if (value === undefined || !Number.isFinite(value)) return undefined;
448
+ const normalized = Math.trunc(value);
449
+ return normalized > 0 ? normalized : undefined;
450
+ }
451
+
452
+ function cloneAssistantMessageForToolCallCap(message: AssistantMessage): AssistantMessage {
453
+ return {
454
+ ...message,
455
+ content: message.content.map(block => {
456
+ if (block.type === "toolCall") {
457
+ return { ...block, arguments: structuredClone(block.arguments) };
458
+ }
459
+ return { ...block };
460
+ }),
461
+ stopReason: "toolUse",
462
+ errorMessage: undefined,
463
+ errorStatus: undefined,
464
+ };
465
+ }
466
+
467
+ async function runLoopBody(
468
+ currentContext: AgentContext,
469
+ newMessages: AgentMessage[],
470
+ config: AgentLoopConfig,
471
+ signal: AbortSignal | undefined,
472
+ stream: EventStream<AgentEvent, AgentMessage[]>,
473
+ telemetry: AgentTelemetry | undefined,
474
+ invokeAgentSpan: Span | undefined,
475
+ stepCounter: StepCounter,
476
+ streamFn?: StreamFn,
477
+ ): Promise<void> {
478
+ let firstTurn = true;
479
+ // Check for steering messages at start (user may have typed while waiting)
480
+ let pendingMessages: AgentMessage[] = (await config.getSteeringMessages?.()) || [];
481
+ let harmonyRetryAttempt = 0;
482
+ let harmonyTruncateResumeCount = 0;
483
+
484
+ // Outer loop: continues when queued follow-up messages arrive after agent would stop
485
+ while (true) {
486
+ let hasMoreToolCalls = true;
487
+
488
+ // Inner loop: process tool calls and steering messages
489
+ while (hasMoreToolCalls || pendingMessages.length > 0) {
490
+ // Yield at the top of each iteration to prevent busy-wait when
491
+ // the agent loop is executing tool calls back-to-back.
492
+ await yieldIfDue();
493
+ if (!firstTurn) {
494
+ stream.push({ type: "turn_start" });
495
+ } else {
496
+ firstTurn = false;
497
+ }
498
+
499
+ // Process pending messages (inject before next assistant response)
500
+ if (pendingMessages.length > 0) {
501
+ for (const message of pendingMessages) {
502
+ stream.push({ type: "message_start", message });
503
+ stream.push({ type: "message_end", message });
504
+ currentContext.messages.push(message);
505
+ newMessages.push(message);
506
+ }
507
+ pendingMessages = [];
508
+ }
509
+
510
+ // Refresh prompt/tool context from live state before each model call
511
+ if (config.syncContextBeforeModelCall) {
512
+ await config.syncContextBeforeModelCall(currentContext);
513
+ }
514
+
515
+ // Stream assistant response
516
+ let recovered: HarmonyRecoveredToolCall | undefined;
517
+ let message: AssistantMessage;
518
+ try {
519
+ message = await streamAssistantResponse(
520
+ currentContext,
521
+ config,
522
+ signal,
523
+ stream,
524
+ telemetry,
525
+ invokeAgentSpan,
526
+ stepCounter,
527
+ streamFn,
528
+ harmonyRetryAttempt,
529
+ );
530
+ harmonyRetryAttempt = 0;
531
+ harmonyTruncateResumeCount = 0;
532
+ } catch (err) {
533
+ if (!(err instanceof HarmonyLeakInterruption)) throw err;
534
+ if (err.recovered) {
535
+ if (harmonyTruncateResumeCount >= 2) {
536
+ await emitHarmonyAudit(config, err, "escalated", harmonyRetryAttempt);
537
+ throw new Error(
538
+ `GPT-5 Harmony leak recurred after truncate-and-resume recovery (${signalListLabel(err.detection.signals)}).`,
539
+ );
540
+ }
541
+ harmonyTruncateResumeCount++;
542
+ recovered = err.recovered;
543
+ message = recovered.message;
544
+ await emitHarmonyAudit(config, err, "truncate_resume", harmonyRetryAttempt);
545
+ } else {
546
+ if (harmonyRetryAttempt >= 2) {
547
+ await emitHarmonyAudit(config, err, "escalated", harmonyRetryAttempt);
548
+ throw new Error(
549
+ `GPT-5 Harmony leak persisted after ${harmonyRetryAttempt} retries (${signalListLabel(err.detection.signals)}).`,
550
+ );
551
+ }
552
+ await emitHarmonyAudit(config, err, "abort_retry", harmonyRetryAttempt);
553
+ harmonyRetryAttempt++;
554
+ continue;
555
+ }
556
+ }
557
+ newMessages.push(message);
558
+ let steeringMessagesFromExecution: AgentMessage[] | undefined;
559
+
560
+ if (message.stopReason === "error" || message.stopReason === "aborted") {
561
+ // Create placeholder tool results for any tool calls in the aborted message
562
+ // This maintains the tool_use/tool_result pairing that the API requires
563
+ type ToolCallContent = Extract<AssistantMessage["content"][number], { type: "toolCall" }>;
564
+ const toolCalls = message.content.filter((c): c is ToolCallContent => c.type === "toolCall");
565
+ const toolResults: ToolResultMessage[] = [];
566
+ for (const toolCall of toolCalls) {
567
+ const result = createAbortedToolResult(toolCall, stream, message.stopReason, message.errorMessage);
568
+ currentContext.messages.push(result);
569
+ newMessages.push(result);
570
+ toolResults.push(result);
571
+ // The placeholder result above keeps the API's tool_use/tool_result
572
+ // pairing intact, but no execute_tool span is started for these
573
+ // calls. Mirror the run-collector entry directly so the run
574
+ // summary's tool counters and `coverage.toolsInvoked` reflect
575
+ // what the user actually saw on the wire.
576
+ recordSkippedTool(telemetry, {
577
+ toolCallId: toolCall.id,
578
+ toolName: toolCall.name,
579
+ status: message.stopReason === "aborted" ? "aborted" : "error",
580
+ });
581
+ }
582
+ stream.push({ type: "turn_end", message, toolResults });
583
+ stream.push(buildAgentEndEvent(newMessages, telemetry, stepCounter.count));
584
+ stream.end(newMessages);
585
+ return;
586
+ }
587
+
588
+ // Run tools whenever the turn carries tool_use blocks AND was not truncated.
589
+ // `stop_reason` is provider metadata that never goes back on the wire, so it
590
+ // does not gate continuation validity: replaying a tool_use turn with the
591
+ // tool_results appended is accepted whether the turn ended on `tool_use` or
592
+ // `end_turn` (adaptive/interleaved-thinking Opus routinely emits tool calls
593
+ // under `end_turn`; verified against the live Anthropic API). The only
594
+ // continuation hazard is a thinking block carrying a stale/invalid signature,
595
+ // which `transformMessages` already neutralizes — it strips the signature on
596
+ // non-`toolUse` turns and the encoder downgrades the unsigned block to text,
597
+ // which the API accepts. So treat `stop` (end_turn/pause_turn) the same as
598
+ // `toolUse`. `length` (max_tokens) is the one reason we must NOT run: the
599
+ // trailing tool_use may be truncated with incomplete arguments — those calls
600
+ // are abandoned below. (`error`/`aborted` already returned above.)
601
+ type ToolCallContent = Extract<AssistantMessage["content"][number], { type: "toolCall" }>;
602
+ const toolCalls = message.content.filter((c): c is ToolCallContent => c.type === "toolCall");
603
+ const runnableStop = message.stopReason === "toolUse" || message.stopReason === "stop";
604
+ hasMoreToolCalls = runnableStop && toolCalls.length > 0;
605
+
606
+ const toolResults: ToolResultMessage[] = [];
607
+ if (hasMoreToolCalls) {
608
+ const executionResult = await executeToolCalls(
609
+ currentContext,
610
+ message,
611
+ signal,
612
+ stream,
613
+ config,
614
+ telemetry,
615
+ invokeAgentSpan,
616
+ );
617
+
618
+ toolResults.push(...executionResult.toolResults);
619
+ steeringMessagesFromExecution = executionResult.steeringMessages;
620
+
621
+ for (const result of toolResults) {
622
+ currentContext.messages.push(result);
623
+ newMessages.push(result);
624
+ }
625
+ } else if (toolCalls.length > 0) {
626
+ // Turn ended on a non-runnable reason (`length` truncation) but left
627
+ // toolCall blocks behind. The trailing call's arguments may be incomplete,
628
+ // so don't execute or continue — pair each with a placeholder result to keep
629
+ // the tool_use/tool_result contract valid for any later request that
630
+ // replays this turn. When the truncation was `length`, surface an actionable
631
+ // hint so the model doesn't loop by re-emitting the same oversized payload
632
+ // (e.g. 1000+ line `write` content blowing past the model's output cap).
633
+ const skipReason = message.stopReason === "length" ? "length" : "skipped";
634
+ for (const toolCall of toolCalls) {
635
+ const result = createAbortedToolResult(toolCall, stream, skipReason);
636
+ currentContext.messages.push(result);
637
+ newMessages.push(result);
638
+ toolResults.push(result);
639
+ recordSkippedTool(telemetry, {
640
+ toolCallId: toolCall.id,
641
+ toolName: toolCall.name,
642
+ status: "skipped",
643
+ });
644
+ }
645
+ }
646
+
647
+ stream.push({ type: "turn_end", message, toolResults });
648
+
649
+ pendingMessages = steeringMessagesFromExecution ?? ((await config.getSteeringMessages?.()) || []);
650
+ }
651
+
652
+ // Agent would stop here. Check for follow-up messages.
653
+ await config.onBeforeYield?.();
654
+ const followUpMessages = (await config.getFollowUpMessages?.()) || [];
655
+ if (followUpMessages.length > 0) {
656
+ // Set as pending so inner loop processes them
657
+ pendingMessages = followUpMessages;
658
+ continue;
659
+ }
660
+
661
+ // No more messages, exit
662
+ break;
663
+ }
664
+
665
+ stream.push(buildAgentEndEvent(newMessages, telemetry, stepCounter.count));
666
+ stream.end(newMessages);
667
+ }
668
+
669
+ async function emitHarmonyAudit(
670
+ config: AgentLoopConfig,
671
+ interruption: HarmonyLeakInterruption,
672
+ action: "truncate_resume" | "abort_retry" | "escalated",
673
+ retryN: number,
674
+ ): Promise<void> {
675
+ await config.onHarmonyLeak?.(
676
+ createHarmonyAuditEvent({
677
+ action,
678
+ detection: interruption.detection,
679
+ model: config.model,
680
+ retryN,
681
+ removed: interruption.removed,
682
+ }),
683
+ );
684
+ }
685
+
686
+ /**
687
+ * Stream an assistant response from the LLM.
688
+ * This is where AgentMessage[] gets transformed to Message[] for the LLM.
689
+ */
690
+ async function streamAssistantResponse(
691
+ context: AgentContext,
692
+ config: AgentLoopConfig,
693
+ signal: AbortSignal | undefined,
694
+ stream: EventStream<AgentEvent, AgentMessage[]>,
695
+ telemetry: AgentTelemetry | undefined,
696
+ invokeAgentSpan: Span | undefined,
697
+ stepCounter: StepCounter,
698
+ streamFn?: StreamFn,
699
+ harmonyRetryAttempt = 0,
700
+ ): Promise<AssistantMessage> {
701
+ // Apply context transform if configured (AgentMessage[] → AgentMessage[])
702
+ let messages = context.messages;
703
+ if (config.transformContext) {
704
+ messages = await config.transformContext(messages, signal);
705
+ }
706
+
707
+ // Convert to LLM-compatible messages (AgentMessage[] → Message[])
708
+ const llmMessages = await config.convertToLlm(messages);
709
+ const normalizedMessages = normalizeMessagesForProvider(llmMessages, config.model);
710
+
711
+ // Build LLM context — append-only mode caches system prompt + tools
712
+ // AND keeps an append-only message log so prior-turn bytes are stable.
713
+ let llmContext: Context;
714
+ if (config.appendOnlyContext) {
715
+ config.appendOnlyContext.syncMessages(normalizedMessages);
716
+ llmContext = config.appendOnlyContext.build(context, { intentTracing: !!config.intentTracing });
717
+ } else {
718
+ llmContext = {
719
+ systemPrompt: context.systemPrompt,
720
+ messages: normalizedMessages,
721
+ tools: normalizeTools(context.tools, !!config.intentTracing),
722
+ };
723
+ }
724
+
725
+ const streamFunction = streamFn || streamSimple;
726
+
727
+ // Resolve API key (important for expiring tokens) — do this before resolving
728
+ // metadata so that the session-sticky credential recorded by getApiKey is
729
+ // visible to metadataResolver (e.g. for the correct account_uuid in metadata.user_id).
730
+ const resolvedApiKey =
731
+ (config.getApiKey ? await config.getApiKey(config.model.provider) : undefined) || config.apiKey;
732
+
733
+ // Re-resolve metadata after credential selection so the per-request value
734
+ // reflects the credential actually used, not the snapshot from AgentLoopConfig construction.
735
+ const resolvedMetadata = config.metadataResolver ? config.metadataResolver(config.model.provider) : config.metadata;
736
+
737
+ const dynamicToolChoice = config.getToolChoice?.();
738
+ const dynamicReasoning = config.getReasoning?.();
739
+ const harmonyMitigationEnabled = isHarmonyLeakMitigationTarget(config.model);
740
+ const harmonyAbortController = harmonyMitigationEnabled ? new AbortController() : undefined;
741
+ const maxToolCallsPerTurn = normalizeMaxToolCallsPerTurn(config.maxToolCallsPerTurn);
742
+ const toolCallCapAbortController = maxToolCallsPerTurn === undefined ? undefined : new AbortController();
743
+ const requestSignals: AbortSignal[] = [];
744
+ if (signal) requestSignals.push(signal);
745
+ if (harmonyAbortController) requestSignals.push(harmonyAbortController.signal);
746
+ if (toolCallCapAbortController) requestSignals.push(toolCallCapAbortController.signal);
747
+ const requestSignal =
748
+ requestSignals.length === 0
749
+ ? undefined
750
+ : requestSignals.length === 1
751
+ ? requestSignals[0]
752
+ : AbortSignal.any(requestSignals);
753
+ const effectiveTemperature =
754
+ harmonyRetryAttempt > 0 && config.temperature !== undefined ? config.temperature + 0.05 : config.temperature;
755
+ const effectiveToolChoice = dynamicToolChoice ?? config.toolChoice;
756
+ const effectiveReasoning = dynamicReasoning ?? config.reasoning;
757
+
758
+ const chatStepNumber = stepCounter.count;
759
+ stepCounter.count += 1;
760
+ const chatSpan = startChatSpan(telemetry, config.model, {
761
+ parent: invokeAgentSpan,
762
+ stepNumber: chatStepNumber,
763
+ request: {
764
+ maxTokens: config.maxTokens,
765
+ temperature: effectiveTemperature,
766
+ topP: config.topP,
767
+ topK: config.topK,
768
+ presencePenalty: config.presencePenalty,
769
+ serviceTier: config.serviceTier,
770
+ reasoningEffort: typeof effectiveReasoning === "string" ? effectiveReasoning : undefined,
771
+ toolChoice: effectiveToolChoice,
772
+ tools: llmContext.tools,
773
+ systemPrompt: llmContext.systemPrompt,
774
+ messages: llmContext.messages,
775
+ },
776
+ });
777
+
778
+ // Wrap the user-supplied onResponse so we always observe response headers
779
+ // for telemetry (`ChatUsageEvent.headers`, gateway auto-detection) without
780
+ // stealing them from the configured hook.
781
+ let capturedHeaders: Readonly<Record<string, string>> | undefined;
782
+ const userOnResponse = config.onResponse;
783
+ const captureOnResponse: AgentLoopConfig["onResponse"] = (response, modelInfo) => {
784
+ capturedHeaders = response.headers;
785
+ return userOnResponse?.(response, modelInfo);
786
+ };
787
+
788
+ const finishChat = async (message: AssistantMessage): Promise<void> => {
789
+ await finishChatSpan(telemetry, chatSpan, message, {
790
+ stepNumber: chatStepNumber,
791
+ serviceTier: config.serviceTier,
792
+ responseHeaders: capturedHeaders,
793
+ baseUrl: config.model.baseUrl,
794
+ });
795
+ };
796
+
797
+ try {
798
+ return await runInActiveSpan(chatSpan, async () => {
799
+ const response = await streamFunction(config.model, llmContext, {
800
+ ...config,
801
+ apiKey: resolvedApiKey,
802
+ metadata: resolvedMetadata,
803
+ toolChoice: effectiveToolChoice,
804
+ reasoning: effectiveReasoning,
805
+ temperature: effectiveTemperature,
806
+ signal: requestSignal,
807
+ onResponse: captureOnResponse,
808
+ });
809
+
810
+ let partialMessage: AssistantMessage | null = null;
811
+ let addedPartial = false;
812
+
813
+ const responseIterator = response[Symbol.asyncIterator]();
814
+ let completedToolCalls = 0;
815
+ let cappedMessage: AssistantMessage | undefined;
816
+ let capFinalized = false;
817
+
818
+ const finishCappedAssistantMessage = async (): Promise<AssistantMessage | undefined> => {
819
+ if (!cappedMessage) return undefined;
820
+ responseIterator.return?.()?.catch(() => {});
821
+ if (!capFinalized) {
822
+ if (addedPartial) {
823
+ context.messages[context.messages.length - 1] = cappedMessage;
824
+ } else {
825
+ context.messages.push(cappedMessage);
826
+ stream.push({ type: "message_start", message: { ...cappedMessage } });
827
+ }
828
+ stream.push({ type: "message_end", message: cappedMessage });
829
+ await finishChat(cappedMessage);
830
+ capFinalized = true;
831
+ }
832
+ return cappedMessage;
833
+ };
834
+
835
+ // Set up a single abort race: register the abort listener once for the whole
836
+ // stream and reuse the same race promise for every iterator.next() instead of
837
+ // allocating Promise.withResolvers and add/removeEventListener per event.
838
+ let abortRacePromise: Promise<typeof ABORTED> | undefined;
839
+ let detachAbortListener: (() => void) | undefined;
840
+ if (requestSignal) {
841
+ if (requestSignal.aborted) {
842
+ const aborted = emitAbortedAssistantMessage(partialMessage, addedPartial, context, config, stream);
843
+ await finishChat(aborted);
844
+ return aborted;
845
+ }
846
+ const { promise, resolve } = Promise.withResolvers<typeof ABORTED>();
847
+ const onAbort = () => resolve(ABORTED);
848
+ requestSignal.addEventListener("abort", onAbort, { once: true });
849
+ abortRacePromise = promise;
850
+ detachAbortListener = () => requestSignal.removeEventListener("abort", onAbort);
851
+ }
852
+
853
+ try {
854
+ while (true) {
855
+ let next: IteratorResult<AssistantMessageEvent>;
856
+ if (abortRacePromise) {
857
+ const result = await Promise.race([responseIterator.next(), abortRacePromise]);
858
+ if (result === ABORTED) {
859
+ if (toolCallCapAbortController?.signal.aborted) {
860
+ const capped = await finishCappedAssistantMessage();
861
+ if (capped) return capped;
862
+ }
863
+ responseIterator.return?.()?.catch(() => {});
864
+ const aborted = emitAbortedAssistantMessage(partialMessage, addedPartial, context, config, stream);
865
+ await finishChat(aborted);
866
+ return aborted;
867
+ }
868
+ next = result;
869
+ } else {
870
+ next = await responseIterator.next();
871
+ }
872
+ if (requestSignal?.aborted) {
873
+ if (toolCallCapAbortController?.signal.aborted) {
874
+ const capped = await finishCappedAssistantMessage();
875
+ if (capped) return capped;
876
+ }
877
+ const aborted = emitAbortedAssistantMessage(partialMessage, addedPartial, context, config, stream);
878
+ await finishChat(aborted);
879
+ return aborted;
880
+ }
881
+ if (next.done) break;
882
+
883
+ const event = next.value;
884
+ // Yield to the event loop periodically to prevent busy-wait
885
+ // when the LLM is streaming chunks faster than the loop can rest.
886
+ await yieldIfDue();
887
+
888
+ switch (event.type) {
889
+ case "start":
890
+ partialMessage = event.partial;
891
+ context.messages.push(partialMessage);
892
+ addedPartial = true;
893
+ stream.push({ type: "message_start", message: { ...partialMessage } });
894
+ break;
895
+
896
+ case "text_start":
897
+ case "text_delta":
898
+ case "text_end":
899
+ case "thinking_start":
900
+ case "thinking_delta":
901
+ case "thinking_end":
902
+ case "toolcall_start":
903
+ case "toolcall_delta":
904
+ case "toolcall_end":
905
+ if (partialMessage) {
906
+ partialMessage = event.partial;
907
+ context.messages[context.messages.length - 1] = partialMessage;
908
+ config.onAssistantMessageEvent?.(partialMessage, event);
909
+ if (signal?.aborted) {
910
+ continue;
911
+ }
912
+ stream.push({
913
+ type: "message_update",
914
+ assistantMessageEvent: event,
915
+ message: { ...partialMessage },
916
+ });
917
+ if (event.type === "toolcall_end" && maxToolCallsPerTurn !== undefined) {
918
+ completedToolCalls++;
919
+ if (completedToolCalls >= maxToolCallsPerTurn) {
920
+ cappedMessage = cloneAssistantMessageForToolCallCap(partialMessage);
921
+ toolCallCapAbortController?.abort();
922
+ const capped = await finishCappedAssistantMessage();
923
+ if (capped) return capped;
924
+ }
925
+ }
926
+ }
927
+ break;
928
+
929
+ case "done":
930
+ case "error": {
931
+ const finalMessage = await response.result();
932
+ if (harmonyMitigationEnabled) {
933
+ const detection = detectHarmonyLeakInAssistantMessage(finalMessage);
934
+ if (detection) {
935
+ const removed = extractHarmonyRemoved(finalMessage, detection);
936
+ if (addedPartial) {
937
+ context.messages.pop();
938
+ addedPartial = false;
939
+ }
940
+ throw new HarmonyLeakInterruption(detection, removed);
941
+ }
942
+ }
943
+ if (addedPartial) {
944
+ context.messages[context.messages.length - 1] = finalMessage;
945
+ } else {
946
+ context.messages.push(finalMessage);
947
+ }
948
+ if (!addedPartial) {
949
+ stream.push({ type: "message_start", message: { ...finalMessage } });
950
+ }
951
+ stream.push({ type: "message_end", message: finalMessage });
952
+ await finishChat(finalMessage);
953
+ return finalMessage;
954
+ }
955
+ }
956
+ }
957
+ } finally {
958
+ detachAbortListener?.();
959
+ }
960
+
961
+ const trailing = await response.result();
962
+ if (harmonyMitigationEnabled) {
963
+ const detection = detectHarmonyLeakInAssistantMessage(trailing);
964
+ if (detection) {
965
+ if (addedPartial) {
966
+ context.messages.pop();
967
+ addedPartial = false;
968
+ }
969
+ throw new HarmonyLeakInterruption(detection, extractHarmonyRemoved(trailing, detection));
970
+ }
971
+ }
972
+ await finishChat(trailing);
973
+ return trailing;
974
+ });
975
+ } catch (err) {
976
+ failChatSpan(telemetry, chatSpan, {
977
+ errorObject: err,
978
+ responseHeaders: capturedHeaders,
979
+ baseUrl: config.model.baseUrl,
980
+ });
981
+ throw err;
982
+ }
983
+ }
984
+
985
+ function emitAbortedAssistantMessage(
986
+ partialMessage: AssistantMessage | null,
987
+ addedPartial: boolean,
988
+ context: AgentContext,
989
+ config: AgentLoopConfig,
990
+ stream: EventStream<AgentEvent, AgentMessage[]>,
991
+ ): AssistantMessage {
992
+ const errorMessage = "Request was aborted";
993
+ const abortedMessage: AssistantMessage = partialMessage
994
+ ? { ...partialMessage, stopReason: "aborted", errorMessage }
995
+ : {
996
+ role: "assistant",
997
+ content: [],
998
+ api: config.model.api,
999
+ provider: config.model.provider,
1000
+ model: config.model.id,
1001
+ usage: {
1002
+ input: 0,
1003
+ output: 0,
1004
+ cacheRead: 0,
1005
+ cacheWrite: 0,
1006
+ totalTokens: 0,
1007
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
1008
+ },
1009
+ stopReason: "aborted",
1010
+ errorMessage,
1011
+ timestamp: Date.now(),
1012
+ };
1013
+ if (addedPartial) {
1014
+ context.messages[context.messages.length - 1] = abortedMessage;
1015
+ } else {
1016
+ context.messages.push(abortedMessage);
1017
+ stream.push({ type: "message_start", message: { ...abortedMessage } });
1018
+ }
1019
+ stream.push({ type: "message_end", message: abortedMessage });
1020
+ return abortedMessage;
1021
+ }
1022
+
1023
+ /**
1024
+ * Execute tool calls from an assistant message.
1025
+ */
1026
+ async function executeToolCalls(
1027
+ currentContext: AgentContext,
1028
+ assistantMessage: AssistantMessage,
1029
+ signal: AbortSignal | undefined,
1030
+ stream: EventStream<AgentEvent, AgentMessage[]>,
1031
+ config: AgentLoopConfig,
1032
+ telemetry: AgentTelemetry | undefined,
1033
+ invokeAgentSpan: Span | undefined,
1034
+ ): Promise<{ toolResults: ToolResultMessage[]; steeringMessages?: AgentMessage[] }> {
1035
+ const tools = currentContext.tools;
1036
+ const {
1037
+ getSteeringMessages,
1038
+ interruptMode = "immediate",
1039
+ getToolContext,
1040
+ transformToolCallArguments,
1041
+ intentTracing,
1042
+ beforeToolCall,
1043
+ afterToolCall,
1044
+ } = config;
1045
+ type ToolCallContent = Extract<AssistantMessage["content"][number], { type: "toolCall" }>;
1046
+ const toolCalls = assistantMessage.content.filter((c): c is ToolCallContent => c.type === "toolCall");
1047
+ const emittedToolResults: ToolResultMessage[] = [];
1048
+ const toolCallInfos = toolCalls.map(call => ({ id: call.id, name: call.name }));
1049
+ const batchId = `${assistantMessage.timestamp ?? Date.now()}_${toolCalls[0]?.id ?? "batch"}`;
1050
+ const shouldInterruptImmediately = interruptMode !== "wait";
1051
+ const steeringAbortController = new AbortController();
1052
+ const toolSignal = signal
1053
+ ? AbortSignal.any([signal, steeringAbortController.signal])
1054
+ : steeringAbortController.signal;
1055
+ const interruptState = { triggered: false };
1056
+ let steeringMessages: AgentMessage[] | undefined;
1057
+ let steeringCheck: Promise<void> | null = null;
1058
+
1059
+ const records = toolCalls.map(toolCall => ({
1060
+ toolCall,
1061
+ // Tools emitted via OpenAI's custom-tool path (e.g. `apply_patch` on GPT-5)
1062
+ // come back under their wire-level name, which may differ from the
1063
+ // harness-internal `name`. Match on either, preferring `name` for
1064
+ // determinism if both somehow collide.
1065
+ tool:
1066
+ tools?.find(t => t.name === toolCall.name) ??
1067
+ tools?.find(t => t.customWireName !== undefined && t.customWireName === toolCall.name),
1068
+ args: toolCall.arguments as Record<string, unknown>,
1069
+ started: false,
1070
+ result: undefined as AgentToolResult<any> | undefined,
1071
+ isError: false,
1072
+ skipped: false,
1073
+ toolResultMessage: undefined as ToolResultMessage | undefined,
1074
+ resultEmitted: false,
1075
+ }));
1076
+
1077
+ const checkSteering = async (): Promise<void> => {
1078
+ if (!shouldInterruptImmediately || !getSteeringMessages || interruptState.triggered) {
1079
+ return;
1080
+ }
1081
+ if (steeringCheck) {
1082
+ await steeringCheck;
1083
+ return;
1084
+ }
1085
+ steeringCheck = (async () => {
1086
+ const steering = await getSteeringMessages();
1087
+ if (steering.length > 0) {
1088
+ steeringMessages = steering;
1089
+ interruptState.triggered = true;
1090
+ steeringAbortController.abort();
1091
+ }
1092
+ })().finally(() => {
1093
+ steeringCheck = null;
1094
+ });
1095
+ await steeringCheck;
1096
+ };
1097
+
1098
+ const emitToolResult = (record: (typeof records)[number], result: AgentToolResult<any>, isError: boolean): void => {
1099
+ if (record.resultEmitted) return;
1100
+ const { toolCall } = record;
1101
+ if (!record.started) {
1102
+ stream.push({
1103
+ type: "tool_execution_start",
1104
+ toolCallId: toolCall.id,
1105
+ toolName: toolCall.name,
1106
+ args: record.args,
1107
+ intent: toolCall.intent,
1108
+ });
1109
+ }
1110
+ stream.push({
1111
+ type: "tool_execution_end",
1112
+ toolCallId: toolCall.id,
1113
+ toolName: toolCall.name,
1114
+ result,
1115
+ isError,
1116
+ });
1117
+
1118
+ const toolResultMessage: ToolResultMessage = {
1119
+ role: "toolResult",
1120
+ toolCallId: toolCall.id,
1121
+ toolName: toolCall.name,
1122
+ content: result.content,
1123
+ details: result.details,
1124
+ isError,
1125
+ timestamp: Date.now(),
1126
+ };
1127
+ record.result = result;
1128
+ record.isError = isError;
1129
+ record.toolResultMessage = toolResultMessage;
1130
+ record.resultEmitted = true;
1131
+ emittedToolResults.push(toolResultMessage);
1132
+
1133
+ stream.push({ type: "message_start", message: toolResultMessage });
1134
+ stream.push({ type: "message_end", message: toolResultMessage });
1135
+ };
1136
+
1137
+ const runTool = async (record: (typeof records)[number], index: number): Promise<void> => {
1138
+ if (interruptState.triggered) {
1139
+ // Skip both span emission and the collector orphan record here. The
1140
+ // tail sweep below (after `Promise.allSettled`) is the single path
1141
+ // that handles "no result message was produced" — it calls
1142
+ // `recordSkippedTool` and `emitToolResult` once per record, so any
1143
+ // work we did here would double-count.
1144
+ record.skipped = true;
1145
+ return;
1146
+ }
1147
+
1148
+ const { toolCall, tool } = record;
1149
+ let argsForExecution = toolCall.arguments as Record<string, unknown>;
1150
+ if (intentTracing) {
1151
+ const { intent, strippedArgs } = extractIntent(toolCall.arguments);
1152
+ argsForExecution = strippedArgs;
1153
+ if (intent) {
1154
+ toolCall.intent = intent;
1155
+ } else if (typeof tool?.intent === "function") {
1156
+ try {
1157
+ const derived = tool.intent(strippedArgs as never)?.trim();
1158
+ if (derived) {
1159
+ toolCall.intent = derived;
1160
+ }
1161
+ } catch {
1162
+ // intent function must never break tool execution
1163
+ }
1164
+ }
1165
+ }
1166
+ record.args = argsForExecution;
1167
+ record.started = true;
1168
+ stream.push({
1169
+ type: "tool_execution_start",
1170
+ toolCallId: toolCall.id,
1171
+ toolName: toolCall.name,
1172
+ args: argsForExecution,
1173
+ intent: toolCall.intent,
1174
+ });
1175
+
1176
+ const toolSpan = startExecuteToolSpan(telemetry, {
1177
+ tool,
1178
+ toolName: toolCall.name,
1179
+ toolCallId: toolCall.id,
1180
+ args: argsForExecution,
1181
+ parent: invokeAgentSpan,
1182
+ });
1183
+ if (toolSpan && toolCall.intent) {
1184
+ toolSpan.setAttribute(PrometheusGenAIAttr.ToolCallIntent, toolCall.intent);
1185
+ }
1186
+
1187
+ let result: AgentToolResult<any> = { content: [], details: {} };
1188
+ let isError = false;
1189
+ let caughtError: unknown;
1190
+
1191
+ await runInActiveSpan(toolSpan, async () => {
1192
+ try {
1193
+ if (!tool) throw new Error(`Tool ${toolCall.name} not found`);
1194
+
1195
+ let effectiveArgs: Record<string, unknown>;
1196
+ try {
1197
+ effectiveArgs = validateToolArguments(tool, { ...toolCall, arguments: argsForExecution });
1198
+ } catch (validationError) {
1199
+ if (tool.lenientArgValidation) {
1200
+ effectiveArgs = argsForExecution;
1201
+ } else {
1202
+ throw validationError;
1203
+ }
1204
+ }
1205
+
1206
+ if (beforeToolCall) {
1207
+ const beforeResult = await beforeToolCall(
1208
+ {
1209
+ assistantMessage,
1210
+ toolCall,
1211
+ args: effectiveArgs,
1212
+ context: currentContext,
1213
+ },
1214
+ toolSignal,
1215
+ );
1216
+ if (beforeResult?.block) {
1217
+ throw new ToolCallBlockedError(beforeResult.reason);
1218
+ }
1219
+ }
1220
+ // Reflect post-hook args so emitted tool results / afterToolCall see what actually executed.
1221
+ record.args = effectiveArgs;
1222
+
1223
+ const toolContext = getToolContext
1224
+ ? getToolContext({
1225
+ batchId,
1226
+ index,
1227
+ total: toolCalls.length,
1228
+ toolCalls: toolCallInfos,
1229
+ })
1230
+ : undefined;
1231
+ const rawResult = await tool.execute(
1232
+ toolCall.id,
1233
+ transformToolCallArguments ? transformToolCallArguments(effectiveArgs, toolCall.name) : effectiveArgs,
1234
+ tool.nonAbortable ? undefined : toolSignal,
1235
+ partialResult => {
1236
+ stream.push({
1237
+ type: "tool_execution_update",
1238
+ toolCallId: toolCall.id,
1239
+ toolName: toolCall.name,
1240
+ args: effectiveArgs,
1241
+ partialResult: coerceToolResult(partialResult).result,
1242
+ });
1243
+ },
1244
+ toolContext,
1245
+ );
1246
+ const coerced = coerceToolResult(rawResult);
1247
+ result = coerced.result;
1248
+ if (coerced.malformed || result.isError) isError = true;
1249
+ } catch (e) {
1250
+ caughtError = e;
1251
+ result = {
1252
+ content: [{ type: "text", text: e instanceof Error ? e.message : String(e) }],
1253
+ details: {},
1254
+ };
1255
+ isError = true;
1256
+ }
1257
+
1258
+ if (afterToolCall) {
1259
+ try {
1260
+ const after = await afterToolCall(
1261
+ {
1262
+ assistantMessage,
1263
+ toolCall,
1264
+ args: record.args,
1265
+ result,
1266
+ isError,
1267
+ context: currentContext,
1268
+ },
1269
+ toolSignal,
1270
+ );
1271
+ if (after) {
1272
+ result = {
1273
+ content: after.content ?? result.content,
1274
+ details: after.details ?? result.details,
1275
+ isError: after.isError ?? result.isError,
1276
+ };
1277
+ isError = after.isError ?? isError;
1278
+ }
1279
+ } catch (e) {
1280
+ caughtError = e;
1281
+ result = {
1282
+ content: [{ type: "text", text: e instanceof Error ? e.message : String(e) }],
1283
+ details: {},
1284
+ };
1285
+ isError = true;
1286
+ }
1287
+ }
1288
+ });
1289
+
1290
+ const interrupted = interruptState.triggered;
1291
+ if (interrupted) {
1292
+ record.skipped = true;
1293
+ emitToolResult(record, createSkippedToolResult(), true);
1294
+ } else {
1295
+ emitToolResult(record, result, isError);
1296
+ }
1297
+
1298
+ const firstTextBlock = result.content?.[0];
1299
+ const errorMessageForSpan =
1300
+ caughtError === undefined && isError && firstTextBlock?.type === "text" ? firstTextBlock.text : undefined;
1301
+ const status = interrupted
1302
+ ? "aborted"
1303
+ : caughtError instanceof ToolCallBlockedError
1304
+ ? "blocked"
1305
+ : isError
1306
+ ? "error"
1307
+ : "ok";
1308
+ finishExecuteToolSpan(telemetry, toolSpan, {
1309
+ result,
1310
+ isError,
1311
+ status,
1312
+ errorMessage: errorMessageForSpan,
1313
+ errorObject: caughtError,
1314
+ toolCallId: toolCall.id,
1315
+ toolName: toolCall.name,
1316
+ });
1317
+
1318
+ await checkSteering();
1319
+ };
1320
+
1321
+ let lastExclusive: Promise<void> = Promise.resolve();
1322
+ let sharedTasks: Promise<void>[] = [];
1323
+ const tasks: Promise<void>[] = [];
1324
+
1325
+ for (let index = 0; index < records.length; index++) {
1326
+ const record = records[index];
1327
+ const concurrency = record.tool?.concurrency ?? "shared";
1328
+ const start = concurrency === "exclusive" ? Promise.all([lastExclusive, ...sharedTasks]) : lastExclusive;
1329
+ const task = start.then(() => runTool(record, index));
1330
+ tasks.push(task);
1331
+ if (concurrency === "exclusive") {
1332
+ lastExclusive = task;
1333
+ sharedTasks = [];
1334
+ } else {
1335
+ sharedTasks.push(task);
1336
+ }
1337
+ }
1338
+
1339
+ await Promise.allSettled(tasks);
1340
+ // Yield after batch tool execution to let GC and I/O catch up,
1341
+ // especially when tool results are large (e.g. bash output).
1342
+ await yieldIfDue();
1343
+
1344
+ for (const record of records) {
1345
+ if (!record.toolResultMessage) {
1346
+ record.skipped = true;
1347
+ recordSkippedTool(telemetry, {
1348
+ toolCallId: record.toolCall.id,
1349
+ toolName: record.toolCall.name,
1350
+ status: "skipped",
1351
+ });
1352
+ emitToolResult(record, createSkippedToolResult(), true);
1353
+ }
1354
+ }
1355
+
1356
+ return { toolResults: emittedToolResults, steeringMessages };
1357
+ }
1358
+
1359
+ /**
1360
+ * Create a tool result for a tool call that was aborted or errored before execution.
1361
+ * Maintains the tool_use/tool_result pairing required by the API.
1362
+ */
1363
+ function createAbortedToolResult(
1364
+ toolCall: Extract<AssistantMessage["content"][number], { type: "toolCall" }>,
1365
+ stream: EventStream<AgentEvent, AgentMessage[]>,
1366
+ reason: "aborted" | "error" | "skipped" | "length",
1367
+ errorMessage?: string,
1368
+ ): ToolResultMessage {
1369
+ const message =
1370
+ reason === "aborted"
1371
+ ? "Tool execution was aborted"
1372
+ : reason === "length"
1373
+ ? "Tool call was not executed because the assistant hit its output token limit (stop_reason: length) before the arguments could complete; the recorded arguments are truncated and unsafe to run. Do NOT retry by re-emitting the same large payload — split the work into several smaller tool calls (e.g. for `write`/`edit`, write the first chunk then append the rest with subsequent `edit` insert ops, or break the file into multiple `write` targets)"
1374
+ : reason === "skipped"
1375
+ ? "Tool call was not executed because the assistant ended its turn"
1376
+ : "Tool execution failed due to an error";
1377
+ const result: AgentToolResult<any> = {
1378
+ content: [{ type: "text", text: errorMessage ? `${message}: ${errorMessage}` : `${message}.` }],
1379
+ details: {},
1380
+ };
1381
+
1382
+ stream.push({
1383
+ type: "tool_execution_start",
1384
+ toolCallId: toolCall.id,
1385
+ toolName: toolCall.name,
1386
+ args: toolCall.arguments,
1387
+ intent: toolCall.intent,
1388
+ });
1389
+ stream.push({
1390
+ type: "tool_execution_end",
1391
+ toolCallId: toolCall.id,
1392
+ toolName: toolCall.name,
1393
+ result,
1394
+ isError: true,
1395
+ });
1396
+
1397
+ const toolResultMessage: ToolResultMessage = {
1398
+ role: "toolResult",
1399
+ toolCallId: toolCall.id,
1400
+ toolName: toolCall.name,
1401
+ content: result.content,
1402
+ details: {},
1403
+ isError: true,
1404
+ timestamp: Date.now(),
1405
+ };
1406
+
1407
+ stream.push({ type: "message_start", message: toolResultMessage });
1408
+ stream.push({ type: "message_end", message: toolResultMessage });
1409
+
1410
+ return toolResultMessage;
1411
+ }
1412
+
1413
+ function createSkippedToolResult(): AgentToolResult<any> {
1414
+ return {
1415
+ content: [{ type: "text", text: "Skipped due to queued user message." }],
1416
+ details: {},
1417
+ };
1418
+ }