@gajae-code/agent-core 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/CHANGELOG.md +482 -0
  2. package/README.md +473 -0
  3. package/dist/types/agent-loop.d.ts +55 -0
  4. package/dist/types/agent.d.ts +334 -0
  5. package/dist/types/append-only-context.d.ts +113 -0
  6. package/dist/types/compaction/branch-summarization.d.ts +94 -0
  7. package/dist/types/compaction/compaction.d.ts +166 -0
  8. package/dist/types/compaction/entries.d.ts +103 -0
  9. package/dist/types/compaction/errors.d.ts +26 -0
  10. package/dist/types/compaction/index.d.ts +11 -0
  11. package/dist/types/compaction/messages.d.ts +61 -0
  12. package/dist/types/compaction/openai.d.ts +58 -0
  13. package/dist/types/compaction/pruning.d.ts +18 -0
  14. package/dist/types/compaction/utils.d.ts +32 -0
  15. package/dist/types/compaction.d.ts +1 -0
  16. package/dist/types/harmony-leak.d.ts +99 -0
  17. package/dist/types/index.d.ts +10 -0
  18. package/dist/types/proxy.d.ts +84 -0
  19. package/dist/types/run-collector.d.ts +196 -0
  20. package/dist/types/telemetry.d.ts +588 -0
  21. package/dist/types/thinking.d.ts +17 -0
  22. package/dist/types/types.d.ts +407 -0
  23. package/package.json +75 -0
  24. package/src/agent-loop.ts +1279 -0
  25. package/src/agent.ts +1399 -0
  26. package/src/append-only-context.ts +297 -0
  27. package/src/compaction/branch-summarization.ts +339 -0
  28. package/src/compaction/compaction.ts +1065 -0
  29. package/src/compaction/entries.ts +133 -0
  30. package/src/compaction/errors.ts +31 -0
  31. package/src/compaction/index.ts +12 -0
  32. package/src/compaction/messages.ts +212 -0
  33. package/src/compaction/openai.ts +552 -0
  34. package/src/compaction/prompts/auto-handoff-threshold-focus.md +1 -0
  35. package/src/compaction/prompts/branch-summary-context.md +5 -0
  36. package/src/compaction/prompts/branch-summary-preamble.md +2 -0
  37. package/src/compaction/prompts/branch-summary.md +30 -0
  38. package/src/compaction/prompts/compaction-short-summary.md +9 -0
  39. package/src/compaction/prompts/compaction-summary-context.md +5 -0
  40. package/src/compaction/prompts/compaction-summary.md +38 -0
  41. package/src/compaction/prompts/compaction-turn-prefix.md +17 -0
  42. package/src/compaction/prompts/compaction-update-summary.md +45 -0
  43. package/src/compaction/prompts/file-operations.md +10 -0
  44. package/src/compaction/prompts/handoff-document.md +49 -0
  45. package/src/compaction/prompts/summarization-system.md +3 -0
  46. package/src/compaction/pruning.ts +92 -0
  47. package/src/compaction/utils.ts +185 -0
  48. package/src/compaction.ts +1 -0
  49. package/src/harmony-leak.ts +427 -0
  50. package/src/index.ts +19 -0
  51. package/src/proxy.ts +326 -0
  52. package/src/run-collector.ts +631 -0
  53. package/src/telemetry.ts +2018 -0
  54. package/src/thinking.ts +19 -0
  55. package/src/types.ts +467 -0
@@ -0,0 +1,1279 @@
1
+ /**
2
+ * Agent loop that works with AgentMessage throughout.
3
+ * Transforms to Message[] only at the LLM call boundary.
4
+ */
5
+ import {
6
+ type AssistantMessage,
7
+ type AssistantMessageEvent,
8
+ type Context,
9
+ EventStream,
10
+ isZodSchema,
11
+ streamSimple,
12
+ type ToolResultMessage,
13
+ type TSchema,
14
+ validateToolArguments,
15
+ zodToWireSchema,
16
+ } from "@gajae-code/ai";
17
+ import { sanitizeText } from "@gajae-code/utils";
18
+ import {
19
+ createHarmonyAuditEvent,
20
+ type HarmonyDetection,
21
+ type HarmonyRecoveredToolCall,
22
+ isHarmonyLeakMitigationTarget,
23
+ signalListLabel,
24
+ } from "./harmony-leak";
25
+ import { type AgentRunCoverage, type AgentRunSummary, ToolCallBlockedError } from "./run-collector";
26
+ import {
27
+ type AgentTelemetry,
28
+ failChatSpan,
29
+ finishChatSpan,
30
+ finishExecuteToolSpan,
31
+ finishInvokeAgentSpan,
32
+ fireOnRunEnd,
33
+ PiGenAIAttr,
34
+ recordSkippedTool,
35
+ resolveTelemetry,
36
+ runInActiveSpan,
37
+ type Span,
38
+ startChatSpan,
39
+ startExecuteToolSpan,
40
+ startInvokeAgentSpan,
41
+ } from "./telemetry";
42
+ import type {
43
+ AgentContext,
44
+ AgentEvent,
45
+ AgentLoopConfig,
46
+ AgentMessage,
47
+ AgentTool,
48
+ AgentToolResult,
49
+ StreamFn,
50
+ } from "./types";
51
+
52
+ /** Sentinel returned by the abort race in `streamAssistantResponse`. */
53
+ const ABORTED: unique symbol = Symbol("agent-loop-aborted");
54
+
55
+ class HarmonyLeakInterruption extends Error {
56
+ constructor(
57
+ readonly detection: HarmonyDetection,
58
+ readonly removed: string,
59
+ readonly recovered?: HarmonyRecoveredToolCall,
60
+ ) {
61
+ super(`Detected GPT-5 Harmony protocol leakage (${signalListLabel(detection.signals)})`);
62
+ this.name = "HarmonyLeakInterruption";
63
+ }
64
+ }
65
+
66
+ /**
67
+ * Normalize a value coming back from `tool.execute()` (or its streaming partial-update callback)
68
+ * into a structurally valid {@link AgentToolResult}.
69
+ *
70
+ * The tool interface is typed, but third-party tools (MCP, extensions, user-authored AgentTools)
71
+ * can violate the contract at runtime. Persisting a malformed result corrupts the session file
72
+ * (missing `content` array → crash on reload). We coerce at the single boundary where untyped
73
+ * results enter the agent loop, so every downstream consumer can rely on the type.
74
+ */
75
+ function coerceToolResult(raw: unknown): { result: AgentToolResult<any>; malformed: boolean } {
76
+ const rawObj = raw && typeof raw === "object" ? (raw as Record<string, unknown>) : null;
77
+ const rawContent = rawObj?.content;
78
+ const details = rawObj && "details" in rawObj ? rawObj.details : {};
79
+ // Tools may flag a non-throwing failure on the result itself (e.g. an
80
+ // aggregator that catches per-entry errors and synthesizes a combined
81
+ // result). Preserve the flag so agent-loop can surface it on the wire.
82
+ const explicitError = Boolean(rawObj && "isError" in rawObj && rawObj.isError);
83
+
84
+ if (!Array.isArray(rawContent)) {
85
+ return {
86
+ result: {
87
+ content: [{ type: "text", text: "Tool returned an invalid result: missing content array." }],
88
+ details,
89
+ isError: true,
90
+ },
91
+ malformed: true,
92
+ };
93
+ }
94
+
95
+ const content: AgentToolResult["content"] = [];
96
+ for (const block of rawContent) {
97
+ if (!block || typeof block !== "object" || !("type" in block)) continue;
98
+ if (block.type === "text" && typeof (block as { text?: unknown }).text === "string") {
99
+ content.push({ type: "text", text: sanitizeText((block as { text: string }).text) });
100
+ } else if (
101
+ block.type === "image" &&
102
+ typeof (block as { data?: unknown }).data === "string" &&
103
+ typeof (block as { mimeType?: unknown }).mimeType === "string"
104
+ ) {
105
+ content.push(block as { type: "image"; data: string; mimeType: string });
106
+ }
107
+ }
108
+ return { result: { content, details, ...(explicitError ? { isError: true } : {}) }, malformed: false };
109
+ }
110
+
111
+ /**
112
+ * Start an agent loop with a new prompt message.
113
+ * The prompt is added to the context and events are emitted for it.
114
+ */
115
+ export function agentLoop(
116
+ prompts: AgentMessage[],
117
+ context: AgentContext,
118
+ config: AgentLoopConfig,
119
+ signal?: AbortSignal,
120
+ streamFn?: StreamFn,
121
+ ): EventStream<AgentEvent, AgentMessage[]> {
122
+ const stream = createAgentStream();
123
+
124
+ (async () => {
125
+ const newMessages: AgentMessage[] = [...prompts];
126
+ const currentContext: AgentContext = {
127
+ ...context,
128
+ messages: [...context.messages, ...prompts],
129
+ };
130
+
131
+ stream.push({ type: "agent_start" });
132
+ stream.push({ type: "turn_start" });
133
+ for (const prompt of prompts) {
134
+ stream.push({ type: "message_start", message: prompt });
135
+ stream.push({ type: "message_end", message: prompt });
136
+ }
137
+
138
+ try {
139
+ await runLoop(currentContext, newMessages, config, signal, stream, streamFn);
140
+ } catch (err) {
141
+ stream.fail(err);
142
+ }
143
+ })();
144
+
145
+ return stream;
146
+ }
147
+
148
+ /**
149
+ * Continue an agent loop from the current context without adding a new message.
150
+ * Used for retries - context already has user message or tool results.
151
+ *
152
+ * **Important:** The last message in context must convert to a `user` or `toolResult` message
153
+ * via `convertToLlm`. If it doesn't, the LLM provider will reject the request.
154
+ * This cannot be validated here since `convertToLlm` is only called once per turn.
155
+ */
156
+ export function agentLoopContinue(
157
+ context: AgentContext,
158
+ config: AgentLoopConfig,
159
+ signal?: AbortSignal,
160
+ streamFn?: StreamFn,
161
+ ): EventStream<AgentEvent, AgentMessage[]> {
162
+ if (context.messages.length === 0) {
163
+ throw new Error("Cannot continue: no messages in context");
164
+ }
165
+
166
+ if (context.messages[context.messages.length - 1].role === "assistant") {
167
+ throw new Error("Cannot continue from message role: assistant");
168
+ }
169
+
170
+ const stream = createAgentStream();
171
+
172
+ (async () => {
173
+ const newMessages: AgentMessage[] = [];
174
+ const currentContext: AgentContext = { ...context };
175
+
176
+ stream.push({ type: "agent_start" });
177
+ stream.push({ type: "turn_start" });
178
+
179
+ try {
180
+ await runLoop(currentContext, newMessages, config, signal, stream, streamFn);
181
+ } catch (err) {
182
+ stream.fail(err);
183
+ }
184
+ })();
185
+
186
+ return stream;
187
+ }
188
+
189
+ function createAgentStream(): EventStream<AgentEvent, AgentMessage[]> {
190
+ return new EventStream<AgentEvent, AgentMessage[]>(
191
+ (event: AgentEvent) => event.type === "agent_end",
192
+ (event: AgentEvent) => (event.type === "agent_end" ? event.messages : []),
193
+ );
194
+ }
195
+
196
+ /**
197
+ * Build the `agent_end` event payload. When telemetry is enabled, snapshots
198
+ * the run collector so consumers receive {@link AgentRunSummary} +
199
+ * {@link AgentRunCoverage} alongside the messages without parsing OTEL spans.
200
+ * When telemetry is unset, returns the bare event for backwards compatibility.
201
+ */
202
+ function buildAgentEndEvent(
203
+ messages: AgentMessage[],
204
+ telemetry: AgentTelemetry | undefined,
205
+ stepCount: number,
206
+ ): Extract<AgentEvent, { type: "agent_end" }> {
207
+ if (!telemetry) return { type: "agent_end", messages };
208
+ const snapshot = telemetry.collector.snapshot({ stepCount });
209
+ if (telemetry.collector.markRunEnded()) {
210
+ fireOnRunEnd(telemetry, snapshot.summary, snapshot.coverage);
211
+ }
212
+ return { type: "agent_end", messages, telemetry: snapshot.summary, coverage: snapshot.coverage };
213
+ }
214
+
215
+ /**
216
+ * Detailed-result handle returned by {@link agentLoopDetailed}. Adds the
217
+ * run-level telemetry/coverage rollup to the existing `AgentMessage[]`
218
+ * payload without changing the resolved type of `stream.result()`.
219
+ */
220
+ export interface AgentLoopDetailedResult {
221
+ readonly messages: AgentMessage[];
222
+ readonly telemetry: AgentRunSummary | undefined;
223
+ readonly coverage: AgentRunCoverage | undefined;
224
+ }
225
+
226
+ /**
227
+ * Convenience wrapper over {@link agentLoop} that exposes the run-level
228
+ * summary + coverage alongside the messages. The returned `stream` is the
229
+ * same `EventStream` callers already consume; `detailed()` awaits the
230
+ * stream's `agent_end` event and returns the additive fields.
231
+ *
232
+ * Existing `stream.result()` semantics are preserved — it still resolves to
233
+ * `AgentMessage[]`. Use {@link agentLoopDetailed} when you need the rollup;
234
+ * use {@link agentLoop} when you do not.
235
+ */
236
+ export function agentLoopDetailed(
237
+ prompts: AgentMessage[],
238
+ context: AgentContext,
239
+ config: AgentLoopConfig,
240
+ signal?: AbortSignal,
241
+ streamFn?: StreamFn,
242
+ ): {
243
+ readonly stream: EventStream<AgentEvent, AgentMessage[]>;
244
+ readonly detailed: () => Promise<AgentLoopDetailedResult>;
245
+ } {
246
+ const capture = createDetailedCapture(config);
247
+ const stream = agentLoop(prompts, context, capture.config, signal, streamFn);
248
+ return { stream, detailed: () => capture.detailed(stream) };
249
+ }
250
+
251
+ /**
252
+ * Like {@link agentLoopDetailed} but built on top of
253
+ * {@link agentLoopContinue}.
254
+ */
255
+ export function agentLoopContinueDetailed(
256
+ context: AgentContext,
257
+ config: AgentLoopConfig,
258
+ signal?: AbortSignal,
259
+ streamFn?: StreamFn,
260
+ ): {
261
+ readonly stream: EventStream<AgentEvent, AgentMessage[]>;
262
+ readonly detailed: () => Promise<AgentLoopDetailedResult>;
263
+ } {
264
+ const capture = createDetailedCapture(config);
265
+ const stream = agentLoopContinue(context, capture.config, signal, streamFn);
266
+ return { stream, detailed: () => capture.detailed(stream) };
267
+ }
268
+
269
+ /**
270
+ * Wire an `onRunEnd` telemetry hook onto `config` so the detailed helper can
271
+ * capture the run summary without consuming the event stream. Preserves any
272
+ * existing `onRunEnd` the caller had set.
273
+ */
274
+ function createDetailedCapture(config: AgentLoopConfig): {
275
+ readonly config: AgentLoopConfig;
276
+ readonly detailed: (stream: EventStream<AgentEvent, AgentMessage[]>) => Promise<AgentLoopDetailedResult>;
277
+ } {
278
+ let captured: { summary: AgentRunSummary; coverage: AgentRunCoverage } | undefined;
279
+ const userHook = config.telemetry?.onRunEnd;
280
+ const wired: AgentLoopConfig = {
281
+ ...config,
282
+ telemetry: {
283
+ ...(config.telemetry ?? {}),
284
+ onRunEnd: (summary, coverage) => {
285
+ captured = { summary, coverage };
286
+ userHook?.(summary, coverage);
287
+ },
288
+ },
289
+ };
290
+ return {
291
+ config: wired,
292
+ detailed: async stream => {
293
+ const messages = await stream.result();
294
+ return {
295
+ messages,
296
+ telemetry: captured?.summary,
297
+ coverage: captured?.coverage,
298
+ };
299
+ },
300
+ };
301
+ }
302
+
303
+ function normalizeMessagesForProvider(
304
+ messages: Context["messages"],
305
+ model: AgentLoopConfig["model"],
306
+ ): Context["messages"] {
307
+ if (model.provider !== "cerebras") {
308
+ return messages;
309
+ }
310
+
311
+ let changed = false;
312
+ const normalized = messages.map(message => {
313
+ if (message.role !== "assistant" || !Array.isArray(message.content)) {
314
+ return message;
315
+ }
316
+
317
+ const filtered = message.content.filter(block => block.type !== "thinking");
318
+ if (filtered.length === message.content.length) {
319
+ return message;
320
+ }
321
+
322
+ changed = true;
323
+ return { ...message, content: filtered };
324
+ });
325
+
326
+ return changed ? normalized : messages;
327
+ }
328
+
329
+ export const INTENT_FIELD = "_i";
330
+
331
+ function injectIntentIntoSchema(schema: unknown, mode: "require" | "optional" = "require"): unknown {
332
+ if (!schema || typeof schema !== "object" || Array.isArray(schema)) return schema;
333
+ const schemaRecord = schema as Record<string, unknown>;
334
+ const propertiesValue = schemaRecord.properties;
335
+ const properties =
336
+ propertiesValue && typeof propertiesValue === "object" && !Array.isArray(propertiesValue)
337
+ ? (propertiesValue as Record<string, unknown>)
338
+ : {};
339
+ const requiredValue = schemaRecord.required;
340
+ const required = Array.isArray(requiredValue)
341
+ ? requiredValue.filter((item): item is string => typeof item === "string")
342
+ : [];
343
+ if (INTENT_FIELD in properties) {
344
+ const { [INTENT_FIELD]: intentProp, ...rest } = properties;
345
+ const needsReorder = Object.keys(properties)[0] !== INTENT_FIELD;
346
+ const needsRequired = mode === "require" && !required.includes(INTENT_FIELD);
347
+ if (!needsReorder && !needsRequired) return schema;
348
+ return {
349
+ ...schemaRecord,
350
+ ...(needsReorder ? { properties: { [INTENT_FIELD]: intentProp, ...rest } } : {}),
351
+ ...(needsRequired ? { required: [...required, INTENT_FIELD] } : {}),
352
+ };
353
+ }
354
+ return {
355
+ ...schemaRecord,
356
+ properties: {
357
+ [INTENT_FIELD]: {
358
+ type: "string",
359
+ },
360
+ ...properties,
361
+ },
362
+ ...(mode === "require" ? { required: [...required, INTENT_FIELD] } : {}),
363
+ };
364
+ }
365
+
366
+ export function normalizeTools(tools: AgentContext["tools"], injectIntent: boolean): Context["tools"] {
367
+ injectIntent = injectIntent && Bun.env.PI_NO_INTENT !== "1";
368
+ return tools?.map(t => {
369
+ const intentMode = resolveIntentMode(t.intent);
370
+ let parameters: TSchema = t.parameters;
371
+ if (injectIntent && intentMode !== "omit") {
372
+ if (isZodSchema(parameters)) {
373
+ const wired = zodToWireSchema(parameters);
374
+ parameters = injectIntentIntoSchema(wired, intentMode) as TSchema;
375
+ } else {
376
+ parameters = injectIntentIntoSchema(parameters, intentMode) as TSchema;
377
+ }
378
+ }
379
+ const description = t.description ?? "";
380
+ return { ...t, parameters, description };
381
+ });
382
+ }
383
+
384
+ function resolveIntentMode(intent: AgentTool["intent"]): "require" | "optional" | "omit" {
385
+ if (typeof intent === "function") return "omit";
386
+ if (intent === "optional" || intent === "omit") return intent;
387
+ return "require";
388
+ }
389
+
390
+ function extractIntent(args: Record<string, unknown>): { intent?: string; strippedArgs: Record<string, unknown> } {
391
+ const { [INTENT_FIELD]: intent, ...strippedArgs } = args;
392
+ if (typeof intent !== "string") {
393
+ return { strippedArgs };
394
+ }
395
+ const trimmed = intent.trim();
396
+ return { intent: trimmed.length > 0 ? trimmed : undefined, strippedArgs };
397
+ }
398
+
399
+ /**
400
+ * Main loop logic shared by agentLoop and agentLoopContinue.
401
+ */
402
+ async function runLoop(
403
+ currentContext: AgentContext,
404
+ newMessages: AgentMessage[],
405
+ config: AgentLoopConfig,
406
+ signal: AbortSignal | undefined,
407
+ stream: EventStream<AgentEvent, AgentMessage[]>,
408
+ streamFn?: StreamFn,
409
+ ): Promise<void> {
410
+ const telemetry = resolveTelemetry(config.telemetry, config.sessionId);
411
+ const invokeAgentSpan = startInvokeAgentSpan(telemetry, config.model);
412
+ const stepCounter = { count: 0 };
413
+ let caughtError: unknown;
414
+ try {
415
+ await runInActiveSpan(invokeAgentSpan, () =>
416
+ runLoopBody(
417
+ currentContext,
418
+ newMessages,
419
+ config,
420
+ signal,
421
+ stream,
422
+ telemetry,
423
+ invokeAgentSpan,
424
+ stepCounter,
425
+ streamFn,
426
+ ),
427
+ );
428
+ } catch (err) {
429
+ caughtError = err;
430
+ throw err;
431
+ } finally {
432
+ finishInvokeAgentSpan(telemetry, invokeAgentSpan, {
433
+ stepCount: stepCounter.count,
434
+ errorObject: caughtError,
435
+ });
436
+ }
437
+ }
438
+
439
+ interface StepCounter {
440
+ count: number;
441
+ }
442
+
443
+ async function runLoopBody(
444
+ currentContext: AgentContext,
445
+ newMessages: AgentMessage[],
446
+ config: AgentLoopConfig,
447
+ signal: AbortSignal | undefined,
448
+ stream: EventStream<AgentEvent, AgentMessage[]>,
449
+ telemetry: AgentTelemetry | undefined,
450
+ invokeAgentSpan: Span | undefined,
451
+ stepCounter: StepCounter,
452
+ streamFn?: StreamFn,
453
+ ): Promise<void> {
454
+ let firstTurn = true;
455
+ // Check for steering messages at start (user may have typed while waiting)
456
+ let pendingMessages: AgentMessage[] = (await config.getSteeringMessages?.()) || [];
457
+ let harmonyRetryAttempt = 0;
458
+ let harmonyTruncateResumeCount = 0;
459
+
460
+ // Outer loop: continues when queued follow-up messages arrive after agent would stop
461
+ while (true) {
462
+ let hasMoreToolCalls = true;
463
+
464
+ // Inner loop: process tool calls and steering messages
465
+ while (hasMoreToolCalls || pendingMessages.length > 0) {
466
+ if (!firstTurn) {
467
+ stream.push({ type: "turn_start" });
468
+ } else {
469
+ firstTurn = false;
470
+ }
471
+
472
+ // Process pending messages (inject before next assistant response)
473
+ if (pendingMessages.length > 0) {
474
+ for (const message of pendingMessages) {
475
+ stream.push({ type: "message_start", message });
476
+ stream.push({ type: "message_end", message });
477
+ currentContext.messages.push(message);
478
+ newMessages.push(message);
479
+ }
480
+ pendingMessages = [];
481
+ }
482
+
483
+ // Refresh prompt/tool context from live state before each model call
484
+ if (config.syncContextBeforeModelCall) {
485
+ await config.syncContextBeforeModelCall(currentContext);
486
+ }
487
+
488
+ // Stream assistant response
489
+ let recovered: HarmonyRecoveredToolCall | undefined;
490
+ let message: AssistantMessage;
491
+ try {
492
+ message = await streamAssistantResponse(
493
+ currentContext,
494
+ config,
495
+ signal,
496
+ stream,
497
+ telemetry,
498
+ invokeAgentSpan,
499
+ stepCounter,
500
+ streamFn,
501
+ harmonyRetryAttempt,
502
+ );
503
+ harmonyRetryAttempt = 0;
504
+ harmonyTruncateResumeCount = 0;
505
+ } catch (err) {
506
+ if (!(err instanceof HarmonyLeakInterruption)) throw err;
507
+ if (err.recovered) {
508
+ if (harmonyTruncateResumeCount >= 2) {
509
+ await emitHarmonyAudit(config, err, "escalated", harmonyRetryAttempt);
510
+ throw new Error(
511
+ `GPT-5 Harmony leak recurred after truncate-and-resume recovery (${signalListLabel(err.detection.signals)}).`,
512
+ );
513
+ }
514
+ harmonyTruncateResumeCount++;
515
+ recovered = err.recovered;
516
+ message = recovered.message;
517
+ await emitHarmonyAudit(config, err, "truncate_resume", harmonyRetryAttempt);
518
+ } else {
519
+ if (harmonyRetryAttempt >= 2) {
520
+ await emitHarmonyAudit(config, err, "escalated", harmonyRetryAttempt);
521
+ throw new Error(
522
+ `GPT-5 Harmony leak persisted after ${harmonyRetryAttempt} retries (${signalListLabel(err.detection.signals)}).`,
523
+ );
524
+ }
525
+ await emitHarmonyAudit(config, err, "abort_retry", harmonyRetryAttempt);
526
+ harmonyRetryAttempt++;
527
+ continue;
528
+ }
529
+ }
530
+ newMessages.push(message);
531
+ let steeringMessagesFromExecution: AgentMessage[] | undefined;
532
+
533
+ if (message.stopReason === "error" || message.stopReason === "aborted") {
534
+ // Create placeholder tool results for any tool calls in the aborted message
535
+ // This maintains the tool_use/tool_result pairing that the API requires
536
+ type ToolCallContent = Extract<AssistantMessage["content"][number], { type: "toolCall" }>;
537
+ const toolCalls = message.content.filter((c): c is ToolCallContent => c.type === "toolCall");
538
+ const toolResults: ToolResultMessage[] = [];
539
+ for (const toolCall of toolCalls) {
540
+ const result = createAbortedToolResult(toolCall, stream, message.stopReason, message.errorMessage);
541
+ currentContext.messages.push(result);
542
+ newMessages.push(result);
543
+ toolResults.push(result);
544
+ // The placeholder result above keeps the API's tool_use/tool_result
545
+ // pairing intact, but no execute_tool span is started for these
546
+ // calls. Mirror the run-collector entry directly so the run
547
+ // summary's tool counters and `coverage.toolsInvoked` reflect
548
+ // what the user actually saw on the wire.
549
+ recordSkippedTool(telemetry, {
550
+ toolCallId: toolCall.id,
551
+ toolName: toolCall.name,
552
+ status: message.stopReason === "aborted" ? "aborted" : "error",
553
+ });
554
+ }
555
+ stream.push({ type: "turn_end", message, toolResults });
556
+ stream.push(buildAgentEndEvent(newMessages, telemetry, stepCounter.count));
557
+ stream.end(newMessages);
558
+ return;
559
+ }
560
+
561
+ // Check for tool calls
562
+ const toolCalls = message.content.filter(c => c.type === "toolCall");
563
+ hasMoreToolCalls = toolCalls.length > 0;
564
+
565
+ const toolResults: ToolResultMessage[] = [];
566
+ if (hasMoreToolCalls) {
567
+ const executionResult = await executeToolCalls(
568
+ currentContext,
569
+ message,
570
+ signal,
571
+ stream,
572
+ config,
573
+ telemetry,
574
+ invokeAgentSpan,
575
+ );
576
+
577
+ toolResults.push(...executionResult.toolResults);
578
+ steeringMessagesFromExecution = executionResult.steeringMessages;
579
+
580
+ for (const result of toolResults) {
581
+ currentContext.messages.push(result);
582
+ newMessages.push(result);
583
+ }
584
+ }
585
+
586
+ stream.push({ type: "turn_end", message, toolResults });
587
+
588
+ pendingMessages = steeringMessagesFromExecution ?? ((await config.getSteeringMessages?.()) || []);
589
+ }
590
+
591
+ // Agent would stop here. Check for follow-up messages.
592
+ await config.onBeforeYield?.();
593
+ const followUpMessages = (await config.getFollowUpMessages?.()) || [];
594
+ if (followUpMessages.length > 0) {
595
+ // Set as pending so inner loop processes them
596
+ pendingMessages = followUpMessages;
597
+ continue;
598
+ }
599
+
600
+ // No more messages, exit
601
+ break;
602
+ }
603
+
604
+ stream.push(buildAgentEndEvent(newMessages, telemetry, stepCounter.count));
605
+ stream.end(newMessages);
606
+ }
607
+
608
+ async function emitHarmonyAudit(
609
+ config: AgentLoopConfig,
610
+ interruption: HarmonyLeakInterruption,
611
+ action: "truncate_resume" | "abort_retry" | "escalated",
612
+ retryN: number,
613
+ ): Promise<void> {
614
+ await config.onHarmonyLeak?.(
615
+ createHarmonyAuditEvent({
616
+ action,
617
+ detection: interruption.detection,
618
+ model: config.model,
619
+ retryN,
620
+ removed: interruption.removed,
621
+ }),
622
+ );
623
+ }
624
+
625
+ /**
626
+ * Stream an assistant response from the LLM.
627
+ * This is where AgentMessage[] gets transformed to Message[] for the LLM.
628
+ */
629
+ async function streamAssistantResponse(
630
+ context: AgentContext,
631
+ config: AgentLoopConfig,
632
+ signal: AbortSignal | undefined,
633
+ stream: EventStream<AgentEvent, AgentMessage[]>,
634
+ telemetry: AgentTelemetry | undefined,
635
+ invokeAgentSpan: Span | undefined,
636
+ stepCounter: StepCounter,
637
+ streamFn?: StreamFn,
638
+ harmonyRetryAttempt = 0,
639
+ ): Promise<AssistantMessage> {
640
+ // Apply context transform if configured (AgentMessage[] → AgentMessage[])
641
+ let messages = context.messages;
642
+ if (config.transformContext) {
643
+ messages = await config.transformContext(messages, signal);
644
+ }
645
+
646
+ // Convert to LLM-compatible messages (AgentMessage[] → Message[])
647
+ const llmMessages = await config.convertToLlm(messages);
648
+ const normalizedMessages = normalizeMessagesForProvider(llmMessages, config.model);
649
+
650
+ // Build LLM context — append-only mode caches system prompt + tools
651
+ // AND keeps an append-only message log so prior-turn bytes are stable.
652
+ let llmContext: Context;
653
+ if (config.appendOnlyContext) {
654
+ config.appendOnlyContext.syncMessages(normalizedMessages);
655
+ llmContext = config.appendOnlyContext.build(context, { intentTracing: !!config.intentTracing });
656
+ } else {
657
+ llmContext = {
658
+ systemPrompt: context.systemPrompt,
659
+ messages: normalizedMessages,
660
+ tools: normalizeTools(context.tools, !!config.intentTracing),
661
+ };
662
+ }
663
+
664
+ const streamFunction = streamFn || streamSimple;
665
+
666
+ // Resolve API key (important for expiring tokens) — do this before resolving
667
+ // metadata so that the session-sticky credential recorded by getApiKey is
668
+ // visible to metadataResolver (e.g. for the correct account_uuid in metadata.user_id).
669
+ const resolvedApiKey =
670
+ (config.getApiKey ? await config.getApiKey(config.model.provider) : undefined) || config.apiKey;
671
+
672
+ // Re-resolve metadata after credential selection so the per-request value
673
+ // reflects the credential actually used, not the snapshot from AgentLoopConfig construction.
674
+ const resolvedMetadata = config.metadataResolver ? config.metadataResolver(config.model.provider) : config.metadata;
675
+
676
+ const dynamicToolChoice = config.getToolChoice?.();
677
+ const dynamicReasoning = config.getReasoning?.();
678
+ const harmonyMitigationEnabled = isHarmonyLeakMitigationTarget(config.model);
679
+ const harmonyAbortController = harmonyMitigationEnabled ? new AbortController() : undefined;
680
+ const requestSignal = harmonyAbortController
681
+ ? signal
682
+ ? AbortSignal.any([signal, harmonyAbortController.signal])
683
+ : harmonyAbortController.signal
684
+ : signal;
685
+ const effectiveTemperature =
686
+ harmonyRetryAttempt > 0 && config.temperature !== undefined ? config.temperature + 0.05 : config.temperature;
687
+ const effectiveToolChoice = dynamicToolChoice ?? config.toolChoice;
688
+ const effectiveReasoning = dynamicReasoning ?? config.reasoning;
689
+
690
+ const chatStepNumber = stepCounter.count;
691
+ stepCounter.count += 1;
692
+ const chatSpan = startChatSpan(telemetry, config.model, {
693
+ parent: invokeAgentSpan,
694
+ stepNumber: chatStepNumber,
695
+ request: {
696
+ maxTokens: config.maxTokens,
697
+ temperature: effectiveTemperature,
698
+ topP: config.topP,
699
+ topK: config.topK,
700
+ presencePenalty: config.presencePenalty,
701
+ serviceTier: config.serviceTier,
702
+ reasoningEffort: typeof effectiveReasoning === "string" ? effectiveReasoning : undefined,
703
+ toolChoice: effectiveToolChoice,
704
+ tools: llmContext.tools,
705
+ systemPrompt: llmContext.systemPrompt,
706
+ messages: llmContext.messages,
707
+ },
708
+ });
709
+
710
+ // Wrap the user-supplied onResponse so we always observe response headers
711
+ // for telemetry (`ChatUsageEvent.headers`, gateway auto-detection) without
712
+ // stealing them from the configured hook.
713
+ let capturedHeaders: Readonly<Record<string, string>> | undefined;
714
+ const userOnResponse = config.onResponse;
715
+ const captureOnResponse: AgentLoopConfig["onResponse"] = (response, modelInfo) => {
716
+ capturedHeaders = response.headers;
717
+ return userOnResponse?.(response, modelInfo);
718
+ };
719
+
720
+ const finishChat = async (message: AssistantMessage): Promise<void> => {
721
+ await finishChatSpan(telemetry, chatSpan, message, {
722
+ stepNumber: chatStepNumber,
723
+ serviceTier: config.serviceTier,
724
+ responseHeaders: capturedHeaders,
725
+ baseUrl: config.model.baseUrl,
726
+ });
727
+ };
728
+
729
+ try {
730
+ return await runInActiveSpan(chatSpan, async () => {
731
+ const response = await streamFunction(config.model, llmContext, {
732
+ ...config,
733
+ apiKey: resolvedApiKey,
734
+ metadata: resolvedMetadata,
735
+ toolChoice: effectiveToolChoice,
736
+ reasoning: effectiveReasoning,
737
+ temperature: effectiveTemperature,
738
+ signal: requestSignal,
739
+ onResponse: captureOnResponse,
740
+ });
741
+
742
+ let partialMessage: AssistantMessage | null = null;
743
+ let addedPartial = false;
744
+
745
+ const responseIterator = response[Symbol.asyncIterator]();
746
+
747
+ // Set up a single abort race: register the abort listener once for the whole
748
+ // stream and reuse the same race promise for every iterator.next() instead of
749
+ // allocating Promise.withResolvers and add/removeEventListener per event.
750
+ let abortRacePromise: Promise<typeof ABORTED> | undefined;
751
+ let detachAbortListener: (() => void) | undefined;
752
+ if (requestSignal) {
753
+ if (requestSignal.aborted) {
754
+ const aborted = emitAbortedAssistantMessage(partialMessage, addedPartial, context, config, stream);
755
+ await finishChat(aborted);
756
+ return aborted;
757
+ }
758
+ const { promise, resolve } = Promise.withResolvers<typeof ABORTED>();
759
+ const onAbort = () => resolve(ABORTED);
760
+ requestSignal.addEventListener("abort", onAbort, { once: true });
761
+ abortRacePromise = promise;
762
+ detachAbortListener = () => requestSignal.removeEventListener("abort", onAbort);
763
+ }
764
+
765
+ try {
766
+ while (true) {
767
+ let next: IteratorResult<AssistantMessageEvent>;
768
+ if (abortRacePromise) {
769
+ const result = await Promise.race([responseIterator.next(), abortRacePromise]);
770
+ if (result === ABORTED) {
771
+ responseIterator.return?.()?.catch(() => {});
772
+ const aborted = emitAbortedAssistantMessage(partialMessage, addedPartial, context, config, stream);
773
+ await finishChat(aborted);
774
+ return aborted;
775
+ }
776
+ next = result;
777
+ } else {
778
+ next = await responseIterator.next();
779
+ }
780
+ if (requestSignal?.aborted) {
781
+ const aborted = emitAbortedAssistantMessage(partialMessage, addedPartial, context, config, stream);
782
+ await finishChat(aborted);
783
+ return aborted;
784
+ }
785
+ if (next.done) break;
786
+
787
+ const event = next.value;
788
+
789
+ switch (event.type) {
790
+ case "start":
791
+ partialMessage = event.partial;
792
+ context.messages.push(partialMessage);
793
+ addedPartial = true;
794
+ stream.push({ type: "message_start", message: { ...partialMessage } });
795
+ break;
796
+
797
+ case "text_start":
798
+ case "text_delta":
799
+ case "text_end":
800
+ case "thinking_start":
801
+ case "thinking_delta":
802
+ case "thinking_end":
803
+ case "toolcall_start":
804
+ case "toolcall_delta":
805
+ case "toolcall_end":
806
+ if (partialMessage) {
807
+ partialMessage = event.partial;
808
+ context.messages[context.messages.length - 1] = partialMessage;
809
+ config.onAssistantMessageEvent?.(partialMessage, event);
810
+ if (signal?.aborted) {
811
+ continue;
812
+ }
813
+ stream.push({
814
+ type: "message_update",
815
+ assistantMessageEvent: event,
816
+ message: { ...partialMessage },
817
+ });
818
+ }
819
+ break;
820
+
821
+ case "done":
822
+ case "error": {
823
+ const finalMessage = await response.result();
824
+ if (addedPartial) {
825
+ context.messages[context.messages.length - 1] = finalMessage;
826
+ } else {
827
+ context.messages.push(finalMessage);
828
+ }
829
+ if (!addedPartial) {
830
+ stream.push({ type: "message_start", message: { ...finalMessage } });
831
+ }
832
+ stream.push({ type: "message_end", message: finalMessage });
833
+ await finishChat(finalMessage);
834
+ return finalMessage;
835
+ }
836
+ }
837
+ }
838
+ } finally {
839
+ detachAbortListener?.();
840
+ }
841
+
842
+ const trailing = await response.result();
843
+ await finishChat(trailing);
844
+ return trailing;
845
+ });
846
+ } catch (err) {
847
+ failChatSpan(telemetry, chatSpan, {
848
+ errorObject: err,
849
+ responseHeaders: capturedHeaders,
850
+ baseUrl: config.model.baseUrl,
851
+ });
852
+ throw err;
853
+ }
854
+ }
855
+
856
+ function emitAbortedAssistantMessage(
857
+ partialMessage: AssistantMessage | null,
858
+ addedPartial: boolean,
859
+ context: AgentContext,
860
+ config: AgentLoopConfig,
861
+ stream: EventStream<AgentEvent, AgentMessage[]>,
862
+ ): AssistantMessage {
863
+ const errorMessage = "Request was aborted";
864
+ const abortedMessage: AssistantMessage = partialMessage
865
+ ? { ...partialMessage, stopReason: "aborted", errorMessage }
866
+ : {
867
+ role: "assistant",
868
+ content: [],
869
+ api: config.model.api,
870
+ provider: config.model.provider,
871
+ model: config.model.id,
872
+ usage: {
873
+ input: 0,
874
+ output: 0,
875
+ cacheRead: 0,
876
+ cacheWrite: 0,
877
+ totalTokens: 0,
878
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
879
+ },
880
+ stopReason: "aborted",
881
+ errorMessage,
882
+ timestamp: Date.now(),
883
+ };
884
+ if (addedPartial) {
885
+ context.messages[context.messages.length - 1] = abortedMessage;
886
+ } else {
887
+ context.messages.push(abortedMessage);
888
+ stream.push({ type: "message_start", message: { ...abortedMessage } });
889
+ }
890
+ stream.push({ type: "message_end", message: abortedMessage });
891
+ return abortedMessage;
892
+ }
893
+
894
+ /**
895
+ * Execute tool calls from an assistant message.
896
+ */
897
+ async function executeToolCalls(
898
+ currentContext: AgentContext,
899
+ assistantMessage: AssistantMessage,
900
+ signal: AbortSignal | undefined,
901
+ stream: EventStream<AgentEvent, AgentMessage[]>,
902
+ config: AgentLoopConfig,
903
+ telemetry: AgentTelemetry | undefined,
904
+ invokeAgentSpan: Span | undefined,
905
+ ): Promise<{ toolResults: ToolResultMessage[]; steeringMessages?: AgentMessage[] }> {
906
+ const tools = currentContext.tools;
907
+ const {
908
+ getSteeringMessages,
909
+ interruptMode = "immediate",
910
+ getToolContext,
911
+ transformToolCallArguments,
912
+ intentTracing,
913
+ beforeToolCall,
914
+ afterToolCall,
915
+ } = config;
916
+ type ToolCallContent = Extract<AssistantMessage["content"][number], { type: "toolCall" }>;
917
+ const toolCalls = assistantMessage.content.filter((c): c is ToolCallContent => c.type === "toolCall");
918
+ const emittedToolResults: ToolResultMessage[] = [];
919
+ const toolCallInfos = toolCalls.map(call => ({ id: call.id, name: call.name }));
920
+ const batchId = `${assistantMessage.timestamp ?? Date.now()}_${toolCalls[0]?.id ?? "batch"}`;
921
+ const shouldInterruptImmediately = interruptMode !== "wait";
922
+ const steeringAbortController = new AbortController();
923
+ const toolSignal = signal
924
+ ? AbortSignal.any([signal, steeringAbortController.signal])
925
+ : steeringAbortController.signal;
926
+ const interruptState = { triggered: false };
927
+ let steeringMessages: AgentMessage[] | undefined;
928
+ let steeringCheck: Promise<void> | null = null;
929
+
930
+ const records = toolCalls.map(toolCall => ({
931
+ toolCall,
932
+ // Tools emitted via OpenAI's custom-tool path (e.g. `apply_patch` on GPT-5)
933
+ // come back under their wire-level name, which may differ from the
934
+ // harness-internal `name`. Match on either, preferring `name` for
935
+ // determinism if both somehow collide.
936
+ tool:
937
+ tools?.find(t => t.name === toolCall.name) ??
938
+ tools?.find(t => t.customWireName !== undefined && t.customWireName === toolCall.name),
939
+ args: toolCall.arguments as Record<string, unknown>,
940
+ started: false,
941
+ result: undefined as AgentToolResult<any> | undefined,
942
+ isError: false,
943
+ skipped: false,
944
+ toolResultMessage: undefined as ToolResultMessage | undefined,
945
+ resultEmitted: false,
946
+ }));
947
+
948
+ const checkSteering = async (): Promise<void> => {
949
+ if (!shouldInterruptImmediately || !getSteeringMessages || interruptState.triggered) {
950
+ return;
951
+ }
952
+ if (steeringCheck) {
953
+ await steeringCheck;
954
+ return;
955
+ }
956
+ steeringCheck = (async () => {
957
+ const steering = await getSteeringMessages();
958
+ if (steering.length > 0) {
959
+ steeringMessages = steering;
960
+ interruptState.triggered = true;
961
+ steeringAbortController.abort();
962
+ }
963
+ })().finally(() => {
964
+ steeringCheck = null;
965
+ });
966
+ await steeringCheck;
967
+ };
968
+
969
+ const emitToolResult = (record: (typeof records)[number], result: AgentToolResult<any>, isError: boolean): void => {
970
+ if (record.resultEmitted) return;
971
+ const { toolCall } = record;
972
+ if (!record.started) {
973
+ stream.push({
974
+ type: "tool_execution_start",
975
+ toolCallId: toolCall.id,
976
+ toolName: toolCall.name,
977
+ args: record.args,
978
+ intent: toolCall.intent,
979
+ });
980
+ }
981
+ stream.push({
982
+ type: "tool_execution_end",
983
+ toolCallId: toolCall.id,
984
+ toolName: toolCall.name,
985
+ result,
986
+ isError,
987
+ });
988
+
989
+ const toolResultMessage: ToolResultMessage = {
990
+ role: "toolResult",
991
+ toolCallId: toolCall.id,
992
+ toolName: toolCall.name,
993
+ content: result.content,
994
+ details: result.details,
995
+ isError,
996
+ timestamp: Date.now(),
997
+ };
998
+ record.result = result;
999
+ record.isError = isError;
1000
+ record.toolResultMessage = toolResultMessage;
1001
+ record.resultEmitted = true;
1002
+ emittedToolResults.push(toolResultMessage);
1003
+
1004
+ stream.push({ type: "message_start", message: toolResultMessage });
1005
+ stream.push({ type: "message_end", message: toolResultMessage });
1006
+ };
1007
+
1008
+ const runTool = async (record: (typeof records)[number], index: number): Promise<void> => {
1009
+ if (interruptState.triggered) {
1010
+ // Skip both span emission and the collector orphan record here. The
1011
+ // tail sweep below (after `Promise.allSettled`) is the single path
1012
+ // that handles "no result message was produced" — it calls
1013
+ // `recordSkippedTool` and `emitToolResult` once per record, so any
1014
+ // work we did here would double-count.
1015
+ record.skipped = true;
1016
+ return;
1017
+ }
1018
+
1019
+ const { toolCall, tool } = record;
1020
+ let argsForExecution = toolCall.arguments as Record<string, unknown>;
1021
+ if (intentTracing) {
1022
+ const { intent, strippedArgs } = extractIntent(toolCall.arguments);
1023
+ argsForExecution = strippedArgs;
1024
+ if (intent) {
1025
+ toolCall.intent = intent;
1026
+ } else if (typeof tool?.intent === "function") {
1027
+ try {
1028
+ const derived = tool.intent(strippedArgs as never)?.trim();
1029
+ if (derived) {
1030
+ toolCall.intent = derived;
1031
+ }
1032
+ } catch {
1033
+ // intent function must never break tool execution
1034
+ }
1035
+ }
1036
+ }
1037
+ record.args = argsForExecution;
1038
+ record.started = true;
1039
+ stream.push({
1040
+ type: "tool_execution_start",
1041
+ toolCallId: toolCall.id,
1042
+ toolName: toolCall.name,
1043
+ args: argsForExecution,
1044
+ intent: toolCall.intent,
1045
+ });
1046
+
1047
+ const toolSpan = startExecuteToolSpan(telemetry, {
1048
+ tool,
1049
+ toolName: toolCall.name,
1050
+ toolCallId: toolCall.id,
1051
+ args: argsForExecution,
1052
+ parent: invokeAgentSpan,
1053
+ });
1054
+ if (toolSpan && toolCall.intent) {
1055
+ toolSpan.setAttribute(PiGenAIAttr.ToolCallIntent, toolCall.intent);
1056
+ }
1057
+
1058
+ let result: AgentToolResult<any> = { content: [], details: {} };
1059
+ let isError = false;
1060
+ let caughtError: unknown;
1061
+
1062
+ await runInActiveSpan(toolSpan, async () => {
1063
+ try {
1064
+ if (!tool) throw new Error(`Tool ${toolCall.name} not found`);
1065
+
1066
+ let effectiveArgs: Record<string, unknown>;
1067
+ try {
1068
+ effectiveArgs = validateToolArguments(tool, { ...toolCall, arguments: argsForExecution });
1069
+ } catch (validationError) {
1070
+ if (tool.lenientArgValidation) {
1071
+ effectiveArgs = argsForExecution;
1072
+ } else {
1073
+ throw validationError;
1074
+ }
1075
+ }
1076
+
1077
+ if (beforeToolCall) {
1078
+ const beforeResult = await beforeToolCall(
1079
+ {
1080
+ assistantMessage,
1081
+ toolCall,
1082
+ args: effectiveArgs,
1083
+ context: currentContext,
1084
+ },
1085
+ toolSignal,
1086
+ );
1087
+ if (beforeResult?.block) {
1088
+ throw new ToolCallBlockedError(beforeResult.reason);
1089
+ }
1090
+ }
1091
+ // Reflect post-hook args so emitted tool results / afterToolCall see what actually executed.
1092
+ record.args = effectiveArgs;
1093
+
1094
+ const toolContext = getToolContext
1095
+ ? getToolContext({
1096
+ batchId,
1097
+ index,
1098
+ total: toolCalls.length,
1099
+ toolCalls: toolCallInfos,
1100
+ })
1101
+ : undefined;
1102
+ const rawResult = await tool.execute(
1103
+ toolCall.id,
1104
+ transformToolCallArguments ? transformToolCallArguments(effectiveArgs, toolCall.name) : effectiveArgs,
1105
+ tool.nonAbortable ? undefined : toolSignal,
1106
+ partialResult => {
1107
+ stream.push({
1108
+ type: "tool_execution_update",
1109
+ toolCallId: toolCall.id,
1110
+ toolName: toolCall.name,
1111
+ args: effectiveArgs,
1112
+ partialResult: coerceToolResult(partialResult).result,
1113
+ });
1114
+ },
1115
+ toolContext,
1116
+ );
1117
+ const coerced = coerceToolResult(rawResult);
1118
+ result = coerced.result;
1119
+ if (coerced.malformed || result.isError) isError = true;
1120
+ } catch (e) {
1121
+ caughtError = e;
1122
+ result = {
1123
+ content: [{ type: "text", text: e instanceof Error ? e.message : String(e) }],
1124
+ details: {},
1125
+ };
1126
+ isError = true;
1127
+ }
1128
+
1129
+ if (afterToolCall) {
1130
+ try {
1131
+ const after = await afterToolCall(
1132
+ {
1133
+ assistantMessage,
1134
+ toolCall,
1135
+ args: record.args,
1136
+ result,
1137
+ isError,
1138
+ context: currentContext,
1139
+ },
1140
+ toolSignal,
1141
+ );
1142
+ if (after) {
1143
+ result = {
1144
+ content: after.content ?? result.content,
1145
+ details: after.details ?? result.details,
1146
+ isError: after.isError ?? result.isError,
1147
+ };
1148
+ isError = after.isError ?? isError;
1149
+ }
1150
+ } catch (e) {
1151
+ caughtError = e;
1152
+ result = {
1153
+ content: [{ type: "text", text: e instanceof Error ? e.message : String(e) }],
1154
+ details: {},
1155
+ };
1156
+ isError = true;
1157
+ }
1158
+ }
1159
+ });
1160
+
1161
+ const interrupted = interruptState.triggered;
1162
+ if (interrupted) {
1163
+ record.skipped = true;
1164
+ emitToolResult(record, createSkippedToolResult(), true);
1165
+ } else {
1166
+ emitToolResult(record, result, isError);
1167
+ }
1168
+
1169
+ const firstTextBlock = result.content?.[0];
1170
+ const errorMessageForSpan =
1171
+ caughtError === undefined && isError && firstTextBlock?.type === "text" ? firstTextBlock.text : undefined;
1172
+ const status = interrupted
1173
+ ? "aborted"
1174
+ : caughtError instanceof ToolCallBlockedError
1175
+ ? "blocked"
1176
+ : isError
1177
+ ? "error"
1178
+ : "ok";
1179
+ finishExecuteToolSpan(telemetry, toolSpan, {
1180
+ result,
1181
+ isError,
1182
+ status,
1183
+ errorMessage: errorMessageForSpan,
1184
+ errorObject: caughtError,
1185
+ toolCallId: toolCall.id,
1186
+ toolName: toolCall.name,
1187
+ });
1188
+
1189
+ await checkSteering();
1190
+ };
1191
+
1192
+ let lastExclusive: Promise<void> = Promise.resolve();
1193
+ let sharedTasks: Promise<void>[] = [];
1194
+ const tasks: Promise<void>[] = [];
1195
+
1196
+ for (let index = 0; index < records.length; index++) {
1197
+ const record = records[index];
1198
+ const concurrency = record.tool?.concurrency ?? "shared";
1199
+ const start = concurrency === "exclusive" ? Promise.all([lastExclusive, ...sharedTasks]) : lastExclusive;
1200
+ const task = start.then(() => runTool(record, index));
1201
+ tasks.push(task);
1202
+ if (concurrency === "exclusive") {
1203
+ lastExclusive = task;
1204
+ sharedTasks = [];
1205
+ } else {
1206
+ sharedTasks.push(task);
1207
+ }
1208
+ }
1209
+
1210
+ await Promise.allSettled(tasks);
1211
+
1212
+ for (const record of records) {
1213
+ if (!record.toolResultMessage) {
1214
+ record.skipped = true;
1215
+ recordSkippedTool(telemetry, {
1216
+ toolCallId: record.toolCall.id,
1217
+ toolName: record.toolCall.name,
1218
+ status: "skipped",
1219
+ });
1220
+ emitToolResult(record, createSkippedToolResult(), true);
1221
+ }
1222
+ }
1223
+
1224
+ return { toolResults: emittedToolResults, steeringMessages };
1225
+ }
1226
+
1227
+ /**
1228
+ * Create a tool result for a tool call that was aborted or errored before execution.
1229
+ * Maintains the tool_use/tool_result pairing required by the API.
1230
+ */
1231
+ function createAbortedToolResult(
1232
+ toolCall: Extract<AssistantMessage["content"][number], { type: "toolCall" }>,
1233
+ stream: EventStream<AgentEvent, AgentMessage[]>,
1234
+ reason: "aborted" | "error",
1235
+ errorMessage?: string,
1236
+ ): ToolResultMessage {
1237
+ const message = reason === "aborted" ? "Tool execution was aborted" : "Tool execution failed due to an error";
1238
+ const result: AgentToolResult<any> = {
1239
+ content: [{ type: "text", text: errorMessage ? `${message}: ${errorMessage}` : `${message}.` }],
1240
+ details: {},
1241
+ };
1242
+
1243
+ stream.push({
1244
+ type: "tool_execution_start",
1245
+ toolCallId: toolCall.id,
1246
+ toolName: toolCall.name,
1247
+ args: toolCall.arguments,
1248
+ intent: toolCall.intent,
1249
+ });
1250
+ stream.push({
1251
+ type: "tool_execution_end",
1252
+ toolCallId: toolCall.id,
1253
+ toolName: toolCall.name,
1254
+ result,
1255
+ isError: true,
1256
+ });
1257
+
1258
+ const toolResultMessage: ToolResultMessage = {
1259
+ role: "toolResult",
1260
+ toolCallId: toolCall.id,
1261
+ toolName: toolCall.name,
1262
+ content: result.content,
1263
+ details: {},
1264
+ isError: true,
1265
+ timestamp: Date.now(),
1266
+ };
1267
+
1268
+ stream.push({ type: "message_start", message: toolResultMessage });
1269
+ stream.push({ type: "message_end", message: toolResultMessage });
1270
+
1271
+ return toolResultMessage;
1272
+ }
1273
+
1274
+ function createSkippedToolResult(): AgentToolResult<any> {
1275
+ return {
1276
+ content: [{ type: "text", text: "Skipped due to queued user message." }],
1277
+ details: {},
1278
+ };
1279
+ }