npm - @prometheus-ai/agent-core - Versions diffs - 0.5.0 - Mend

@prometheus-ai/agent-core 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

package/CHANGELOG.md +7 -0
package/README.md +473 -0
package/dist/types/agent-loop.d.ts +55 -0
package/dist/types/agent.d.ts +331 -0
package/dist/types/append-only-context.d.ts +113 -0
package/dist/types/compaction/branch-summarization.d.ts +94 -0
package/dist/types/compaction/compaction.d.ts +183 -0
package/dist/types/compaction/entries.d.ts +103 -0
package/dist/types/compaction/errors.d.ts +26 -0
package/dist/types/compaction/index.d.ts +12 -0
package/dist/types/compaction/messages.d.ts +61 -0
package/dist/types/compaction/openai.d.ts +58 -0
package/dist/types/compaction/pruning.d.ts +19 -0
package/dist/types/compaction/shake.d.ts +82 -0
package/dist/types/compaction/tool-protection.d.ts +17 -0
package/dist/types/compaction/utils.d.ts +32 -0
package/dist/types/compaction.d.ts +1 -0
package/dist/types/harmony-leak.d.ts +118 -0
package/dist/types/index.d.ts +11 -0
package/dist/types/proxy.d.ts +84 -0
package/dist/types/run-collector.d.ts +196 -0
package/dist/types/telemetry.d.ts +588 -0
package/dist/types/thinking.d.ts +17 -0
package/dist/types/types.d.ts +443 -0
package/dist/types/utils/yield.d.ts +52 -0
package/package.json +75 -0
package/src/agent-loop.ts +1418 -0
package/src/agent.ts +1236 -0
package/src/append-only-context.ts +297 -0
package/src/compaction/branch-summarization.ts +339 -0
package/src/compaction/compaction.ts +1155 -0
package/src/compaction/entries.ts +133 -0
package/src/compaction/errors.ts +31 -0
package/src/compaction/index.ts +13 -0
package/src/compaction/messages.ts +212 -0
package/src/compaction/openai.ts +552 -0
package/src/compaction/prompts/auto-handoff-threshold-focus.md +1 -0
package/src/compaction/prompts/branch-summary-context.md +5 -0
package/src/compaction/prompts/branch-summary-preamble.md +2 -0
package/src/compaction/prompts/branch-summary.md +30 -0
package/src/compaction/prompts/compaction-short-summary.md +9 -0
package/src/compaction/prompts/compaction-summary-context.md +5 -0
package/src/compaction/prompts/compaction-summary.md +38 -0
package/src/compaction/prompts/compaction-turn-prefix.md +17 -0
package/src/compaction/prompts/compaction-update-summary.md +45 -0
package/src/compaction/prompts/file-operations.md +10 -0
package/src/compaction/prompts/handoff-document.md +49 -0
package/src/compaction/prompts/summarization-system.md +3 -0
package/src/compaction/pruning.ts +99 -0
package/src/compaction/shake.ts +406 -0
package/src/compaction/tool-protection.ts +55 -0
package/src/compaction/utils.ts +185 -0
package/src/compaction.ts +1 -0
package/src/harmony-leak.ts +456 -0
package/src/index.ts +21 -0
package/src/proxy.ts +326 -0
package/src/run-collector.ts +631 -0
package/src/telemetry.ts +2020 -0
package/src/thinking.ts +19 -0
package/src/types.ts +505 -0
package/src/utils/yield.ts +146 -0

package/src/agent-loop.ts ADDED Viewed

@@ -0,0 +1,1418 @@
+/**
+ * Agent loop that works with AgentMessage throughout.
+ * Transforms to Message[] only at the LLM call boundary.
+ */
+import {
+	type AssistantMessage,
+	type AssistantMessageEvent,
+	type Context,
+	EventStream,
+	isZodSchema,
+	streamSimple,
+	type ToolResultMessage,
+	type TSchema,
+	validateToolArguments,
+	zodToWireSchema,
+} from "@prometheus-ai/ai";
+import { sanitizeText } from "@prometheus-ai/utils";
+import {
+	createHarmonyAuditEvent,
+	detectHarmonyLeakInAssistantMessage,
+	extractHarmonyRemoved,
+	type HarmonyDetection,
+	type HarmonyRecoveredToolCall,
+	isHarmonyLeakMitigationTarget,
+	signalListLabel,
+} from "./harmony-leak";
+import { type AgentRunCoverage, type AgentRunSummary, ToolCallBlockedError } from "./run-collector";
+import {
+	type AgentTelemetry,
+	failChatSpan,
+	finishChatSpan,
+	finishExecuteToolSpan,
+	finishInvokeAgentSpan,
+	fireOnRunEnd,
+	PrometheusGenAIAttr,
+	recordSkippedTool,
+	resolveTelemetry,
+	runInActiveSpan,
+	type Span,
+	startChatSpan,
+	startExecuteToolSpan,
+	startInvokeAgentSpan,
+} from "./telemetry";
+import type {
+	AgentContext,
+	AgentEvent,
+	AgentLoopConfig,
+	AgentMessage,
+	AgentTool,
+	AgentToolResult,
+	StreamFn,
+} from "./types";
+import { yieldIfDue } from "./utils/yield";
+/** Sentinel returned by the abort race in `streamAssistantResponse`. */
+const ABORTED: unique symbol = Symbol("agent-loop-aborted");
+class HarmonyLeakInterruption extends Error {
+	constructor(
+		readonly detection: HarmonyDetection,
+		readonly removed: string,
+		readonly recovered?: HarmonyRecoveredToolCall,
+	) {
+		super(`Detected GPT-5 Harmony protocol leakage (${signalListLabel(detection.signals)})`);
+		this.name = "HarmonyLeakInterruption";
+	}
+}
+/**
+ * Normalize a value coming back from `tool.execute()` (or its streaming partial-update callback)
+ * into a structurally valid {@link AgentToolResult}.
+ *
+ * The tool interface is typed, but third-party tools (MCP, extensions, user-authored AgentTools)
+ * can violate the contract at runtime. Persisting a malformed result corrupts the session file
+ * (missing `content` array → crash on reload). We coerce at the single boundary where untyped
+ * results enter the agent loop, so every downstream consumer can rely on the type.
+ */
+function coerceToolResult(raw: unknown): { result: AgentToolResult<any>; malformed: boolean } {
+	const rawObj = raw && typeof raw === "object" ? (raw as Record<string, unknown>) : null;
+	const rawContent = rawObj?.content;
+	const details = rawObj && "details" in rawObj ? rawObj.details : {};
+	// Tools may flag a non-throwing failure on the result itself (e.g. an
+	// aggregator that catches per-entry errors and synthesizes a combined
+	// result). Preserve the flag so agent-loop can surface it on the wire.
+	const explicitError = Boolean(rawObj && "isError" in rawObj && rawObj.isError);
+	if (!Array.isArray(rawContent)) {
+		return {
+			result: {
+				content: [{ type: "text", text: "Tool returned an invalid result: missing content array." }],
+				details,
+				isError: true,
+			},
+			malformed: true,
+		};
+	}
+	const content: AgentToolResult["content"] = [];
+	for (const block of rawContent) {
+		if (!block || typeof block !== "object" || !("type" in block)) continue;
+		if (block.type === "text" && typeof (block as { text?: unknown }).text === "string") {
+			content.push({ type: "text", text: sanitizeText((block as { text: string }).text) });
+		} else if (
+			block.type === "image" &&
+			typeof (block as { data?: unknown }).data === "string" &&
+			typeof (block as { mimeType?: unknown }).mimeType === "string"
+		) {
+			content.push(block as { type: "image"; data: string; mimeType: string });
+		}
+	}
+	return { result: { content, details, ...(explicitError ? { isError: true } : {}) }, malformed: false };
+}
+/**
+ * Start an agent loop with a new prompt message.
+ * The prompt is added to the context and events are emitted for it.
+ */
+export function agentLoop(
+	prompts: AgentMessage[],
+	context: AgentContext,
+	config: AgentLoopConfig,
+	signal?: AbortSignal,
+	streamFn?: StreamFn,
+): EventStream<AgentEvent, AgentMessage[]> {
+	const stream = createAgentStream();
+	(async () => {
+		const newMessages: AgentMessage[] = [...prompts];
+		const currentContext: AgentContext = {
+			...context,
+			messages: [...context.messages, ...prompts],
+		};
+		stream.push({ type: "agent_start" });
+		stream.push({ type: "turn_start" });
+		for (const prompt of prompts) {
+			stream.push({ type: "message_start", message: prompt });
+			stream.push({ type: "message_end", message: prompt });
+		}
+		try {
+			await runLoop(currentContext, newMessages, config, signal, stream, streamFn);
+		} catch (err) {
+			stream.fail(err);
+		}
+	})();
+	return stream;
+}
+/**
+ * Continue an agent loop from the current context without adding a new message.
+ * Used for retries - context already has user message or tool results.
+ *
+ * **Important:** The last message in context must convert to a `user` or `toolResult` message
+ * via `convertToLlm`. If it doesn't, the LLM provider will reject the request.
+ * This cannot be validated here since `convertToLlm` is only called once per turn.
+ */
+export function agentLoopContinue(
+	context: AgentContext,
+	config: AgentLoopConfig,
+	signal?: AbortSignal,
+	streamFn?: StreamFn,
+): EventStream<AgentEvent, AgentMessage[]> {
+	if (context.messages.length === 0) {
+		throw new Error("Cannot continue: no messages in context");
+	}
+	if (context.messages[context.messages.length - 1].role === "assistant") {
+		throw new Error("Cannot continue from message role: assistant");
+	}
+	const stream = createAgentStream();
+	(async () => {
+		const newMessages: AgentMessage[] = [];
+		const currentContext: AgentContext = { ...context };
+		stream.push({ type: "agent_start" });
+		stream.push({ type: "turn_start" });
+		try {
+			await runLoop(currentContext, newMessages, config, signal, stream, streamFn);
+		} catch (err) {
+			stream.fail(err);
+		}
+	})();
+	return stream;
+}
+function createAgentStream(): EventStream<AgentEvent, AgentMessage[]> {
+	return new EventStream<AgentEvent, AgentMessage[]>(
+		(event: AgentEvent) => event.type === "agent_end",
+		(event: AgentEvent) => (event.type === "agent_end" ? event.messages : []),
+	);
+}
+/**
+ * Build the `agent_end` event payload. When telemetry is enabled, snapshots
+ * the run collector so consumers receive {@link AgentRunSummary} +
+ * {@link AgentRunCoverage} alongside the messages without parsing OTEL spans.
+ * When telemetry is unset, returns the bare event for backwards compatibility.
+ */
+function buildAgentEndEvent(
+	messages: AgentMessage[],
+	telemetry: AgentTelemetry | undefined,
+	stepCount: number,
+): Extract<AgentEvent, { type: "agent_end" }> {
+	if (!telemetry) return { type: "agent_end", messages };
+	const snapshot = telemetry.collector.snapshot({ stepCount });
+	if (telemetry.collector.markRunEnded()) {
+		fireOnRunEnd(telemetry, snapshot.summary, snapshot.coverage);
+	}
+	return { type: "agent_end", messages, telemetry: snapshot.summary, coverage: snapshot.coverage };
+}
+/**
+ * Detailed-result handle returned by {@link agentLoopDetailed}. Adds the
+ * run-level telemetry/coverage rollup to the existing `AgentMessage[]`
+ * payload without changing the resolved type of `stream.result()`.
+ */
+export interface AgentLoopDetailedResult {
+	readonly messages: AgentMessage[];
+	readonly telemetry: AgentRunSummary | undefined;
+	readonly coverage: AgentRunCoverage | undefined;
+}
+/**
+ * Convenience wrapper over {@link agentLoop} that exposes the run-level
+ * summary + coverage alongside the messages. The returned `stream` is the
+ * same `EventStream` callers already consume; `detailed()` awaits the
+ * stream's `agent_end` event and returns the additive fields.
+ *
+ * Existing `stream.result()` semantics are preserved — it still resolves to
+ * `AgentMessage[]`. Use {@link agentLoopDetailed} when you need the rollup;
+ * use {@link agentLoop} when you do not.
+ */
+export function agentLoopDetailed(
+	prompts: AgentMessage[],
+	context: AgentContext,
+	config: AgentLoopConfig,
+	signal?: AbortSignal,
+	streamFn?: StreamFn,
+): {
+	readonly stream: EventStream<AgentEvent, AgentMessage[]>;
+	readonly detailed: () => Promise<AgentLoopDetailedResult>;
+} {
+	const capture = createDetailedCapture(config);
+	const stream = agentLoop(prompts, context, capture.config, signal, streamFn);
+	return { stream, detailed: () => capture.detailed(stream) };
+}
+/**
+ * Like {@link agentLoopDetailed} but built on top of
+ * {@link agentLoopContinue}.
+ */
+export function agentLoopContinueDetailed(
+	context: AgentContext,
+	config: AgentLoopConfig,
+	signal?: AbortSignal,
+	streamFn?: StreamFn,
+): {
+	readonly stream: EventStream<AgentEvent, AgentMessage[]>;
+	readonly detailed: () => Promise<AgentLoopDetailedResult>;
+} {
+	const capture = createDetailedCapture(config);
+	const stream = agentLoopContinue(context, capture.config, signal, streamFn);
+	return { stream, detailed: () => capture.detailed(stream) };
+}
+/**
+ * Wire an `onRunEnd` telemetry hook onto `config` so the detailed helper can
+ * capture the run summary without consuming the event stream. Preserves any
+ * existing `onRunEnd` the caller had set.
+ */
+function createDetailedCapture(config: AgentLoopConfig): {
+	readonly config: AgentLoopConfig;
+	readonly detailed: (stream: EventStream<AgentEvent, AgentMessage[]>) => Promise<AgentLoopDetailedResult>;
+} {
+	let captured: { summary: AgentRunSummary; coverage: AgentRunCoverage } | undefined;
+	const userHook = config.telemetry?.onRunEnd;
+	const wired: AgentLoopConfig = {
+		...config,
+		telemetry: {
+			...(config.telemetry ?? {}),
+			onRunEnd: (summary, coverage) => {
+				captured = { summary, coverage };
+				userHook?.(summary, coverage);
+			},
+		},
+	};
+	return {
+		config: wired,
+		detailed: async stream => {
+			const messages = await stream.result();
+			return {
+				messages,
+				telemetry: captured?.summary,
+				coverage: captured?.coverage,
+			};
+		},
+	};
+}
+function normalizeMessagesForProvider(
+	messages: Context["messages"],
+	model: AgentLoopConfig["model"],
+): Context["messages"] {
+	if (model.provider !== "cerebras") {
+		return messages;
+	}
+	let changed = false;
+	const normalized = messages.map(message => {
+		if (message.role !== "assistant" || !Array.isArray(message.content)) {
+			return message;
+		}
+		const filtered = message.content.filter(block => block.type !== "thinking");
+		if (filtered.length === message.content.length) {
+			return message;
+		}
+		changed = true;
+		return { ...message, content: filtered };
+	});
+	return changed ? normalized : messages;
+}
+export const INTENT_FIELD = "_i";
+function injectIntentIntoSchema(schema: unknown, mode: "require" | "optional" = "require"): unknown {
+	if (!schema || typeof schema !== "object" || Array.isArray(schema)) return schema;
+	const schemaRecord = schema as Record<string, unknown>;
+	const propertiesValue = schemaRecord.properties;
+	const properties =
+		propertiesValue && typeof propertiesValue === "object" && !Array.isArray(propertiesValue)
+			? (propertiesValue as Record<string, unknown>)
+			: {};
+	const requiredValue = schemaRecord.required;
+	const required = Array.isArray(requiredValue)
+		? requiredValue.filter((item): item is string => typeof item === "string")
+		: [];
+	if (INTENT_FIELD in properties) {
+		const { [INTENT_FIELD]: intentProp, ...rest } = properties;
+		const needsReorder = Object.keys(properties)[0] !== INTENT_FIELD;
+		const needsRequired = mode === "require" && !required.includes(INTENT_FIELD);
+		if (!needsReorder && !needsRequired) return schema;
+		return {
+			...schemaRecord,
+			...(needsReorder ? { properties: { [INTENT_FIELD]: intentProp, ...rest } } : {}),
+			...(needsRequired ? { required: [...required, INTENT_FIELD] } : {}),
+		};
+	}
+	return {
+		...schemaRecord,
+		properties: {
+			[INTENT_FIELD]: {
+				type: "string",
+			},
+			...properties,
+		},
+		...(mode === "require" ? { required: [...required, INTENT_FIELD] } : {}),
+	};
+}
+export function normalizeTools(tools: AgentContext["tools"], injectIntent: boolean): Context["tools"] {
+	injectIntent = injectIntent && Bun.env.PROMETHEUS_NO_INTENT !== "1";
+	return tools?.map(t => {
+		const intentMode = resolveIntentMode(t.intent);
+		let parameters: TSchema = t.parameters;
+		if (injectIntent && intentMode !== "omit") {
+			if (isZodSchema(parameters)) {
+				const wired = zodToWireSchema(parameters);
+				parameters = injectIntentIntoSchema(wired, intentMode) as TSchema;
+			} else {
+				parameters = injectIntentIntoSchema(parameters, intentMode) as TSchema;
+			}
+		}
+		const description = t.description ?? "";
+		return { ...t, parameters, description };
+	});
+}
+function resolveIntentMode(intent: AgentTool["intent"]): "require" | "optional" | "omit" {
+	if (typeof intent === "function") return "omit";
+	if (intent === "optional" || intent === "omit") return intent;
+	return "require";
+}
+function extractIntent(args: Record<string, unknown>): { intent?: string; strippedArgs: Record<string, unknown> } {
+	const { [INTENT_FIELD]: intent, ...strippedArgs } = args;
+	if (typeof intent !== "string") {
+		return { strippedArgs };
+	}
+	const trimmed = intent.trim();
+	return { intent: trimmed.length > 0 ? trimmed : undefined, strippedArgs };
+}
+/**
+ * Main loop logic shared by agentLoop and agentLoopContinue.
+ */
+async function runLoop(
+	currentContext: AgentContext,
+	newMessages: AgentMessage[],
+	config: AgentLoopConfig,
+	signal: AbortSignal | undefined,
+	stream: EventStream<AgentEvent, AgentMessage[]>,
+	streamFn?: StreamFn,
+): Promise<void> {
+	const telemetry = resolveTelemetry(config.telemetry, config.sessionId);
+	const invokeAgentSpan = startInvokeAgentSpan(telemetry, config.model);
+	const stepCounter = { count: 0 };
+	let caughtError: unknown;
+	try {
+		await runInActiveSpan(invokeAgentSpan, () =>
+			runLoopBody(
+				currentContext,
+				newMessages,
+				config,
+				signal,
+				stream,
+				telemetry,
+				invokeAgentSpan,
+				stepCounter,
+				streamFn,
+			),
+		);
+	} catch (err) {
+		caughtError = err;
+		throw err;
+	} finally {
+		finishInvokeAgentSpan(telemetry, invokeAgentSpan, {
+			stepCount: stepCounter.count,
+			errorObject: caughtError,
+		});
+	}
+}
+interface StepCounter {
+	count: number;
+}
+function normalizeMaxToolCallsPerTurn(value: number | undefined): number | undefined {
+	if (value === undefined || !Number.isFinite(value)) return undefined;
+	const normalized = Math.trunc(value);
+	return normalized > 0 ? normalized : undefined;
+}
+function cloneAssistantMessageForToolCallCap(message: AssistantMessage): AssistantMessage {
+	return {
+		...message,
+		content: message.content.map(block => {
+			if (block.type === "toolCall") {
+				return { ...block, arguments: structuredClone(block.arguments) };
+			}
+			return { ...block };
+		}),
+		stopReason: "toolUse",
+		errorMessage: undefined,
+		errorStatus: undefined,
+	};
+}
+async function runLoopBody(
+	currentContext: AgentContext,
+	newMessages: AgentMessage[],
+	config: AgentLoopConfig,
+	signal: AbortSignal | undefined,
+	stream: EventStream<AgentEvent, AgentMessage[]>,
+	telemetry: AgentTelemetry | undefined,
+	invokeAgentSpan: Span | undefined,
+	stepCounter: StepCounter,
+	streamFn?: StreamFn,
+): Promise<void> {
+	let firstTurn = true;
+	// Check for steering messages at start (user may have typed while waiting)
+	let pendingMessages: AgentMessage[] = (await config.getSteeringMessages?.()) || [];
+	let harmonyRetryAttempt = 0;
+	let harmonyTruncateResumeCount = 0;
+	// Outer loop: continues when queued follow-up messages arrive after agent would stop
+	while (true) {
+		let hasMoreToolCalls = true;
+		// Inner loop: process tool calls and steering messages
+		while (hasMoreToolCalls || pendingMessages.length > 0) {
+			// Yield at the top of each iteration to prevent busy-wait when
+			// the agent loop is executing tool calls back-to-back.
+			await yieldIfDue();
+			if (!firstTurn) {
+				stream.push({ type: "turn_start" });
+			} else {
+				firstTurn = false;
+			}
+			// Process pending messages (inject before next assistant response)
+			if (pendingMessages.length > 0) {
+				for (const message of pendingMessages) {
+					stream.push({ type: "message_start", message });
+					stream.push({ type: "message_end", message });
+					currentContext.messages.push(message);
+					newMessages.push(message);
+				}
+				pendingMessages = [];
+			}
+			// Refresh prompt/tool context from live state before each model call
+			if (config.syncContextBeforeModelCall) {
+				await config.syncContextBeforeModelCall(currentContext);
+			}
+			// Stream assistant response
+			let recovered: HarmonyRecoveredToolCall | undefined;
+			let message: AssistantMessage;
+			try {
+				message = await streamAssistantResponse(
+					currentContext,
+					config,
+					signal,
+					stream,
+					telemetry,
+					invokeAgentSpan,
+					stepCounter,
+					streamFn,
+					harmonyRetryAttempt,
+				);
+				harmonyRetryAttempt = 0;
+				harmonyTruncateResumeCount = 0;
+			} catch (err) {
+				if (!(err instanceof HarmonyLeakInterruption)) throw err;
+				if (err.recovered) {
+					if (harmonyTruncateResumeCount >= 2) {
+						await emitHarmonyAudit(config, err, "escalated", harmonyRetryAttempt);
+						throw new Error(
+							`GPT-5 Harmony leak recurred after truncate-and-resume recovery (${signalListLabel(err.detection.signals)}).`,
+						);
+					}
+					harmonyTruncateResumeCount++;
+					recovered = err.recovered;
+					message = recovered.message;
+					await emitHarmonyAudit(config, err, "truncate_resume", harmonyRetryAttempt);
+				} else {
+					if (harmonyRetryAttempt >= 2) {
+						await emitHarmonyAudit(config, err, "escalated", harmonyRetryAttempt);
+						throw new Error(
+							`GPT-5 Harmony leak persisted after ${harmonyRetryAttempt} retries (${signalListLabel(err.detection.signals)}).`,
+						);
+					}
+					await emitHarmonyAudit(config, err, "abort_retry", harmonyRetryAttempt);
+					harmonyRetryAttempt++;
+					continue;
+				}
+			}
+			newMessages.push(message);
+			let steeringMessagesFromExecution: AgentMessage[] | undefined;
+			if (message.stopReason === "error" || message.stopReason === "aborted") {
+				// Create placeholder tool results for any tool calls in the aborted message
+				// This maintains the tool_use/tool_result pairing that the API requires
+				type ToolCallContent = Extract<AssistantMessage["content"][number], { type: "toolCall" }>;
+				const toolCalls = message.content.filter((c): c is ToolCallContent => c.type === "toolCall");
+				const toolResults: ToolResultMessage[] = [];
+				for (const toolCall of toolCalls) {
+					const result = createAbortedToolResult(toolCall, stream, message.stopReason, message.errorMessage);
+					currentContext.messages.push(result);
+					newMessages.push(result);
+					toolResults.push(result);
+					// The placeholder result above keeps the API's tool_use/tool_result
+					// pairing intact, but no execute_tool span is started for these
+					// calls. Mirror the run-collector entry directly so the run
+					// summary's tool counters and `coverage.toolsInvoked` reflect
+					// what the user actually saw on the wire.
+					recordSkippedTool(telemetry, {
+						toolCallId: toolCall.id,
+						toolName: toolCall.name,
+						status: message.stopReason === "aborted" ? "aborted" : "error",
+					});
+				}
+				stream.push({ type: "turn_end", message, toolResults });
+				stream.push(buildAgentEndEvent(newMessages, telemetry, stepCounter.count));
+				stream.end(newMessages);
+				return;
+			}
+			// Run tools whenever the turn carries tool_use blocks AND was not truncated.
+			// `stop_reason` is provider metadata that never goes back on the wire, so it
+			// does not gate continuation validity: replaying a tool_use turn with the
+			// tool_results appended is accepted whether the turn ended on `tool_use` or
+			// `end_turn` (adaptive/interleaved-thinking Opus routinely emits tool calls
+			// under `end_turn`; verified against the live Anthropic API). The only
+			// continuation hazard is a thinking block carrying a stale/invalid signature,
+			// which `transformMessages` already neutralizes — it strips the signature on
+			// non-`toolUse` turns and the encoder downgrades the unsigned block to text,
+			// which the API accepts. So treat `stop` (end_turn/pause_turn) the same as
+			// `toolUse`. `length` (max_tokens) is the one reason we must NOT run: the
+			// trailing tool_use may be truncated with incomplete arguments — those calls
+			// are abandoned below. (`error`/`aborted` already returned above.)
+			type ToolCallContent = Extract<AssistantMessage["content"][number], { type: "toolCall" }>;
+			const toolCalls = message.content.filter((c): c is ToolCallContent => c.type === "toolCall");
+			const runnableStop = message.stopReason === "toolUse" || message.stopReason === "stop";
+			hasMoreToolCalls = runnableStop && toolCalls.length > 0;
+			const toolResults: ToolResultMessage[] = [];
+			if (hasMoreToolCalls) {
+				const executionResult = await executeToolCalls(
+					currentContext,
+					message,
+					signal,
+					stream,
+					config,
+					telemetry,
+					invokeAgentSpan,
+				);
+				toolResults.push(...executionResult.toolResults);
+				steeringMessagesFromExecution = executionResult.steeringMessages;
+				for (const result of toolResults) {
+					currentContext.messages.push(result);
+					newMessages.push(result);
+				}
+			} else if (toolCalls.length > 0) {
+				// Turn ended on a non-runnable reason (`length` truncation) but left
+				// toolCall blocks behind. The trailing call's arguments may be incomplete,
+				// so don't execute or continue — pair each with a placeholder result to keep
+				// the tool_use/tool_result contract valid for any later request that
+				// replays this turn. When the truncation was `length`, surface an actionable
+				// hint so the model doesn't loop by re-emitting the same oversized payload
+				// (e.g. 1000+ line `write` content blowing past the model's output cap).
+				const skipReason = message.stopReason === "length" ? "length" : "skipped";
+				for (const toolCall of toolCalls) {
+					const result = createAbortedToolResult(toolCall, stream, skipReason);
+					currentContext.messages.push(result);
+					newMessages.push(result);
+					toolResults.push(result);
+					recordSkippedTool(telemetry, {
+						toolCallId: toolCall.id,
+						toolName: toolCall.name,
+						status: "skipped",
+					});
+				}
+			}
+			stream.push({ type: "turn_end", message, toolResults });
+			pendingMessages = steeringMessagesFromExecution ?? ((await config.getSteeringMessages?.()) || []);
+		}
+		// Agent would stop here. Check for follow-up messages.
+		await config.onBeforeYield?.();
+		const followUpMessages = (await config.getFollowUpMessages?.()) || [];
+		if (followUpMessages.length > 0) {
+			// Set as pending so inner loop processes them
+			pendingMessages = followUpMessages;
+			continue;
+		}
+		// No more messages, exit
+		break;
+	}
+	stream.push(buildAgentEndEvent(newMessages, telemetry, stepCounter.count));
+	stream.end(newMessages);
+}
+async function emitHarmonyAudit(
+	config: AgentLoopConfig,
+	interruption: HarmonyLeakInterruption,
+	action: "truncate_resume" | "abort_retry" | "escalated",
+	retryN: number,
+): Promise<void> {
+	await config.onHarmonyLeak?.(
+		createHarmonyAuditEvent({
+			action,
+			detection: interruption.detection,
+			model: config.model,
+			retryN,
+			removed: interruption.removed,
+		}),
+	);
+}
+/**
+ * Stream an assistant response from the LLM.
+ * This is where AgentMessage[] gets transformed to Message[] for the LLM.
+ */
+async function streamAssistantResponse(
+	context: AgentContext,
+	config: AgentLoopConfig,
+	signal: AbortSignal | undefined,
+	stream: EventStream<AgentEvent, AgentMessage[]>,
+	telemetry: AgentTelemetry | undefined,
+	invokeAgentSpan: Span | undefined,
+	stepCounter: StepCounter,
+	streamFn?: StreamFn,
+	harmonyRetryAttempt = 0,
+): Promise<AssistantMessage> {
+	// Apply context transform if configured (AgentMessage[] → AgentMessage[])
+	let messages = context.messages;
+	if (config.transformContext) {
+		messages = await config.transformContext(messages, signal);
+	}
+	// Convert to LLM-compatible messages (AgentMessage[] → Message[])
+	const llmMessages = await config.convertToLlm(messages);
+	const normalizedMessages = normalizeMessagesForProvider(llmMessages, config.model);
+	// Build LLM context — append-only mode caches system prompt + tools
+	// AND keeps an append-only message log so prior-turn bytes are stable.
+	let llmContext: Context;
+	if (config.appendOnlyContext) {
+		config.appendOnlyContext.syncMessages(normalizedMessages);
+		llmContext = config.appendOnlyContext.build(context, { intentTracing: !!config.intentTracing });
+	} else {
+		llmContext = {
+			systemPrompt: context.systemPrompt,
+			messages: normalizedMessages,
+			tools: normalizeTools(context.tools, !!config.intentTracing),
+		};
+	}
+	const streamFunction = streamFn || streamSimple;
+	// Resolve API key (important for expiring tokens) — do this before resolving
+	// metadata so that the session-sticky credential recorded by getApiKey is
+	// visible to metadataResolver (e.g. for the correct account_uuid in metadata.user_id).
+	const resolvedApiKey =
+		(config.getApiKey ? await config.getApiKey(config.model.provider) : undefined) || config.apiKey;
+	// Re-resolve metadata after credential selection so the per-request value
+	// reflects the credential actually used, not the snapshot from AgentLoopConfig construction.
+	const resolvedMetadata = config.metadataResolver ? config.metadataResolver(config.model.provider) : config.metadata;
+	const dynamicToolChoice = config.getToolChoice?.();
+	const dynamicReasoning = config.getReasoning?.();
+	const harmonyMitigationEnabled = isHarmonyLeakMitigationTarget(config.model);
+	const harmonyAbortController = harmonyMitigationEnabled ? new AbortController() : undefined;
+	const maxToolCallsPerTurn = normalizeMaxToolCallsPerTurn(config.maxToolCallsPerTurn);
+	const toolCallCapAbortController = maxToolCallsPerTurn === undefined ? undefined : new AbortController();
+	const requestSignals: AbortSignal[] = [];
+	if (signal) requestSignals.push(signal);
+	if (harmonyAbortController) requestSignals.push(harmonyAbortController.signal);
+	if (toolCallCapAbortController) requestSignals.push(toolCallCapAbortController.signal);
+	const requestSignal =
+		requestSignals.length === 0
+			? undefined
+			: requestSignals.length === 1
+				? requestSignals[0]
+				: AbortSignal.any(requestSignals);
+	const effectiveTemperature =
+		harmonyRetryAttempt > 0 && config.temperature !== undefined ? config.temperature + 0.05 : config.temperature;
+	const effectiveToolChoice = dynamicToolChoice ?? config.toolChoice;
+	const effectiveReasoning = dynamicReasoning ?? config.reasoning;
+	const chatStepNumber = stepCounter.count;
+	stepCounter.count += 1;
+	const chatSpan = startChatSpan(telemetry, config.model, {
+		parent: invokeAgentSpan,
+		stepNumber: chatStepNumber,
+		request: {
+			maxTokens: config.maxTokens,
+			temperature: effectiveTemperature,
+			topP: config.topP,
+			topK: config.topK,
+			presencePenalty: config.presencePenalty,
+			serviceTier: config.serviceTier,
+			reasoningEffort: typeof effectiveReasoning === "string" ? effectiveReasoning : undefined,
+			toolChoice: effectiveToolChoice,
+			tools: llmContext.tools,
+			systemPrompt: llmContext.systemPrompt,
+			messages: llmContext.messages,
+		},
+	});
+	// Wrap the user-supplied onResponse so we always observe response headers
+	// for telemetry (`ChatUsageEvent.headers`, gateway auto-detection) without
+	// stealing them from the configured hook.
+	let capturedHeaders: Readonly<Record<string, string>> | undefined;
+	const userOnResponse = config.onResponse;
+	const captureOnResponse: AgentLoopConfig["onResponse"] = (response, modelInfo) => {
+		capturedHeaders = response.headers;
+		return userOnResponse?.(response, modelInfo);
+	};
+	const finishChat = async (message: AssistantMessage): Promise<void> => {
+		await finishChatSpan(telemetry, chatSpan, message, {
+			stepNumber: chatStepNumber,
+			serviceTier: config.serviceTier,
+			responseHeaders: capturedHeaders,
+			baseUrl: config.model.baseUrl,
+		});
+	};
+	try {
+		return await runInActiveSpan(chatSpan, async () => {
+			const response = await streamFunction(config.model, llmContext, {
+				...config,
+				apiKey: resolvedApiKey,
+				metadata: resolvedMetadata,
+				toolChoice: effectiveToolChoice,
+				reasoning: effectiveReasoning,
+				temperature: effectiveTemperature,
+				signal: requestSignal,
+				onResponse: captureOnResponse,
+			});
+			let partialMessage: AssistantMessage | null = null;
+			let addedPartial = false;
+			const responseIterator = response[Symbol.asyncIterator]();
+			let completedToolCalls = 0;
+			let cappedMessage: AssistantMessage | undefined;
+			let capFinalized = false;
+			const finishCappedAssistantMessage = async (): Promise<AssistantMessage | undefined> => {
+				if (!cappedMessage) return undefined;
+				responseIterator.return?.()?.catch(() => {});
+				if (!capFinalized) {
+					if (addedPartial) {
+						context.messages[context.messages.length - 1] = cappedMessage;
+					} else {
+						context.messages.push(cappedMessage);
+						stream.push({ type: "message_start", message: { ...cappedMessage } });
+					}
+					stream.push({ type: "message_end", message: cappedMessage });
+					await finishChat(cappedMessage);
+					capFinalized = true;
+				}
+				return cappedMessage;
+			};
+			// Set up a single abort race: register the abort listener once for the whole
+			// stream and reuse the same race promise for every iterator.next() instead of
+			// allocating Promise.withResolvers and add/removeEventListener per event.
+			let abortRacePromise: Promise<typeof ABORTED> | undefined;
+			let detachAbortListener: (() => void) | undefined;
+			if (requestSignal) {
+				if (requestSignal.aborted) {
+					const aborted = emitAbortedAssistantMessage(partialMessage, addedPartial, context, config, stream);
+					await finishChat(aborted);
+					return aborted;
+				}
+				const { promise, resolve } = Promise.withResolvers<typeof ABORTED>();
+				const onAbort = () => resolve(ABORTED);
+				requestSignal.addEventListener("abort", onAbort, { once: true });
+				abortRacePromise = promise;
+				detachAbortListener = () => requestSignal.removeEventListener("abort", onAbort);
+			}
+			try {
+				while (true) {
+					let next: IteratorResult<AssistantMessageEvent>;
+					if (abortRacePromise) {
+						const result = await Promise.race([responseIterator.next(), abortRacePromise]);
+						if (result === ABORTED) {
+							if (toolCallCapAbortController?.signal.aborted) {
+								const capped = await finishCappedAssistantMessage();
+								if (capped) return capped;
+							}
+							responseIterator.return?.()?.catch(() => {});
+							const aborted = emitAbortedAssistantMessage(partialMessage, addedPartial, context, config, stream);
+							await finishChat(aborted);
+							return aborted;
+						}
+						next = result;
+					} else {
+						next = await responseIterator.next();
+					}
+					if (requestSignal?.aborted) {
+						if (toolCallCapAbortController?.signal.aborted) {
+							const capped = await finishCappedAssistantMessage();
+							if (capped) return capped;
+						}
+						const aborted = emitAbortedAssistantMessage(partialMessage, addedPartial, context, config, stream);
+						await finishChat(aborted);
+						return aborted;
+					}
+					if (next.done) break;
+					const event = next.value;
+					// Yield to the event loop periodically to prevent busy-wait
+					// when the LLM is streaming chunks faster than the loop can rest.
+					await yieldIfDue();
+					switch (event.type) {
+						case "start":
+							partialMessage = event.partial;
+							context.messages.push(partialMessage);
+							addedPartial = true;
+							stream.push({ type: "message_start", message: { ...partialMessage } });
+							break;
+						case "text_start":
+						case "text_delta":
+						case "text_end":
+						case "thinking_start":
+						case "thinking_delta":
+						case "thinking_end":
+						case "toolcall_start":
+						case "toolcall_delta":
+						case "toolcall_end":
+							if (partialMessage) {
+								partialMessage = event.partial;
+								context.messages[context.messages.length - 1] = partialMessage;
+								config.onAssistantMessageEvent?.(partialMessage, event);
+								if (signal?.aborted) {
+									continue;
+								}
+								stream.push({
+									type: "message_update",
+									assistantMessageEvent: event,
+									message: { ...partialMessage },
+								});
+								if (event.type === "toolcall_end" && maxToolCallsPerTurn !== undefined) {
+									completedToolCalls++;
+									if (completedToolCalls >= maxToolCallsPerTurn) {
+										cappedMessage = cloneAssistantMessageForToolCallCap(partialMessage);
+										toolCallCapAbortController?.abort();
+										const capped = await finishCappedAssistantMessage();
+										if (capped) return capped;
+									}
+								}
+							}
+							break;
+						case "done":
+						case "error": {
+							const finalMessage = await response.result();
+							if (harmonyMitigationEnabled) {
+								const detection = detectHarmonyLeakInAssistantMessage(finalMessage);
+								if (detection) {
+									const removed = extractHarmonyRemoved(finalMessage, detection);
+									if (addedPartial) {
+										context.messages.pop();
+										addedPartial = false;
+									}
+									throw new HarmonyLeakInterruption(detection, removed);
+								}
+							}
+							if (addedPartial) {
+								context.messages[context.messages.length - 1] = finalMessage;
+							} else {
+								context.messages.push(finalMessage);
+							}
+							if (!addedPartial) {
+								stream.push({ type: "message_start", message: { ...finalMessage } });
+							}
+							stream.push({ type: "message_end", message: finalMessage });
+							await finishChat(finalMessage);
+							return finalMessage;
+						}
+					}
+				}
+			} finally {
+				detachAbortListener?.();
+			}
+			const trailing = await response.result();
+			if (harmonyMitigationEnabled) {
+				const detection = detectHarmonyLeakInAssistantMessage(trailing);
+				if (detection) {
+					if (addedPartial) {
+						context.messages.pop();
+						addedPartial = false;
+					}
+					throw new HarmonyLeakInterruption(detection, extractHarmonyRemoved(trailing, detection));
+				}
+			}
+			await finishChat(trailing);
+			return trailing;
+		});
+	} catch (err) {
+		failChatSpan(telemetry, chatSpan, {
+			errorObject: err,
+			responseHeaders: capturedHeaders,
+			baseUrl: config.model.baseUrl,
+		});
+		throw err;
+	}
+}
+function emitAbortedAssistantMessage(
+	partialMessage: AssistantMessage | null,
+	addedPartial: boolean,
+	context: AgentContext,
+	config: AgentLoopConfig,
+	stream: EventStream<AgentEvent, AgentMessage[]>,
+): AssistantMessage {
+	const errorMessage = "Request was aborted";
+	const abortedMessage: AssistantMessage = partialMessage
+		? { ...partialMessage, stopReason: "aborted", errorMessage }
+		: {
+				role: "assistant",
+				content: [],
+				api: config.model.api,
+				provider: config.model.provider,
+				model: config.model.id,
+				usage: {
+					input: 0,
+					output: 0,
+					cacheRead: 0,
+					cacheWrite: 0,
+					totalTokens: 0,
+					cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+				},
+				stopReason: "aborted",
+				errorMessage,
+				timestamp: Date.now(),
+			};
+	if (addedPartial) {
+		context.messages[context.messages.length - 1] = abortedMessage;
+	} else {
+		context.messages.push(abortedMessage);
+		stream.push({ type: "message_start", message: { ...abortedMessage } });
+	}
+	stream.push({ type: "message_end", message: abortedMessage });
+	return abortedMessage;
+}
+/**
+ * Execute tool calls from an assistant message.
+ */
+async function executeToolCalls(
+	currentContext: AgentContext,
+	assistantMessage: AssistantMessage,
+	signal: AbortSignal | undefined,
+	stream: EventStream<AgentEvent, AgentMessage[]>,
+	config: AgentLoopConfig,
+	telemetry: AgentTelemetry | undefined,
+	invokeAgentSpan: Span | undefined,
+): Promise<{ toolResults: ToolResultMessage[]; steeringMessages?: AgentMessage[] }> {
+	const tools = currentContext.tools;
+	const {
+		getSteeringMessages,
+		interruptMode = "immediate",
+		getToolContext,
+		transformToolCallArguments,
+		intentTracing,
+		beforeToolCall,
+		afterToolCall,
+	} = config;
+	type ToolCallContent = Extract<AssistantMessage["content"][number], { type: "toolCall" }>;
+	const toolCalls = assistantMessage.content.filter((c): c is ToolCallContent => c.type === "toolCall");
+	const emittedToolResults: ToolResultMessage[] = [];
+	const toolCallInfos = toolCalls.map(call => ({ id: call.id, name: call.name }));
+	const batchId = `${assistantMessage.timestamp ?? Date.now()}_${toolCalls[0]?.id ?? "batch"}`;
+	const shouldInterruptImmediately = interruptMode !== "wait";
+	const steeringAbortController = new AbortController();
+	const toolSignal = signal
+		? AbortSignal.any([signal, steeringAbortController.signal])
+		: steeringAbortController.signal;
+	const interruptState = { triggered: false };
+	let steeringMessages: AgentMessage[] | undefined;
+	let steeringCheck: Promise<void> | null = null;
+	const records = toolCalls.map(toolCall => ({
+		toolCall,
+		// Tools emitted via OpenAI's custom-tool path (e.g. `apply_patch` on GPT-5)
+		// come back under their wire-level name, which may differ from the
+		// harness-internal `name`. Match on either, preferring `name` for
+		// determinism if both somehow collide.
+		tool:
+			tools?.find(t => t.name === toolCall.name) ??
+			tools?.find(t => t.customWireName !== undefined && t.customWireName === toolCall.name),
+		args: toolCall.arguments as Record<string, unknown>,
+		started: false,
+		result: undefined as AgentToolResult<any> | undefined,
+		isError: false,
+		skipped: false,
+		toolResultMessage: undefined as ToolResultMessage | undefined,
+		resultEmitted: false,
+	}));
+	const checkSteering = async (): Promise<void> => {
+		if (!shouldInterruptImmediately || !getSteeringMessages || interruptState.triggered) {
+			return;
+		}
+		if (steeringCheck) {
+			await steeringCheck;
+			return;
+		}
+		steeringCheck = (async () => {
+			const steering = await getSteeringMessages();
+			if (steering.length > 0) {
+				steeringMessages = steering;
+				interruptState.triggered = true;
+				steeringAbortController.abort();
+			}
+		})().finally(() => {
+			steeringCheck = null;
+		});
+		await steeringCheck;
+	};
+	const emitToolResult = (record: (typeof records)[number], result: AgentToolResult<any>, isError: boolean): void => {
+		if (record.resultEmitted) return;
+		const { toolCall } = record;
+		if (!record.started) {
+			stream.push({
+				type: "tool_execution_start",
+				toolCallId: toolCall.id,
+				toolName: toolCall.name,
+				args: record.args,
+				intent: toolCall.intent,
+			});
+		}
+		stream.push({
+			type: "tool_execution_end",
+			toolCallId: toolCall.id,
+			toolName: toolCall.name,
+			result,
+			isError,
+		});
+		const toolResultMessage: ToolResultMessage = {
+			role: "toolResult",
+			toolCallId: toolCall.id,
+			toolName: toolCall.name,
+			content: result.content,
+			details: result.details,
+			isError,
+			timestamp: Date.now(),
+		};
+		record.result = result;
+		record.isError = isError;
+		record.toolResultMessage = toolResultMessage;
+		record.resultEmitted = true;
+		emittedToolResults.push(toolResultMessage);
+		stream.push({ type: "message_start", message: toolResultMessage });
+		stream.push({ type: "message_end", message: toolResultMessage });
+	};
+	const runTool = async (record: (typeof records)[number], index: number): Promise<void> => {
+		if (interruptState.triggered) {
+			// Skip both span emission and the collector orphan record here. The
+			// tail sweep below (after `Promise.allSettled`) is the single path
+			// that handles "no result message was produced" — it calls
+			// `recordSkippedTool` and `emitToolResult` once per record, so any
+			// work we did here would double-count.
+			record.skipped = true;
+			return;
+		}
+		const { toolCall, tool } = record;
+		let argsForExecution = toolCall.arguments as Record<string, unknown>;
+		if (intentTracing) {
+			const { intent, strippedArgs } = extractIntent(toolCall.arguments);
+			argsForExecution = strippedArgs;
+			if (intent) {
+				toolCall.intent = intent;
+			} else if (typeof tool?.intent === "function") {
+				try {
+					const derived = tool.intent(strippedArgs as never)?.trim();
+					if (derived) {
+						toolCall.intent = derived;
+					}
+				} catch {
+					// intent function must never break tool execution
+				}
+			}
+		}
+		record.args = argsForExecution;
+		record.started = true;
+		stream.push({
+			type: "tool_execution_start",
+			toolCallId: toolCall.id,
+			toolName: toolCall.name,
+			args: argsForExecution,
+			intent: toolCall.intent,
+		});
+		const toolSpan = startExecuteToolSpan(telemetry, {
+			tool,
+			toolName: toolCall.name,
+			toolCallId: toolCall.id,
+			args: argsForExecution,
+			parent: invokeAgentSpan,
+		});
+		if (toolSpan && toolCall.intent) {
+			toolSpan.setAttribute(PrometheusGenAIAttr.ToolCallIntent, toolCall.intent);
+		}
+		let result: AgentToolResult<any> = { content: [], details: {} };
+		let isError = false;
+		let caughtError: unknown;
+		await runInActiveSpan(toolSpan, async () => {
+			try {
+				if (!tool) throw new Error(`Tool ${toolCall.name} not found`);
+				let effectiveArgs: Record<string, unknown>;
+				try {
+					effectiveArgs = validateToolArguments(tool, { ...toolCall, arguments: argsForExecution });
+				} catch (validationError) {
+					if (tool.lenientArgValidation) {
+						effectiveArgs = argsForExecution;
+					} else {
+						throw validationError;
+					}
+				}
+				if (beforeToolCall) {
+					const beforeResult = await beforeToolCall(
+						{
+							assistantMessage,
+							toolCall,
+							args: effectiveArgs,
+							context: currentContext,
+						},
+						toolSignal,
+					);
+					if (beforeResult?.block) {
+						throw new ToolCallBlockedError(beforeResult.reason);
+					}
+				}
+				// Reflect post-hook args so emitted tool results / afterToolCall see what actually executed.
+				record.args = effectiveArgs;
+				const toolContext = getToolContext
+					? getToolContext({
+							batchId,
+							index,
+							total: toolCalls.length,
+							toolCalls: toolCallInfos,
+						})
+					: undefined;
+				const rawResult = await tool.execute(
+					toolCall.id,
+					transformToolCallArguments ? transformToolCallArguments(effectiveArgs, toolCall.name) : effectiveArgs,
+					tool.nonAbortable ? undefined : toolSignal,
+					partialResult => {
+						stream.push({
+							type: "tool_execution_update",
+							toolCallId: toolCall.id,
+							toolName: toolCall.name,
+							args: effectiveArgs,
+							partialResult: coerceToolResult(partialResult).result,
+						});
+					},
+					toolContext,
+				);
+				const coerced = coerceToolResult(rawResult);
+				result = coerced.result;
+				if (coerced.malformed || result.isError) isError = true;
+			} catch (e) {
+				caughtError = e;
+				result = {
+					content: [{ type: "text", text: e instanceof Error ? e.message : String(e) }],
+					details: {},
+				};
+				isError = true;
+			}
+			if (afterToolCall) {
+				try {
+					const after = await afterToolCall(
+						{
+							assistantMessage,
+							toolCall,
+							args: record.args,
+							result,
+							isError,
+							context: currentContext,
+						},
+						toolSignal,
+					);
+					if (after) {
+						result = {
+							content: after.content ?? result.content,
+							details: after.details ?? result.details,
+							isError: after.isError ?? result.isError,
+						};
+						isError = after.isError ?? isError;
+					}
+				} catch (e) {
+					caughtError = e;
+					result = {
+						content: [{ type: "text", text: e instanceof Error ? e.message : String(e) }],
+						details: {},
+					};
+					isError = true;
+				}
+			}
+		});
+		const interrupted = interruptState.triggered;
+		if (interrupted) {
+			record.skipped = true;
+			emitToolResult(record, createSkippedToolResult(), true);
+		} else {
+			emitToolResult(record, result, isError);
+		}
+		const firstTextBlock = result.content?.[0];
+		const errorMessageForSpan =
+			caughtError === undefined && isError && firstTextBlock?.type === "text" ? firstTextBlock.text : undefined;
+		const status = interrupted
+			? "aborted"
+			: caughtError instanceof ToolCallBlockedError
+				? "blocked"
+				: isError
+					? "error"
+					: "ok";
+		finishExecuteToolSpan(telemetry, toolSpan, {
+			result,
+			isError,
+			status,
+			errorMessage: errorMessageForSpan,
+			errorObject: caughtError,
+			toolCallId: toolCall.id,
+			toolName: toolCall.name,
+		});
+		await checkSteering();
+	};
+	let lastExclusive: Promise<void> = Promise.resolve();
+	let sharedTasks: Promise<void>[] = [];
+	const tasks: Promise<void>[] = [];
+	for (let index = 0; index < records.length; index++) {
+		const record = records[index];
+		const concurrency = record.tool?.concurrency ?? "shared";
+		const start = concurrency === "exclusive" ? Promise.all([lastExclusive, ...sharedTasks]) : lastExclusive;
+		const task = start.then(() => runTool(record, index));
+		tasks.push(task);
+		if (concurrency === "exclusive") {
+			lastExclusive = task;
+			sharedTasks = [];
+		} else {
+			sharedTasks.push(task);
+		}
+	}
+	await Promise.allSettled(tasks);
+	// Yield after batch tool execution to let GC and I/O catch up,
+	// especially when tool results are large (e.g. bash output).
+	await yieldIfDue();
+	for (const record of records) {
+		if (!record.toolResultMessage) {
+			record.skipped = true;
+			recordSkippedTool(telemetry, {
+				toolCallId: record.toolCall.id,
+				toolName: record.toolCall.name,
+				status: "skipped",
+			});
+			emitToolResult(record, createSkippedToolResult(), true);
+		}
+	}
+	return { toolResults: emittedToolResults, steeringMessages };
+}
+/**
+ * Create a tool result for a tool call that was aborted or errored before execution.
+ * Maintains the tool_use/tool_result pairing required by the API.
+ */
+function createAbortedToolResult(
+	toolCall: Extract<AssistantMessage["content"][number], { type: "toolCall" }>,
+	stream: EventStream<AgentEvent, AgentMessage[]>,
+	reason: "aborted" | "error" | "skipped" | "length",
+	errorMessage?: string,
+): ToolResultMessage {
+	const message =
+		reason === "aborted"
+			? "Tool execution was aborted"
+			: reason === "length"
+				? "Tool call was not executed because the assistant hit its output token limit (stop_reason: length) before the arguments could complete; the recorded arguments are truncated and unsafe to run. Do NOT retry by re-emitting the same large payload — split the work into several smaller tool calls (e.g. for `write`/`edit`, write the first chunk then append the rest with subsequent `edit` insert ops, or break the file into multiple `write` targets)"
+				: reason === "skipped"
+					? "Tool call was not executed because the assistant ended its turn"
+					: "Tool execution failed due to an error";
+	const result: AgentToolResult<any> = {
+		content: [{ type: "text", text: errorMessage ? `${message}: ${errorMessage}` : `${message}.` }],
+		details: {},
+	};
+	stream.push({
+		type: "tool_execution_start",
+		toolCallId: toolCall.id,
+		toolName: toolCall.name,
+		args: toolCall.arguments,
+		intent: toolCall.intent,
+	});
+	stream.push({
+		type: "tool_execution_end",
+		toolCallId: toolCall.id,
+		toolName: toolCall.name,
+		result,
+		isError: true,
+	});
+	const toolResultMessage: ToolResultMessage = {
+		role: "toolResult",
+		toolCallId: toolCall.id,
+		toolName: toolCall.name,
+		content: result.content,
+		details: {},
+		isError: true,
+		timestamp: Date.now(),
+	};
+	stream.push({ type: "message_start", message: toolResultMessage });
+	stream.push({ type: "message_end", message: toolResultMessage });
+	return toolResultMessage;
+}
+function createSkippedToolResult(): AgentToolResult<any> {
+	return {
+		content: [{ type: "text", text: "Skipped due to queued user message." }],
+		details: {},
+	};
+}