npm - @rudderjs/ai - Versions diffs - 1.0.1 → 1.1.0 - Mend

@rudderjs/ai 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/boost/guidelines.md +1 -0
package/dist/agent.d.ts +21 -0
package/dist/agent.d.ts.map +1 -1
package/dist/agent.js +770 -530
package/dist/agent.js.map +1 -1
package/dist/fake.d.ts +60 -2
package/dist/fake.d.ts.map +1 -1
package/dist/fake.js +80 -1
package/dist/fake.js.map +1 -1
package/dist/index.d.ts +1 -0
package/dist/index.d.ts.map +1 -1
package/dist/index.js.map +1 -1
package/dist/observers.d.ts +24 -0
package/dist/observers.d.ts.map +1 -1
package/dist/observers.js.map +1 -1
package/dist/providers/anthropic.js +2 -2
package/dist/providers/anthropic.js.map +1 -1
package/dist/providers/google.js +5 -0
package/dist/providers/google.js.map +1 -1
package/dist/providers/openai.js +2 -2
package/dist/providers/openai.js.map +1 -1
package/dist/types.d.ts +43 -0
package/dist/types.d.ts.map +1 -1
package/package.json +3 -3

package/dist/agent.js CHANGED Viewed

@@ -13,14 +13,17 @@ function _buildObserverSteps(steps, modelString) {
         model: modelString,
         tokens: { prompt: step.usage.promptTokens, completion: step.usage.completionTokens, total: step.usage.totalTokens },
         finishReason: step.finishReason,
-        toolCalls: step.toolCalls.map(tc => ({
-            id: tc.id,
-            name: tc.name,
-            args: tc.arguments,
-            result: step.toolResults.find(r => r.toolCallId === tc.id)?.result,
-            duration: 0,
-            needsApproval: false,
-        })),
+        toolCalls: step.toolCalls.map(tc => {
+            const tr = step.toolResults.find(r => r.toolCallId === tc.id);
+            return {
+                id: tc.id,
+                name: tc.name,
+                args: tc.arguments,
+                result: tr?.result,
+                duration: tr?.duration ?? 0,
+                needsApproval: false,
+            };
+        }),
     }));
 }
 // ─── Stop Condition Combinators ──────────────────────────
@@ -51,6 +54,13 @@ export class Agent {
     temperature() { return undefined; }
     /** Max tokens for response */
     maxTokens() { return undefined; }
+    /**
+     * Default for `AgentPromptOptions.parallelTools`. When `true` (default),
+     * multiple tool calls within a single step run their `execute()` functions
+     * concurrently. Override on a subclass to flip the default for an agent
+     * whose tools share non-idempotent state. Per-call options still win.
+     */
+    parallelTools() { return true; }
     /** Run the agent with a prompt (non-streaming) */
     async prompt(input, options) {
         return runAgentLoop(this, input, options);
@@ -284,8 +294,587 @@ function buildMiddlewareConfig(messages, a) {
         config.maxTokens = maxTok;
     return config;
 }
-// ─── Agent Loop (non-streaming) ──────────────────────────
-async function runAgentLoop(a, input, options) {
+/**
+ * Iterate the failover model list and invoke `call` against each provider
+ * adapter until one succeeds. Mutates `loopCtx.failoverAttempts` so the
+ * observer event reflects the real number of attempts. A caller-supplied
+ * `AbortSignal` short-circuits — abort errors propagate immediately rather
+ * than triggering the next failover model.
+ */
+async function runFailover(loopCtx, currentModel, call) {
+    const failoverModels = [currentModel, ...loopCtx.agent.failover().filter(m => m !== currentModel)];
+    let lastError;
+    for (const tryModel of failoverModels) {
+        try {
+            const adapter = AiRegistry.resolve(tryModel);
+            const [, modelId] = AiRegistry.parseModelString(tryModel);
+            const reqOptions = {
+                model: modelId,
+                messages: loopCtx.messages,
+                tools: loopCtx.toolSchemas.length > 0 ? loopCtx.toolSchemas : undefined,
+                temperature: loopCtx.agent.temperature(),
+                maxTokens: loopCtx.agent.maxTokens(),
+                signal: loopCtx.options?.signal,
+            };
+            return await call(adapter, modelId, reqOptions);
+        }
+        catch (err) {
+            // If the abort came from the caller, don't try the next failover
+            // model — re-throw so `prompt()` / the stream rejects immediately.
+            if (loopCtx.options?.signal?.aborted)
+                throw loopCtx.options.signal.reason;
+            lastError = err instanceof Error ? err : new Error(String(err));
+            loopCtx.failoverAttempts++;
+            if (tryModel === failoverModels[failoverModels.length - 1])
+                throw lastError;
+        }
+    }
+    throw lastError ?? new Error('No provider available');
+}
+/** Emit the `agent.failed` observer event from the shared loop state. */
+function emitObserverFailed(loopCtx, err, streaming) {
+    const obs = _getAiObservers();
+    if (!obs)
+        return;
+    const inputText = loopCtx.options?.messages ? '' : loopCtx.input;
+    obs.emit({
+        kind: 'agent.failed',
+        agentName: loopCtx.agent.constructor.name,
+        model: loopCtx.modelString,
+        provider: loopCtx.providerName,
+        input: inputText,
+        output: '',
+        steps: _buildObserverSteps(loopCtx.steps, loopCtx.modelString),
+        tokens: {
+            prompt: loopCtx.totalUsage.promptTokens,
+            completion: loopCtx.totalUsage.completionTokens,
+            total: loopCtx.totalUsage.totalTokens,
+        },
+        duration: Math.round(performance.now() - loopCtx.loopStart),
+        finishReason: 'error',
+        streaming,
+        conversationId: null,
+        failoverAttempts: loopCtx.failoverAttempts,
+        error: err instanceof Error ? err.message : String(err),
+    });
+}
+/**
+ * Emit the per-step `agent.step.completed` observer event after each
+ * iteration. Built from the SAME `_buildObserverSteps` mapping used by
+ * the terminal events so consumers see consistent shapes — they just see
+ * the latest step rather than the full array.
+ */
+function emitObserverStepCompleted(loopCtx, iteration, streaming) {
+    const obs = _getAiObservers();
+    if (!obs)
+        return;
+    const justPushed = loopCtx.steps[loopCtx.steps.length - 1];
+    if (!justPushed)
+        return;
+    // Re-use _buildObserverSteps so the per-step shape matches the steps[]
+    // entries on the terminal events. Pass a single-element slice since we
+    // only need the latest step's mapping.
+    const built = _buildObserverSteps([justPushed], loopCtx.modelString);
+    const stepEvent = built[0];
+    if (!stepEvent)
+        return;
+    // Override iteration with the loop's iteration counter — _buildObserverSteps
+    // numbers from 1 within the array it sees, but we want the global step
+    // number across the whole run.
+    stepEvent.iteration = iteration + 1;
+    obs.emit({
+        kind: 'agent.step.completed',
+        agentName: loopCtx.agent.constructor.name,
+        model: loopCtx.modelString,
+        provider: loopCtx.providerName,
+        iteration: iteration + 1,
+        step: stepEvent,
+        tokens: {
+            prompt: loopCtx.totalUsage.promptTokens,
+            completion: loopCtx.totalUsage.completionTokens,
+            total: loopCtx.totalUsage.totalTokens,
+        },
+        duration: Math.round(performance.now() - loopCtx.loopStart),
+        streaming,
+        conversationId: null,
+    });
+}
+/** Emit the `agent.completed` observer event from the shared loop state. */
+function emitObserverCompleted(loopCtx, result, streaming) {
+    const obs = _getAiObservers();
+    if (!obs)
+        return;
+    const inputText = loopCtx.options?.messages ? '' : loopCtx.input;
+    const lastStep = loopCtx.steps[loopCtx.steps.length - 1];
+    obs.emit({
+        kind: 'agent.completed',
+        agentName: loopCtx.agent.constructor.name,
+        model: loopCtx.modelString,
+        provider: loopCtx.providerName,
+        input: inputText,
+        output: result.text,
+        steps: _buildObserverSteps(loopCtx.steps, loopCtx.modelString),
+        tokens: {
+            prompt: loopCtx.totalUsage.promptTokens,
+            completion: loopCtx.totalUsage.completionTokens,
+            total: loopCtx.totalUsage.totalTokens,
+        },
+        duration: Math.round(performance.now() - loopCtx.loopStart),
+        finishReason: result.finishReason ?? lastStep?.finishReason ?? 'stop',
+        streaming,
+        conversationId: null,
+        failoverAttempts: loopCtx.failoverAttempts,
+    });
+}
+/** Build the final `AgentResponse` from accumulated loop state. */
+function buildAgentResponse(loopCtx) {
+    const lastStep = loopCtx.steps[loopCtx.steps.length - 1];
+    const result = {
+        text: lastStep ? getMessageText(lastStep.message.content) : '',
+        steps: loopCtx.steps,
+        usage: loopCtx.totalUsage,
+    };
+    if (loopCtx.loopFinishReason)
+        result.finishReason = loopCtx.loopFinishReason;
+    if (loopCtx.pendingClientToolCalls.length > 0)
+        result.pendingClientToolCalls = loopCtx.pendingClientToolCalls;
+    if (loopCtx.pendingApprovalToolCall)
+        result.pendingApprovalToolCall = loopCtx.pendingApprovalToolCall;
+    if (loopCtx.resumedToolMessages.length > 0)
+        result.resumedToolMessages = loopCtx.resumedToolMessages;
+    return result;
+}
+/**
+ * Execute the tool phase for a single agent step. Yields the same
+ * `StreamChunk` sequence (`tool-call` → `tool-update*` → `tool-result`) that
+ * the streaming caller surfaces to consumers. Non-streaming callers iterate
+ * via `.next()` and discard yields — the side effects (message pushes,
+ * pending-state mutations on `loopCtx`) are identical regardless of whether
+ * the chunks reach a consumer.
+ *
+ * Returns the step's `ToolResult[]`. The caller passes the assistant message
+ * to push before iteration so the AgentStep shape (response.message) and the
+ * final `messages` array stay in sync with the loop variant.
+ */
+async function* executeToolPhase(loopCtx, toolCalls, assistantMessage) {
+    const { messages, middlewares, options, ctx } = loopCtx;
+    const toolResults = [];
+    messages.push(assistantMessage);
+    // Resolve parallelism setting. Per-call option wins; falls back to the
+    // agent-level override which defaults to `true`. Single-tool batches
+    // route through the serial path either way (no parallelism to gain, and
+    // serial preserves live `tool-update` streaming for that one tool).
+    const parallel = (options?.parallelTools ?? loopCtx.agent.parallelTools()) && toolCalls.length > 1;
+    if (parallel) {
+        yield* runToolPhaseParallel(loopCtx, toolCalls, toolResults);
+    }
+    else {
+        yield* runToolPhaseSerial(loopCtx, toolCalls, toolResults);
+    }
+    // onToolPhaseComplete
+    if (middlewares.length > 0)
+        await runSequential(middlewares, 'onToolPhaseComplete', ctx);
+    return toolResults;
+}
+/**
+ * Serial tool execution — the original behavior. Runs each tool call's
+ * prelude (approval, before-middleware, validation) and `execute()`
+ * one-after-another, streaming `tool-update` chunks live as the tool
+ * emits them.
+ */
+async function* runToolPhaseSerial(loopCtx, toolCalls, toolResults) {
+    const { messages, middlewares, toolMap, options, ctx } = loopCtx;
+    for (const tc of toolCalls) {
+        const tool = toolMap.get(tc.name);
+        if (!tool) {
+            const unknownResult = `Error: Unknown tool "${tc.name}"`;
+            toolResults.push({ toolCallId: tc.id, result: unknownResult });
+            messages.push({ role: 'tool', content: unknownResult, toolCallId: tc.id });
+            yield { type: 'tool-result', toolCall: tc, result: unknownResult };
+            continue;
+        }
+        if (!tool.execute) {
+            // Client tool — no server-side handler.
+            if (options?.toolCallStreamingMode === 'stop-on-client-tool') {
+                loopCtx.pendingClientToolCalls.push(tc);
+                loopCtx.loopFinishReason = 'client_tool_calls';
+                loopCtx.stopForClientTools = true;
+                yield { type: 'tool-call', toolCall: tc };
+                continue;
+            }
+            const placeholder = '[client tool — execute on client]';
+            toolResults.push({ toolCallId: tc.id, result: placeholder });
+            messages.push({ role: 'tool', content: placeholder, toolCallId: tc.id });
+            yield { type: 'tool-call', toolCall: tc };
+            yield { type: 'tool-result', toolCall: tc, result: placeholder };
+            continue;
+        }
+        // needsApproval enforcement
+        const approvalDecision = await evaluateApproval(tool, tc, options);
+        if (approvalDecision === 'rejected') {
+            const rejectionResult = { rejected: true, reason: 'User rejected this tool call' };
+            toolResults.push({ toolCallId: tc.id, result: rejectionResult });
+            messages.push({ role: 'tool', content: JSON.stringify(rejectionResult), toolCallId: tc.id });
+            yield { type: 'tool-result', toolCall: tc, result: rejectionResult };
+            continue;
+        }
+        if (approvalDecision === 'pending') {
+            loopCtx.pendingApprovalToolCall = { toolCall: tc, isClientTool: false };
+            loopCtx.loopFinishReason = 'tool_approval_required';
+            loopCtx.stopForApproval = true;
+            yield { type: 'tool-call', toolCall: tc };
+            break;
+        }
+        // onBeforeToolCall
+        let toolArgs = tc.arguments;
+        if (middlewares.length > 0) {
+            const beforeResult = await runOnBeforeToolCall(middlewares, ctx, tc.name, toolArgs);
+            if (beforeResult) {
+                if (beforeResult.type === 'skip') {
+                    const resultStr = typeof beforeResult.result === 'string' ? beforeResult.result : JSON.stringify(beforeResult.result);
+                    toolResults.push({ toolCallId: tc.id, result: beforeResult.result });
+                    messages.push({ role: 'tool', content: resultStr, toolCallId: tc.id });
+                    yield { type: 'tool-result', toolCall: tc, result: beforeResult.result };
+                    await runOnAfterToolCall(middlewares, ctx, tc.name, toolArgs, beforeResult.result);
+                    continue;
+                }
+                if (beforeResult.type === 'abort') {
+                    await runOnAbort(middlewares, ctx, beforeResult.reason);
+                    break;
+                }
+                if (beforeResult.type === 'transformArgs') {
+                    toolArgs = beforeResult.args;
+                }
+            }
+        }
+        // Validate args against the tool's inputSchema. Runs after middleware
+        // transforms so transforms can reshape malformed model output before
+        // it is judged. The tool-call chunk is emitted even on validation
+        // failure so streaming UIs see a paired tool-call → tool-result(error)
+        // sequence; non-streaming callers discard the chunk.
+        const validation = validateToolArgs(tool, toolArgs);
+        if (!validation.ok) {
+            yield { type: 'tool-call', toolCall: tc };
+            toolResults.push({ toolCallId: tc.id, result: validation.error });
+            messages.push({ role: 'tool', content: JSON.stringify(validation.error), toolCallId: tc.id });
+            yield { type: 'tool-result', toolCall: tc, result: validation.error };
+            if (middlewares.length > 0)
+                await runOnAfterToolCall(middlewares, ctx, tc.name, toolArgs, validation.error);
+            continue;
+        }
+        const validatedArgs = validation.value;
+        const toolStart = performance.now();
+        try {
+            // Emit the tool-call marker before execution so streaming UIs see
+            // tool-call → tool-update* → tool-result in order. Async-generator
+            // executes stream their yields as tool-update chunks live; plain
+            // executes yield nothing here.
+            //
+            // Pause detection: a yielded `pause_for_client_tools` control chunk
+            // halts iteration, propagates the nested calls to the parent's
+            // pending list, and SKIPS the tool_result emission — the yielding
+            // tool's own call stays orphaned in the parent message history
+            // until the caller resolves it on resume.
+            yield { type: 'tool-call', toolCall: tc };
+            const execGen = executeMaybeStreaming(tool, validatedArgs, { toolCallId: tc.id });
+            let result;
+            let paused = false;
+            while (true) {
+                const step = await execGen.next();
+                if (step.done) {
+                    result = step.value;
+                    break;
+                }
+                if (isPauseForClientToolsChunk(step.value)) {
+                    for (const pending of step.value.toolCalls) {
+                        loopCtx.pendingClientToolCalls.push(pending);
+                    }
+                    loopCtx.loopFinishReason = 'client_tool_calls';
+                    loopCtx.stopForClientTools = true;
+                    paused = true;
+                    break;
+                }
+                const updateChunk = { type: 'tool-update', toolCall: tc, update: step.value };
+                if (middlewares.length > 0) {
+                    const transformed = runOnChunk(middlewares, ctx, updateChunk);
+                    if (transformed)
+                        yield transformed;
+                }
+                else {
+                    yield updateChunk;
+                }
+            }
+            if (paused)
+                continue; // skip tool_result emission + message push for this tc
+            const duration = performance.now() - toolStart;
+            // toolResults preserves the ORIGINAL value; only the message content
+            // pushed onto `messages` (next-step model input) is narrowed by
+            // toModelOutput. The streamed `tool-result` chunk also carries the
+            // ORIGINAL value.
+            toolResults.push({ toolCallId: tc.id, result, duration });
+            const resultStr = await applyToModelOutput(tool, result, middlewares.length > 0 ? (e) => runOnError(middlewares, ctx, e) : undefined);
+            messages.push({ role: 'tool', content: resultStr, toolCallId: tc.id });
+            yield { type: 'tool-result', toolCall: tc, result };
+            // onAfterToolCall
+            if (middlewares.length > 0)
+                await runOnAfterToolCall(middlewares, ctx, tc.name, toolArgs, result);
+        }
+        catch (err) {
+            const duration = performance.now() - toolStart;
+            const msg = err instanceof Error ? err.message : String(err);
+            const errResult = `Error: ${msg}`;
+            toolResults.push({ toolCallId: tc.id, result: errResult, duration });
+            messages.push({ role: 'tool', content: errResult, toolCallId: tc.id });
+            yield { type: 'tool-result', toolCall: tc, result: errResult };
+            // onAfterToolCall (error case)
+            if (middlewares.length > 0)
+                await runOnAfterToolCall(middlewares, ctx, tc.name, toolArgs, errResult);
+        }
+    }
+}
+/**
+ * Parallel tool execution — three phases:
+ *
+ * 1. **Prelude (serial, in tool-call order):** classify each call. Approval
+ *    decisions, `onBeforeToolCall` middleware, and arg validation all
+ *    resolve here; the next phase only sees calls that cleared every
+ *    gate. `pending-approval` and `mw-abort` short-circuit the prelude
+ *    exactly as they do in serial mode — later calls are never dispatched.
+ *
+ * 2. **Execution (parallel):** for every `ready` outcome, drive
+ *    `executeMaybeStreaming` to completion concurrently. `tool-update`
+ *    chunks (and any pause-for-client-tools mutations to `loopCtx`) are
+ *    captured per-call into a buffer.
+ *
+ * 3. **Replay (serial, in tool-call order):** for each outcome, emit its
+ *    chunks (including buffered `tool-update`s for ready calls), push
+ *    tool messages, and run `onAfterToolCall`. This is the only phase
+ *    that yields chunks to consumers, so streamed output stays
+ *    deterministic regardless of which `execute()` finished first.
+ */
+async function* runToolPhaseParallel(loopCtx, toolCalls, toolResults) {
+    const { messages, middlewares, ctx } = loopCtx;
+    // ─── Phase 1: prelude ──────────────────────────────────
+    const outcomes = await classifyToolCalls(loopCtx, toolCalls);
+    // ─── Phase 2: dispatch ready executions concurrently ──
+    const ready = outcomes.filter((o) => o.kind === 'ready');
+    const executions = await Promise.all(ready.map(o => runToolExecution(loopCtx, o)));
+    const executionByCallId = new Map();
+    for (let i = 0; i < ready.length; i++) {
+        executionByCallId.set(ready[i].tc.id, executions[i]);
+    }
+    // ─── Phase 3: replay chunks + side-effects in order ───
+    for (const outcome of outcomes) {
+        if (outcome.kind === 'unknown-tool') {
+            toolResults.push({ toolCallId: outcome.tc.id, result: outcome.result });
+            messages.push({ role: 'tool', content: outcome.result, toolCallId: outcome.tc.id });
+            yield { type: 'tool-result', toolCall: outcome.tc, result: outcome.result };
+            continue;
+        }
+        if (outcome.kind === 'client-tool-stop') {
+            // loopCtx mutations already applied during the prelude.
+            yield { type: 'tool-call', toolCall: outcome.tc };
+            continue;
+        }
+        if (outcome.kind === 'client-tool-placeholder') {
+            toolResults.push({ toolCallId: outcome.tc.id, result: outcome.result });
+            messages.push({ role: 'tool', content: outcome.result, toolCallId: outcome.tc.id });
+            yield { type: 'tool-call', toolCall: outcome.tc };
+            yield { type: 'tool-result', toolCall: outcome.tc, result: outcome.result };
+            continue;
+        }
+        if (outcome.kind === 'rejected') {
+            toolResults.push({ toolCallId: outcome.tc.id, result: outcome.result });
+            messages.push({ role: 'tool', content: JSON.stringify(outcome.result), toolCallId: outcome.tc.id });
+            yield { type: 'tool-result', toolCall: outcome.tc, result: outcome.result };
+            continue;
+        }
+        if (outcome.kind === 'pending-approval') {
+            // loopCtx mutations already applied during the prelude.
+            yield { type: 'tool-call', toolCall: outcome.tc };
+            // Phase 1 stops classifying after pending-approval, so this is the
+            // last outcome — but `break` keeps the intent explicit.
+            break;
+        }
+        if (outcome.kind === 'mw-skip') {
+            const resultStr = typeof outcome.result === 'string' ? outcome.result : JSON.stringify(outcome.result);
+            toolResults.push({ toolCallId: outcome.tc.id, result: outcome.result });
+            messages.push({ role: 'tool', content: resultStr, toolCallId: outcome.tc.id });
+            yield { type: 'tool-result', toolCall: outcome.tc, result: outcome.result };
+            if (middlewares.length > 0)
+                await runOnAfterToolCall(middlewares, ctx, outcome.tc.name, outcome.toolArgs, outcome.result);
+            continue;
+        }
+        if (outcome.kind === 'validation-error') {
+            yield { type: 'tool-call', toolCall: outcome.tc };
+            toolResults.push({ toolCallId: outcome.tc.id, result: outcome.error });
+            messages.push({ role: 'tool', content: JSON.stringify(outcome.error), toolCallId: outcome.tc.id });
+            yield { type: 'tool-result', toolCall: outcome.tc, result: outcome.error };
+            if (middlewares.length > 0)
+                await runOnAfterToolCall(middlewares, ctx, outcome.tc.name, outcome.toolArgs, outcome.error);
+            continue;
+        }
+        // outcome.kind === 'ready'
+        const exec = executionByCallId.get(outcome.tc.id);
+        yield { type: 'tool-call', toolCall: outcome.tc };
+        for (const chunk of exec.updates)
+            yield chunk;
+        if (exec.kind === 'paused') {
+            // Pause-for-client-tools propagated its calls onto `loopCtx` during
+            // execution. Skip tool_result emission + message push — the call
+            // stays orphaned until resume.
+            continue;
+        }
+        if (exec.kind === 'error') {
+            const errResult = `Error: ${exec.error.message}`;
+            toolResults.push({ toolCallId: outcome.tc.id, result: errResult, duration: exec.duration });
+            messages.push({ role: 'tool', content: errResult, toolCallId: outcome.tc.id });
+            yield { type: 'tool-result', toolCall: outcome.tc, result: errResult };
+            if (middlewares.length > 0)
+                await runOnAfterToolCall(middlewares, ctx, outcome.tc.name, outcome.toolArgs, errResult);
+            continue;
+        }
+        // exec.kind === 'ok'
+        toolResults.push({ toolCallId: outcome.tc.id, result: exec.result, duration: exec.duration });
+        const resultStr = await applyToModelOutput(outcome.tool, exec.result, middlewares.length > 0 ? (e) => runOnError(middlewares, ctx, e) : undefined);
+        messages.push({ role: 'tool', content: resultStr, toolCallId: outcome.tc.id });
+        yield { type: 'tool-result', toolCall: outcome.tc, result: exec.result };
+        if (middlewares.length > 0)
+            await runOnAfterToolCall(middlewares, ctx, outcome.tc.name, outcome.toolArgs, exec.result);
+    }
+}
+/**
+ * Walk `toolCalls` in order and decide each call's fate. Mutations to
+ * `loopCtx` for client-tool-stop, pending-approval, and middleware-abort
+ * happen here so the rest of the parallel flow sees the same state the
+ * serial path would. `pending-approval` and `mw-abort` stop the walk —
+ * later calls are not classified and are silently dropped.
+ */
+async function classifyToolCalls(loopCtx, toolCalls) {
+    const { middlewares, toolMap, options, ctx } = loopCtx;
+    const outcomes = [];
+    for (const tc of toolCalls) {
+        const tool = toolMap.get(tc.name);
+        if (!tool) {
+            outcomes.push({ kind: 'unknown-tool', tc, result: `Error: Unknown tool "${tc.name}"` });
+            continue;
+        }
+        if (!tool.execute) {
+            if (options?.toolCallStreamingMode === 'stop-on-client-tool') {
+                loopCtx.pendingClientToolCalls.push(tc);
+                loopCtx.loopFinishReason = 'client_tool_calls';
+                loopCtx.stopForClientTools = true;
+                outcomes.push({ kind: 'client-tool-stop', tc });
+                continue;
+            }
+            outcomes.push({ kind: 'client-tool-placeholder', tc, result: '[client tool — execute on client]' });
+            continue;
+        }
+        const approvalDecision = await evaluateApproval(tool, tc, options);
+        if (approvalDecision === 'rejected') {
+            outcomes.push({ kind: 'rejected', tc, result: { rejected: true, reason: 'User rejected this tool call' } });
+            continue;
+        }
+        if (approvalDecision === 'pending') {
+            loopCtx.pendingApprovalToolCall = { toolCall: tc, isClientTool: false };
+            loopCtx.loopFinishReason = 'tool_approval_required';
+            loopCtx.stopForApproval = true;
+            outcomes.push({ kind: 'pending-approval', tc });
+            break;
+        }
+        let toolArgs = tc.arguments;
+        if (middlewares.length > 0) {
+            const beforeResult = await runOnBeforeToolCall(middlewares, ctx, tc.name, toolArgs);
+            if (beforeResult) {
+                if (beforeResult.type === 'skip') {
+                    outcomes.push({ kind: 'mw-skip', tc, toolArgs, result: beforeResult.result });
+                    continue;
+                }
+                if (beforeResult.type === 'abort') {
+                    await runOnAbort(middlewares, ctx, beforeResult.reason);
+                    // Drop any prior outcomes too? No — serial mode emits prior
+                    // outcomes' chunks before hitting abort, so we keep them in the
+                    // outcomes list and Phase 3 emits them up to (but not including)
+                    // this call. Stop classifying further.
+                    break;
+                }
+                if (beforeResult.type === 'transformArgs') {
+                    toolArgs = beforeResult.args;
+                }
+            }
+        }
+        const validation = validateToolArgs(tool, toolArgs);
+        if (!validation.ok) {
+            outcomes.push({ kind: 'validation-error', tc, toolArgs, error: validation.error });
+            continue;
+        }
+        outcomes.push({ kind: 'ready', tc, tool, toolArgs, validatedArgs: validation.value });
+    }
+    return outcomes;
+}
+/**
+ * Drive a single tool's `executeMaybeStreaming` to completion. Buffers
+ * `tool-update` chunks for replay in tool-call order; pause-for-client-tools
+ * mutations to `loopCtx` apply immediately and the call returns `paused`.
+ *
+ * `ctx` is shared across concurrent invocations. Middleware that writes
+ * through `ctx` during `runOnChunk` (uncommon — most use it read-only for
+ * telemetry) may observe interleaved updates from sibling tool calls;
+ * apps with such middleware should opt out via `parallelTools: false`.
+ */
+async function runToolExecution(loopCtx, outcome) {
+    const { middlewares, ctx } = loopCtx;
+    const updates = [];
+    const toolStart = performance.now();
+    try {
+        const execGen = executeMaybeStreaming(outcome.tool, outcome.validatedArgs, { toolCallId: outcome.tc.id });
+        let result;
+        let paused = false;
+        while (true) {
+            const step = await execGen.next();
+            if (step.done) {
+                result = step.value;
+                break;
+            }
+            if (isPauseForClientToolsChunk(step.value)) {
+                for (const pending of step.value.toolCalls) {
+                    loopCtx.pendingClientToolCalls.push(pending);
+                }
+                loopCtx.loopFinishReason = 'client_tool_calls';
+                loopCtx.stopForClientTools = true;
+                paused = true;
+                break;
+            }
+            const updateChunk = { type: 'tool-update', toolCall: outcome.tc, update: step.value };
+            if (middlewares.length > 0) {
+                const transformed = runOnChunk(middlewares, ctx, updateChunk);
+                if (transformed)
+                    updates.push(transformed);
+            }
+            else {
+                updates.push(updateChunk);
+            }
+        }
+        const duration = performance.now() - toolStart;
+        if (paused)
+            return { kind: 'paused', updates, duration };
+        return { kind: 'ok', result, updates, duration };
+    }
+    catch (err) {
+        const duration = performance.now() - toolStart;
+        return { kind: 'error', error: err instanceof Error ? err : new Error(String(err)), updates, duration };
+    }
+}
+/**
+ * Build the shared `LoopContext` for a `prompt()` / `stream()` call, run
+ * approval-resume, and fire `onConfig(init)` + `onStart`. After this returns,
+ * the iteration loop can run with the same setup regardless of streaming
+ * mode.
+ */
+async function initializeLoop(a, input, options) {
+    // Honor caller-supplied AbortSignal as early as possible — if the signal
+    // is already aborted on entry, do no work at all.
+    options?.signal?.throwIfAborted();
     const loopStart = performance.now();
     const modelString = a.model() ?? AiRegistry.getDefault();
     const [providerName] = AiRegistry.parseModelString(modelString);
@@ -293,7 +882,6 @@ async function runAgentLoop(a, input, options) {
     const middlewares = getMiddleware(a);
     const toolSchemas = buildToolSchemas(tools);
     const toolMap = buildToolMap(tools);
-    let failoverAttempts = 0;
     const messages = options?.messages
         ? [{ role: 'system', content: a.instructions() }, ...options.messages]
         : [
@@ -304,26 +892,42 @@ async function runAgentLoop(a, input, options) {
     const steps = [];
     const stopConditions = normalizeStopConditions(a.stopWhen());
     const totalUsage = { promptTokens: 0, completionTokens: 0, totalTokens: 0 };
-    // State for client-tool-stopping and approval-stopping
-    const pendingClientToolCalls = [];
-    let pendingApprovalToolCall;
-    let loopFinishReason;
-    let stopForClientTools = false;
-    let stopForApproval = false;
-    let resumedToolMessages = []; // eslint-disable-line no-useless-assignment
+    // Create middleware context (resume below mutates `messages`, captured by
+    // reference here, so order is safe).
+    const ctx = createMiddlewareContext(messages, modelString, tools, 0);
+    const loopCtx = {
+        agent: a,
+        input,
+        options,
+        modelString,
+        providerName,
+        tools,
+        toolMap,
+        toolSchemas,
+        middlewares,
+        loopStart,
+        ctx,
+        messages,
+        steps,
+        totalUsage,
+        pendingClientToolCalls: [],
+        pendingApprovalToolCall: undefined,
+        loopFinishReason: undefined,
+        stopForClientTools: false,
+        stopForApproval: false,
+        resumedToolMessages: [],
+        failoverAttempts: 0,
+    };
     // Resume server tools left pending by a previous approval round-trip.
-    // (Must run before middleware context creation since `messages` may grow.)
     {
         const resume = await resumePendingToolCalls({ messages, toolMap, options });
-        resumedToolMessages = resume.resumed;
+        loopCtx.resumedToolMessages = resume.resumed;
         if (resume.approvalStillRequired) {
-            pendingApprovalToolCall = resume.approvalStillRequired;
-            loopFinishReason = 'tool_approval_required';
-            stopForApproval = true;
+            loopCtx.pendingApprovalToolCall = resume.approvalStillRequired;
+            loopCtx.loopFinishReason = 'tool_approval_required';
+            loopCtx.stopForApproval = true;
         }
     }
-    // Create middleware context
-    const ctx = createMiddlewareContext(messages, modelString, tools, 0);
     // onConfig — init phase
     if (middlewares.length > 0) {
         const configResult = runOnConfig(middlewares, ctx, buildMiddlewareConfig(messages, a), 'init');
@@ -333,179 +937,78 @@ async function runAgentLoop(a, input, options) {
     // onStart
     if (middlewares.length > 0)
         await runSequential(middlewares, 'onStart', ctx);
+    return { loopCtx, stopConditions };
+}
+/**
+ * Run the per-iteration prelude — caller-abort check, middleware-abort
+ * check, `onIteration`, `prepareStep`, `onConfig(beforeModel)`. Returns the
+ * resolved model for this step or `{ aborted: true }` if middleware
+ * cancelled the run (caller should `break`). Throws the abort reason if a
+ * caller-supplied AbortSignal fired between iterations.
+ */
+async function runIterationPrelude(loopCtx, iteration) {
+    const { agent, options, ctx, middlewares, messages, modelString, steps } = loopCtx;
+    ctx.iteration = iteration;
+    // Reset the streaming chunk index for middlewares that key off it. Harmless
+    // in non-streaming mode where no chunks flow through `onChunk`.
+    ctx.chunkIndex = 0;
+    // Honor caller-supplied AbortSignal between iterations.
+    options?.signal?.throwIfAborted();
+    if (ctx._aborted) {
+        await runOnAbort(middlewares, ctx, ctx._abortReason);
+        return { aborted: true };
+    }
+    if (middlewares.length > 0)
+        await runSequential(middlewares, 'onIteration', ctx);
+    let currentModel = modelString;
+    if (agent.prepareStep) {
+        const prep = await agent.prepareStep({ stepNumber: iteration, steps, messages });
+        if (prep.model)
+            currentModel = prep.model;
+        if (prep.messages)
+            messages.splice(0, messages.length, ...prep.messages);
+        if (prep.system)
+            messages[0] = { role: 'system', content: prep.system };
+    }
+    if (middlewares.length > 0) {
+        const configResult = runOnConfig(middlewares, ctx, buildMiddlewareConfig(messages, agent), 'beforeModel');
+        if (configResult.messages)
+            messages.splice(0, messages.length, ...configResult.messages);
+    }
+    return { currentModel };
+}
+// ─── Agent Loop (non-streaming) ──────────────────────────
+async function runAgentLoop(a, input, options) {
+    const { loopCtx, stopConditions } = await initializeLoop(a, input, options);
+    const { ctx, middlewares, messages, steps, totalUsage } = loopCtx;
     try {
-        if (stopForApproval) {
+        if (loopCtx.stopForApproval) {
             // Approval is still required from the resume — skip the model loop.
         }
         else {
             for (let iteration = 0; iteration < a.maxSteps(); iteration++) {
-                ctx.iteration = iteration;
-                // Check if middleware aborted
-                if (ctx._aborted) {
-                    await runOnAbort(middlewares, ctx, ctx._abortReason);
+                const prelude = await runIterationPrelude(loopCtx, iteration);
+                if ('aborted' in prelude)
                     break;
-                }
-                // onIteration
-                if (middlewares.length > 0)
-                    await runSequential(middlewares, 'onIteration', ctx);
-                let currentModel = modelString;
-                const currentToolSchemas = toolSchemas;
-                // prepareStep hook
-                if (a.prepareStep) {
-                    const prep = await a.prepareStep({ stepNumber: iteration, steps, messages });
-                    if (prep.model)
-                        currentModel = prep.model;
-                    if (prep.messages)
-                        messages.splice(0, messages.length, ...prep.messages);
-                    if (prep.system)
-                        messages[0] = { role: 'system', content: prep.system };
-                }
-                // onConfig — beforeModel phase
-                if (middlewares.length > 0) {
-                    const configResult = runOnConfig(middlewares, ctx, buildMiddlewareConfig(messages, a), 'beforeModel');
-                    if (configResult.messages)
-                        messages.splice(0, messages.length, ...configResult.messages);
-                }
-                const failoverModels = [currentModel, ...a.failover().filter(m => m !== currentModel)];
-                let response;
-                let lastError;
-                for (const tryModel of failoverModels) {
-                    try {
-                        const adapter = AiRegistry.resolve(tryModel);
-                        const [, modelId] = AiRegistry.parseModelString(tryModel);
-                        const reqOptions = {
-                            model: modelId,
-                            messages,
-                            tools: currentToolSchemas.length > 0 ? currentToolSchemas : undefined,
-                            temperature: a.temperature(),
-                            maxTokens: a.maxTokens(),
-                        };
-                        response = await adapter.generate(reqOptions);
-                        break;
-                    }
-                    catch (err) {
-                        lastError = err instanceof Error ? err : new Error(String(err));
-                        failoverAttempts++;
-                        if (tryModel === failoverModels[failoverModels.length - 1])
-                            throw lastError;
-                    }
-                }
-                if (!response)
-                    throw lastError ?? new Error('No provider available');
+                const { currentModel } = prelude;
+                const response = await runFailover(loopCtx, currentModel, (adapter, _, opts) => adapter.generate(opts));
                 addUsage(totalUsage, response.usage);
                 // onUsage
                 if (middlewares.length > 0)
                     await runOnUsage(middlewares, ctx, response.usage);
                 const toolCalls = response.message.toolCalls ?? [];
-                const toolResults = [];
+                let toolResults = [];
                 if (toolCalls.length > 0) {
-                    messages.push(response.message);
-                    for (const tc of toolCalls) {
-                        const tool = toolMap.get(tc.name);
-                        if (!tool) {
-                            toolResults.push({ toolCallId: tc.id, result: `Error: Unknown tool "${tc.name}"` });
-                            messages.push({ role: 'tool', content: `Error: Unknown tool "${tc.name}"`, toolCallId: tc.id });
-                            continue;
-                        }
-                        if (!tool.execute) {
-                            // Client tool — no server-side handler.
-                            if (options?.toolCallStreamingMode === 'stop-on-client-tool') {
-                                pendingClientToolCalls.push(tc);
-                                loopFinishReason = 'client_tool_calls';
-                                stopForClientTools = true;
-                                continue;
-                            }
-                            toolResults.push({ toolCallId: tc.id, result: '[client tool — execute on client]' });
-                            messages.push({ role: 'tool', content: '[client tool — execute on client]', toolCallId: tc.id });
-                            continue;
-                        }
-                        // needsApproval enforcement
-                        const approvalDecision = await evaluateApproval(tool, tc, options);
-                        if (approvalDecision === 'rejected') {
-                            const rejectionResult = { rejected: true, reason: 'User rejected this tool call' };
-                            toolResults.push({ toolCallId: tc.id, result: rejectionResult });
-                            messages.push({ role: 'tool', content: JSON.stringify(rejectionResult), toolCallId: tc.id });
-                            continue;
-                        }
-                        if (approvalDecision === 'pending') {
-                            pendingApprovalToolCall = { toolCall: tc, isClientTool: false };
-                            loopFinishReason = 'tool_approval_required';
-                            stopForApproval = true;
+                    // Drain `executeToolPhase` to completion, discarding the streamed
+                    // chunks — non-streaming callers don't surface them.
+                    const phaseGen = executeToolPhase(loopCtx, toolCalls, response.message);
+                    while (true) {
+                        const next = await phaseGen.next();
+                        if (next.done) {
+                            toolResults = next.value;
                             break;
                         }
-                        // onBeforeToolCall
-                        let toolArgs = tc.arguments;
-                        if (middlewares.length > 0) {
-                            const beforeResult = await runOnBeforeToolCall(middlewares, ctx, tc.name, toolArgs);
-                            if (beforeResult) {
-                                if (beforeResult.type === 'skip') {
-                                    const resultStr = typeof beforeResult.result === 'string' ? beforeResult.result : JSON.stringify(beforeResult.result);
-                                    toolResults.push({ toolCallId: tc.id, result: beforeResult.result });
-                                    messages.push({ role: 'tool', content: resultStr, toolCallId: tc.id });
-                                    await runOnAfterToolCall(middlewares, ctx, tc.name, toolArgs, beforeResult.result);
-                                    continue;
-                                }
-                                if (beforeResult.type === 'abort') {
-                                    await runOnAbort(middlewares, ctx, beforeResult.reason);
-                                    break;
-                                }
-                                if (beforeResult.type === 'transformArgs') {
-                                    toolArgs = beforeResult.args;
-                                }
-                            }
-                        }
-                        try {
-                            // Drain generator yields silently in the non-streaming loop —
-                            // the same tool definition must work in both prompt() and stream().
-                            // Exception: a `pause_for_client_tools` control chunk yield
-                            // halts iteration, propagates the nested calls to the parent's
-                            // pending list, and skips tool_result recording (see tool.ts
-                            // `pauseForClientTools` for rationale).
-                            const execGen = executeMaybeStreaming(tool, toolArgs, { toolCallId: tc.id });
-                            let result;
-                            let paused = false;
-                            while (true) {
-                                const step = await execGen.next();
-                                if (step.done) {
-                                    result = step.value;
-                                    break;
-                                }
-                                if (isPauseForClientToolsChunk(step.value)) {
-                                    for (const pending of step.value.toolCalls) {
-                                        pendingClientToolCalls.push(pending);
-                                    }
-                                    loopFinishReason = 'client_tool_calls';
-                                    stopForClientTools = true;
-                                    paused = true;
-                                    break;
-                                }
-                                // Plain tool-update yields are silently dropped in the
-                                // non-streaming loop — only the final return value matters.
-                            }
-                            if (paused)
-                                continue; // skip toolResults + message push for this tc
-                            // toolResults preserves the ORIGINAL value; only the tool message
-                            // pushed onto `messages` (what the next model step sees) is
-                            // narrowed by toModelOutput.
-                            toolResults.push({ toolCallId: tc.id, result });
-                            const resultStr = await applyToModelOutput(tool, result, middlewares.length > 0 ? (e) => runOnError(middlewares, ctx, e) : undefined);
-                            messages.push({ role: 'tool', content: resultStr, toolCallId: tc.id });
-                            // onAfterToolCall
-                            if (middlewares.length > 0)
-                                await runOnAfterToolCall(middlewares, ctx, tc.name, toolArgs, result);
-                        }
-                        catch (err) {
-                            const msg = err instanceof Error ? err.message : String(err);
-                            toolResults.push({ toolCallId: tc.id, result: `Error: ${msg}` });
-                            messages.push({ role: 'tool', content: `Error: ${msg}`, toolCallId: tc.id });
-                            // onAfterToolCall (error case)
-                            if (middlewares.length > 0)
-                                await runOnAfterToolCall(middlewares, ctx, tc.name, toolArgs, `Error: ${msg}`);
-                        }
                     }
-                    // onToolPhaseComplete
-                    if (middlewares.length > 0)
-                        await runSequential(middlewares, 'onToolPhaseComplete', ctx);
                 }
                 else {
                     messages.push(response.message);
@@ -518,7 +1021,8 @@ async function runAgentLoop(a, input, options) {
                     finishReason: response.finishReason,
                 };
                 steps.push(step);
-                if (stopForClientTools || stopForApproval)
+                emitObserverStepCompleted(loopCtx, iteration, false);
+                if (loopCtx.stopForClientTools || loopCtx.stopForApproval)
                     break;
                 const shouldStop = stopConditions.some(cond => cond({ steps, iteration, lastMessage: response.message }));
                 if (shouldStop || response.finishReason !== 'tool_calls') {
@@ -531,177 +1035,38 @@ async function runAgentLoop(a, input, options) {
         // onError
         if (middlewares.length > 0)
             await runOnError(middlewares, ctx, err);
-        // Emit observer event on failure
-        const obs = _getAiObservers();
-        if (obs) {
-            const inputText = options?.messages ? '' : input;
-            obs.emit({
-                kind: 'agent.failed',
-                agentName: a.constructor.name,
-                model: modelString,
-                provider: providerName,
-                input: inputText,
-                output: '',
-                steps: _buildObserverSteps(steps, modelString),
-                tokens: { prompt: totalUsage.promptTokens, completion: totalUsage.completionTokens, total: totalUsage.totalTokens },
-                duration: Math.round(performance.now() - loopStart),
-                finishReason: 'error',
-                streaming: false,
-                conversationId: null,
-                failoverAttempts,
-                error: err instanceof Error ? err.message : String(err),
-            });
-        }
+        emitObserverFailed(loopCtx, err, false);
         throw err;
     }
     // onFinish
     if (middlewares.length > 0)
         await runSequential(middlewares, 'onFinish', ctx);
-    const lastStep = steps[steps.length - 1];
-    const result = {
-        text: lastStep ? getMessageText(lastStep.message.content) : '',
-        steps,
-        usage: totalUsage,
-    };
-    if (loopFinishReason)
-        result.finishReason = loopFinishReason;
-    if (pendingClientToolCalls.length > 0)
-        result.pendingClientToolCalls = pendingClientToolCalls;
-    if (pendingApprovalToolCall)
-        result.pendingApprovalToolCall = pendingApprovalToolCall;
-    if (resumedToolMessages.length > 0)
-        result.resumedToolMessages = resumedToolMessages;
-    // Emit observer event on success
-    const obs = _getAiObservers();
-    if (obs) {
-        const inputText = options?.messages ? '' : input;
-        obs.emit({
-            kind: 'agent.completed',
-            agentName: a.constructor.name,
-            model: modelString,
-            provider: providerName,
-            input: inputText,
-            output: result.text,
-            steps: _buildObserverSteps(steps, modelString),
-            tokens: { prompt: totalUsage.promptTokens, completion: totalUsage.completionTokens, total: totalUsage.totalTokens },
-            duration: Math.round(performance.now() - loopStart),
-            finishReason: result.finishReason ?? lastStep?.finishReason ?? 'stop',
-            streaming: false,
-            conversationId: null,
-            failoverAttempts,
-        });
-    }
+    const result = buildAgentResponse(loopCtx);
+    emitObserverCompleted(loopCtx, result, false);
     return result;
 }
 // ─── Agent Loop (streaming) ──────────────────────────────
 function runAgentLoopStreaming(a, input, options) {
     let resolveResponse;
-    const responsePromise = new Promise((resolve) => { resolveResponse = resolve; });
+    let rejectResponse;
+    const responsePromise = new Promise((resolve, reject) => {
+        resolveResponse = resolve;
+        rejectResponse = reject;
+    });
     async function* generateStream() {
-        const loopStart = performance.now();
-        const modelString = a.model() ?? AiRegistry.getDefault();
-        const [providerName] = AiRegistry.parseModelString(modelString);
-        const tools = getTools(a);
-        const middlewares = getMiddleware(a);
-        const toolSchemas = buildToolSchemas(tools);
-        const toolMap = buildToolMap(tools);
-        let failoverAttempts = 0;
-        const messages = options?.messages
-            ? [{ role: 'system', content: a.instructions() }, ...options.messages]
-            : [
-                { role: 'system', content: a.instructions() },
-                ...(options?.history ?? []),
-                buildUserMessage(input, options?.attachments),
-            ];
-        const steps = [];
-        const stopConditions = normalizeStopConditions(a.stopWhen());
-        const totalUsage = { promptTokens: 0, completionTokens: 0, totalTokens: 0 };
-        // State for client-tool-stopping and approval-stopping
-        const pendingClientToolCalls = [];
-        let pendingApprovalToolCall;
-        let loopFinishReason;
-        let stopForClientTools = false;
-        let stopForApproval = false;
-        let resumedToolMessages = []; // eslint-disable-line no-useless-assignment
-        // Resume server tools left pending by a previous approval round-trip.
-        {
-            const resume = await resumePendingToolCalls({ messages, toolMap, options });
-            resumedToolMessages = resume.resumed;
-            if (resume.approvalStillRequired) {
-                pendingApprovalToolCall = resume.approvalStillRequired;
-                loopFinishReason = 'tool_approval_required';
-                stopForApproval = true;
-            }
-        }
-        // Create middleware context
-        const ctx = createMiddlewareContext(messages, modelString, tools, 0);
-        // onConfig — init phase
-        if (middlewares.length > 0) {
-            const configResult = runOnConfig(middlewares, ctx, buildMiddlewareConfig(messages, a), 'init');
-            if (configResult.messages)
-                messages.splice(0, messages.length, ...configResult.messages);
-        }
-        // onStart
-        if (middlewares.length > 0)
-            await runSequential(middlewares, 'onStart', ctx);
+        const { loopCtx, stopConditions } = await initializeLoop(a, input, options);
+        const { ctx, middlewares, messages, steps, totalUsage } = loopCtx;
         try {
-            if (stopForApproval) {
+            if (loopCtx.stopForApproval) {
                 // Resume detected unfulfilled approval — skip the model loop entirely.
             }
             else {
                 for (let iteration = 0; iteration < a.maxSteps(); iteration++) {
-                    ctx.iteration = iteration;
-                    ctx.chunkIndex = 0;
-                    // Check if middleware aborted
-                    if (ctx._aborted) {
-                        await runOnAbort(middlewares, ctx, ctx._abortReason);
+                    const prelude = await runIterationPrelude(loopCtx, iteration);
+                    if ('aborted' in prelude)
                         break;
-                    }
-                    // onIteration
-                    if (middlewares.length > 0)
-                        await runSequential(middlewares, 'onIteration', ctx);
-                    let currentModel = modelString;
-                    if (a.prepareStep) {
-                        const prep = await a.prepareStep({ stepNumber: iteration, steps, messages });
-                        if (prep.model)
-                            currentModel = prep.model;
-                        if (prep.messages)
-                            messages.splice(0, messages.length, ...prep.messages);
-                        if (prep.system)
-                            messages[0] = { role: 'system', content: prep.system };
-                    }
-                    // onConfig — beforeModel phase
-                    if (middlewares.length > 0) {
-                        const configResult = runOnConfig(middlewares, ctx, buildMiddlewareConfig(messages, a), 'beforeModel');
-                        if (configResult.messages)
-                            messages.splice(0, messages.length, ...configResult.messages);
-                    }
-                    const failoverModels = [currentModel, ...a.failover().filter(m => m !== currentModel)];
-                    let streamSource;
-                    let lastError;
-                    for (const tryModel of failoverModels) {
-                        try {
-                            const adapter = AiRegistry.resolve(tryModel);
-                            const [, modelId] = AiRegistry.parseModelString(tryModel);
-                            const opts = {
-                                model: modelId,
-                                messages,
-                                tools: toolSchemas.length > 0 ? toolSchemas : undefined,
-                                temperature: a.temperature(),
-                                maxTokens: a.maxTokens(),
-                            };
-                            streamSource = adapter.stream(opts);
-                            break;
-                        }
-                        catch (err) {
-                            lastError = err instanceof Error ? err : new Error(String(err));
-                            failoverAttempts++;
-                            if (tryModel === failoverModels[failoverModels.length - 1])
-                                throw lastError;
-                        }
-                    }
-                    if (!streamSource)
-                        throw lastError ?? new Error('No provider available');
+                    const { currentModel } = prelude;
+                    const streamSource = await runFailover(loopCtx, currentModel, (adapter, _, opts) => adapter.stream(opts));
                     let text = '';
                     let currentToolCalls = [];
                     let stepUsage = { promptTokens: 0, completionTokens: 0, totalTokens: 0 };
@@ -760,141 +1125,20 @@ function runAgentLoopStreaming(a, input, options) {
                     // onUsage
                     if (middlewares.length > 0)
                         await runOnUsage(middlewares, ctx, stepUsage);
-                    const toolResults = [];
+                    let toolResults = [];
                     if (currentToolCalls.length > 0) {
                         const assistantMsg = { role: 'assistant', content: text, toolCalls: currentToolCalls };
-                        messages.push(assistantMsg);
-                        for (const tc of currentToolCalls) {
-                            const tool = toolMap.get(tc.name);
-                            if (!tool) {
-                                const unknownResult = `Error: Unknown tool "${tc.name}"`;
-                                toolResults.push({ toolCallId: tc.id, result: unknownResult });
-                                messages.push({ role: 'tool', content: unknownResult, toolCallId: tc.id });
-                                yield { type: 'tool-result', toolCall: tc, result: unknownResult };
-                                continue;
-                            }
-                            if (!tool.execute) {
-                                // Client tool — no server-side handler.
-                                if (options?.toolCallStreamingMode === 'stop-on-client-tool') {
-                                    pendingClientToolCalls.push(tc);
-                                    loopFinishReason = 'client_tool_calls';
-                                    stopForClientTools = true;
-                                    yield { type: 'tool-call', toolCall: tc };
-                                    continue;
-                                }
-                                const placeholder = '[client tool — execute on client]';
-                                toolResults.push({ toolCallId: tc.id, result: placeholder });
-                                messages.push({ role: 'tool', content: placeholder, toolCallId: tc.id });
-                                yield { type: 'tool-call', toolCall: tc };
-                                yield { type: 'tool-result', toolCall: tc, result: placeholder };
-                                continue;
-                            }
-                            // needsApproval enforcement
-                            const approvalDecision = await evaluateApproval(tool, tc, options);
-                            if (approvalDecision === 'rejected') {
-                                const rejectionResult = { rejected: true, reason: 'User rejected this tool call' };
-                                toolResults.push({ toolCallId: tc.id, result: rejectionResult });
-                                messages.push({ role: 'tool', content: JSON.stringify(rejectionResult), toolCallId: tc.id });
-                                yield { type: 'tool-result', toolCall: tc, result: rejectionResult };
-                                continue;
-                            }
-                            if (approvalDecision === 'pending') {
-                                pendingApprovalToolCall = { toolCall: tc, isClientTool: false };
-                                loopFinishReason = 'tool_approval_required';
-                                stopForApproval = true;
-                                yield { type: 'tool-call', toolCall: tc };
+                        // Forward chunks from the shared tool-phase generator straight
+                        // through to the stream consumer.
+                        const phaseGen = executeToolPhase(loopCtx, currentToolCalls, assistantMsg);
+                        while (true) {
+                            const next = await phaseGen.next();
+                            if (next.done) {
+                                toolResults = next.value;
                                 break;
                             }
-                            // onBeforeToolCall
-                            let toolArgs = tc.arguments;
-                            if (middlewares.length > 0) {
-                                const beforeResult = await runOnBeforeToolCall(middlewares, ctx, tc.name, toolArgs);
-                                if (beforeResult) {
-                                    if (beforeResult.type === 'skip') {
-                                        const resultStr = typeof beforeResult.result === 'string' ? beforeResult.result : JSON.stringify(beforeResult.result);
-                                        toolResults.push({ toolCallId: tc.id, result: beforeResult.result });
-                                        messages.push({ role: 'tool', content: resultStr, toolCallId: tc.id });
-                                        yield { type: 'tool-result', toolCall: tc, result: beforeResult.result };
-                                        await runOnAfterToolCall(middlewares, ctx, tc.name, toolArgs, beforeResult.result);
-                                        continue;
-                                    }
-                                    if (beforeResult.type === 'abort') {
-                                        await runOnAbort(middlewares, ctx, beforeResult.reason);
-                                        break;
-                                    }
-                                    if (beforeResult.type === 'transformArgs') {
-                                        toolArgs = beforeResult.args;
-                                    }
-                                }
-                            }
-                            try {
-                                // Emit the tool-call marker before execution so the UI sees
-                                // tool-call → tool-update* → tool-result in order. Async-
-                                // generator executes stream their yields as tool-update chunks
-                                // live; plain executes yield nothing here.
-                                //
-                                // Pause detection: a yielded `pause_for_client_tools` control
-                                // chunk halts iteration, propagates the nested calls to the
-                                // parent's pending list, and SKIPS the tool_result emission
-                                // — the yielding tool's own call stays orphaned in the parent
-                                // message history until the caller resolves it on resume.
-                                yield { type: 'tool-call', toolCall: tc };
-                                const execGen = executeMaybeStreaming(tool, toolArgs, { toolCallId: tc.id });
-                                let result;
-                                let paused = false;
-                                while (true) {
-                                    const step = await execGen.next();
-                                    if (step.done) {
-                                        result = step.value;
-                                        break;
-                                    }
-                                    if (isPauseForClientToolsChunk(step.value)) {
-                                        for (const pending of step.value.toolCalls) {
-                                            pendingClientToolCalls.push(pending);
-                                        }
-                                        loopFinishReason = 'client_tool_calls';
-                                        stopForClientTools = true;
-                                        paused = true;
-                                        break;
-                                    }
-                                    const updateChunk = { type: 'tool-update', toolCall: tc, update: step.value };
-                                    if (middlewares.length > 0) {
-                                        const transformed = runOnChunk(middlewares, ctx, updateChunk);
-                                        if (transformed)
-                                            yield transformed;
-                                    }
-                                    else {
-                                        yield updateChunk;
-                                    }
-                                }
-                                if (paused)
-                                    continue; // skip tool_result emission + message push for this tc
-                                // The streamed `tool-result` chunk and `step.toolResults`
-                                // both carry the ORIGINAL value; only the message content
-                                // pushed onto `messages` (next-step model input) is narrowed
-                                // by toModelOutput.
-                                toolResults.push({ toolCallId: tc.id, result });
-                                const resultStr = await applyToModelOutput(tool, result, middlewares.length > 0 ? (e) => runOnError(middlewares, ctx, e) : undefined);
-                                messages.push({ role: 'tool', content: resultStr, toolCallId: tc.id });
-                                yield { type: 'tool-result', toolCall: tc, result };
-                                // onAfterToolCall
-                                if (middlewares.length > 0)
-                                    await runOnAfterToolCall(middlewares, ctx, tc.name, toolArgs, result);
-                            }
-                            catch (err) {
-                                const msg = err instanceof Error ? err.message : String(err);
-                                const errResult = `Error: ${msg}`;
-                                toolResults.push({ toolCallId: tc.id, result: errResult });
-                                messages.push({ role: 'tool', content: errResult, toolCallId: tc.id });
-                                yield { type: 'tool-result', toolCall: tc, result: errResult };
-                                // onAfterToolCall (error case)
-                                if (middlewares.length > 0)
-                                    await runOnAfterToolCall(middlewares, ctx, tc.name, toolArgs, errResult);
-                            }
+                            yield next.value;
                         }
-                        // onToolPhaseComplete
-                        if (middlewares.length > 0)
-                            await runSequential(middlewares, 'onToolPhaseComplete', ctx);
                     }
                     else {
                         messages.push({ role: 'assistant', content: text });
@@ -907,7 +1151,8 @@ function runAgentLoopStreaming(a, input, options) {
                         finishReason,
                     };
                     steps.push(step);
-                    if (stopForClientTools || stopForApproval)
+                    emitObserverStepCompleted(loopCtx, iteration, true);
+                    if (loopCtx.stopForClientTools || loopCtx.stopForApproval)
                         break;
                     const shouldStop = stopConditions.some(cond => cond({ steps, iteration, lastMessage: step.message }));
                     if (shouldStop || finishReason !== 'tool_calls')
@@ -922,77 +1167,38 @@ function runAgentLoopStreaming(a, input, options) {
             // onError
             if (middlewares.length > 0)
                 await runOnError(middlewares, ctx, err);
-            // Emit observer event on failure
-            const obs = _getAiObservers();
-            if (obs) {
-                const inputText = options?.messages ? '' : input;
-                obs.emit({
-                    kind: 'agent.failed',
-                    agentName: a.constructor.name,
-                    model: modelString,
-                    provider: providerName,
-                    input: inputText,
-                    output: '',
-                    steps: _buildObserverSteps(steps, modelString),
-                    tokens: { prompt: totalUsage.promptTokens, completion: totalUsage.completionTokens, total: totalUsage.totalTokens },
-                    duration: Math.round(performance.now() - loopStart),
-                    finishReason: 'error',
-                    streaming: true,
-                    conversationId: null,
-                    failoverAttempts,
-                    error: err instanceof Error ? err.message : String(err),
-                });
-            }
+            emitObserverFailed(loopCtx, err, true);
             throw err;
         }
         // onFinish
         if (middlewares.length > 0)
             await runSequential(middlewares, 'onFinish', ctx);
         // Emit pending state to consumers via dedicated chunk types
-        if (pendingClientToolCalls.length > 0) {
-            yield { type: 'pending-client-tools', toolCalls: pendingClientToolCalls };
+        if (loopCtx.pendingClientToolCalls.length > 0) {
+            yield { type: 'pending-client-tools', toolCalls: loopCtx.pendingClientToolCalls };
         }
-        if (pendingApprovalToolCall) {
-            yield { type: 'pending-approval', toolCall: pendingApprovalToolCall.toolCall, isClientTool: pendingApprovalToolCall.isClientTool };
-        }
-        const lastStep = steps[steps.length - 1];
-        const result = {
-            text: lastStep ? getMessageText(lastStep.message.content) : '',
-            steps,
-            usage: totalUsage,
-        };
-        if (loopFinishReason)
-            result.finishReason = loopFinishReason;
-        if (pendingClientToolCalls.length > 0)
-            result.pendingClientToolCalls = pendingClientToolCalls;
-        if (pendingApprovalToolCall)
-            result.pendingApprovalToolCall = pendingApprovalToolCall;
-        if (resumedToolMessages.length > 0)
-            result.resumedToolMessages = resumedToolMessages;
-        // Emit observer event on success
-        const obs = _getAiObservers();
-        if (obs) {
-            const inputText = options?.messages ? '' : input;
-            obs.emit({
-                kind: 'agent.completed',
-                agentName: a.constructor.name,
-                model: modelString,
-                provider: providerName,
-                input: inputText,
-                output: result.text,
-                steps: _buildObserverSteps(steps, modelString),
-                tokens: { prompt: totalUsage.promptTokens, completion: totalUsage.completionTokens, total: totalUsage.totalTokens },
-                duration: Math.round(performance.now() - loopStart),
-                finishReason: result.finishReason ?? lastStep?.finishReason ?? 'stop',
-                streaming: true,
-                conversationId: null,
-                failoverAttempts,
-            });
+        if (loopCtx.pendingApprovalToolCall) {
+            yield { type: 'pending-approval', toolCall: loopCtx.pendingApprovalToolCall.toolCall, isClientTool: loopCtx.pendingApprovalToolCall.isClientTool };
         }
+        const result = buildAgentResponse(loopCtx);
+        emitObserverCompleted(loopCtx, result, true);
         resolveResponse(result);
     }
+    // Outer wrapper: if `generateStream` throws (e.g. the caller's
+    // AbortSignal fired), reject the `response` promise with the same
+    // reason BEFORE re-throwing into the for-await consumer. Without this,
+    // `await response` would hang forever after a mid-stream abort.
+    async function* withRejectOnError() {
+        try {
+            yield* generateStream();
+        }
+        catch (err) {
+            rejectResponse(err);
+            throw err;
+        }
+    }
     return {
-        stream: generateStream(),
+        stream: withRejectOnError(),
         response: responsePromise,
     };
 }
@@ -1056,11 +1262,21 @@ async function resumePendingToolCalls(deps) {
             approvalStillRequired = { toolCall: tc, isClientTool: false };
             break;
         }
+        // Validate args before executing on resume. Approval-resume bypasses
+        // middleware so we use the raw tc.arguments. On failure, feed the
+        // structured error to the model so it can correct itself.
+        const validation = validateToolArgs(tool, tc.arguments);
+        if (!validation.ok) {
+            const m = { role: 'tool', content: JSON.stringify(validation.error), toolCallId: tc.id };
+            messages.push(m);
+            resumed.push(m);
+            continue;
+        }
         try {
             // Drain generator yields silently — approval-resume runs outside the
             // stream, so any preliminary updates are discarded; only the final
             // return value is captured.
-            const execGen = executeMaybeStreaming(tool, tc.arguments, { toolCallId: tc.id });
+            const execGen = executeMaybeStreaming(tool, validation.value, { toolCallId: tc.id });
             let result;
             while (true) {
                 const step = await execGen.next();
@@ -1127,6 +1343,30 @@ async function* executeMaybeStreaming(tool, args, ctx) {
     }
     return await ret;
 }
+/**
+ * Validate a tool call's arguments against the tool's `inputSchema`. On
+ * success, the parsed value is returned — zod transforms (`.transform`,
+ * `.default`, type coercion) are applied, so `execute` receives the
+ * canonical shape the schema describes. On failure, a structured error
+ * suitable for feeding back to the model is returned.
+ */
+function validateToolArgs(tool, args) {
+    const parsed = tool.definition.inputSchema.safeParse(args);
+    if (parsed.success) {
+        return { ok: true, value: parsed.data };
+    }
+    return {
+        ok: false,
+        error: {
+            error: 'invalid_arguments',
+            message: `Tool "${tool.definition.name}" received arguments that did not match its inputSchema.`,
+            issues: parsed.error.issues.map(i => ({
+                path: i.path.map(seg => String(seg)).join('.'),
+                message: i.message,
+            })),
+        },
+    };
+}
 /**
  * Default stringification used for the `tool` role message content when a
  * tool has no `toModelOutput` transform: pass through strings, JSON-encode