npm - @vellumai/assistant - Versions diffs - 0.8.7 → 0.8.8-dev.202606052332.17fc8ea - Mend

@vellumai/assistant 0.8.7 → 0.8.8-dev.202606052332.17fc8ea

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (570) hide show

package/src/__tests__/conversation-agent-loop.test.ts CHANGED Viewed

@@ -1,26 +1,18 @@
 import { createRequire } from "node:module";
-import { afterAll, beforeEach, describe, expect, mock, test } from "bun:test";
-import { CompactionCircuit } from "../agent/compaction-circuit.js";
-import type {
-  AgentEvent,
-  AgentLoopRunOptions,
-  AgentLoopRunResult,
-  MidLoopCompaction,
-} from "../agent/loop.js";
-import type { ContextWindowResult } from "../context/window-manager.js";
+import {
+  afterAll,
+  beforeEach,
+  describe,
+  expect,
+  mock,
+  spyOn,
+  test,
+} from "bun:test";
+import type { LoopToolExecutor } from "../agent/loop.js";
 import type { ServerMessage } from "../daemon/message-protocol.js";
-import { defaultCompactionTerminal } from "../plugins/defaults/compaction/terminal.js";
 import { resetPluginRegistryAndRegisterDefaults } from "../plugins/defaults/index.js";
-import { DEFAULT_TIMEOUTS, runPipeline } from "../plugins/pipeline.js";
-import { getMiddlewaresFor } from "../plugins/registry.js";
-import type {
-  CompactionArgs,
-  CompactionResult,
-  TurnContext,
-} from "../plugins/types.js";
-import { PluginTimeoutError } from "../plugins/types.js";
-import type { ContentBlock, Message } from "../providers/types.js";
+import type { Message, Provider, ToolDefinition } from "../providers/types.js";
 const conversationCrudRealSnapshot = {
   ...(createRequire(import.meta.url)(
@@ -76,6 +68,7 @@ mock.module("../config/loader.js", () => ({
     memory: { retrieval: { scratchpadInjection: { enabled: true } } },
     ui: mockUiConfig,
     compaction: { enabled: true, autoThreshold: 0.7 },
+    conversations: { skipAutoRetitling: true },
   }),
   loadRawConfig: () => ({}),
   saveRawConfig: () => {},
@@ -86,17 +79,20 @@ mock.module("../config/loader.js", () => ({
 // Token estimator returns a small value by default (well within budget)
 // so preflight does not trigger unless the test overrides it. Both the
-// calibrated entry point (`estimatePromptTokens`, used in the convergence
-// path) and the raw entry point (`estimatePromptTokensRaw`, used by the
-// default `tokenEstimate` plugin pipeline for preflight/mid-loop) are
+// calibrated entry point (`estimatePromptTokens`, which backs the preflight
+// overflow gate and the convergence path) and the raw entry point
+// (`estimatePromptTokensRaw`, used by the pre-send calibration capture) are
 // stubbed so either call site can drive the test.
 let mockEstimateTokens = 1000;
 mock.module("../context/token-estimator.js", () => ({
   estimatePromptTokens: () => mockEstimateTokens,
   estimatePromptTokensRaw: () => mockEstimateTokens,
-  // Pass-through: the default plugin computes `toolTokenBudget` via this
-  // helper before delegating to the raw estimator. Return 0 so the mocked
-  // raw estimate is not perturbed.
+  // The preflight overflow gate calls this calibrated wrapper directly, so it
+  // must honor `mockEstimateTokens` too rather than fall through to the real
+  // implementation.
+  estimatePromptTokensWithTools: () => mockEstimateTokens,
+  // Pass-through: `estimatePromptTokensWithTools` computes `toolTokenBudget`
+  // via this helper. Return 0 so the mocked estimate is not perturbed.
   estimateToolsTokens: () => 0,
 }));
@@ -320,12 +316,14 @@ const buildUnifiedTurnContextBlockMock = mock(
   (options: Record<string, unknown>) =>
     `<turn_context>\ncurrent_time: ${String(options.timestamp)}\n</turn_context>`,
 );
-const applyRuntimeInjectionsMock = mock(
-  async (msgs: Message[], _options?: unknown) => ({
-    messages: msgs,
-    blocks: { ...mockInjectionBlocks },
-  }),
-);
+const defaultApplyRuntimeInjectionsImpl = async (
+  msgs: Message[],
+  _options?: unknown,
+) => ({
+  messages: msgs,
+  blocks: { ...mockInjectionBlocks },
+});
+const applyRuntimeInjectionsMock = mock(defaultApplyRuntimeInjectionsImpl);
 let mockSlackChronologicalContext: {
   renderedMessages: Array<{
     message: Message;
@@ -364,15 +362,6 @@ mock.module("../daemon/conversation-runtime-assembly.js", () => ({
   applyRuntimeInjections: applyRuntimeInjectionsMock,
   buildUnifiedTurnContextBlock: buildUnifiedTurnContextBlockMock,
   stripInjectionsForCompaction: (msgs: Message[]) => msgs,
-  findLastInjectedNowContent: () => null,
-  readNowScratchpad: () => null,
-  readPkbContext: () => null,
-  getPkbAutoInjectList: () => [
-    "INDEX.md",
-    "essentials.md",
-    "threads.md",
-    "buffer.md",
-  ],
   isSlackChannelConversation: () => false,
   getSlackCompactionWatermarkForPrefix:
     getSlackCompactionWatermarkForPrefixMock,
@@ -549,195 +538,78 @@ mock.module("../proactive-artifact/index.js", () => ({
 // ── Imports (after mocks) ────────────────────────────────────────────
+import { AgentLoop } from "../agent/loop.js";
 import {
   type AgentLoopConversationContext,
   applyCompactionResult,
   runAgentLoopImpl,
 } from "../daemon/conversation-agent-loop.js";
+import {
+  createMockProvider,
+  type ScriptedResponse,
+  textResponse,
+  toolUseResponse,
+} from "./helpers/mock-provider.js";
 // ── Test helpers ─────────────────────────────────────────────────────
-type AgentLoopRun = (
-  messages: Message[],
-  onEvent: (event: AgentEvent) => void | Promise<void>,
-  options?: AgentLoopRunOptions,
-) => Promise<Message[]>;
-/**
- * Faithful re-implementation of `AgentLoop.compact()` for the mock loop: run
- * the compaction pipeline against the supplied turn context (which carries the
- * test's `contextWindowManager`), invoke the orchestrator-supplied hooks, and
- * return the continuation history — or `null` on timeout/exhaustion so the
- * caller yields "budget".
- */
-async function simulateInlineCompaction(
-  compaction: MidLoopCompaction,
-  history: Message[],
-  turnContext: TurnContext | undefined,
-  signal: AbortSignal | undefined,
-  onEvent: (event: AgentEvent) => void | Promise<void>,
-  compactionCircuit: CompactionCircuit,
-): Promise<Message[] | null> {
-  await onEvent({ type: "context_compacting" });
-  const { rawHistory, options } = compaction.prepare(history);
-  let result: CompactionResult;
-  try {
-    result = await runPipeline<CompactionArgs, CompactionResult>(
-      "compaction",
-      getMiddlewaresFor("compaction"),
-      (args) => defaultCompactionTerminal(args, turnContext as TurnContext),
-      { messages: rawHistory, signal, options },
-      turnContext as TurnContext,
-      DEFAULT_TIMEOUTS.compaction,
-    );
-  } catch (error) {
-    if (error instanceof PluginTimeoutError) {
-      await compactionCircuit.recordOutcome(
-        {
-          currentRequestId: turnContext?.requestId,
-          currentTurnTrustContext: turnContext?.trust,
-          turnCount: turnContext?.turnIndex ?? 0,
-        },
-        true,
-        onEvent,
-      );
-      return null;
-    }
-    throw error;
-  }
-  const compactResult = result as ContextWindowResult;
-  if (compactResult.summaryFailed !== undefined) {
-    await compactionCircuit.recordOutcome(
-      {
-        currentRequestId: turnContext?.requestId,
-        currentTurnTrustContext: turnContext?.trust,
-        turnCount: turnContext?.turnIndex ?? 0,
-      },
-      compactResult.summaryFailed,
-      onEvent,
-    );
-  }
-  if (compactResult.compacted) {
-    await compaction.applyResult(compactResult, rawHistory);
-  }
-  if (compactResult.exhausted ?? false) {
-    return null;
-  }
-  return compaction.reinject();
-}
-/**
- * Adapt a `Message[]`-returning mock loop body into `run()`'s real result
- * shape. Mirrors the production loop: the pause-reason carried back is
- * whatever the most recent `onCheckpoint` call yielded with (null when it
- * never yielded), so the orchestrator derives its yield bookkeeping the same
- * way it does against the real loop.
- */
-const asAgentLoopRun = (
-  fn: AgentLoopRun,
-  compactionCircuit: CompactionCircuit,
-): ((
-  messages: Message[],
-  onEvent: (event: AgentEvent) => void | Promise<void>,
-  options?: AgentLoopRunOptions,
-) => Promise<AgentLoopRunResult>) => {
-  return async (messages, onEvent, options) => {
-    let exitReason: AgentLoopRunResult["exitReason"] = null;
-    let wrapped = options;
-    if (options?.onCheckpoint) {
-      const inner = options.onCheckpoint;
-      wrapped = {
-        ...options,
-        onCheckpoint: async (info) => {
-          // Handoff is offered first, mirroring the loop's ordering.
-          const decision = await inner(info);
-          if (decision !== "continue") {
-            exitReason = decision;
-            return decision;
-          }
-          // The mid-loop budget gate and inline compaction both live inside
-          // `AgentLoop.run`. Replicate them here — same formula, stubbed
-          // estimator, and the loop's own `compact()` ceremony — so these
-          // orchestrator tests drive the real escalation path now that the
-          // orchestrator's `onCheckpoint` is handoff-only and compaction runs
-          // inline rather than via an orchestrator re-entry loop.
-          const contextWindow = options.resolveContextWindow?.();
-          if (contextWindow?.overflowRecovery.enabled) {
-            const { maxInputTokens, overflowRecovery } = contextWindow;
-            const safetyMargin =
-              info.history.length > 50
-                ? Math.max(overflowRecovery.safetyMarginRatio, 0.15)
-                : overflowRecovery.safetyMarginRatio;
-            const preflightBudget = Math.floor(
-              maxInputTokens * (1 - safetyMargin),
-            );
-            if (mockEstimateTokens > preflightBudget * 0.85) {
-              // Mirror `AgentLoop.compact()`: when a compaction path is
-              // supplied, run it in place and continue; on timeout or
-              // exhaustion it returns null, so the loop yields "budget".
-              const compacted = options.compaction
-                ? await simulateInlineCompaction(
-                    options.compaction,
-                    info.history,
-                    options.turnContext,
-                    options.signal,
-                    onEvent,
-                    compactionCircuit,
-                  )
-                : null;
-              if (compacted) {
-                exitReason = null;
-                return "continue";
-              }
-              exitReason = "budget";
-              return "budget";
-            }
-          }
-          exitReason = null;
-          return "continue";
-        },
-      };
-    }
-    const history = await fn(messages, onEvent, wrapped);
-    return { history, exitReason };
-  };
-};
 function makeCtx(
   overrides?: Partial<AgentLoopConversationContext> & {
-    agentLoopRun?: AgentLoopRun;
+    providerResponses?: ScriptedResponse[];
+    loopProvider?: Provider;
+    loopTools?: ToolDefinition[];
+    toolExecutor?: LoopToolExecutor;
   },
 ): AgentLoopConversationContext {
-  const agentLoopRun =
-    overrides?.agentLoopRun ??
-    (async (messages: Message[]) => [
-      ...messages,
-      {
-        role: "assistant" as const,
-        content: [{ type: "text" as const, text: "response" }],
-      },
-    ]);
-  const compactionCircuit = new CompactionCircuit("test-conv");
+  const {
+    providerResponses,
+    loopProvider,
+    loopTools,
+    toolExecutor,
+    ...ctxOverrides
+  } = overrides ?? {};
+  const conversationId = ctxOverrides.conversationId ?? "test-conv";
+  let processing = true;
+  // Drive the real `AgentLoop` against a scripted provider, mocking only the
+  // provider HTTP boundary. The loop owns its mid-loop budget gate, inline
+  // compaction, and event emission, so these orchestrator tests exercise the
+  // real escalation/persistence path.
+  //
+  // Name the loop's provider after `ctx.provider` so the two stay in sync,
+  // mirroring production where the orchestrator hands the same provider to
+  // the loop. The loop stamps this name onto `usage.actualProvider` whenever
+  // a response omits its own, which is what the request-log fallback reads.
+  // Tests that need to introspect provider calls (or sequence a rejection)
+  // build their own `loopProvider` via `createMockProvider`.
+  const loopProviderName =
+    (ctxOverrides.provider as { name?: string } | undefined)?.name ??
+    "mock-provider";
+  const provider =
+    loopProvider ??
+    createMockProvider(
+      providerResponses ?? [textResponse("response")],
+      loopProviderName,
+    ).provider;
+  const agentLoop = new AgentLoop(provider, "system prompt", {
+    conversationId,
+    tools: loopTools ?? [],
+    toolExecutor,
+  });
   return {
     conversationId: "test-conv",
     messages: [
       { role: "user", content: [{ type: "text", text: "Hello" }] },
     ] as Message[],
-    processing: true,
+    isProcessing: () => processing,
+    setProcessing: (value: boolean) => {
+      processing = value;
+    },
     abortController: new AbortController(),
     currentRequestId: "test-req",
-    agentLoop: {
-      run: asAgentLoopRun(agentLoopRun, compactionCircuit),
-      getToolTokenBudget: () => 0,
-      getResolvedTools: () => [],
-      // Tests here don't exercise calibration; returning undefined makes
-      // the estimator use the per-provider aggregate key.
-      getActiveModel: () => undefined,
-      compactionCircuit,
-    } as unknown as AgentLoopConversationContext["agentLoop"],
+    agentLoop,
     provider: {
       name: "mock-provider",
       sendMessage: async () => ({
@@ -766,8 +638,6 @@ function makeCtx(
     currentTurnSurfaces: [],
     workingDir: "/tmp",
-    workspaceTopLevelContext: null,
-    workspaceTopLevelDirty: false,
     channelCapabilities: undefined,
     commandIntent: undefined,
     trustContext: undefined,
@@ -804,7 +674,6 @@ function makeCtx(
     getWorkspaceGitService: () => ({ ensureInitialized: async () => {} }),
     commitTurnChanges: async () => {},
-    refreshWorkspaceTopLevelContextIfNeeded: () => {},
     markWorkspaceTopLevelDirty: () => {},
     emitActivityState: () => {},
     getQueueDepth: () => 0,
@@ -830,9 +699,10 @@ function makeCtx(
         injectedTokens: 0,
       }),
       retrackCachedNodes: () => {},
+      recordPkbQueryVectors: () => {},
     } as unknown as AgentLoopConversationContext["graphMemory"],
-    ...overrides,
+    ...ctxOverrides,
   } as AgentLoopConversationContext;
 }
@@ -873,6 +743,9 @@ beforeEach(() => {
   setConversationHistoryStrippedAtMock.mockClear();
   setConversationHistoryStrippedAtMock.mockImplementation(() => {});
   applyRuntimeInjectionsMock.mockClear();
+  applyRuntimeInjectionsMock.mockImplementation(
+    defaultApplyRuntimeInjectionsImpl,
+  );
   buildUnifiedTurnContextBlockMock.mockClear();
   resolveTurnTimezoneContextMock.mockClear();
   formatTurnTimestampMock.mockClear();
@@ -886,11 +759,10 @@ beforeEach(() => {
   projectAssistantMessageMock.mockClear();
   publishSyncInvalidationMock.mockClear();
   mockMessageById = null;
-  // Orchestrator pipelines (overflowReduce, persistence, …) run through the
-  // plugin registry; reset and re-register every default so the pipelines
-  // dispatch to middleware backed by the mocked collaborators these tests
-  // install (`reduceContextOverflow`, `syncMessageToDisk`, etc.) instead of
-  // hitting the bare terminals.
+  // The compaction pipeline runs through the plugin registry; reset and
+  // re-register every default so it dispatches to middleware backed by the
+  // mocked collaborators these tests install (`syncMessageToDisk`, etc.)
+  // instead of hitting the bare terminal.
   resetPluginRegistryAndRegisterDefaults();
 });
@@ -970,57 +842,28 @@ describe("session-agent-loop", () => {
       mockHasProactiveArtifactCompleted = false;
       mockTryClaimProactiveArtifactTrigger = true;
-      const agentLoopRun: AgentLoopRun = async (messages, onEvent, options) => {
-        // Prime the assistant row anchor for LLM call 1 — production code
-        // emits this from `AgentLoop.run` just before `provider.sendMessage`.
-        await onEvent({ type: "llm_call_started" });
-        await onEvent({
-          type: "message_complete",
-          message: {
-            role: "assistant",
-            content: [{ type: "text", text: "I'll build that app." }],
-          },
-        });
-        await onEvent({
-          type: "tool_use",
-          id: "tool-1",
-          name: "app_create",
-          input: { name: "Flow" },
-        });
-        await onEvent({
-          type: "tool_result",
-          toolUseId: "tool-1",
-          content: "{}",
-          isError: false,
-        });
-        await options?.onCheckpoint?.({
-          turnIndex: 0,
-          toolCount: 1,
-          hasToolUse: true,
-          history: messages,
-        });
-        // Prime the anchor again for LLM call 2 — multi-call agent turns
-        // reserve a fresh assistant row per LLM call.
-        await onEvent({ type: "llm_call_started" });
-        await onEvent({
-          type: "message_complete",
-          message: {
-            role: "assistant",
-            content: [{ type: "text", text: "Done." }],
-          },
-        });
-        return [
-          ...messages,
-          {
-            role: "assistant" as const,
-            content: [{ type: "text" as const, text: "Done." }],
-          },
-        ];
-      };
+      // A two-call agent turn: the model invokes `app_create`, then wraps up
+      // with a final text reply.
       const ctx = makeCtx({
         conversationId: "test-conv",
-        agentLoopRun,
+        providerResponses: [
+          {
+            content: [
+              { type: "text", text: "I'll build that app." },
+              {
+                type: "tool_use",
+                id: "tool-1",
+                name: "app_create",
+                input: { name: "Flow" },
+              },
+            ],
+            model: "mock-model",
+            usage: { inputTokens: 10, outputTokens: 5 },
+            stopReason: "tool_use",
+          },
+          textResponse("Done."),
+        ],
+        toolExecutor: async () => ({ content: "{}", isError: false }),
       });
       await runAgentLoopImpl(
         ctx,
@@ -1038,7 +881,23 @@ describe("session-agent-loop", () => {
   });
   describe("disk pressure injection context", () => {
-    test("passes cleanup context into runtime injections for cleanup-mode turns", async () => {
+    // The loop sets `ctx.diskPressureCleanupModeActive` for the duration of the
+    // turn (the disk-pressure-warning injector reads it via the per-conversation
+    // registry) and resets it in the turn-end cleanup path. Snapshot the flag at
+    // each `applyRuntimeInjections` call so assertions observe its value while
+    // injection runs, not the post-turn reset.
+    function captureCleanupFlagDuringInjection(ctx: {
+      diskPressureCleanupModeActive?: boolean;
+    }): () => Array<boolean | undefined> {
+      const observed: Array<boolean | undefined> = [];
+      applyRuntimeInjectionsMock.mockImplementation(async (msgs: Message[]) => {
+        observed.push(ctx.diskPressureCleanupModeActive);
+        return { messages: msgs, blocks: { ...mockInjectionBlocks } };
+      });
+      return () => observed;
+    }
+    test("sets the cleanup-mode flag on the conversation for cleanup-mode turns", async () => {
       mockDiskPressureDecision = {
         action: "allow-cleanup-mode",
         reason: "guardian",
@@ -1061,6 +920,7 @@ describe("session-agent-loop", () => {
           trustClass: "guardian",
         } as AgentLoopConversationContext["trustContext"],
       });
+      const cleanupFlagDuringInjection = captureCleanupFlagDuringInjection(ctx);
       await runAgentLoopImpl(ctx, "free up space", "msg-1", () => {});
@@ -1079,21 +939,16 @@ describe("session-agent-loop", () => {
           },
         }),
       );
-      const firstInjectionOptions = applyRuntimeInjectionsMock.mock
-        .calls[0]![1] as {
-        diskPressureContext?: { cleanupModeActive: boolean } | null;
-      };
-      expect(firstInjectionOptions.diskPressureContext).toEqual({
-        cleanupModeActive: true,
-      });
+      expect(cleanupFlagDuringInjection()).toEqual([true]);
     });
-    test("passes cleanup context into runtime injections for local-owner turns", async () => {
+    test("sets the cleanup-mode flag on the conversation for local-owner turns", async () => {
       mockDiskPressureDecision = {
         action: "allow-cleanup-mode",
         reason: "local-owner",
       };
       const ctx = makeCtx();
+      const cleanupFlagDuringInjection = captureCleanupFlagDuringInjection(ctx);
       await runAgentLoopImpl(ctx, "free up space", "msg-1", () => {});
@@ -1105,16 +960,10 @@ describe("session-agent-loop", () => {
           trustContext: null,
         }),
       );
-      const firstInjectionOptions = applyRuntimeInjectionsMock.mock
-        .calls[0]![1] as {
-        diskPressureContext?: { cleanupModeActive: boolean } | null;
-      };
-      expect(firstInjectionOptions.diskPressureContext).toEqual({
-        cleanupModeActive: true,
-      });
+      expect(cleanupFlagDuringInjection()).toEqual([true]);
     });
-    test("keeps cleanup context on overflow recovery reinjection", async () => {
+    test("keeps the cleanup-mode flag set across overflow recovery reinjection", async () => {
       mockDiskPressureDecision = {
         action: "allow-cleanup-mode",
         reason: "guardian",
@@ -1136,18 +985,14 @@ describe("session-agent-loop", () => {
           trustClass: "guardian",
         } as AgentLoopConversationContext["trustContext"],
       });
+      const cleanupFlagDuringInjection = captureCleanupFlagDuringInjection(ctx);
       await runAgentLoopImpl(ctx, "free up space", "msg-1", () => {});
       expect(applyRuntimeInjectionsMock.mock.calls.length).toBeGreaterThan(1);
-      for (const call of applyRuntimeInjectionsMock.mock.calls) {
-        const options = call[1] as {
-          diskPressureContext?: { cleanupModeActive: boolean } | null;
-        };
-        expect(options.diskPressureContext).toEqual({
-          cleanupModeActive: true,
-        });
-      }
+      const flags = cleanupFlagDuringInjection();
+      expect(flags.length).toBeGreaterThan(1);
+      expect(flags.every((flag) => flag === true)).toBe(true);
     });
     test("blocks policy-denied turns before runtime injection or model execution", async () => {
@@ -1156,9 +1001,6 @@ describe("session-agent-loop", () => {
         reason: "trusted-contact",
       };
       const events: ServerMessage[] = [];
-      const agentLoopRun = mock(async (_messages: Message[]) => {
-        throw new Error("agent loop should not run");
-      });
       const activityStates: unknown[][] = [];
       const traceEvents: unknown[][] = [];
       const ctx = makeCtx({
@@ -1171,14 +1013,11 @@ describe("session-agent-loop", () => {
           },
         } as unknown as AgentLoopConversationContext["traceEmitter"],
       });
-      ctx.agentLoop.run = asAgentLoopRun(
-        agentLoopRun,
-        ctx.agentLoop.compactionCircuit,
-      );
+      const runSpy = spyOn(ctx.agentLoop, "run");
       await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
-      expect(agentLoopRun).not.toHaveBeenCalled();
+      expect(runSpy).not.toHaveBeenCalled();
       expect(applyRuntimeInjectionsMock).not.toHaveBeenCalled();
       expect(activityStates).toContainEqual([
         "idle",
@@ -1238,7 +1077,7 @@ describe("session-agent-loop", () => {
       });
       expect(applyRuntimeInjectionsMock).not.toHaveBeenCalled();
-      expect(ctx.processing).toBe(false);
+      expect(ctx.isProcessing()).toBe(false);
       expect(ctx.abortController).toBeNull();
       expect(ctx.currentRequestId).toBeUndefined();
       expect(drainQueue).toHaveBeenCalledWith("loop_complete");
@@ -1254,47 +1093,14 @@ describe("session-agent-loop", () => {
     test("error events from agent loop are classified and emitted", async () => {
       const events: ServerMessage[] = [];
-      const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
-        // Prime the assistant row anchor — production code emits this from
-        // `AgentLoop.run` just before `provider.sendMessage`.
-        await onEvent({ type: "llm_call_started" });
-        // Simulate tool_use + error during execution
-        onEvent({
-          type: "tool_use",
-          id: "tu-1",
-          name: "bash",
-          input: { cmd: "ls" },
-        });
-        onEvent({
-          type: "error",
-          error: new Error("Tool execution failed: permission denied"),
-        });
-        onEvent({
-          type: "message_complete",
-          message: {
-            role: "assistant",
-            content: [{ type: "text", text: "I encountered an error" }],
-          },
-        });
-        onEvent({
-          type: "usage",
-          inputTokens: 100,
-          outputTokens: 50,
-          model: "test-model",
-          providerDurationMs: 200,
-        });
-        return [
-          ...messages,
-          {
-            role: "assistant" as const,
-            content: [
-              { type: "text", text: "I encountered an error" },
-            ] as ContentBlock[],
-          },
-        ];
-      };
-      const ctx = makeCtx({ agentLoopRun });
+      // The model calls a tool whose executor throws, surfacing an `error`
+      // event from the loop's catch handler.
+      const ctx = makeCtx({
+        providerResponses: [toolUseResponse("tu-1", "bash", { cmd: "ls" })],
+        toolExecutor: async () => {
+          throw new Error("Tool execution failed: permission denied");
+        },
+      });
       await runAgentLoopImpl(ctx, "run ls", "msg-1", (msg) => events.push(msg));
       const conversationError = events.find(
@@ -1306,34 +1112,9 @@ describe("session-agent-loop", () => {
     test("non-error agent loop completion does not emit conversation_error", async () => {
       const events: ServerMessage[] = [];
-      const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
-        // Prime the assistant row anchor — production code emits this from
-        // `AgentLoop.run` just before `provider.sendMessage`.
-        await onEvent({ type: "llm_call_started" });
-        onEvent({
-          type: "message_complete",
-          message: {
-            role: "assistant",
-            content: [{ type: "text", text: "All good" }],
-          },
-        });
-        onEvent({
-          type: "usage",
-          inputTokens: 50,
-          outputTokens: 25,
-          model: "test-model",
-          providerDurationMs: 100,
-        });
-        return [
-          ...messages,
-          {
-            role: "assistant" as const,
-            content: [{ type: "text", text: "All good" }] as ContentBlock[],
-          },
-        ];
-      };
-      const ctx = makeCtx({ agentLoopRun });
+      const ctx = makeCtx({
+        providerResponses: [textResponse("All good")],
+      });
       await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
       const conversationError = events.find(
@@ -1369,38 +1150,20 @@ describe("session-agent-loop", () => {
         },
       };
-      const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
-        // Prime the assistant row anchor — production code emits this from
-        // `AgentLoop.run` just before `provider.sendMessage`.
-        await onEvent({ type: "llm_call_started" });
-        onEvent({
-          type: "message_complete",
-          message: {
-            role: "assistant",
-            content: [{ type: "text", text: "Hi there." }],
-          },
-        });
-        onEvent({
-          type: "usage",
-          inputTokens: 12,
-          outputTokens: 3,
-          model: "gpt-4.1-2026-03-01",
-          actualProvider: "fireworks",
-          providerDurationMs: 45,
-          rawRequest,
-          rawResponse,
-        });
-        return [
-          ...messages,
+      // The provider response carries its own `actualProvider`, so the logged
+      // row should record that name rather than the runtime provider.
+      const ctx = makeCtx({
+        providerResponses: [
           {
-            role: "assistant" as const,
-            content: [{ type: "text", text: "Hi there." }] as ContentBlock[],
+            content: [{ type: "text", text: "Hi there." }],
+            model: "gpt-4.1-2026-03-01",
+            usage: { inputTokens: 12, outputTokens: 3 },
+            stopReason: "end_turn",
+            actualProvider: "fireworks",
+            rawRequest,
+            rawResponse,
           },
-        ];
-      };
-      const ctx = makeCtx({
-        agentLoopRun,
+        ],
         provider: {
           name: "openrouter",
           sendMessage: async () => ({
@@ -1437,37 +1200,19 @@ describe("session-agent-loop", () => {
         ],
       };
-      const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
-        // Prime the assistant row anchor — production code emits this from
-        // `AgentLoop.run` just before `provider.sendMessage`.
-        await onEvent({ type: "llm_call_started" });
-        onEvent({
-          type: "message_complete",
-          message: {
-            role: "assistant",
-            content: [{ type: "text", text: "Hi there." }],
-          },
-        });
-        onEvent({
-          type: "usage",
-          inputTokens: 12,
-          outputTokens: 3,
-          model: "gpt-4.1-2026-03-01",
-          providerDurationMs: 45,
-          rawRequest,
-          rawResponse,
-        });
-        return [
-          ...messages,
+      // The provider response omits `actualProvider`, so the loop stamps the
+      // runtime provider name onto the usage event and the row records it.
+      const ctx = makeCtx({
+        providerResponses: [
           {
-            role: "assistant" as const,
-            content: [{ type: "text", text: "Hi there." }] as ContentBlock[],
+            content: [{ type: "text", text: "Hi there." }],
+            model: "gpt-4.1-2026-03-01",
+            usage: { inputTokens: 12, outputTokens: 3 },
+            stopReason: "end_turn",
+            rawRequest,
+            rawResponse,
           },
-        ];
-      };
-      const ctx = makeCtx({
-        agentLoopRun,
+        ],
         provider: {
           name: "openrouter",
           sendMessage: async () => ({
@@ -1522,38 +1267,18 @@ describe("session-agent-loop", () => {
         status: "completed",
       };
-      const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
-        // Prime the assistant row anchor — production code emits this from
-        // `AgentLoop.run` just before `provider.sendMessage`.
-        await onEvent({ type: "llm_call_started" });
-        onEvent({
-          type: "message_complete",
-          message: {
-            role: "assistant",
-            content: [{ type: "text", text: "Hi there." }],
-          },
-        });
-        onEvent({
-          type: "usage",
-          inputTokens: 12,
-          outputTokens: 3,
-          model: "gpt-5.4",
-          actualProvider: "openai",
-          providerDurationMs: 45,
-          rawRequest,
-          rawResponse,
-        });
-        return [
-          ...messages,
+      const ctx = makeCtx({
+        providerResponses: [
           {
-            role: "assistant" as const,
-            content: [{ type: "text", text: "Hi there." }] as ContentBlock[],
+            content: [{ type: "text", text: "Hi there." }],
+            model: "gpt-5.4",
+            usage: { inputTokens: 12, outputTokens: 3 },
+            stopReason: "end_turn",
+            actualProvider: "openai",
+            rawRequest,
+            rawResponse,
           },
-        ];
-      };
-      const ctx = makeCtx({
-        agentLoopRun,
+        ],
         provider: {
           name: "openai",
           sendMessage: async () => ({
@@ -1593,37 +1318,17 @@ describe("session-agent-loop", () => {
         attrs: Record<string, unknown>;
       }> = [];
-      const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
-        // Prime the assistant row anchor — production code emits this from
-        // `AgentLoop.run` just before `provider.sendMessage`.
-        await onEvent({ type: "llm_call_started" });
-        onEvent({ type: "text_delta", text: "Hi." });
-        onEvent({
-          type: "message_complete",
-          message: {
-            role: "assistant",
-            content: [{ type: "text", text: "Hi." }],
-          },
-        });
-        onEvent({
-          type: "usage",
-          inputTokens: 10,
-          outputTokens: 2,
-          model: "gpt-5.5-2026-04-23",
-          actualProvider: "openai",
-          providerDurationMs: 100,
-        });
-        return [
-          ...messages,
+      const ctx = makeCtx({
+        // The loop replays the text block as a `text_delta` before `usage`.
+        providerResponses: [
           {
-            role: "assistant" as const,
-            content: [{ type: "text", text: "Hi." }] as ContentBlock[],
+            content: [{ type: "text", text: "Hi." }],
+            model: "gpt-5.5-2026-04-23",
+            usage: { inputTokens: 10, outputTokens: 2 },
+            stopReason: "end_turn",
+            actualProvider: "openai",
           },
-        ];
-      };
-      const ctx = makeCtx({
-        agentLoopRun,
+        ],
         // Provider name matches actualProvider so both paths agree.
         provider: {
           name: "openai",
@@ -1671,31 +1376,18 @@ describe("session-agent-loop", () => {
         attrs: Record<string, unknown>;
       }> = [];
-      const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
-        // Prime the assistant row anchor — production code emits this from
-        // `AgentLoop.run` just before `provider.sendMessage`.
-        await onEvent({ type: "llm_call_started" });
-        // No text_delta — pure tool-call response
-        onEvent({
-          type: "message_complete",
-          message: {
-            role: "assistant",
+      const ctx = makeCtx({
+        // An empty-content response: no text block fires `text_delta`, so the
+        // started event falls back to the resolved usage provider name.
+        providerResponses: [
+          {
             content: [],
+            model: "gpt-5.5-2026-04-23",
+            usage: { inputTokens: 10, outputTokens: 2 },
+            stopReason: "end_turn",
+            actualProvider: "openai",
           },
-        });
-        onEvent({
-          type: "usage",
-          inputTokens: 10,
-          outputTokens: 2,
-          model: "gpt-5.5-2026-04-23",
-          actualProvider: "openai",
-          providerDurationMs: 100,
-        });
-        return messages;
-      };
-      const ctx = makeCtx({
-        agentLoopRun,
+        ],
         provider: {
           name: "anthropic",
           sendMessage: async () => ({
@@ -1737,52 +1429,32 @@ describe("session-agent-loop", () => {
     test("records the actual provider for usage accounting", async () => {
       const events: ServerMessage[] = [];
-      const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
-        // Prime the assistant row anchor — production code emits this from
-        // `AgentLoop.run` just before `provider.sendMessage`.
-        await onEvent({ type: "llm_call_started" });
-        onEvent({
-          type: "message_complete",
-          message: {
-            role: "assistant",
+      const ctx = makeCtx({
+        providerResponses: [
+          {
             content: [{ type: "text", text: "Hi there." }],
-          },
-        });
-        onEvent({
-          type: "usage",
-          inputTokens: 12,
-          outputTokens: 3,
-          model: "gpt-4.1-2026-03-01",
-          actualProvider: "fireworks",
-          providerDurationMs: 45,
-          rawRequest: {
-            model: "gpt-4.1",
-            messages: [{ role: "user", content: "Hello" }],
-          },
-          rawResponse: {
             model: "gpt-4.1-2026-03-01",
-            choices: [
-              {
-                finish_reason: "stop",
-                message: {
-                  role: "assistant",
-                  content: "Hi there.",
+            usage: { inputTokens: 12, outputTokens: 3 },
+            stopReason: "end_turn",
+            actualProvider: "fireworks",
+            rawRequest: {
+              model: "gpt-4.1",
+              messages: [{ role: "user", content: "Hello" }],
+            },
+            rawResponse: {
+              model: "gpt-4.1-2026-03-01",
+              choices: [
+                {
+                  finish_reason: "stop",
+                  message: {
+                    role: "assistant",
+                    content: "Hi there.",
+                  },
                 },
-              },
-            ],
-          },
-        });
-        return [
-          ...messages,
-          {
-            role: "assistant" as const,
-            content: [{ type: "text", text: "Hi there." }] as ContentBlock[],
+              ],
+            },
           },
-        ];
-      };
-      const ctx = makeCtx({
-        agentLoopRun,
+        ],
         provider: {
           name: "openrouter",
           sendMessage: async () => ({
@@ -1852,27 +1524,9 @@ describe("session-agent-loop", () => {
         },
       });
-      const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
-        // Prime the assistant row anchor — production code emits this from
-        // `AgentLoop.run` just before `provider.sendMessage`.
-        await onEvent({ type: "llm_call_started" });
-        onEvent({
-          type: "message_complete",
-          message: {
-            role: "assistant",
-            content: [{ type: "text", text: "recovered" }],
-          },
-        });
-        return [
-          ...messages,
-          {
-            role: "assistant" as const,
-            content: [{ type: "text", text: "recovered" }] as ContentBlock[],
-          },
-        ];
-      };
-      const ctx = makeCtx({ agentLoopRun });
+      // After the orchestrator's preflight compaction runs, the loop completes
+      // the turn normally.
+      const ctx = makeCtx({ providerResponses: [textResponse("recovered")] });
       await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
       const compactorCall = recordUsageMock.mock.calls.find(
@@ -1911,7 +1565,6 @@ describe("session-agent-loop", () => {
     test("convergence loop applies reducer and retries when context-too-large is detected", async () => {
       const events: ServerMessage[] = [];
-      let callCount = 0;
       let reducerCalled = false;
       // Configure reducer to succeed on first call — return reduced messages
@@ -1945,53 +1598,15 @@ describe("session-agent-loop", () => {
         };
       };
-      const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
-        // Prime the assistant row anchor — production code emits this from
-        // `AgentLoop.run` just before `provider.sendMessage`. Retry branches
-        // need this on every invocation: each agent-loop iteration reserves
-        // its own row.
-        await onEvent({ type: "llm_call_started" });
-        callCount++;
-        if (callCount === 1) {
-          onEvent({
-            type: "error",
-            error: new Error("context_length_exceeded"),
-          });
-          onEvent({
-            type: "usage",
-            inputTokens: 100,
-            outputTokens: 0,
-            model: "test-model",
-            providerDurationMs: 50,
-          });
-          return messages;
-        }
-        // Second call (after reducer): succeed
-        onEvent({
-          type: "message_complete",
-          message: {
-            role: "assistant",
-            content: [{ type: "text", text: "recovered" }],
-          },
-        });
-        onEvent({
-          type: "usage",
-          inputTokens: 50,
-          outputTokens: 25,
-          model: "test-model",
-          providerDurationMs: 100,
-        });
-        return [
-          ...messages,
-          {
-            role: "assistant" as const,
-            content: [{ type: "text", text: "recovered" }] as ContentBlock[],
-          },
-        ];
-      };
+      // The provider rejects the first call with a context-too-large error,
+      // then succeeds once the orchestrator has reduced the context.
+      const { provider, calls } = createMockProvider([
+        new Error("context_length_exceeded"),
+        textResponse("recovered"),
+      ]);
       const ctx = makeCtx({
-        agentLoopRun,
+        loopProvider: provider,
         contextWindowManager: {
           shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
           maybeCompact: async () => ({ compacted: false }),
@@ -2001,7 +1616,7 @@ describe("session-agent-loop", () => {
       await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
       expect(reducerCalled).toBe(true);
-      expect(callCount).toBe(2);
+      expect(calls.length).toBe(2);
       const compactEvent = events.find((e) => e.type === "context_compacted");
       expect(compactEvent).toBeDefined();
     });
@@ -2009,23 +1624,10 @@ describe("session-agent-loop", () => {
     test("emits conversation_error when context stays too large after all recovery attempts", async () => {
       const events: ServerMessage[] = [];
-      const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
-        onEvent({
-          type: "error",
-          error: new Error("context_length_exceeded"),
-        });
-        onEvent({
-          type: "usage",
-          inputTokens: 100,
-          outputTokens: 0,
-          model: "test-model",
-          providerDurationMs: 50,
-        });
-        return messages;
-      };
+      // The provider rejects every call with a context-too-large error, so the
+      // orchestrator exhausts its recovery attempts.
       const ctx = makeCtx({
-        agentLoopRun,
+        providerResponses: [new Error("context_length_exceeded")],
         contextWindowManager: {
           shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
           // Compaction succeeds but context is still too large
@@ -2059,7 +1661,6 @@ describe("session-agent-loop", () => {
     test("bounded convergence loop applies reducer tiers and recovers", async () => {
       const events: ServerMessage[] = [];
-      let callCount = 0;
       let reducerCalls = 0;
       // Reducer: succeed on first call, returning reduced messages
@@ -2077,55 +1678,15 @@ describe("session-agent-loop", () => {
         };
       };
-      const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
-        // Prime the assistant row anchor — production code emits this from
-        // `AgentLoop.run` just before `provider.sendMessage`. Retry branches
-        // need this on every invocation: each agent-loop iteration reserves
-        // its own row.
-        await onEvent({ type: "llm_call_started" });
-        callCount++;
-        if (callCount === 1) {
-          onEvent({
-            type: "error",
-            error: new Error("context_length_exceeded"),
-          });
-          onEvent({
-            type: "usage",
-            inputTokens: 100,
-            outputTokens: 0,
-            model: "test-model",
-            providerDurationMs: 50,
-          });
-          return messages;
-        }
-        // After reducer runs, succeed
-        onEvent({
-          type: "message_complete",
-          message: {
-            role: "assistant",
-            content: [{ type: "text", text: "recovered via convergence" }],
-          },
-        });
-        onEvent({
-          type: "usage",
-          inputTokens: 50,
-          outputTokens: 25,
-          model: "test-model",
-          providerDurationMs: 100,
-        });
-        return [
-          ...messages,
-          {
-            role: "assistant" as const,
-            content: [
-              { type: "text", text: "recovered via convergence" },
-            ] as ContentBlock[],
-          },
-        ];
-      };
+      // The provider rejects the first call with a context-too-large error,
+      // then succeeds once the orchestrator has reduced the context.
+      const { provider, calls } = createMockProvider([
+        new Error("context_length_exceeded"),
+        textResponse("recovered via convergence"),
+      ]);
       const ctx = makeCtx({
-        agentLoopRun,
+        loopProvider: provider,
         contextWindowManager: {
           shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
           maybeCompact: async () => ({ compacted: false }),
@@ -2135,7 +1696,7 @@ describe("session-agent-loop", () => {
       await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
       expect(reducerCalls).toBeGreaterThanOrEqual(1);
-      expect(callCount).toBe(2);
+      expect(calls.length).toBe(2);
       const conversationError = events.find(
         (e) => e.type === "conversation_error",
       );
@@ -2146,7 +1707,6 @@ describe("session-agent-loop", () => {
     test("non-interactive auto-compress continues without approval prompt", async () => {
       const events: ServerMessage[] = [];
-      let callCount = 0;
       // Reducer exhausts all tiers
       mockReducerStepFn = (msgs: Message[]) => ({
@@ -2167,54 +1727,14 @@ describe("session-agent-loop", () => {
       mockOverflowAction = "auto_compress_latest_turn";
-      const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
-        // Prime the assistant row anchor — production code emits this from
-        // `AgentLoop.run` just before `provider.sendMessage`. Retry branches
-        // need this on every invocation: each agent-loop iteration reserves
-        // its own row.
-        await onEvent({ type: "llm_call_started" });
-        callCount++;
-        if (callCount <= 2) {
-          onEvent({
-            type: "error",
-            error: new Error("context_length_exceeded"),
-          });
-          onEvent({
-            type: "usage",
-            inputTokens: 100,
-            outputTokens: 0,
-            model: "test-model",
-            providerDurationMs: 50,
-          });
-          return messages;
-        }
-        onEvent({
-          type: "message_complete",
-          message: {
-            role: "assistant",
-            content: [{ type: "text", text: "auto-recovered" }],
-          },
-        });
-        onEvent({
-          type: "usage",
-          inputTokens: 50,
-          outputTokens: 25,
-          model: "test-model",
-          providerDurationMs: 100,
-        });
-        return [
-          ...messages,
-          {
-            role: "assistant" as const,
-            content: [
-              { type: "text", text: "auto-recovered" },
-            ] as ContentBlock[],
-          },
-        ];
-      };
+      // The provider rejects the first two calls with context-too-large errors,
+      // then succeeds after the emergency auto-compress runs.
       const ctx = makeCtx({
-        agentLoopRun,
+        providerResponses: [
+          new Error("context_length_exceeded"),
+          new Error("context_length_exceeded"),
+          textResponse("auto-recovered"),
+        ],
         hasNoClient: true,
         contextWindowManager: {
           shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
@@ -2261,7 +1781,6 @@ describe("session-agent-loop", () => {
       // `budget_yield_unrecovered` so the inspector and dashboards can
       // attribute the silent stall.
       const events: ServerMessage[] = [];
-      let callCount = 0;
       // Reducer exhausts all 4 tiers on first call so the convergence
       // loop runs exactly one iteration before falling through to
@@ -2292,43 +1811,30 @@ describe("session-agent-loop", () => {
       // call). 90k satisfies both so the path reaches call 3.
       mockEstimateTokens = 90_000;
-      const agentLoopRun: AgentLoopRun = async (messages, onEvent, options) => {
-        callCount++;
-        if (callCount <= 2) {
-          // Calls 1 (initial) and 2 (convergence rerun): error so
-          // `state.contextTooLargeDetected` stays true through
-          // convergence exit and we enter the auto_compress branch.
-          onEvent({
-            type: "error",
-            error: new Error("context_length_exceeded"),
-          });
-          onEvent({
-            type: "usage",
-            inputTokens: 100,
-            outputTokens: 0,
-            model: "test-model",
-            providerDurationMs: 50,
-          });
-          return messages;
-        }
-        // Call 3: the auto_compress_latest_turn rerun. Invoke
-        // onCheckpoint so the orchestrator's mid-loop budget check
-        // flips `yieldedForBudget` to true, then return without
-        // finishing — mirroring what AgentLoop.run does when its
-        // checkpoint returns "yield".
-        if (options?.onCheckpoint) {
-          await options.onCheckpoint({
-            turnIndex: 0,
-            toolCount: 1,
-            hasToolUse: true,
-            history: messages,
-          });
-        }
-        return messages;
-      };
+      // Calls 1 (initial) and 2 (convergence rerun) reject with
+      // context-too-large so `contextTooLargeDetected` stays true through the
+      // convergence exit and the orchestrator enters the auto_compress branch.
+      // Call 3 (the auto_compress rerun) is a tool turn: the loop runs it
+      // without a compaction hook, so when its mid-loop budget gate trips on
+      // the still-oversized estimate it yields `exitReason = "budget"` rather
+      // than recovering — the silent-stall path under test.
       const ctx = makeCtx({
-        agentLoopRun,
+        providerResponses: [
+          new Error("context_length_exceeded"),
+          new Error("context_length_exceeded"),
+          toolUseResponse("t1", "read_file", { path: "/a.txt" }),
+        ],
+        loopTools: [
+          {
+            name: "read_file",
+            description: "Read a file",
+            input_schema: {
+              type: "object",
+              properties: { path: { type: "string" } },
+            },
+          },
+        ],
+        toolExecutor: async () => ({ content: "data", isError: false }),
         hasNoClient: true,
         contextWindowManager: {
           shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
@@ -2411,23 +1917,10 @@ describe("session-agent-loop", () => {
         };
       };
-      const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
-        onEvent({
-          type: "error",
-          error: new Error("context_length_exceeded"),
-        });
-        onEvent({
-          type: "usage",
-          inputTokens: 100,
-          outputTokens: 0,
-          model: "test-model",
-          providerDurationMs: 50,
-        });
-        return messages;
-      };
+      // The provider rejects every call with a context-too-large error, so the
+      // orchestrator keeps retrying until it hits the attempt ceiling.
       const ctx = makeCtx({
-        agentLoopRun,
+        providerResponses: [new Error("context_length_exceeded")],
         contextWindowManager: {
           shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
           maybeCompact: async () => ({ compacted: false }),
@@ -2443,7 +1936,6 @@ describe("session-agent-loop", () => {
     test("preflight budget evaluation invokes reducer before provider call", async () => {
       const events: ServerMessage[] = [];
       let reducerCalls = 0;
-      let agentLoopCalls = 0;
       // Set token estimate above budget (100000 * 0.95 = 95000)
       mockEstimateTokens = 96000;
@@ -2462,36 +1954,11 @@ describe("session-agent-loop", () => {
         };
       };
-      const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
-        agentLoopCalls++;
-        // Prime the assistant row anchor — production code emits this from
-        // `AgentLoop.run` just before `provider.sendMessage`.
-        await onEvent({ type: "llm_call_started" });
-        onEvent({
-          type: "message_complete",
-          message: {
-            role: "assistant",
-            content: [{ type: "text", text: "ok" }],
-          },
-        });
-        onEvent({
-          type: "usage",
-          inputTokens: 50,
-          outputTokens: 25,
-          model: "test-model",
-          providerDurationMs: 100,
-        });
-        return [
-          ...messages,
-          {
-            role: "assistant" as const,
-            content: [{ type: "text", text: "ok" }] as ContentBlock[],
-          },
-        ];
-      };
+      // After the preflight reducer brings the estimate under budget, the loop
+      // completes the turn in a single provider call.
+      const { provider, calls } = createMockProvider([textResponse("ok")]);
       const ctx = makeCtx({
-        agentLoopRun,
+        loopProvider: provider,
         contextWindowManager: {
           shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
           maybeCompact: async () => ({ compacted: false }),
@@ -2502,8 +1969,8 @@ describe("session-agent-loop", () => {
       // Reducer should have been called during preflight
       expect(reducerCalls).toBeGreaterThanOrEqual(1);
-      // Agent loop should still succeed
-      expect(agentLoopCalls).toBe(1);
+      // Agent loop should still succeed in a single provider call
+      expect(calls.length).toBe(1);
       const complete = events.find((e) => e.type === "message_complete");
       expect(complete).toBeDefined();
     });
@@ -2512,78 +1979,28 @@ describe("session-agent-loop", () => {
   describe("provider ordering error retry", () => {
     test("retries with deep repair when ordering error is detected", async () => {
       const events: ServerMessage[] = [];
-      let callCount = 0;
-      const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
-        // Prime the assistant row anchor — production code emits this from
-        // `AgentLoop.run` just before `provider.sendMessage`. Retry branches
-        // need this on every invocation: each agent-loop iteration reserves
-        // its own row.
-        await onEvent({ type: "llm_call_started" });
-        callCount++;
-        if (callCount === 1) {
-          onEvent({
-            type: "error",
-            error: new Error("messages ordering error"),
-          });
-          onEvent({
-            type: "usage",
-            inputTokens: 100,
-            outputTokens: 0,
-            model: "test-model",
-            providerDurationMs: 50,
-          });
-          return messages;
-        }
-        // Retry succeeds
-        onEvent({
-          type: "message_complete",
-          message: {
-            role: "assistant",
-            content: [{ type: "text", text: "fixed" }],
-          },
-        });
-        onEvent({
-          type: "usage",
-          inputTokens: 50,
-          outputTokens: 25,
-          model: "test-model",
-          providerDurationMs: 100,
-        });
-        return [
-          ...messages,
-          {
-            role: "assistant" as const,
-            content: [{ type: "text", text: "fixed" }] as ContentBlock[],
-          },
-        ];
-      };
-      const ctx = makeCtx({ agentLoopRun });
+      // The provider rejects the first call with an ordering error, then
+      // succeeds once the orchestrator's deep repair re-sends the turn.
+      const { provider, calls } = createMockProvider([
+        new Error("messages ordering error"),
+        textResponse("fixed"),
+      ]);
+      const ctx = makeCtx({ loopProvider: provider });
       await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
-      expect(callCount).toBe(2);
+      expect(calls.length).toBe(2);
     });
     test("emits deferred ordering error when retry also fails", async () => {
       const events: ServerMessage[] = [];
-      const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
-        onEvent({
-          type: "error",
-          error: new Error("messages ordering error"),
-        });
-        onEvent({
-          type: "usage",
-          inputTokens: 100,
-          outputTokens: 0,
-          model: "test-model",
-          providerDurationMs: 50,
-        });
-        return messages;
-      };
-      const ctx = makeCtx({ agentLoopRun });
+      // The provider rejects every call with an ordering error, so even the
+      // deep-repair retry fails and the orchestrator surfaces the error.
+      const ctx = makeCtx({
+        providerResponses: [new Error("messages ordering error")],
+      });
       await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
       const conversationError = events.find(
@@ -2597,62 +2014,18 @@ describe("session-agent-loop", () => {
     test("yields at checkpoint when canHandoffAtCheckpoint returns true", async () => {
       const events: ServerMessage[] = [];
-      const agentLoopRun: AgentLoopRun = async (messages, onEvent, options) => {
-        // Prime the assistant row anchor — production code emits this from
-        // `AgentLoop.run` just before `provider.sendMessage`. Retry branches
-        // need this on every invocation: each agent-loop iteration reserves
-        // its own row.
-        await onEvent({ type: "llm_call_started" });
-        // Simulate tool use followed by checkpoint
-        onEvent({ type: "tool_use", id: "tu-1", name: "file_read", input: {} });
-        onEvent({
-          type: "tool_result",
-          toolUseId: "tu-1",
-          content: "file content",
-          isError: false,
-        });
-        onEvent({
-          type: "message_complete",
-          message: {
-            role: "assistant",
-            content: [{ type: "text", text: "partial" }],
-          },
-        });
-        onEvent({
-          type: "usage",
-          inputTokens: 100,
-          outputTokens: 50,
-          model: "test-model",
-          providerDurationMs: 100,
-        });
-        if (options?.onCheckpoint) {
-          const decision = await options.onCheckpoint({
-            turnIndex: 0,
-            toolCount: 1,
-            hasToolUse: true,
-            history: messages,
-          });
-          if (decision !== "continue") {
-            return [
-              ...messages,
-              {
-                role: "assistant" as const,
-                content: [{ type: "text", text: "partial" }] as ContentBlock[],
-              },
-            ];
-          }
-        }
-        return [
-          ...messages,
+      // A tool turn drives the loop to its first mid-loop checkpoint, where the
+      // orchestrator yields for a queued handoff.
+      const ctx = makeCtx({
+        providerResponses: [toolUseResponse("tu-1", "file_read", {})],
+        loopTools: [
           {
-            role: "assistant" as const,
-            content: [{ type: "text", text: "partial" }] as ContentBlock[],
+            name: "file_read",
+            description: "Read a file",
+            input_schema: { type: "object", properties: {} },
           },
-        ];
-      };
-      const ctx = makeCtx({
-        agentLoopRun,
+        ],
+        toolExecutor: async () => ({ content: "file content", isError: false }),
         canHandoffAtCheckpoint: () => true,
       } as unknown as Partial<AgentLoopConversationContext>);
@@ -2669,52 +2042,21 @@ describe("session-agent-loop", () => {
     test("continues when canHandoffAtCheckpoint returns false", async () => {
       const events: ServerMessage[] = [];
-      const agentLoopRun: AgentLoopRun = async (messages, onEvent, options) => {
-        // Prime the assistant row anchor — production code emits this from
-        // `AgentLoop.run` just before `provider.sendMessage`. Retry branches
-        // need this on every invocation: each agent-loop iteration reserves
-        // its own row.
-        await onEvent({ type: "llm_call_started" });
-        onEvent({ type: "tool_use", id: "tu-1", name: "file_read", input: {} });
-        onEvent({
-          type: "tool_result",
-          toolUseId: "tu-1",
-          content: "content",
-          isError: false,
-        });
-        onEvent({
-          type: "message_complete",
-          message: {
-            role: "assistant",
-            content: [{ type: "text", text: "done" }],
-          },
-        });
-        onEvent({
-          type: "usage",
-          inputTokens: 100,
-          outputTokens: 50,
-          model: "test-model",
-          providerDurationMs: 100,
-        });
-        if (options?.onCheckpoint) {
-          await options.onCheckpoint({
-            turnIndex: 0,
-            toolCount: 1,
-            hasToolUse: true,
-            history: messages,
-          });
-        }
-        return [
-          ...messages,
+      // The tool turn reaches a checkpoint, but with handoff disabled the loop
+      // continues to the next turn and completes normally.
+      const ctx = makeCtx({
+        providerResponses: [
+          toolUseResponse("tu-1", "file_read", {}),
+          textResponse("done"),
+        ],
+        loopTools: [
           {
-            role: "assistant" as const,
-            content: [{ type: "text", text: "done" }] as ContentBlock[],
+            name: "file_read",
+            description: "Read a file",
+            input_schema: { type: "object", properties: {} },
           },
-        ];
-      };
-      const ctx = makeCtx({
-        agentLoopRun,
+        ],
+        toolExecutor: async () => ({ content: "content", isError: false }),
         canHandoffAtCheckpoint: () => false,
       } as unknown as Partial<AgentLoopConversationContext>);
@@ -2736,36 +2078,18 @@ describe("session-agent-loop", () => {
       const events: ServerMessage[] = [];
       const abortController = new AbortController();
-      const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
-        // Prime the assistant row anchor — production code emits this from
-        // `AgentLoop.run` just before `provider.sendMessage`.
-        await onEvent({ type: "llm_call_started" });
-        onEvent({
-          type: "message_complete",
-          message: {
-            role: "assistant",
-            content: [{ type: "text", text: "partial" }],
-          },
-        });
-        onEvent({
-          type: "usage",
-          inputTokens: 100,
-          outputTokens: 50,
-          model: "test-model",
-          providerDurationMs: 100,
-        });
-        // Simulate abort after processing
-        abortController.abort();
-        return [
-          ...messages,
-          {
-            role: "assistant" as const,
-            content: [{ type: "text", text: "partial" }] as ContentBlock[],
-          },
-        ];
+      // The provider completes its response but the user cancels mid-turn, so
+      // the orchestrator observes the aborted signal once the loop returns.
+      const provider: Provider = {
+        name: "mock",
+        async sendMessage(_messages, options) {
+          options?.onEvent?.({ type: "text_delta", text: "partial" });
+          abortController.abort();
+          return textResponse("partial");
+        },
       };
-      const ctx = makeCtx({ agentLoopRun, abortController });
+      const ctx = makeCtx({ loopProvider: provider, abortController });
       await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
       const cancelled = events.find((e) => e.type === "generation_cancelled");
@@ -2776,13 +2100,16 @@ describe("session-agent-loop", () => {
       const events: ServerMessage[] = [];
       const abortController = new AbortController();
-      const agentLoopRun: AgentLoopRun = async () => {
-        abortController.abort();
-        const err = new DOMException("The operation was aborted", "AbortError");
-        throw err;
+      // The provider rejects with an AbortError after the user cancels.
+      const provider: Provider = {
+        name: "mock",
+        async sendMessage() {
+          abortController.abort();
+          throw new DOMException("The operation was aborted", "AbortError");
+        },
       };
-      const ctx = makeCtx({ agentLoopRun, abortController });
+      const ctx = makeCtx({ loopProvider: provider, abortController });
       await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
       const cancelled = events.find((e) => e.type === "generation_cancelled");
@@ -2799,36 +2126,17 @@ describe("session-agent-loop", () => {
       const abortController = new AbortController();
       resolveAssistantAttachmentsMock.mockClear();
-      const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
-        // Prime the assistant row anchor — production code emits this from
-        // `AgentLoop.run` just before `provider.sendMessage`.
-        await onEvent({ type: "llm_call_started" });
-        onEvent({
-          type: "message_complete",
-          message: {
-            role: "assistant",
-            content: [{ type: "text", text: "partial" }],
-          },
-        });
-        onEvent({
-          type: "usage",
-          inputTokens: 100,
-          outputTokens: 50,
-          model: "test-model",
-          providerDurationMs: 100,
-        });
-        // Simulate abort after processing
-        abortController.abort();
-        return [
-          ...messages,
-          {
-            role: "assistant" as const,
-            content: [{ type: "text", text: "partial" }] as ContentBlock[],
-          },
-        ];
+      // The provider completes its response but the user cancels mid-turn.
+      const provider: Provider = {
+        name: "mock",
+        async sendMessage(_messages, options) {
+          options?.onEvent?.({ type: "text_delta", text: "partial" });
+          abortController.abort();
+          return textResponse("partial");
+        },
       };
-      const ctx = makeCtx({ agentLoopRun, abortController });
+      const ctx = makeCtx({ loopProvider: provider, abortController });
       await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
       const cancelled = events.find((e) => e.type === "generation_cancelled");
@@ -2840,96 +2148,50 @@ describe("session-agent-loop", () => {
   describe("finally block cleanup", () => {
     test("increments turnCount after successful run", async () => {
-      const ctx = makeCtx({
-        agentLoopRun: async (messages, onEvent) => {
-          // Prime the assistant row anchor — production code emits this from
-          // `AgentLoop.run` just before `provider.sendMessage`.
-          await onEvent({ type: "llm_call_started" });
-          onEvent({
-            type: "message_complete",
-            message: {
-              role: "assistant",
-              content: [{ type: "text", text: "hi" }],
-            },
-          });
-          onEvent({
-            type: "usage",
-            inputTokens: 10,
-            outputTokens: 5,
-            model: "test",
-            providerDurationMs: 50,
-          });
-          return [
-            ...messages,
-            {
-              role: "assistant" as const,
-              content: [{ type: "text", text: "hi" }] as ContentBlock[],
-            },
-          ];
-        },
-      });
+      // GIVEN a real loop that answers in a single text turn
+      const ctx = makeCtx({ providerResponses: [textResponse("hi")] });
       expect(ctx.turnCount).toBe(0);
+      // WHEN the orchestrator runs the turn to completion
       await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
+      // THEN the finally block increments the turn count
       expect(ctx.turnCount).toBe(1);
     });
     test("clears processing state and abort controller", async () => {
-      const ctx = makeCtx({
-        agentLoopRun: async (messages, onEvent) => {
-          // Prime the assistant row anchor — production code emits this from
-          // `AgentLoop.run` just before `provider.sendMessage`.
-          await onEvent({ type: "llm_call_started" });
-          onEvent({
-            type: "message_complete",
-            message: {
-              role: "assistant",
-              content: [{ type: "text", text: "hi" }],
-            },
-          });
-          onEvent({
-            type: "usage",
-            inputTokens: 10,
-            outputTokens: 5,
-            model: "test",
-            providerDurationMs: 50,
-          });
-          return [
-            ...messages,
-            {
-              role: "assistant" as const,
-              content: [{ type: "text", text: "hi" }] as ContentBlock[],
-            },
-          ];
-        },
-      });
+      // GIVEN a real loop that answers in a single text turn
+      const ctx = makeCtx({ providerResponses: [textResponse("hi")] });
+      // WHEN the orchestrator runs the turn to completion
       await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
-      expect(ctx.processing).toBe(false);
+      // THEN the finally block clears all per-turn processing state
+      expect(ctx.isProcessing()).toBe(false);
       expect(ctx.abortController).toBeNull();
       expect(ctx.currentRequestId).toBeUndefined();
       expect(ctx.commandIntent).toBeUndefined();
     });
-    test("clears state even when agent loop throws", async () => {
+    test("clears state and surfaces a processing error when the provider call fails", async () => {
+      // GIVEN a real loop whose provider rejects with an unexpected error
       const events: ServerMessage[] = [];
       const ctx = makeCtx({
-        agentLoopRun: async () => {
-          throw new Error("unexpected crash");
-        },
+        loopProvider: {
+          name: "mock-provider",
+          async sendMessage() {
+            throw new Error("unexpected crash");
+          },
+        } as unknown as Provider,
       });
+      // WHEN the orchestrator runs the turn
       await runAgentLoopImpl(ctx, "hi", "msg-1", (msg) => events.push(msg));
-      expect(ctx.processing).toBe(false);
+      // THEN the finally block clears per-turn state and the failure is
+      // surfaced as a processing-failed conversation error
+      expect(ctx.isProcessing()).toBe(false);
       expect(ctx.abortController).toBeNull();
-      expect(events.find((event) => event.type === "error")).toMatchObject({
-        type: "error",
-        code: "CONVERSATION_PROCESSING_FAILED",
-        errorCategory: "processing_failed",
-      });
       expect(
         events.find((event) => event.type === "conversation_error"),
       ).toMatchObject({
@@ -2940,46 +2202,19 @@ describe("session-agent-loop", () => {
     });
     test("drains queue after completion", async () => {
+      // GIVEN a real loop that answers in a single text turn
       let drainReason: string | undefined;
       const ctx = makeCtx({
-        agentLoopRun: async (
-          messages: Message[],
-          onEvent: (event: AgentEvent) => void | Promise<void>,
-        ) => {
-          // Prime the assistant row anchor — production code emits this from
-          // `AgentLoop.run` just before `provider.sendMessage`. Must be
-          // awaited so the assistant row is reserved before message_complete
-          // tries to write into it.
-          await onEvent({ type: "llm_call_started" });
-          onEvent({
-            type: "message_complete",
-            message: {
-              role: "assistant",
-              content: [{ type: "text", text: "ok" }],
-            },
-          });
-          onEvent({
-            type: "usage",
-            inputTokens: 10,
-            outputTokens: 5,
-            model: "test",
-            providerDurationMs: 50,
-          });
-          return [
-            ...messages,
-            {
-              role: "assistant" as const,
-              content: [{ type: "text", text: "ok" }] as ContentBlock[],
-            },
-          ];
-        },
+        providerResponses: [textResponse("ok")],
         drainQueue: (reason: string) => {
           drainReason = reason;
         },
       } as unknown as Partial<AgentLoopConversationContext>);
+      // WHEN the orchestrator runs the turn to completion
       await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
+      // THEN the queue is drained with the loop-complete reason
       expect(drainReason).toBe("loop_complete");
     });
   });
@@ -3098,7 +2333,7 @@ describe("session-agent-loop", () => {
         isUserMessage: true,
       });
-      expect(ctx.processing).toBe(false);
+      expect(ctx.isProcessing()).toBe(false);
       expect(ctx.abortController).toBeNull();
       expect(ctx.currentRequestId).toBeUndefined();
     });
@@ -3208,24 +2443,17 @@ describe("session-agent-loop", () => {
     test("synthesizes error assistant message when provider returns no response", async () => {
       const events: ServerMessage[] = [];
-      const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
-        // Emit a non-ordering, non-context-too-large error that sets providerErrorUserMessage
-        onEvent({
-          type: "error",
-          error: new Error("Internal processing failure"),
-        });
-        onEvent({
-          type: "usage",
-          inputTokens: 100,
-          outputTokens: 0,
-          model: "test-model",
-          providerDurationMs: 50,
-        });
-        // Return same messages (no assistant message appended)
-        return messages;
-      };
-      const ctx = makeCtx({ agentLoopRun });
+      // GIVEN a real loop whose provider rejects with a generic error
+      // (non-ordering, non-context-too-large) so the loop emits `error` and
+      // the orchestrator sets `providerErrorUserMessage`.
+      const ctx = makeCtx({
+        loopProvider: {
+          name: "mock-provider",
+          async sendMessage() {
+            throw new Error("Internal processing failure");
+          },
+        } as unknown as Provider,
+      });
       await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
       // The error should be sent as a conversation_error (not as an
@@ -3249,26 +2477,19 @@ describe("session-agent-loop", () => {
       // sweep would wrong-attach this row to the wrong assistant message.
       const events: ServerMessage[] = [];
-      const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
-        // 1) handleProviderError -> writes an `llm_request_logs` row with
-        //    messageId=null (the orphan we are trying to link).
-        onEvent({
-          type: "provider_error",
-          error: new Error("upstream 500"),
-          rawRequest: { model: "gpt-4.1", messages: [] },
-          actualProvider: "openai",
-        });
-        // 2) handleError -> sets `state.providerErrorUserMessage`, which
-        //    activates the synthetic-message branch below the loop.
-        onEvent({
-          type: "error",
-          error: new Error("upstream 500"),
-        });
-        // Provider returned no assistant content — same messages back.
-        return messages;
-      };
-      const ctx = makeCtx({ agentLoopRun });
+      // GIVEN a real loop whose provider rejects: the loop emits
+      // `provider_error` (writing an `llm_request_logs` row with
+      // messageId=null — the orphan we link) then `error` (which sets
+      // `state.providerErrorUserMessage`, activating the synthetic-message
+      // branch below the loop).
+      const ctx = makeCtx({
+        loopProvider: {
+          name: "mock-provider",
+          async sendMessage() {
+            throw new Error("upstream 500");
+          },
+        } as unknown as Provider,
+      });
       await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
       // The orphan was written with messageId=undefined.
@@ -3315,39 +2536,10 @@ describe("session-agent-loop", () => {
       // observe the sync-invalidation publish path on the same turn.
       projectAssistantMessageMock.mockImplementationOnce(() => true);
-      const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
-        await onEvent({ type: "llm_call_started" });
-        // `message_complete` is awaited so `handleMessageComplete` (and its
-        // async indexer + projector chain) completes before the next event
-        // or before the loop returns. Without the await the projector's
-        // synchronous call still races against the test's assertion phase
-        // because the indexer's `await` yields microtasks.
-        await onEvent({
-          type: "message_complete",
-          message: {
-            role: "assistant",
-            content: [{ type: "text", text: "indexed reply" }],
-          },
-        });
-        onEvent({
-          type: "usage",
-          inputTokens: 10,
-          outputTokens: 5,
-          model: "test",
-          providerDurationMs: 50,
-        });
-        return [
-          ...messages,
-          {
-            role: "assistant" as const,
-            content: [
-              { type: "text", text: "indexed reply" },
-            ] as ContentBlock[],
-          },
-        ];
-      };
-      const ctx = makeCtx({ agentLoopRun });
+      // GIVEN a real loop that answers with a single finalized assistant turn
+      const ctx = makeCtx({
+        providerResponses: [textResponse("indexed reply")],
+      });
       await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
       // Indexer fired with the reserved row's id + the finalized content.
@@ -3410,34 +2602,8 @@ describe("session-agent-loop", () => {
         metadata: null,
       };
-      const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
-        await onEvent({ type: "llm_call_started" });
-        // See sibling test — `message_complete` must be awaited so the
-        // projector call lands before the assertion phase.
-        await onEvent({
-          type: "message_complete",
-          message: {
-            role: "assistant",
-            content: [{ type: "text", text: "quiet" }],
-          },
-        });
-        onEvent({
-          type: "usage",
-          inputTokens: 1,
-          outputTokens: 1,
-          model: "test",
-          providerDurationMs: 1,
-        });
-        return [
-          ...messages,
-          {
-            role: "assistant" as const,
-            content: [{ type: "text", text: "quiet" }] as ContentBlock[],
-          },
-        ];
-      };
-      const ctx = makeCtx({ agentLoopRun });
+      // GIVEN a real loop that answers with a single finalized assistant turn
+      const ctx = makeCtx({ providerResponses: [textResponse("quiet")] });
       await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
       expect(projectAssistantMessageMock).toHaveBeenCalledTimes(1);
@@ -3462,40 +2628,33 @@ describe("session-agent-loop", () => {
       // Indexer/projector mocks default to no-op; no finalized row in this
       // test, so `mockMessageById` stays null.
-      const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
-        // First LLM call: reserve msg-strand-A, never finalize.
-        await onEvent({ type: "llm_call_started" });
-        // Second LLM call: should delete msg-strand-A before reserving
-        // msg-strand-B.
-        await onEvent({ type: "llm_call_started" });
-        // Finalize the second one so the loop has a valid assistant message
-        // and exits cleanly.
-        onEvent({
-          type: "message_complete",
-          message: {
-            role: "assistant",
-            content: [{ type: "text", text: "retry succeeded" }],
-          },
-        });
-        onEvent({
-          type: "usage",
-          inputTokens: 5,
-          outputTokens: 3,
-          model: "test",
-          providerDurationMs: 25,
-        });
-        return [
-          ...messages,
-          {
-            role: "assistant" as const,
-            content: [
-              { type: "text", text: "retry succeeded" },
-            ] as ContentBlock[],
-          },
-        ];
-      };
+      // A single reducer tier converges the oversized context so the
+      // orchestrator re-enters the loop after the first call fails.
+      mockReducerStepFn = (msgs: Message[]) => ({
+        messages: msgs,
+        tier: "forced_compaction",
+        state: {
+          appliedTiers: ["forced_compaction"],
+          injectionMode: "full",
+          exhausted: false,
+        },
+        estimatedTokens: 5000,
+      });
-      const ctx = makeCtx({ agentLoopRun });
+      // GIVEN a real loop whose first call rejects with context-too-large
+      // (reserving msg-strand-A but never finalizing it), then recovers via
+      // convergence on re-entry. The re-entry's `llm_call_started` must
+      // delete the stranded msg-strand-A before reserving msg-strand-B.
+      const ctx = makeCtx({
+        providerResponses: [
+          new Error("context_length_exceeded"),
+          textResponse("retry succeeded"),
+        ],
+        contextWindowManager: {
+          shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
+          maybeCompact: async () => ({ compacted: false }),
+        } as unknown as AgentLoopConversationContext["contextWindowManager"],
+      });
       await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
       // Exactly one delete fires — for msg-strand-A, before the second
@@ -3523,27 +2682,20 @@ describe("session-agent-loop", () => {
         id: "msg-orphaned-reservation",
       }));
-      const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
-        // Reserve the orphan.
-        await onEvent({ type: "llm_call_started" });
-        // Provider rejects — writes the llm_request_log row and arms
-        // `state.providerErrorUserMessage` via `handleError`.
-        onEvent({
-          type: "provider_error",
-          error: new Error("upstream 500"),
-          rawRequest: { model: "gpt-4.1", messages: [] },
-          actualProvider: "openai",
-        });
-        onEvent({
-          type: "error",
-          error: new Error("upstream 500"),
-        });
-        // No assistant message in the result — the synthetic-error branch
-        // below the agent loop fires.
-        return messages;
-      };
-      const ctx = makeCtx({ agentLoopRun });
+      // GIVEN a real loop that reserves an assistant row at
+      // `llm_call_started`, then whose provider rejects: the loop emits
+      // `provider_error` (writing the llm_request_log row) and `error`
+      // (arming `state.providerErrorUserMessage`), exiting with no
+      // `message_complete` so the synthetic-error branch below the loop
+      // fires.
+      const ctx = makeCtx({
+        loopProvider: {
+          name: "mock-provider",
+          async sendMessage() {
+            throw new Error("upstream 500");
+          },
+        } as unknown as Provider,
+      });
       await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
       // The orphan was deleted exactly once, before the synthetic error
@@ -3599,40 +2751,23 @@ describe("session-agent-loop", () => {
         metadata: null,
       };
-      const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
-        await onEvent({ type: "llm_call_started" });
-        // Two small deltas — well under the 1024-char size gate — should
-        // schedule a single debounced flush.
-        onEvent({ type: "text_delta", text: "Hello, " });
-        onEvent({ type: "text_delta", text: "world." });
-        // Wait long enough for the 250ms debounce to fire.
-        await new Promise((resolve) => setTimeout(resolve, 1100));
-        await onEvent({
-          type: "message_complete",
-          message: {
-            role: "assistant",
-            content: [{ type: "text", text: "Hello, world." }],
-          },
-        });
-        onEvent({
-          type: "usage",
-          inputTokens: 10,
-          outputTokens: 5,
-          model: "test",
-          providerDurationMs: 50,
-        });
-        return [
-          ...messages,
-          {
-            role: "assistant" as const,
-            content: [
-              { type: "text", text: "Hello, world." },
-            ] as ContentBlock[],
+      // GIVEN a real loop whose provider streams two small deltas (each under
+      // the 1024-char size gate) then holds the turn open past the 250ms
+      // debounce window before completing, so a single debounced partial
+      // flush lands before `message_complete`.
+      const ctx = makeCtx({
+        loopProvider: {
+          name: "mock-provider",
+          async sendMessage(_messages, options) {
+            options?.onEvent?.({ type: "text_delta", text: "Hello, " });
+            options?.onEvent?.({ type: "text_delta", text: "world." });
+            await new Promise((resolve) => setTimeout(resolve, 1100));
+            return textResponse("Hello, world.");
           },
-        ];
-      };
+        },
+      });
-      const ctx = makeCtx({ agentLoopRun });
+      // WHEN the orchestrator runs the turn to completion
       await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
       // Exactly two `updateContent` calls land:
@@ -3668,70 +2803,38 @@ describe("session-agent-loop", () => {
         metadata: null,
       };
-      const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
-        await onEvent({ type: "llm_call_started" });
-        // No text delta — only a tool_use. If `handleToolUse` were
-        // flushing, this would land a partial write before
-        // `message_complete`.
-        onEvent({
-          type: "tool_use",
-          id: "tu-no-flush",
-          name: "file_read",
-          input: { path: "/foo" },
-        });
-        // Yield a microtask so any (incorrectly) fire-and-forget
-        // pipeline call has a chance to land before message_complete.
-        await new Promise((resolve) => setImmediate(resolve));
-        onEvent({
-          type: "tool_result",
-          toolUseId: "tu-no-flush",
-          content: "ok",
-          isError: false,
-        });
-        await onEvent({
-          type: "message_complete",
-          message: {
-            role: "assistant",
-            content: [
-              {
-                type: "tool_use",
-                id: "tu-no-flush",
-                name: "file_read",
-                input: { path: "/foo" },
-              },
-            ],
-          },
-        });
-        onEvent({
-          type: "usage",
-          inputTokens: 10,
-          outputTokens: 5,
-          model: "test",
-          providerDurationMs: 50,
-        });
-        return [
-          ...messages,
+      // GIVEN a real loop that runs one tool turn — the loop emits `tool_use`
+      // strictly AFTER `message_complete` — and then answers with a final
+      // text turn. The tool executor returns immediately.
+      const ctx = makeCtx({
+        providerResponses: [
+          toolUseResponse("tu-no-flush", "file_read", { path: "/foo" }),
+          textResponse("done"),
+        ],
+        loopTools: [
           {
-            role: "assistant" as const,
-            content: [
-              {
-                type: "tool_use",
-                id: "tu-no-flush",
-                name: "file_read",
-                input: { path: "/foo" },
-              },
-            ] as ContentBlock[],
+            name: "file_read",
+            description: "Read a file",
+            input_schema: {
+              type: "object",
+              properties: { path: { type: "string" } },
+            },
           },
-        ];
-      };
+        ],
+        toolExecutor: async () => ({ content: "ok", isError: false }),
+      });
-      const ctx = makeCtx({ agentLoopRun });
+      // WHEN the orchestrator runs the turn to completion
       await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
-      // Only the authoritative final flush from `handleMessageComplete`
-      // lands. A partial flush from `handleToolUse` would have made this
-      // 2; that's the regression this test guards against.
-      expect(updateMessageContentMock).toHaveBeenCalledTimes(1);
+      // Four authoritative writes land and no stray partial flush:
+      //   - one final flush per `message_complete` (the tool turn and the final
+      //     text turn), plus
+      //   - two grouped tool-result user-row writes (persist-on-arrival and the
+      //     turn-boundary finalize).
+      // `handleToolUse` contributes no partial flush of its own; one would make
+      // this 5. That stray flush is the regression this test guards against.
+      expect(updateMessageContentMock).toHaveBeenCalledTimes(4);
     });
     test("handleMessageComplete clears any pending debounce timer before the final flush", async () => {
@@ -3744,45 +2847,53 @@ describe("session-agent-loop", () => {
         metadata: null,
       };
-      const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
-        await onEvent({ type: "llm_call_started" });
-        // Short delta — schedules a debounce timer but does NOT trip the
-        // size gate. message_complete then arrives immediately after,
-        // before the 250ms timer can fire.
-        onEvent({ type: "text_delta", text: "Quick reply." });
-        await onEvent({
-          type: "message_complete",
-          message: {
-            role: "assistant",
-            content: [{ type: "text", text: "Quick reply." }],
+      // GIVEN a real loop whose first turn streams a short delta (scheduling a
+      // debounce timer) and completes as a tool turn — so `message_complete`
+      // arrives before the 250ms timer and clears it. The tool executor then
+      // holds the loop open well past the original debounce window, proving a
+      // late timer does NOT fire a stray partial flush, before a final text
+      // turn ends the run.
+      const ctx = makeCtx({
+        providerResponses: [
+          {
+            content: [
+              { type: "text", text: "Quick reply." },
+              {
+                type: "tool_use",
+                id: "tu-keep-alive",
+                name: "file_read",
+                input: {},
+              },
+            ],
+            model: "mock-model",
+            usage: { inputTokens: 10, outputTokens: 5 },
+            stopReason: "tool_use",
           },
-        });
-        onEvent({
-          type: "usage",
-          inputTokens: 10,
-          outputTokens: 5,
-          model: "test",
-          providerDurationMs: 50,
-        });
-        // Wait past the original debounce window to prove a late timer
-        // does NOT fire a stray partial flush.
-        await new Promise((resolve) => setTimeout(resolve, 1100));
-        return [
-          ...messages,
+          textResponse("done"),
+        ],
+        loopTools: [
           {
-            role: "assistant" as const,
-            content: [{ type: "text", text: "Quick reply." }] as ContentBlock[],
+            name: "file_read",
+            description: "Read a file",
+            input_schema: { type: "object", properties: {} },
           },
-        ];
-      };
+        ],
+        toolExecutor: async () => {
+          await new Promise((resolve) => setTimeout(resolve, 1100));
+          return { content: "ok", isError: false };
+        },
+      });
-      const ctx = makeCtx({ agentLoopRun });
+      // WHEN the orchestrator runs the turn to completion
       await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
-      // Only the final flush from `handleMessageComplete` lands. The
-      // debounced partial would have fired around T+250ms; the timer-clear
-      // at the top of `handleMessageComplete` cancels it.
-      expect(updateMessageContentMock).toHaveBeenCalledTimes(1);
+      // Four authoritative writes land: one final flush per `message_complete`
+      // (the tool turn and the final text turn) plus two grouped tool-result
+      // user-row writes (persist-on-arrival and the turn-boundary finalize).
+      // The debounced partial would have fired around T+250ms — during the tool
+      // executor's hold — but the timer-clear at the top of
+      // `handleMessageComplete` cancels it, so no stray fifth flush appears.
+      expect(updateMessageContentMock).toHaveBeenCalledTimes(4);
     });
     test("partial flushes never trigger the indexer or attention projector", async () => {
@@ -3795,54 +2906,29 @@ describe("session-agent-loop", () => {
         metadata: null,
       };
-      const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
-        await onEvent({ type: "llm_call_started" });
-        onEvent({ type: "text_delta", text: "hello world" });
-        // Wait past the 250ms debounce so the partial flush definitely
-        // lands BEFORE message_complete fires.
-        await new Promise((resolve) => setTimeout(resolve, 1100));
-        // Snapshot the indexer/projector call counts AFTER the partial
-        // flush has run but BEFORE message_complete. They must be zero.
-        const indexerCallsBeforeComplete =
-          indexMessageNowMock.mock.calls.length;
-        const projectorCallsBeforeComplete =
-          projectAssistantMessageMock.mock.calls.length;
-        // Stash on a side channel the assertion phase can read.
-        (
-          ctx as unknown as { __partialSnapshot?: [number, number] }
-        ).__partialSnapshot = [
-          indexerCallsBeforeComplete,
-          projectorCallsBeforeComplete,
-        ];
-        await onEvent({
-          type: "message_complete",
-          message: {
-            role: "assistant",
-            content: [{ type: "text", text: "hello world" }],
-          },
-        });
-        onEvent({
-          type: "usage",
-          inputTokens: 10,
-          outputTokens: 5,
-          model: "test",
-          providerDurationMs: 50,
-        });
-        return [
-          ...messages,
-          {
-            role: "assistant" as const,
-            content: [{ type: "text", text: "hello world" }] as ContentBlock[],
+      // GIVEN a real loop whose provider streams a delta then holds the turn
+      // open past the 250ms debounce window so the partial flush lands BEFORE
+      // `message_complete`. The indexer/projector counts are snapshotted at
+      // that mid-turn point (after the partial flush, before completion).
+      let snapshot: [number, number] | undefined;
+      const ctx = makeCtx({
+        loopProvider: {
+          name: "mock-provider",
+          async sendMessage(_messages, options) {
+            options?.onEvent?.({ type: "text_delta", text: "hello world" });
+            await new Promise((resolve) => setTimeout(resolve, 1100));
+            snapshot = [
+              indexMessageNowMock.mock.calls.length,
+              projectAssistantMessageMock.mock.calls.length,
+            ];
+            return textResponse("hello world");
           },
-        ];
-      };
+        },
+      });
-      const ctx = makeCtx({ agentLoopRun });
+      // WHEN the orchestrator runs the turn to completion
       await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
-      const snapshot = (
-        ctx as unknown as { __partialSnapshot?: [number, number] }
-      ).__partialSnapshot;
       expect(snapshot).toBeDefined();
       // Indexer + projector were both ZERO during the mid-turn partial
       // flush — they only fire from `handleMessageComplete` after the
@@ -3870,35 +2956,21 @@ describe("session-agent-loop", () => {
       const ghToken = "ghp_" + "a".repeat(36);
       const payload = "Here's the key: " + ghToken + " enjoy.";
-      const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
-        await onEvent({ type: "llm_call_started" });
-        onEvent({ type: "text_delta", text: payload });
-        // Wait past the 250ms debounce so the partial flush lands.
-        await new Promise((resolve) => setTimeout(resolve, 1100));
-        await onEvent({
-          type: "message_complete",
-          message: {
-            role: "assistant",
-            content: [{ type: "text", text: payload }],
-          },
-        });
-        onEvent({
-          type: "usage",
-          inputTokens: 10,
-          outputTokens: 5,
-          model: "test",
-          providerDurationMs: 50,
-        });
-        return [
-          ...messages,
-          {
-            role: "assistant" as const,
-            content: [{ type: "text", text: payload }] as ContentBlock[],
+      // GIVEN a real loop whose provider streams the PAT-bearing payload as a
+      // delta then holds the turn open past the 250ms debounce window so the
+      // partial flush lands before `message_complete`.
+      const ctx = makeCtx({
+        loopProvider: {
+          name: "mock-provider",
+          async sendMessage(_messages, options) {
+            options?.onEvent?.({ type: "text_delta", text: payload });
+            await new Promise((resolve) => setTimeout(resolve, 1100));
+            return textResponse(payload);
           },
-        ];
-      };
+        },
+      });
-      const ctx = makeCtx({ agentLoopRun });
+      // WHEN the orchestrator runs the turn to completion
       await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
       expect(updateMessageContentMock).toHaveBeenCalledTimes(2);
@@ -3922,26 +2994,21 @@ describe("session-agent-loop", () => {
         id: "msg-orphan-with-partial",
       }));
-      const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
-        await onEvent({ type: "llm_call_started" });
-        // A debounced delta lands a partial flush BEFORE the provider
-        // error fires.
-        onEvent({ type: "text_delta", text: "hello world" });
-        await new Promise((resolve) => setTimeout(resolve, 1100));
-        onEvent({
-          type: "provider_error",
-          error: new Error("upstream 500"),
-          rawRequest: { model: "gpt-4.1", messages: [] },
-          actualProvider: "openai",
-        });
-        onEvent({
-          type: "error",
-          error: new Error("upstream 500"),
-        });
-        return messages;
-      };
+      // GIVEN a real loop whose provider streams a delta — landing a debounced
+      // partial flush on the reserved row — then rejects, so the loop emits
+      // `provider_error` and `error` and exits with no `message_complete`.
+      const ctx = makeCtx({
+        loopProvider: {
+          name: "mock-provider",
+          async sendMessage(_messages, options) {
+            options?.onEvent?.({ type: "text_delta", text: "hello world" });
+            await new Promise((resolve) => setTimeout(resolve, 1100));
+            throw new Error("upstream 500");
+          },
+        },
+      });
-      const ctx = makeCtx({ agentLoopRun });
+      // WHEN the orchestrator runs the turn
       await runAgentLoopImpl(ctx, "hi", "msg-1", () => {});
       // Partial flush fired exactly once (before the provider error).
@@ -4442,51 +3509,32 @@ describe("session-agent-loop", () => {
         compactableStartIndex: 0,
       };
-      const rawMidLoopBasis: Message[] = [
-        {
-          role: "user",
-          content: [{ type: "text", text: "fresh DB basis user row" }],
-        },
-        {
-          role: "assistant",
-          content: [{ type: "text", text: "partial assistant response" }],
-        },
-      ];
       const maybeCompactInputs: Message[][] = [];
-      let runCount = 0;
-      const agentLoopRun: AgentLoopRun = async (
-        messages,
-        _onEvent,
-        options,
-      ) => {
-        runCount++;
-        if (runCount === 1) {
-          // The loop reaches its mid-loop budget checkpoint with the raw
-          // persistent basis as its in-loop history; the wrapped onCheckpoint
-          // trips the gate and runs inline compaction over that basis.
-          mockEstimateTokens = 90_000;
-          const decision = await options?.onCheckpoint?.({
-            turnIndex: 0,
-            toolCount: 1,
-            hasToolUse: true,
-            history: rawMidLoopBasis,
-          });
-          mockEstimateTokens = 1000;
-          if (decision !== "continue") {
-            return rawMidLoopBasis;
-          }
-        }
-        return [
-          ...messages,
-          {
-            role: "assistant" as const,
-            content: [{ type: "text" as const, text: "final response" }],
-          },
-        ];
-      };
+      // AND a real loop that runs one tool turn and then a final text turn.
+      // The tool executor raises the token estimate above the mid-loop budget
+      // threshold so the loop compacts in place at the post-tool checkpoint —
+      // over its own in-loop history, which does not match the loaded Slack
+      // rows.
       const ctx = makeCtx({
-        agentLoopRun,
+        providerResponses: [
+          toolUseResponse("tu-mid-loop", "file_read", { path: "/foo" }),
+          textResponse("final response"),
+        ],
+        loopTools: [
+          {
+            name: "file_read",
+            description: "Read a file",
+            input_schema: {
+              type: "object",
+              properties: { path: { type: "string" } },
+            },
+          },
+        ],
+        toolExecutor: async () => {
+          mockEstimateTokens = 90_000;
+          return { content: "ok", isError: false };
+        },
         channelCapabilities: {
           channel: "slack",
           dashboardCapable: false,
@@ -4523,6 +3571,9 @@ describe("session-agent-loop", () => {
                 summaryText: "",
               };
             }
+            // The mid-loop gate compacted its in-loop basis; drop the estimate
+            // back under budget so the post-compaction provider call proceeds.
+            mockEstimateTokens = 1000;
             return {
               compacted: true,
               messages: [
@@ -4551,7 +3602,9 @@ describe("session-agent-loop", () => {
       await runAgentLoopImpl(ctx, "next reply", "user-msg-mid-loop", () => {});
       expect(maybeCompactInputs[0]).toBe(renderedSlackMessages);
-      expect(maybeCompactInputs[1]).toBe(rawMidLoopBasis);
+      // The mid-loop gate compacts the loop's own in-loop history, never the
+      // loaded Slack rows — the mismatch this test guards against.
+      expect(maybeCompactInputs[1]).not.toBe(renderedSlackMessages);
       expect(getSlackCompactionWatermarkForPrefixMock).toHaveBeenCalledWith(
         null,
         2,
@@ -4824,67 +3877,32 @@ describe("session-agent-loop", () => {
         estimatedTokens: 5000,
       });
-      let callCount = 0;
-      const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
-        callCount++;
-        // Prime the assistant row anchor — production code emits this from
-        // `AgentLoop.run` just before `provider.sendMessage`. Retry branches
-        // need this on every invocation: each agent-loop iteration reserves
-        // its own row.
-        await onEvent({ type: "llm_call_started" });
-        if (callCount === 1) {
-          // Trigger convergence path: error + appended assistant message so
-          // updatedHistory.length > preRunHistoryLength at the strip site.
-          onEvent({
-            type: "error",
-            error: new Error("context_length_exceeded"),
-          });
-          onEvent({
-            type: "usage",
-            inputTokens: 100,
-            outputTokens: 0,
-            model: "test-model",
-            providerDurationMs: 50,
-          });
-          return [
-            ...messages,
-            {
-              role: "assistant" as const,
-              content: [{ type: "text", text: "partial" }] as ContentBlock[],
-            },
-          ];
-        }
-        onEvent({
-          type: "message_complete",
-          message: {
-            role: "assistant",
-            content: [{ type: "text", text: "recovered" }],
-          },
-        });
-        onEvent({
-          type: "usage",
-          inputTokens: 50,
-          outputTokens: 25,
-          model: "test-model",
-          providerDurationMs: 100,
-        });
-        return [
-          ...messages,
+      // GIVEN a real loop that appends a tool turn (so the run reports
+      // `appendedNewMessages`) and then rejects with a context-too-large
+      // error on the following call — the orchestrator strips that appended
+      // history during its bounded convergence path before a final call
+      // recovers.
+      const ctx = makeCtx({
+        providerResponses: [
+          toolUseResponse("t1", "file_read", {}),
+          new Error("context_length_exceeded"),
+          textResponse("recovered"),
+        ],
+        loopTools: [
           {
-            role: "assistant" as const,
-            content: [{ type: "text", text: "recovered" }] as ContentBlock[],
+            name: "file_read",
+            description: "Read a file",
+            input_schema: { type: "object", properties: {} },
           },
-        ];
-      };
-      const ctx = makeCtx({
-        agentLoopRun,
+        ],
+        toolExecutor: async () => ({ content: "ok", isError: false }),
         contextWindowManager: {
           shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
           maybeCompact: async () => ({ compacted: false }),
         } as unknown as AgentLoopConversationContext["contextWindowManager"],
       });
+      // WHEN the orchestrator runs the turn to completion
       await runAgentLoopImpl(ctx, "hello", "msg-1", () => {});
       const stripCalls = setConversationHistoryStrippedAtMock.mock.calls.filter(
@@ -4909,59 +3927,24 @@ describe("session-agent-loop", () => {
         estimatedTokens: 5000,
       });
-      let callCount = 0;
-      const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
-        callCount++;
-        // Prime the assistant row anchor — production code emits this from
-        // `AgentLoop.run` just before `provider.sendMessage`. Retry branches
-        // need this on every invocation: each agent-loop iteration reserves
-        // its own row.
-        await onEvent({ type: "llm_call_started" });
-        if (callCount === 1) {
-          onEvent({
-            type: "error",
-            error: new Error("context_length_exceeded"),
-          });
-          onEvent({
-            type: "usage",
-            inputTokens: 100,
-            outputTokens: 0,
-            model: "test-model",
-            providerDurationMs: 50,
-          });
-          return [
-            ...messages,
-            {
-              role: "assistant" as const,
-              content: [{ type: "text", text: "partial" }] as ContentBlock[],
-            },
-          ];
-        }
-        onEvent({
-          type: "message_complete",
-          message: {
-            role: "assistant",
-            content: [{ type: "text", text: "recovered" }],
-          },
-        });
-        onEvent({
-          type: "usage",
-          inputTokens: 50,
-          outputTokens: 25,
-          model: "test-model",
-          providerDurationMs: 100,
-        });
-        return [
-          ...messages,
+      // GIVEN a real loop that appends a tool turn and then rejects with a
+      // context-too-large error on the following call, driving the
+      // convergence strip whose marker-write helper is stubbed to throw,
+      // before a final call recovers.
+      const ctx = makeCtx({
+        providerResponses: [
+          toolUseResponse("t1", "file_read", {}),
+          new Error("context_length_exceeded"),
+          textResponse("recovered"),
+        ],
+        loopTools: [
           {
-            role: "assistant" as const,
-            content: [{ type: "text", text: "recovered" }] as ContentBlock[],
+            name: "file_read",
+            description: "Read a file",
+            input_schema: { type: "object", properties: {} },
           },
-        ];
-      };
-      const ctx = makeCtx({
-        agentLoopRun,
+        ],
+        toolExecutor: async () => ({ content: "ok", isError: false }),
         contextWindowManager: {
           shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
           maybeCompact: async () => ({ compacted: false }),