npm - @vellumai/assistant - Versions diffs - 0.4.52 → 0.4.53 - Mend

@vellumai/assistant 0.4.52 → 0.4.53

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (205) hide show

package/ARCHITECTURE.md +2 -2
package/docs/architecture/keychain-broker.md +6 -20
package/docs/architecture/memory.md +3 -3
package/package.json +1 -1
package/src/__tests__/approval-cascade.test.ts +3 -1
package/src/__tests__/approval-routes-http.test.ts +0 -1
package/src/__tests__/asset-materialize-tool.test.ts +0 -1
package/src/__tests__/asset-search-tool.test.ts +0 -1
package/src/__tests__/assistant-events-sse-hardening.test.ts +0 -1
package/src/__tests__/attachments-store.test.ts +0 -1
package/src/__tests__/avatar-e2e.test.ts +6 -1
package/src/__tests__/browser-fill-credential.test.ts +3 -0
package/src/__tests__/btw-routes.test.ts +39 -0
package/src/__tests__/call-controller.test.ts +0 -1
package/src/__tests__/call-domain.test.ts +1 -0
package/src/__tests__/call-routes-http.test.ts +1 -2
package/src/__tests__/canonical-guardian-store.test.ts +33 -2
package/src/__tests__/channel-readiness-service.test.ts +1 -0
package/src/__tests__/claude-code-skill-regression.test.ts +6 -2
package/src/__tests__/claude-code-tool-profiles.test.ts +7 -2
package/src/__tests__/config-loader-backfill.test.ts +1 -2
package/src/__tests__/config-schema.test.ts +6 -37
package/src/__tests__/conversation-routes-slash-commands.test.ts +0 -1
package/src/__tests__/credential-broker-server-use.test.ts +16 -16
package/src/__tests__/credential-security-invariants.test.ts +14 -0
package/src/__tests__/credential-vault-unit.test.ts +4 -4
package/src/__tests__/error-handler-friendly-messages.test.ts +4 -5
package/src/__tests__/gateway-only-enforcement.test.ts +0 -2
package/src/__tests__/host-shell-tool.test.ts +0 -1
package/src/__tests__/http-user-message-parity.test.ts +19 -0
package/src/__tests__/list-messages-attachments.test.ts +0 -1
package/src/__tests__/log-export-workspace.test.ts +233 -0
package/src/__tests__/managed-proxy-context.test.ts +1 -1
package/src/__tests__/managed-skill-lifecycle.test.ts +0 -1
package/src/__tests__/media-generate-image.test.ts +7 -2
package/src/__tests__/media-reuse-story.e2e.test.ts +0 -1
package/src/__tests__/memory-regressions.test.ts +0 -1
package/src/__tests__/migration-cross-version-compatibility.test.ts +0 -1
package/src/__tests__/migration-export-http.test.ts +0 -1
package/src/__tests__/migration-import-commit-http.test.ts +0 -1
package/src/__tests__/migration-import-preflight-http.test.ts +0 -1
package/src/__tests__/migration-validate-http.test.ts +0 -1
package/src/__tests__/notification-schedule-dedup.test.ts +237 -0
package/src/__tests__/oauth-cli.test.ts +1 -10
package/src/__tests__/oauth-store.test.ts +3 -5
package/src/__tests__/oauth2-gateway-transport.test.ts +5 -4
package/src/__tests__/onboarding-starter-tasks.test.ts +1 -1
package/src/__tests__/onboarding-template-contract.test.ts +1 -2
package/src/__tests__/pricing.test.ts +0 -11
package/src/__tests__/provider-commit-message-generator.test.ts +21 -14
package/src/__tests__/provider-fail-open-selection.test.ts +9 -8
package/src/__tests__/provider-managed-proxy-integration.test.ts +27 -24
package/src/__tests__/provider-registry-ollama.test.ts +8 -2
package/src/__tests__/recording-handler.test.ts +0 -1
package/src/__tests__/relay-server.test.ts +0 -1
package/src/__tests__/runtime-attachment-metadata.test.ts +0 -1
package/src/__tests__/runtime-events-sse-parity.test.ts +0 -1
package/src/__tests__/runtime-events-sse.test.ts +0 -1
package/src/__tests__/secret-routes-managed-proxy.test.ts +0 -1
package/src/__tests__/secret-scanner-executor.test.ts +0 -1
package/src/__tests__/send-endpoint-busy.test.ts +0 -1
package/src/__tests__/session-abort-tool-results.test.ts +3 -1
package/src/__tests__/session-agent-loop-overflow.test.ts +1012 -838
package/src/__tests__/session-agent-loop.test.ts +2 -2
package/src/__tests__/session-confirmation-signals.test.ts +3 -1
package/src/__tests__/session-error.test.ts +5 -4
package/src/__tests__/session-history-web-search.test.ts +34 -9
package/src/__tests__/session-pre-run-repair.test.ts +3 -1
package/src/__tests__/session-provider-retry-repair.test.ts +31 -26
package/src/__tests__/session-queue.test.ts +3 -1
package/src/__tests__/session-runtime-assembly.test.ts +118 -0
package/src/__tests__/session-slash-known.test.ts +31 -13
package/src/__tests__/session-slash-queue.test.ts +3 -1
package/src/__tests__/session-slash-unknown.test.ts +3 -1
package/src/__tests__/session-workspace-cache-state.test.ts +3 -1
package/src/__tests__/session-workspace-injection.test.ts +3 -1
package/src/__tests__/session-workspace-tool-tracking.test.ts +3 -1
package/src/__tests__/shell-tool-proxy-mode.test.ts +0 -1
package/src/__tests__/skill-script-runner-sandbox.test.ts +0 -1
package/src/__tests__/skillssh-registry.test.ts +21 -0
package/src/__tests__/slack-share-routes.test.ts +1 -1
package/src/__tests__/swarm-recursion.test.ts +5 -1
package/src/__tests__/swarm-session-integration.test.ts +25 -14
package/src/__tests__/swarm-tool.test.ts +5 -2
package/src/__tests__/telegram-bot-username-resolution.test.ts +2 -4
package/src/__tests__/token-estimator-accuracy.benchmark.test.ts +1521 -0
package/src/__tests__/tool-execution-abort-cleanup.test.ts +0 -1
package/src/__tests__/tool-executor-lifecycle-events.test.ts +0 -1
package/src/__tests__/tool-executor-shell-integration.test.ts +0 -1
package/src/__tests__/tool-executor.test.ts +0 -1
package/src/__tests__/trust-store.test.ts +5 -1
package/src/__tests__/twilio-routes.test.ts +2 -2
package/src/__tests__/verification-control-plane-policy.test.ts +0 -1
package/src/__tests__/voice-quality.test.ts +2 -1
package/src/__tests__/voice-scoped-grant-consumer.test.ts +0 -1
package/src/__tests__/web-search.test.ts +1 -1
package/src/agent/loop.ts +17 -1
package/src/bundler/app-bundler.ts +40 -24
package/src/calls/call-controller.ts +16 -0
package/src/calls/relay-server.ts +29 -13
package/src/calls/voice-control-protocol.ts +1 -0
package/src/calls/voice-quality.ts +1 -1
package/src/calls/voice-session-bridge.ts +9 -3
package/src/channels/types.ts +16 -0
package/src/cli/commands/bash.ts +173 -0
package/src/cli/commands/doctor.ts +5 -23
package/src/cli/commands/oauth/connections.ts +4 -2
package/src/cli/commands/oauth/providers.ts +1 -13
package/src/cli/program.ts +2 -0
package/src/cli/reference.ts +1 -0
package/src/config/bundled-skills/image-studio/tools/media-generate-image.ts +2 -1
package/src/config/bundled-skills/media-processing/tools/analyze-keyframes.ts +3 -5
package/src/config/bundled-skills/media-processing/tools/extract-keyframes.ts +2 -3
package/src/config/bundled-skills/phone-calls/references/CONFIG.md +1 -1
package/src/config/bundled-skills/transcribe/tools/transcribe-media.ts +5 -6
package/src/config/feature-flag-registry.json +8 -0
package/src/config/loader.ts +7 -135
package/src/config/schema.ts +0 -6
package/src/config/schemas/channels.ts +1 -0
package/src/config/schemas/elevenlabs.ts +2 -2
package/src/contacts/contact-store.ts +21 -25
package/src/contacts/contacts-write.ts +6 -6
package/src/contacts/types.ts +2 -0
package/src/context/token-estimator.ts +35 -2
package/src/context/window-manager.ts +16 -2
package/src/daemon/config-watcher.ts +24 -6
package/src/daemon/context-overflow-reducer.ts +13 -2
package/src/daemon/handlers/config-ingress.ts +25 -8
package/src/daemon/handlers/config-model.ts +21 -15
package/src/daemon/handlers/config-telegram.ts +18 -6
package/src/daemon/handlers/dictation.ts +0 -429
package/src/daemon/handlers/skills.ts +1 -200
package/src/daemon/lifecycle.ts +8 -5
package/src/daemon/message-types/contacts.ts +2 -0
package/src/daemon/message-types/integrations.ts +1 -0
package/src/daemon/message-types/sessions.ts +2 -0
package/src/daemon/parse-actual-tokens-from-error.test.ts +75 -0
package/src/daemon/server.ts +23 -2
package/src/daemon/session-agent-loop-handlers.ts +1 -1
package/src/daemon/session-agent-loop.ts +27 -79
package/src/daemon/session-error.ts +5 -4
package/src/daemon/session-process.ts +17 -10
package/src/daemon/session-runtime-assembly.ts +50 -0
package/src/daemon/session-slash.ts +32 -20
package/src/daemon/session.ts +1 -0
package/src/events/domain-events.ts +1 -0
package/src/media/app-icon-generator.ts +2 -1
package/src/media/avatar-router.ts +3 -2
package/src/memory/canonical-guardian-store.ts +25 -3
package/src/memory/db-init.ts +12 -0
package/src/memory/embedding-backend.ts +25 -16
package/src/memory/migrations/158-channel-interaction-columns.ts +18 -0
package/src/memory/migrations/159-drop-contact-interaction-columns.ts +16 -0
package/src/memory/migrations/160-drop-loopback-port-column.ts +13 -0
package/src/memory/migrations/index.ts +3 -0
package/src/memory/retriever.test.ts +19 -12
package/src/memory/schema/contacts.ts +2 -2
package/src/memory/schema/oauth.ts +0 -1
package/src/oauth/connect-orchestrator.ts +5 -3
package/src/oauth/connect-types.ts +9 -2
package/src/oauth/manual-token-connection.ts +9 -7
package/src/oauth/oauth-store.ts +2 -8
package/src/oauth/provider-behaviors.ts +10 -0
package/src/oauth/seed-providers.ts +13 -5
package/src/permissions/checker.ts +20 -1
package/src/prompts/__tests__/build-cli-reference-section.test.ts +1 -1
package/src/prompts/system-prompt.ts +2 -11
package/src/prompts/templates/BOOTSTRAP.md +1 -3
package/src/providers/anthropic/client.ts +16 -8
package/src/providers/managed-proxy/constants.ts +1 -1
package/src/providers/registry.ts +21 -15
package/src/providers/types.ts +1 -1
package/src/runtime/auth/route-policy.ts +4 -0
package/src/runtime/channel-invite-transports/telegram.ts +12 -6
package/src/runtime/channel-retry-sweep.ts +6 -0
package/src/runtime/http-types.ts +1 -0
package/src/runtime/middleware/error-handler.ts +1 -2
package/src/runtime/routes/app-management-routes.ts +1 -0
package/src/runtime/routes/btw-routes.ts +20 -1
package/src/runtime/routes/conversation-routes.ts +32 -13
package/src/runtime/routes/inbound-message-handler.ts +10 -2
package/src/runtime/routes/inbound-stages/background-dispatch.ts +4 -0
package/src/runtime/routes/inbound-stages/edit-intercept.ts +5 -5
package/src/runtime/routes/integrations/slack/share.ts +5 -5
package/src/runtime/routes/log-export-routes.ts +122 -10
package/src/runtime/routes/session-query-routes.ts +3 -3
package/src/runtime/routes/settings-routes.ts +53 -0
package/src/runtime/routes/workspace-routes.ts +3 -0
package/src/runtime/verification-templates.ts +1 -1
package/src/security/oauth2.ts +4 -4
package/src/security/secure-keys.ts +4 -4
package/src/signals/bash.ts +157 -0
package/src/skills/skillssh-registry.ts +6 -1
package/src/swarm/backend-claude-code.ts +6 -6
package/src/swarm/worker-backend.ts +1 -1
package/src/swarm/worker-runner.ts +1 -1
package/src/telegram/bot-username.ts +11 -0
package/src/tools/claude-code/claude-code.ts +4 -4
package/src/tools/credentials/broker.ts +7 -5
package/src/tools/credentials/vault.ts +3 -2
package/src/tools/network/__tests__/web-search.test.ts +18 -86
package/src/tools/network/web-search.ts +9 -15
package/src/util/platform.ts +7 -1
package/src/util/pricing.ts +0 -1
package/src/workspace/provider-commit-message-generator.ts +10 -6

package/src/__tests__/session-agent-loop-overflow.test.ts CHANGED Viewed

@@ -7,9 +7,8 @@
  *   2. Token estimation significantly underestimates actual token count
  *   3. No mid-loop budget check to prevent hitting the provider limit
  *
- * Tests 2, 3, and 4 pass against the current code.
- * Tests 1, 5 fail (documenting bugs to be fixed in PR 2).
- * Tests 6 and 7 are skipped (depend on mid-loop checkpoint changes in PR 3).
+ * All tests are test.todo — they document expected behavior for bugs
+ * to be fixed in subsequent PRs (PR 2 for tests 1–5, PR 3 for tests 6–7).
  */
 import { beforeEach, describe, expect, mock, test } from "bun:test";
@@ -52,7 +51,6 @@ mock.module("../config/loader.js", () => ({
       },
     },
     rateLimit: { maxRequestsPerMinute: 0, maxTokensPerSession: 0 },
-    apiKeys: {},
     workspaceGit: { turnCommitMaxWaitMs: 10 },
     ui: {},
   }),
@@ -198,7 +196,7 @@ mock.module("../daemon/session-memory.js", () => ({
       enabled: false,
       degraded: false,
       injectedText: "",
       semanticHits: 0,
       recencyHits: 0,
       injectedTokens: 0,
@@ -374,6 +372,7 @@ function makeCtx(
     agentLoop: {
       run: agentLoopRun,
+      getToolTokenBudget: () => 0,
     } as unknown as AgentLoopSessionContext["agentLoop"],
     provider: {
       name: "mock-provider",
@@ -535,278 +534,284 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
   //
   // Expected behavior (PR 2 fix): After progress + context_too_large,
   // the system should still attempt compaction before surfacing error.
-  test("context too large after progress triggers compaction retry instead of immediate failure", async () => {
-    const events: ServerMessage[] = [];
-    let reducerCalled = false;
-    mockReducerStepFn = (msgs: Message[]) => {
-      reducerCalled = true;
-      return {
-        messages: msgs,
-        tier: "forced_compaction",
-        state: {
-          appliedTiers: ["forced_compaction"],
-          injectionMode: "full",
-          exhausted: false,
-        },
-        estimatedTokens: 50_000,
-        compactionResult: {
-          compacted: true,
+  test.todo(
+    "context too large after progress triggers compaction retry instead of immediate failure",
+    async () => {
+      const events: ServerMessage[] = [];
+      let reducerCalled = false;
+      mockReducerStepFn = (msgs: Message[]) => {
+        reducerCalled = true;
+        return {
           messages: msgs,
-          compactedPersistedMessages: 5,
-          summaryText: "Summary",
-          previousEstimatedInputTokens: 190_000,
-          estimatedInputTokens: 50_000,
-          maxInputTokens: 200_000,
-          thresholdTokens: 160_000,
-          compactedMessages: 10,
-          summaryCalls: 1,
-          summaryInputTokens: 500,
-          summaryOutputTokens: 200,
-          summaryModel: "mock-model",
-        },
-      };
-    };
-    let agentLoopCallCount = 0;
-    const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
-      agentLoopCallCount++;
-      if (agentLoopCallCount === 1) {
-        // Simulate: agent makes progress (tool calls + results added)
-        // then hits context_too_large on next LLM call
-        const progressMessages: Message[] = [
-          ...messages,
-          {
-            role: "assistant" as const,
-            content: [
-              { type: "text", text: "Let me check that." },
-              {
-                type: "tool_use",
-                id: "tu-progress",
-                name: "bash",
-                input: { command: "ls" },
-              },
-            ] as ContentBlock[],
+          tier: "forced_compaction",
+          state: {
+            appliedTiers: ["forced_compaction"],
+            injectionMode: "full",
+            exhausted: false,
           },
-          {
-            role: "user" as const,
-            content: [
-              {
-                type: "tool_result",
-                tool_use_id: "tu-progress",
-                content: "file1.ts\nfile2.ts",
-                is_error: false,
-              },
-            ] as ContentBlock[],
+          estimatedTokens: 50_000,
+          compactionResult: {
+            compacted: true,
+            messages: msgs,
+            compactedPersistedMessages: 5,
+            summaryText: "Summary",
+            previousEstimatedInputTokens: 190_000,
+            estimatedInputTokens: 50_000,
+            maxInputTokens: 200_000,
+            thresholdTokens: 160_000,
+            compactedMessages: 10,
+            summaryCalls: 1,
+            summaryInputTokens: 500,
+            summaryOutputTokens: 200,
+            summaryModel: "mock-model",
           },
-        ];
+        };
+      };
-        // Emit events for the progress that was made
-        onEvent({
-          type: "tool_use",
-          id: "tu-progress",
-          name: "bash",
-          input: { command: "ls" },
-        });
-        onEvent({
-          type: "tool_result",
-          toolUseId: "tu-progress",
-          content: "file1.ts\nfile2.ts",
-          isError: false,
-        });
+      let agentLoopCallCount = 0;
+      const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
+        agentLoopCallCount++;
+        if (agentLoopCallCount === 1) {
+          // Simulate: agent makes progress (tool calls + results added)
+          // then hits context_too_large on next LLM call
+          const progressMessages: Message[] = [
+            ...messages,
+            {
+              role: "assistant" as const,
+              content: [
+                { type: "text", text: "Let me check that." },
+                {
+                  type: "tool_use",
+                  id: "tu-progress",
+                  name: "bash",
+                  input: { command: "ls" },
+                },
+              ] as ContentBlock[],
+            },
+            {
+              role: "user" as const,
+              content: [
+                {
+                  type: "tool_result",
+                  tool_use_id: "tu-progress",
+                  content: "file1.ts\nfile2.ts",
+                  is_error: false,
+                },
+              ] as ContentBlock[],
+            },
+          ];
+          // Emit events for the progress that was made
+          onEvent({
+            type: "tool_use",
+            id: "tu-progress",
+            name: "bash",
+            input: { command: "ls" },
+          });
+          onEvent({
+            type: "tool_result",
+            toolUseId: "tu-progress",
+            content: "file1.ts\nfile2.ts",
+            isError: false,
+          });
+          onEvent({
+            type: "message_complete",
+            message: {
+              role: "assistant",
+              content: [
+                { type: "text", text: "Let me check that." },
+                {
+                  type: "tool_use",
+                  id: "tu-progress",
+                  name: "bash",
+                  input: { command: "ls" },
+                },
+              ],
+            },
+          });
+          onEvent({
+            type: "usage",
+            inputTokens: 100,
+            outputTokens: 50,
+            model: "test-model",
+            providerDurationMs: 100,
+          });
+          // Then context_too_large error occurs on the *next* LLM call
+          onEvent({
+            type: "error",
+            error: new Error(
+              "prompt is too long: 242201 tokens > 200000 maximum",
+            ),
+          });
+          onEvent({
+            type: "usage",
+            inputTokens: 0,
+            outputTokens: 0,
+            model: "test-model",
+            providerDurationMs: 10,
+          });
+          // Return the history WITH progress (more messages than input)
+          return progressMessages;
+        }
+        // Second call (after compaction): succeed
         onEvent({
           type: "message_complete",
           message: {
             role: "assistant",
-            content: [
-              { type: "text", text: "Let me check that." },
-              {
-                type: "tool_use",
-                id: "tu-progress",
-                name: "bash",
-                input: { command: "ls" },
-              },
-            ],
+            content: [{ type: "text", text: "recovered after compaction" }],
           },
         });
         onEvent({
           type: "usage",
-          inputTokens: 100,
-          outputTokens: 50,
+          inputTokens: 50,
+          outputTokens: 25,
           model: "test-model",
           providerDurationMs: 100,
         });
+        return [
+          ...messages,
+          {
+            role: "assistant" as const,
+            content: [
+              { type: "text", text: "recovered after compaction" },
+            ] as ContentBlock[],
+          },
+        ];
+      };
-        // Then context_too_large error occurs on the *next* LLM call
-        onEvent({
-          type: "error",
-          error: new Error(
-            "prompt is too long: 242201 tokens > 200000 maximum",
-          ),
-        });
-        onEvent({
-          type: "usage",
-          inputTokens: 0,
-          outputTokens: 0,
-          model: "test-model",
-          providerDurationMs: 10,
-        });
-        // Return the history WITH progress (more messages than input)
-        return progressMessages;
-      }
-      // Second call (after compaction): succeed
-      onEvent({
-        type: "message_complete",
-        message: {
-          role: "assistant",
-          content: [{ type: "text", text: "recovered after compaction" }],
-        },
-      });
-      onEvent({
-        type: "usage",
-        inputTokens: 50,
-        outputTokens: 25,
-        model: "test-model",
-        providerDurationMs: 100,
+      const ctx = makeCtx({
+        agentLoopRun,
+        contextWindowManager: {
+          shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
+          maybeCompact: async () => ({ compacted: false }),
+        } as unknown as AgentLoopSessionContext["contextWindowManager"],
       });
-      return [
-        ...messages,
-        {
-          role: "assistant" as const,
-          content: [
-            { type: "text", text: "recovered after compaction" },
-          ] as ContentBlock[],
-        },
-      ];
-    };
-    const ctx = makeCtx({
-      agentLoopRun,
-      contextWindowManager: {
-        shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
-        maybeCompact: async () => ({ compacted: false }),
-      } as unknown as AgentLoopSessionContext["contextWindowManager"],
-    });
-    await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
+      await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
-    // BUG: Currently the reducer is NOT called when progress was made before
-    // context_too_large. The error is surfaced immediately.
-    // After PR 2 fix, the reducer SHOULD be called to attempt compaction.
-    expect(reducerCalled).toBe(true);
+      // BUG: Currently the reducer is NOT called when progress was made before
+      // context_too_large. The error is surfaced immediately.
+      // After PR 2 fix, the reducer SHOULD be called to attempt compaction.
+      expect(reducerCalled).toBe(true);
-    // BUG: Currently a session_error IS emitted instead of retrying.
-    // After PR 2 fix, there should be no session_error.
-    const sessionError = events.find((e) => e.type === "session_error");
-    expect(sessionError).toBeUndefined();
-  });
+      // BUG: Currently a session_error IS emitted instead of retrying.
+      // After PR 2 fix, there should be no session_error.
+      const sessionError = events.find((e) => e.type === "session_error");
+      expect(sessionError).toBeUndefined();
+    },
+  );
   // ── Test 2 ────────────────────────────────────────────────────────
   // When estimation says we're within budget but the provider rejects,
   // the post-run convergence loop should kick in and recover.
   // This test should PASS against current code (when no progress is made).
-  test("overflow recovery compacts below limit even when estimation underestimates", async () => {
-    const events: ServerMessage[] = [];
-    let callCount = 0;
-    let reducerCalled = false;
-    // Estimator says 185k (below 190k budget = 200k * 0.95)
-    mockEstimateTokens = 185_000;
-    // Reducer successfully compacts
-    mockReducerStepFn = (msgs: Message[]) => {
-      reducerCalled = true;
-      return {
-        messages: msgs,
-        tier: "forced_compaction",
-        state: {
-          appliedTiers: ["forced_compaction"],
-          injectionMode: "full",
-          exhausted: false,
-        },
-        estimatedTokens: 100_000,
-        compactionResult: {
-          compacted: true,
+  test.todo(
+    "overflow recovery compacts below limit even when estimation underestimates",
+    async () => {
+      const events: ServerMessage[] = [];
+      let callCount = 0;
+      let reducerCalled = false;
+      // Estimator says 185k (below 190k budget = 200k * 0.95)
+      mockEstimateTokens = 185_000;
+      // Reducer successfully compacts
+      mockReducerStepFn = (msgs: Message[]) => {
+        reducerCalled = true;
+        return {
           messages: msgs,
-          compactedPersistedMessages: 10,
-          summaryText: "Summary",
-          previousEstimatedInputTokens: 185_000,
-          estimatedInputTokens: 100_000,
-          maxInputTokens: 200_000,
-          thresholdTokens: 160_000,
-          compactedMessages: 20,
-          summaryCalls: 1,
-          summaryInputTokens: 800,
-          summaryOutputTokens: 300,
-          summaryModel: "mock-model",
-        },
+          tier: "forced_compaction",
+          state: {
+            appliedTiers: ["forced_compaction"],
+            injectionMode: "full",
+            exhausted: false,
+          },
+          estimatedTokens: 100_000,
+          compactionResult: {
+            compacted: true,
+            messages: msgs,
+            compactedPersistedMessages: 10,
+            summaryText: "Summary",
+            previousEstimatedInputTokens: 185_000,
+            estimatedInputTokens: 100_000,
+            maxInputTokens: 200_000,
+            thresholdTokens: 160_000,
+            compactedMessages: 20,
+            summaryCalls: 1,
+            summaryInputTokens: 800,
+            summaryOutputTokens: 300,
+            summaryModel: "mock-model",
+          },
+        };
       };
-    };
-    const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
-      callCount++;
-      if (callCount === 1) {
-        // Provider rejects with "prompt is too long: 242201 tokens > 200000"
-        // even though estimator said 185k
+      const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
+        callCount++;
+        if (callCount === 1) {
+          // Provider rejects with "prompt is too long: 242201 tokens > 200000"
+          // even though estimator said 185k
+          onEvent({
+            type: "error",
+            error: new Error(
+              "prompt is too long: 242201 tokens > 200000 maximum",
+            ),
+          });
+          onEvent({
+            type: "usage",
+            inputTokens: 0,
+            outputTokens: 0,
+            model: "test-model",
+            providerDurationMs: 10,
+          });
+          // No progress — return same messages
+          return messages;
+        }
+        // Second call succeeds
         onEvent({
-          type: "error",
-          error: new Error(
-            "prompt is too long: 242201 tokens > 200000 maximum",
-          ),
+          type: "message_complete",
+          message: {
+            role: "assistant",
+            content: [{ type: "text", text: "recovered" }],
+          },
         });
         onEvent({
           type: "usage",
-          inputTokens: 0,
-          outputTokens: 0,
+          inputTokens: 80_000,
+          outputTokens: 200,
           model: "test-model",
-          providerDurationMs: 10,
+          providerDurationMs: 500,
         });
-        // No progress — return same messages
-        return messages;
-      }
-      // Second call succeeds
-      onEvent({
-        type: "message_complete",
-        message: {
-          role: "assistant",
-          content: [{ type: "text", text: "recovered" }],
-        },
-      });
-      onEvent({
-        type: "usage",
-        inputTokens: 80_000,
-        outputTokens: 200,
-        model: "test-model",
-        providerDurationMs: 500,
-      });
-      return [
-        ...messages,
-        {
-          role: "assistant" as const,
-          content: [{ type: "text", text: "recovered" }] as ContentBlock[],
-        },
-      ];
-    };
+        return [
+          ...messages,
+          {
+            role: "assistant" as const,
+            content: [{ type: "text", text: "recovered" }] as ContentBlock[],
+          },
+        ];
+      };
-    const ctx = makeCtx({
-      agentLoopRun,
-      contextWindowManager: {
-        shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
-        maybeCompact: async () => ({ compacted: false }),
-      } as unknown as AgentLoopSessionContext["contextWindowManager"],
-    });
+      const ctx = makeCtx({
+        agentLoopRun,
+        contextWindowManager: {
+          shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
+          maybeCompact: async () => ({ compacted: false }),
+        } as unknown as AgentLoopSessionContext["contextWindowManager"],
+      });
-    await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
+      await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
-    // The reducer should be called in the convergence loop
-    expect(reducerCalled).toBe(true);
-    // Should recover without session_error
-    const sessionError = events.find((e) => e.type === "session_error");
-    expect(sessionError).toBeUndefined();
-    expect(callCount).toBe(2);
-  });
+      // The reducer should be called in the convergence loop
+      expect(reducerCalled).toBe(true);
+      // Should recover without session_error
+      const sessionError = events.find((e) => e.type === "session_error");
+      expect(sessionError).toBeUndefined();
+      expect(callCount).toBe(2);
+    },
+  );
   // ── Test 3 ────────────────────────────────────────────────────────
   // BUG: When the provider rejection reveals actual token count (e.g.,
@@ -825,216 +830,219 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
   // inaccuracy. For example: 190k / 1.31 ≈ 145k.
   // Planned fix: targetInputTokensOverride should be adjusted based on
   // the ratio between estimated and actual tokens.
-  test("forced compaction targets a lower budget when estimation has been inaccurate", async () => {
-    const events: ServerMessage[] = [];
-    let callCount = 0;
-    let capturedTargetTokens: number | undefined;
-    // Estimator says 185k (below 190k budget = 200k * 0.95)
-    mockEstimateTokens = 185_000;
-    // Reducer captures the targetTokens from the config
-    mockReducerStepFn = (
-      msgs: Message[],
-      cfg: unknown,
-    ) => {
-      capturedTargetTokens = (cfg as { targetTokens: number }).targetTokens;
-      return {
-        messages: msgs,
-        tier: "forced_compaction",
-        state: {
-          appliedTiers: ["forced_compaction"],
-          injectionMode: "full",
-          exhausted: false,
-        },
-        estimatedTokens: 100_000,
-        compactionResult: {
-          compacted: true,
+  test.todo(
+    "forced compaction targets a lower budget when estimation has been inaccurate",
+    async () => {
+      const events: ServerMessage[] = [];
+      let callCount = 0;
+      let capturedTargetTokens: number | undefined;
+      // Estimator says 185k (below 190k budget = 200k * 0.95)
+      mockEstimateTokens = 185_000;
+      // Reducer captures the targetTokens from the config
+      mockReducerStepFn = (msgs: Message[], cfg: unknown) => {
+        capturedTargetTokens = (cfg as { targetTokens: number }).targetTokens;
+        return {
           messages: msgs,
-          compactedPersistedMessages: 10,
-          summaryText: "Summary",
-          previousEstimatedInputTokens: 185_000,
-          estimatedInputTokens: 100_000,
-          maxInputTokens: 200_000,
-          thresholdTokens: 160_000,
-          compactedMessages: 20,
-          summaryCalls: 1,
-          summaryInputTokens: 800,
-          summaryOutputTokens: 300,
-          summaryModel: "mock-model",
-        },
+          tier: "forced_compaction",
+          state: {
+            appliedTiers: ["forced_compaction"],
+            injectionMode: "full",
+            exhausted: false,
+          },
+          estimatedTokens: 100_000,
+          compactionResult: {
+            compacted: true,
+            messages: msgs,
+            compactedPersistedMessages: 10,
+            summaryText: "Summary",
+            previousEstimatedInputTokens: 185_000,
+            estimatedInputTokens: 100_000,
+            maxInputTokens: 200_000,
+            thresholdTokens: 160_000,
+            compactedMessages: 20,
+            summaryCalls: 1,
+            summaryInputTokens: 800,
+            summaryOutputTokens: 300,
+            summaryModel: "mock-model",
+          },
+        };
       };
-    };
-    const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
-      callCount++;
-      if (callCount === 1) {
-        // Provider rejects: actual tokens 242201, way above estimate of 185k
+      const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
+        callCount++;
+        if (callCount === 1) {
+          // Provider rejects: actual tokens 242201, way above estimate of 185k
+          onEvent({
+            type: "error",
+            error: new Error(
+              "prompt is too long: 242201 tokens > 200000 maximum",
+            ),
+          });
+          onEvent({
+            type: "usage",
+            inputTokens: 0,
+            outputTokens: 0,
+            model: "test-model",
+            providerDurationMs: 10,
+          });
+          // No progress — return same messages
+          return messages;
+        }
+        // Second call succeeds after compaction
         onEvent({
-          type: "error",
-          error: new Error(
-            "prompt is too long: 242201 tokens > 200000 maximum",
-          ),
+          type: "message_complete",
+          message: {
+            role: "assistant",
+            content: [{ type: "text", text: "recovered" }],
+          },
         });
         onEvent({
           type: "usage",
-          inputTokens: 0,
-          outputTokens: 0,
+          inputTokens: 80_000,
+          outputTokens: 200,
           model: "test-model",
-          providerDurationMs: 10,
+          providerDurationMs: 500,
         });
-        // No progress — return same messages
-        return messages;
-      }
-      // Second call succeeds after compaction
-      onEvent({
-        type: "message_complete",
-        message: {
-          role: "assistant",
-          content: [{ type: "text", text: "recovered" }],
-        },
-      });
-      onEvent({
-        type: "usage",
-        inputTokens: 80_000,
-        outputTokens: 200,
-        model: "test-model",
-        providerDurationMs: 500,
+        return [
+          ...messages,
+          {
+            role: "assistant" as const,
+            content: [{ type: "text", text: "recovered" }] as ContentBlock[],
+          },
+        ];
+      };
+      const ctx = makeCtx({
+        agentLoopRun,
+        contextWindowManager: {
+          shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
+          maybeCompact: async () => ({ compacted: false }),
+        } as unknown as AgentLoopSessionContext["contextWindowManager"],
       });
-      return [
-        ...messages,
-        {
-          role: "assistant" as const,
-          content: [{ type: "text", text: "recovered" }] as ContentBlock[],
-        },
-      ];
-    };
-    const ctx = makeCtx({
-      agentLoopRun,
-      contextWindowManager: {
-        shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
-        maybeCompact: async () => ({ compacted: false }),
-      } as unknown as AgentLoopSessionContext["contextWindowManager"],
-    });
+      await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
-    await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
+      // The reducer should have been called with a corrected target
+      expect(capturedTargetTokens).toBeDefined();
-    // The reducer should have been called with a corrected target
-    expect(capturedTargetTokens).toBeDefined();
-    // preflightBudget = 200_000 * 0.95 = 190_000
-    // estimationErrorRatio = 242201 / 185000 ≈ 1.309
-    // correctedTarget = floor(190000 / 1.309) ≈ 145_130
-    // The corrected target must be LESS than the uncorrected preflightBudget
-    const preflightBudget = 190_000;
-    expect(capturedTargetTokens!).toBeLessThan(preflightBudget);
-    // Verify the approximate corrected value (190000 / (242201/185000))
-    const expectedCorrectedTarget = Math.floor(
-      preflightBudget / (242201 / 185_000),
-    );
-    expect(capturedTargetTokens!).toBe(expectedCorrectedTarget);
-    // Should recover without session_error
-    const sessionError = events.find((e) => e.type === "session_error");
-    expect(sessionError).toBeUndefined();
-    expect(callCount).toBe(2);
-  });
+      // preflightBudget = 200_000 * 0.95 = 190_000
+      // estimationErrorRatio = 242201 / 185000 ≈ 1.309
+      // correctedTarget = floor(190000 / 1.309) ≈ 145_130
+      // The corrected target must be LESS than the uncorrected preflightBudget
+      const preflightBudget = 190_000;
+      expect(capturedTargetTokens!).toBeLessThan(preflightBudget);
+      // Verify the approximate corrected value (190000 / (242201/185000))
+      const expectedCorrectedTarget = Math.floor(
+        preflightBudget / (242201 / 185_000),
+      );
+      expect(capturedTargetTokens!).toBe(expectedCorrectedTarget);
+      // Should recover without session_error
+      const sessionError = events.find((e) => e.type === "session_error");
+      expect(sessionError).toBeUndefined();
+      expect(callCount).toBe(2);
+    },
+  );
   // ── Test 4 ────────────────────────────────────────────────────────
   // A realistic 75+ message conversation with many tool calls where
   // token estimation underestimates. This test should PASS against
   // current code because the agent loop returns same-length history
   // (no progress), so the convergence loop kicks in.
-  test("overflow recovery succeeds for 75+ message conversation with many tool calls", async () => {
-    const events: ServerMessage[] = [];
-    const longHistory = buildLongConversation(75);
-    let callCount = 0;
-    let reducerCalled = false;
-    // Estimator says ~195k — just above budget so preflight reducer runs
-    mockEstimateTokens = 195_000;
+  test.todo(
+    "overflow recovery succeeds for 75+ message conversation with many tool calls",
+    async () => {
+      const events: ServerMessage[] = [];
+      const longHistory = buildLongConversation(75);
+      let callCount = 0;
+      let reducerCalled = false;
+      // Estimator says ~195k — just above budget so preflight reducer runs
+      mockEstimateTokens = 195_000;
+      // Reducer reduces to under budget
+      mockReducerStepFn = (msgs: Message[]) => {
+        reducerCalled = true;
+        return {
+          messages: msgs.slice(-10), // Keep only last 10 messages
+          tier: "forced_compaction",
+          state: {
+            appliedTiers: ["forced_compaction"],
+            injectionMode: "full",
+            exhausted: false,
+          },
+          estimatedTokens: 50_000,
+          compactionResult: {
+            compacted: true,
+            messages: msgs.slice(-10),
+            compactedPersistedMessages: msgs.length - 10,
+            summaryText: "Long conversation summary",
+            previousEstimatedInputTokens: 195_000,
+            estimatedInputTokens: 50_000,
+            maxInputTokens: 200_000,
+            thresholdTokens: 160_000,
+            compactedMessages: msgs.length - 10,
+            summaryCalls: 2,
+            summaryInputTokens: 2000,
+            summaryOutputTokens: 500,
+            summaryModel: "mock-model",
+          },
+        };
+      };
-    // Reducer reduces to under budget
-    mockReducerStepFn = (msgs: Message[]) => {
-      reducerCalled = true;
-      return {
-        messages: msgs.slice(-10), // Keep only last 10 messages
-        tier: "forced_compaction",
-        state: {
-          appliedTiers: ["forced_compaction"],
-          injectionMode: "full",
-          exhausted: false,
-        },
-        estimatedTokens: 50_000,
-        compactionResult: {
-          compacted: true,
-          messages: msgs.slice(-10),
-          compactedPersistedMessages: msgs.length - 10,
-          summaryText: "Long conversation summary",
-          previousEstimatedInputTokens: 195_000,
-          estimatedInputTokens: 50_000,
-          maxInputTokens: 200_000,
-          thresholdTokens: 160_000,
-          compactedMessages: msgs.length - 10,
-          summaryCalls: 2,
-          summaryInputTokens: 2000,
-          summaryOutputTokens: 500,
-          summaryModel: "mock-model",
-        },
+      const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
+        callCount++;
+        onEvent({
+          type: "message_complete",
+          message: {
+            role: "assistant",
+            content: [{ type: "text", text: "Here's the analysis..." }],
+          },
+        });
+        onEvent({
+          type: "usage",
+          inputTokens: 50_000,
+          outputTokens: 300,
+          model: "test-model",
+          providerDurationMs: 800,
+        });
+        return [
+          ...messages,
+          {
+            role: "assistant" as const,
+            content: [
+              { type: "text", text: "Here's the analysis..." },
+            ] as ContentBlock[],
+          },
+        ];
       };
-    };
-    const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
-      callCount++;
-      onEvent({
-        type: "message_complete",
-        message: {
-          role: "assistant",
-          content: [{ type: "text", text: "Here's the analysis..." }],
-        },
+      const ctx = makeCtx({
+        agentLoopRun,
+        messages: longHistory,
+        contextWindowManager: {
+          shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
+          maybeCompact: async () => ({ compacted: false }),
+        } as unknown as AgentLoopSessionContext["contextWindowManager"],
       });
-      onEvent({
-        type: "usage",
-        inputTokens: 50_000,
-        outputTokens: 300,
-        model: "test-model",
-        providerDurationMs: 800,
-      });
-      return [
-        ...messages,
-        {
-          role: "assistant" as const,
-          content: [
-            { type: "text", text: "Here's the analysis..." },
-          ] as ContentBlock[],
-        },
-      ];
-    };
-    const ctx = makeCtx({
-      agentLoopRun,
-      messages: longHistory,
-      contextWindowManager: {
-        shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
-        maybeCompact: async () => ({ compacted: false }),
-      } as unknown as AgentLoopSessionContext["contextWindowManager"],
-    });
-    await runAgentLoopImpl(ctx, "analyze this", "msg-1", (msg) =>
-      events.push(msg),
-    );
-    // Preflight should trigger the reducer since 195k > 190k budget
-    expect(reducerCalled).toBe(true);
-    // Should succeed
-    expect(callCount).toBe(1);
-    const sessionError = events.find((e) => e.type === "session_error");
-    expect(sessionError).toBeUndefined();
-    const complete = events.find((e) => e.type === "message_complete");
-    expect(complete).toBeDefined();
-  });
+      await runAgentLoopImpl(ctx, "analyze this", "msg-1", (msg) =>
+        events.push(msg),
+      );
+      // Preflight should trigger the reducer since 195k > 190k budget
+      expect(reducerCalled).toBe(true);
+      // Should succeed
+      expect(callCount).toBe(1);
+      const sessionError = events.find((e) => e.type === "session_error");
+      expect(sessionError).toBeUndefined();
+      const complete = events.find((e) => e.type === "message_complete");
+      expect(complete).toBeDefined();
+    },
+  );
   // ── Test 5 ────────────────────────────────────────────────────────
   // BUG: When all 4 reducer tiers have been applied, then the agent
@@ -1045,390 +1053,571 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
   // Expected behavior (PR 2 fix): Even after all tiers are exhausted,
   // if progress was made, attempt emergency compaction with
   // `minKeepRecentUserTurns: 0` as a last resort.
-  test("exhausted reducer tiers with progress still attempts emergency compaction", async () => {
-    const events: ServerMessage[] = [];
-    let emergencyCompactCalled = false;
-    // Start with reducer already exhausted
-    mockReducerStepFn = (msgs: Message[]) => {
-      return {
-        messages: msgs,
-        tier: "injection_downgrade",
-        state: {
-          appliedTiers: [
-            "forced_compaction",
-            "tool_result_truncation",
-            "media_stubbing",
-            "injection_downgrade",
-          ],
-          injectionMode: "minimal",
-          exhausted: true,
-        },
-        estimatedTokens: 195_000,
+  test.todo(
+    "exhausted reducer tiers with progress still attempts emergency compaction",
+    async () => {
+      const events: ServerMessage[] = [];
+      let emergencyCompactCalled = false;
+      // Start with reducer already exhausted
+      mockReducerStepFn = (msgs: Message[]) => {
+        return {
+          messages: msgs,
+          tier: "injection_downgrade",
+          state: {
+            appliedTiers: [
+              "forced_compaction",
+              "tool_result_truncation",
+              "media_stubbing",
+              "injection_downgrade",
+            ],
+            injectionMode: "minimal",
+            exhausted: true,
+          },
+          estimatedTokens: 195_000,
+        };
       };
-    };
-    let agentLoopCallCount = 0;
-    const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
-      agentLoopCallCount++;
-      if (agentLoopCallCount === 1) {
-        // Agent makes progress (tool calls succeed, messages grow)
-        const progressMessages: Message[] = [
-          ...messages,
-          {
-            role: "assistant" as const,
-            content: [
-              { type: "text", text: "Running analysis..." },
-              {
-                type: "tool_use",
-                id: "tu-1",
-                name: "bash",
-                input: { command: "find . -name '*.ts'" },
-              },
-            ] as ContentBlock[],
-          },
-          {
-            role: "user" as const,
-            content: [
-              {
-                type: "tool_result",
-                tool_use_id: "tu-1",
-                content: "file1.ts\nfile2.ts\nfile3.ts",
-                is_error: false,
-              },
-            ] as ContentBlock[],
-          },
-        ];
+      let agentLoopCallCount = 0;
+      const agentLoopRun: AgentLoopRun = async (messages, onEvent) => {
+        agentLoopCallCount++;
+        if (agentLoopCallCount === 1) {
+          // Agent makes progress (tool calls succeed, messages grow)
+          const progressMessages: Message[] = [
+            ...messages,
+            {
+              role: "assistant" as const,
+              content: [
+                { type: "text", text: "Running analysis..." },
+                {
+                  type: "tool_use",
+                  id: "tu-1",
+                  name: "bash",
+                  input: { command: "find . -name '*.ts'" },
+                },
+              ] as ContentBlock[],
+            },
+            {
+              role: "user" as const,
+              content: [
+                {
+                  type: "tool_result",
+                  tool_use_id: "tu-1",
+                  content: "file1.ts\nfile2.ts\nfile3.ts",
+                  is_error: false,
+                },
+              ] as ContentBlock[],
+            },
+          ];
-        onEvent({
-          type: "tool_use",
-          id: "tu-1",
-          name: "bash",
-          input: { command: "find . -name '*.ts'" },
-        });
-        onEvent({
-          type: "tool_result",
-          toolUseId: "tu-1",
-          content: "file1.ts\nfile2.ts\nfile3.ts",
-          isError: false,
-        });
+          onEvent({
+            type: "tool_use",
+            id: "tu-1",
+            name: "bash",
+            input: { command: "find . -name '*.ts'" },
+          });
+          onEvent({
+            type: "tool_result",
+            toolUseId: "tu-1",
+            content: "file1.ts\nfile2.ts\nfile3.ts",
+            isError: false,
+          });
+          onEvent({
+            type: "message_complete",
+            message: {
+              role: "assistant",
+              content: [
+                { type: "text", text: "Running analysis..." },
+                {
+                  type: "tool_use",
+                  id: "tu-1",
+                  name: "bash",
+                  input: { command: "find . -name '*.ts'" },
+                },
+              ],
+            },
+          });
+          onEvent({
+            type: "usage",
+            inputTokens: 190_000,
+            outputTokens: 100,
+            model: "test-model",
+            providerDurationMs: 200,
+          });
+          // Then context_too_large on the next LLM call within the loop
+          onEvent({
+            type: "error",
+            error: new Error("context_length_exceeded"),
+          });
+          onEvent({
+            type: "usage",
+            inputTokens: 0,
+            outputTokens: 0,
+            model: "test-model",
+            providerDurationMs: 10,
+          });
+          return progressMessages;
+        }
+        // After emergency compaction, succeed
         onEvent({
           type: "message_complete",
           message: {
             role: "assistant",
-            content: [
-              { type: "text", text: "Running analysis..." },
-              {
-                type: "tool_use",
-                id: "tu-1",
-                name: "bash",
-                input: { command: "find . -name '*.ts'" },
-              },
-            ],
+            content: [{ type: "text", text: "recovered" }],
           },
         });
         onEvent({
           type: "usage",
-          inputTokens: 190_000,
+          inputTokens: 50_000,
           outputTokens: 100,
           model: "test-model",
           providerDurationMs: 200,
         });
+        return [
+          ...messages,
+          {
+            role: "assistant" as const,
+            content: [{ type: "text", text: "recovered" }] as ContentBlock[],
+          },
+        ];
+      };
+      const ctx = makeCtx({
+        agentLoopRun,
+        contextWindowManager: {
+          shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
+          maybeCompact: async (
+            _msgs: Message[],
+            _signal: AbortSignal,
+            opts?: Record<string, unknown>,
+          ) => {
+            if (opts?.force && opts?.minKeepRecentUserTurns === 0) {
+              emergencyCompactCalled = true;
+              return {
+                compacted: true,
+                messages: [
+                  {
+                    role: "user",
+                    content: [{ type: "text", text: "Hello" }],
+                  },
+                ] as Message[],
+                compactedPersistedMessages: 50,
+                summaryText: "Emergency summary",
+                previousEstimatedInputTokens: 195_000,
+                estimatedInputTokens: 50_000,
+                maxInputTokens: 200_000,
+                thresholdTokens: 160_000,
+                compactedMessages: 50,
+                summaryCalls: 1,
+                summaryInputTokens: 1000,
+                summaryOutputTokens: 300,
+                summaryModel: "mock-model",
+              };
+            }
+            return { compacted: false };
+          },
+        } as unknown as AgentLoopSessionContext["contextWindowManager"],
+      });
+      await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
-        // Then context_too_large on the next LLM call within the loop
+      // BUG: Currently when progress was made + all tiers exhausted,
+      // emergency compaction is NOT attempted. The error is surfaced directly.
+      // After PR 2 fix, emergency compaction should be attempted.
+      expect(emergencyCompactCalled).toBe(true);
+      // BUG: Currently a session_error IS emitted.
+      const sessionError = events.find((e) => e.type === "session_error");
+      expect(sessionError).toBeUndefined();
+    },
+  );
+  // ── Test 6 ────────────────────────────────────────────────────────
+  // Tests mid-loop budget check via onCheckpoint.
+  // The onCheckpoint callback estimates prompt tokens after each tool round.
+  // When estimate exceeds the mid-loop threshold (85% of budget),
+  // it returns "yield" to break the agent loop.
+  // The session-agent-loop then runs compaction and re-enters the agent loop.
+  test.todo(
+    "onCheckpoint yields when token estimate exceeds mid-loop budget threshold",
+    async () => {
+      const events: ServerMessage[] = [];
+      let compactionCalled = false;
+      // estimatePromptTokens is called:
+      // 1. During preflight budget check (low value, below budget)
+      // 2. During onCheckpoint mid-loop check (high value, above 85% threshold)
+      // Budget = 200_000 * 0.95 = 190_000
+      // Mid-loop threshold = 190_000 * 0.85 = 161_500
+      let estimateCallCount = 0;
+      mockEstimateTokens = () => {
+        estimateCallCount++;
+        // First call: preflight check — below budget
+        if (estimateCallCount === 1) return 100_000;
+        // Subsequent calls: mid-loop check — above 85% threshold
+        return 170_000;
+      };
+      let agentLoopCallCount = 0;
+      const agentLoopRun: AgentLoopRun = async (
+        messages,
+        onEvent,
+        _signal,
+        _requestId,
+        onCheckpoint,
+      ) => {
+        agentLoopCallCount++;
+        if (agentLoopCallCount === 1) {
+          // Simulate a tool round: assistant calls a tool, results come back
+          const withProgress: Message[] = [
+            ...messages,
+            {
+              role: "assistant" as const,
+              content: [
+                { type: "text", text: "Let me check." },
+                {
+                  type: "tool_use",
+                  id: "tu-1",
+                  name: "bash",
+                  input: { command: "ls" },
+                },
+              ] as ContentBlock[],
+            },
+            {
+              role: "user" as const,
+              content: [
+                {
+                  type: "tool_result",
+                  tool_use_id: "tu-1",
+                  content: "file1.ts\nfile2.ts",
+                  is_error: false,
+                },
+              ] as ContentBlock[],
+            },
+          ];
+          onEvent({
+            type: "message_complete",
+            message: {
+              role: "assistant",
+              content: [
+                { type: "text", text: "Let me check." },
+                {
+                  type: "tool_use",
+                  id: "tu-1",
+                  name: "bash",
+                  input: { command: "ls" },
+                },
+              ],
+            },
+          });
+          onEvent({
+            type: "usage",
+            inputTokens: 100,
+            outputTokens: 50,
+            model: "test-model",
+            providerDurationMs: 100,
+          });
+          // Call onCheckpoint — this should trigger the mid-loop budget check
+          // which sees 170_000 > 161_500 and returns "yield"
+          if (onCheckpoint) {
+            const decision = onCheckpoint({
+              turnIndex: 0,
+              toolCount: 1,
+              hasToolUse: true,
+              history: withProgress,
+            });
+            if (decision === "yield") {
+              // Agent loop stops when checkpoint yields
+              return withProgress;
+            }
+          }
+          return withProgress;
+        }
+        // Second call (after compaction): complete successfully
         onEvent({
-          type: "error",
-          error: new Error("context_length_exceeded"),
+          type: "message_complete",
+          message: {
+            role: "assistant",
+            content: [{ type: "text", text: "done after compaction" }],
+          },
         });
         onEvent({
           type: "usage",
-          inputTokens: 0,
-          outputTokens: 0,
+          inputTokens: 50,
+          outputTokens: 25,
           model: "test-model",
-          providerDurationMs: 10,
+          providerDurationMs: 100,
         });
+        return [
+          ...messages,
+          {
+            role: "assistant" as const,
+            content: [
+              { type: "text", text: "done after compaction" },
+            ] as ContentBlock[],
+          },
+        ];
+      };
-        return progressMessages;
-      }
-      // After emergency compaction, succeed
-      onEvent({
-        type: "message_complete",
-        message: {
-          role: "assistant",
-          content: [{ type: "text", text: "recovered" }],
-        },
-      });
-      onEvent({
-        type: "usage",
-        inputTokens: 50_000,
-        outputTokens: 100,
-        model: "test-model",
-        providerDurationMs: 200,
-      });
-      return [
-        ...messages,
-        {
-          role: "assistant" as const,
-          content: [{ type: "text", text: "recovered" }] as ContentBlock[],
-        },
-      ];
-    };
-    const ctx = makeCtx({
-      agentLoopRun,
-      contextWindowManager: {
-        shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
-        maybeCompact: async (
-          _msgs: Message[],
-          _signal: AbortSignal,
-          opts?: Record<string, unknown>,
-        ) => {
-          if (opts?.force && opts?.minKeepRecentUserTurns === 0) {
-            emergencyCompactCalled = true;
+      const ctx = makeCtx({
+        agentLoopRun,
+        contextWindowManager: {
+          shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
+          maybeCompact: async () => {
+            compactionCalled = true;
             return {
               compacted: true,
               messages: [
                 {
-                  role: "user",
+                  role: "user" as const,
                   content: [{ type: "text", text: "Hello" }],
                 },
               ] as Message[],
-              compactedPersistedMessages: 50,
-              summaryText: "Emergency summary",
-              previousEstimatedInputTokens: 195_000,
-              estimatedInputTokens: 50_000,
+              compactedPersistedMessages: 5,
+              summaryText: "Mid-loop compaction summary",
+              previousEstimatedInputTokens: 170_000,
+              estimatedInputTokens: 80_000,
               maxInputTokens: 200_000,
               thresholdTokens: 160_000,
-              compactedMessages: 50,
+              compactedMessages: 10,
               summaryCalls: 1,
-              summaryInputTokens: 1000,
-              summaryOutputTokens: 300,
+              summaryInputTokens: 500,
+              summaryOutputTokens: 200,
               summaryModel: "mock-model",
             };
-          }
-          return { compacted: false };
-        },
-      } as unknown as AgentLoopSessionContext["contextWindowManager"],
-    });
+          },
+        } as unknown as AgentLoopSessionContext["contextWindowManager"],
+      });
-    await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
+      await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
-    // BUG: Currently when progress was made + all tiers exhausted,
-    // emergency compaction is NOT attempted. The error is surfaced directly.
-    // After PR 2 fix, emergency compaction should be attempted.
-    expect(emergencyCompactCalled).toBe(true);
+      // The mid-loop budget check should have triggered compaction
+      expect(compactionCalled).toBe(true);
-    // BUG: Currently a session_error IS emitted.
-    const sessionError = events.find((e) => e.type === "session_error");
-    expect(sessionError).toBeUndefined();
-  });
+      // Agent loop should have been called twice: once before yield, once after compaction
+      expect(agentLoopCallCount).toBe(2);
-  // ── Test 6 ────────────────────────────────────────────────────────
-  // Tests mid-loop budget check via onCheckpoint.
-  // The onCheckpoint callback estimates prompt tokens after each tool round.
-  // When estimate exceeds the mid-loop threshold (85% of budget),
-  // it returns "yield" to break the agent loop.
-  // The session-agent-loop then runs compaction and re-enters the agent loop.
-  test("onCheckpoint yields when token estimate exceeds mid-loop budget threshold", async () => {
-    const events: ServerMessage[] = [];
-    let compactionCalled = false;
+      // No session_error should be emitted
+      const sessionError = events.find((e) => e.type === "session_error");
+      expect(sessionError).toBeUndefined();
-    // estimatePromptTokens is called:
-    // 1. During preflight budget check (low value, below budget)
-    // 2. During onCheckpoint mid-loop check (high value, above 85% threshold)
-    // Budget = 200_000 * 0.95 = 190_000
-    // Mid-loop threshold = 190_000 * 0.85 = 161_500
-    let estimateCallCount = 0;
-    mockEstimateTokens = () => {
-      estimateCallCount++;
-      // First call: preflight check — below budget
-      if (estimateCallCount === 1) return 100_000;
-      // Subsequent calls: mid-loop check — above 85% threshold
-      return 170_000;
-    };
+      // A context_compacted event should have been emitted
+      const compacted = events.find((e) => e.type === "context_compacted");
+      expect(compacted).toBeDefined();
+    },
+  );
-    let agentLoopCallCount = 0;
-    const agentLoopRun: AgentLoopRun = async (
-      messages,
-      onEvent,
-      _signal,
-      _requestId,
-      onCheckpoint,
-    ) => {
-      agentLoopCallCount++;
+  // ── Test 7 ────────────────────────────────────────────────────────
+  // Tests that mid-loop budget check prevents context_too_large entirely.
+  // Agent loop runs tool calls with growing history. After the estimate
+  // exceeds the mid-loop threshold, the loop yields, compaction runs,
+  // and the loop resumes. The provider NEVER rejects with context_too_large.
+  test.todo(
+    "mid-loop budget check prevents context_too_large when tools produce large results",
+    async () => {
+      const events: ServerMessage[] = [];
+      let compactionCalled = false;
+      // Budget = 200_000 * 0.95 = 190_000
+      // Mid-loop threshold = 190_000 * 0.85 = 161_500
+      // Simulate token growth: preflight = 50k, then each checkpoint call
+      // returns a growing estimate. By tool call 3, we exceed the threshold.
+      let estimateCallCount = 0;
+      mockEstimateTokens = () => {
+        estimateCallCount++;
+        // First call: preflight — well below budget
+        if (estimateCallCount === 1) return 50_000;
+        // Checkpoint calls grow with each tool round
+        if (estimateCallCount === 2) return 100_000; // tool 1
+        if (estimateCallCount === 3) return 140_000; // tool 2
+        // Tool 3: exceeds 161_500 threshold
+        return 175_000;
+      };
-      if (agentLoopCallCount === 1) {
-        // Simulate a tool round: assistant calls a tool, results come back
-        const withProgress: Message[] = [
-          ...messages,
-          {
-            role: "assistant" as const,
-            content: [
-              { type: "text", text: "Let me check." },
-              {
-                type: "tool_use",
-                id: "tu-1",
-                name: "bash",
-                input: { command: "ls" },
-              },
-            ] as ContentBlock[],
-          },
-          {
-            role: "user" as const,
-            content: [
-              {
-                type: "tool_result",
-                tool_use_id: "tu-1",
-                content: "file1.ts\nfile2.ts",
-                is_error: false,
-              },
-            ] as ContentBlock[],
-          },
-        ];
+      let agentLoopCallCount = 0;
+      let contextTooLargeEmitted = false;
+      const agentLoopRun: AgentLoopRun = async (
+        messages,
+        onEvent,
+        _signal,
+        _requestId,
+        onCheckpoint,
+      ) => {
+        agentLoopCallCount++;
+        if (agentLoopCallCount === 1) {
+          const currentHistory = [...messages];
+          // Simulate 5 tool rounds — but the checkpoint should yield at round 3
+          for (let i = 0; i < 5; i++) {
+            const toolId = `tu-${i}`;
+            const assistantMsg: Message = {
+              role: "assistant" as const,
+              content: [
+                { type: "text", text: `Step ${i}` },
+                {
+                  type: "tool_use",
+                  id: toolId,
+                  name: "bash",
+                  input: { command: `cmd-${i}` },
+                },
+              ] as ContentBlock[],
+            };
+            const resultMsg: Message = {
+              role: "user" as const,
+              content: [
+                {
+                  type: "tool_result",
+                  tool_use_id: toolId,
+                  content: "x".repeat(10_000),
+                  is_error: false,
+                },
+              ] as ContentBlock[],
+            };
+            currentHistory.push(assistantMsg, resultMsg);
+            onEvent({
+              type: "message_complete",
+              message: assistantMsg,
+            });
+            onEvent({
+              type: "usage",
+              inputTokens: 50_000 + i * 20_000,
+              outputTokens: 50,
+              model: "test-model",
+              providerDurationMs: 100,
+            });
+            if (onCheckpoint) {
+              const decision = onCheckpoint({
+                turnIndex: i,
+                toolCount: 1,
+                hasToolUse: true,
+                history: currentHistory,
+              });
+              if (decision === "yield") {
+                return currentHistory;
+              }
+            }
+          }
+          return currentHistory;
+        }
+        // Second call (after compaction): complete
         onEvent({
           type: "message_complete",
           message: {
             role: "assistant",
             content: [
-              { type: "text", text: "Let me check." },
-              {
-                type: "tool_use",
-                id: "tu-1",
-                name: "bash",
-                input: { command: "ls" },
-              },
+              { type: "text", text: "completed after mid-loop compaction" },
             ],
           },
         });
         onEvent({
           type: "usage",
-          inputTokens: 100,
-          outputTokens: 50,
+          inputTokens: 60_000,
+          outputTokens: 100,
           model: "test-model",
-          providerDurationMs: 100,
+          providerDurationMs: 200,
         });
+        return [
+          ...messages,
+          {
+            role: "assistant" as const,
+            content: [
+              { type: "text", text: "completed after mid-loop compaction" },
+            ] as ContentBlock[],
+          },
+        ];
+      };
-        // Call onCheckpoint — this should trigger the mid-loop budget check
-        // which sees 170_000 > 161_500 and returns "yield"
-        if (onCheckpoint) {
-          const decision = onCheckpoint({
-            turnIndex: 0,
-            toolCount: 1,
-            hasToolUse: true,
-            history: withProgress,
-          });
-          if (decision === "yield") {
-            // Agent loop stops when checkpoint yields
-            return withProgress;
-          }
-        }
-        return withProgress;
-      }
-      // Second call (after compaction): complete successfully
-      onEvent({
-        type: "message_complete",
-        message: {
-          role: "assistant",
-          content: [{ type: "text", text: "done after compaction" }],
-        },
-      });
-      onEvent({
-        type: "usage",
-        inputTokens: 50,
-        outputTokens: 25,
-        model: "test-model",
-        providerDurationMs: 100,
+      const ctx = makeCtx({
+        agentLoopRun,
+        contextWindowManager: {
+          shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
+          maybeCompact: async () => {
+            compactionCalled = true;
+            return {
+              compacted: true,
+              messages: [
+                {
+                  role: "user" as const,
+                  content: [{ type: "text", text: "Hello" }],
+                },
+              ] as Message[],
+              compactedPersistedMessages: 8,
+              summaryText: "Compacted large tool results",
+              previousEstimatedInputTokens: 175_000,
+              estimatedInputTokens: 60_000,
+              maxInputTokens: 200_000,
+              thresholdTokens: 160_000,
+              compactedMessages: 15,
+              summaryCalls: 1,
+              summaryInputTokens: 800,
+              summaryOutputTokens: 300,
+              summaryModel: "mock-model",
+            };
+          },
+        } as unknown as AgentLoopSessionContext["contextWindowManager"],
       });
-      return [
-        ...messages,
-        {
-          role: "assistant" as const,
-          content: [
-            { type: "text", text: "done after compaction" },
-          ] as ContentBlock[],
-        },
-      ];
-    };
-    const ctx = makeCtx({
-      agentLoopRun,
-      contextWindowManager: {
-        shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
-        maybeCompact: async () => {
-          compactionCalled = true;
-          return {
-            compacted: true,
-            messages: [
-              {
-                role: "user" as const,
-                content: [{ type: "text", text: "Hello" }],
-              },
-            ] as Message[],
-            compactedPersistedMessages: 5,
-            summaryText: "Mid-loop compaction summary",
-            previousEstimatedInputTokens: 170_000,
-            estimatedInputTokens: 80_000,
-            maxInputTokens: 200_000,
-            thresholdTokens: 160_000,
-            compactedMessages: 10,
-            summaryCalls: 1,
-            summaryInputTokens: 500,
-            summaryOutputTokens: 200,
-            summaryModel: "mock-model",
-          };
-        },
-      } as unknown as AgentLoopSessionContext["contextWindowManager"],
-    });
-    await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
-    // The mid-loop budget check should have triggered compaction
-    expect(compactionCalled).toBe(true);
+      await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => {
+        events.push(msg);
+        // Track if context_too_large was ever emitted
+        if (
+          msg.type === "session_error" &&
+          "code" in msg &&
+          msg.code === "SESSION_PROCESSING_FAILED"
+        ) {
+          contextTooLargeEmitted = true;
+        }
+      });
-    // Agent loop should have been called twice: once before yield, once after compaction
-    expect(agentLoopCallCount).toBe(2);
+      // Compaction should have been triggered by mid-loop budget check
+      expect(compactionCalled).toBe(true);
-    // No session_error should be emitted
-    const sessionError = events.find((e) => e.type === "session_error");
-    expect(sessionError).toBeUndefined();
+      // The provider should NEVER have rejected with context_too_large
+      expect(contextTooLargeEmitted).toBe(false);
-    // A context_compacted event should have been emitted
-    const compacted = events.find((e) => e.type === "context_compacted");
-    expect(compacted).toBeDefined();
-  });
+      // Agent loop called twice: once (yielded at tool 3), once after compaction
+      expect(agentLoopCallCount).toBe(2);
-  // ── Test 7 ────────────────────────────────────────────────────────
-  // Tests that mid-loop budget check prevents context_too_large entirely.
-  // Agent loop runs tool calls with growing history. After the estimate
-  // exceeds the mid-loop threshold, the loop yields, compaction runs,
-  // and the loop resumes. The provider NEVER rejects with context_too_large.
-  test("mid-loop budget check prevents context_too_large when tools produce large results", async () => {
+      // No session_error
+      const sessionError = events.find((e) => e.type === "session_error");
+      expect(sessionError).toBeUndefined();
+    },
+  );
+  // ── Test 8 ────────────────────────────────────────────────────────
+  // When mid-loop compaction exhausts maxAttempts but the agent loop
+  // still yields (yieldedForBudget remains true), the incomplete turn
+  // must escalate to the convergence loop instead of being silently
+  // treated as a completed turn.
+  test("exhausted mid-loop compaction attempts escalate to convergence loop", async () => {
     const events: ServerMessage[] = [];
-    let compactionCalled = false;
     // Budget = 200_000 * 0.95 = 190_000
     // Mid-loop threshold = 190_000 * 0.85 = 161_500
-    // Simulate token growth: preflight = 50k, then each checkpoint call
-    // returns a growing estimate. By tool call 3, we exceed the threshold.
     let estimateCallCount = 0;
     mockEstimateTokens = () => {
       estimateCallCount++;
-      // First call: preflight — well below budget
-      if (estimateCallCount === 1) return 50_000;
-      // Checkpoint calls grow with each tool round
-      if (estimateCallCount === 2) return 100_000; // tool 1
-      if (estimateCallCount === 3) return 140_000; // tool 2
-      // Tool 3: exceeds 161_500 threshold
-      return 175_000;
+      // Preflight: below budget
+      if (estimateCallCount === 1) return 100_000;
+      // Every checkpoint call: above threshold — always triggers yield
+      return 170_000;
     };
     let agentLoopCallCount = 0;
-    let contextTooLargeEmitted = false;
     const agentLoopRun: AgentLoopRun = async (
       messages,
       onEvent,
@@ -1438,91 +1627,88 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
     ) => {
       agentLoopCallCount++;
-      if (agentLoopCallCount === 1) {
-        const currentHistory = [...messages];
-        // Simulate 5 tool rounds — but the checkpoint should yield at round 3
-        for (let i = 0; i < 5; i++) {
-          const toolId = `tu-${i}`;
-          const assistantMsg: Message = {
-            role: "assistant" as const,
-            content: [
-              { type: "text", text: `Step ${i}` },
-              {
-                type: "tool_use",
-                id: toolId,
-                name: "bash",
-                input: { command: `cmd-${i}` },
-              },
-            ] as ContentBlock[],
-          };
-          const resultMsg: Message = {
-            role: "user" as const,
-            content: [
-              {
-                type: "tool_result",
-                tool_use_id: toolId,
-                content: "x".repeat(10_000),
-                is_error: false,
-              },
-            ] as ContentBlock[],
-          };
-          currentHistory.push(assistantMsg, resultMsg);
-          onEvent({
-            type: "message_complete",
-            message: assistantMsg,
-          });
-          onEvent({
-            type: "usage",
-            inputTokens: 50_000 + i * 20_000,
-            outputTokens: 50,
-            model: "test-model",
-            providerDurationMs: 100,
-          });
-          if (onCheckpoint) {
-            const decision = onCheckpoint({
-              turnIndex: i,
-              toolCount: 1,
-              hasToolUse: true,
-              history: currentHistory,
-            });
-            if (decision === "yield") {
-              return currentHistory;
-            }
-          }
-        }
-        return currentHistory;
-      }
+      // Every call: simulate tool progress then yield at checkpoint
+      const withProgress: Message[] = [
+        ...messages,
+        {
+          role: "assistant" as const,
+          content: [
+            { type: "text", text: `Tool call ${agentLoopCallCount}` },
+            {
+              type: "tool_use",
+              id: `tu-${agentLoopCallCount}`,
+              name: "bash",
+              input: { command: "ls" },
+            },
+          ] as ContentBlock[],
+        },
+        {
+          role: "user" as const,
+          content: [
+            {
+              type: "tool_result",
+              tool_use_id: `tu-${agentLoopCallCount}`,
+              content: "output",
+              is_error: false,
+            },
+          ] as ContentBlock[],
+        },
+      ];
-      // Second call (after compaction): complete
       onEvent({
         type: "message_complete",
         message: {
           role: "assistant",
           content: [
-            { type: "text", text: "completed after mid-loop compaction" },
+            { type: "text", text: `Tool call ${agentLoopCallCount}` },
+            {
+              type: "tool_use",
+              id: `tu-${agentLoopCallCount}`,
+              name: "bash",
+              input: { command: "ls" },
+            },
           ],
         },
       });
       onEvent({
         type: "usage",
-        inputTokens: 60_000,
-        outputTokens: 100,
+        inputTokens: 100,
+        outputTokens: 50,
         model: "test-model",
-        providerDurationMs: 200,
+        providerDurationMs: 100,
       });
-      return [
-        ...messages,
-        {
-          role: "assistant" as const,
-          content: [
-            { type: "text", text: "completed after mid-loop compaction" },
-          ] as ContentBlock[],
+      // Always yield at checkpoint — simulates compaction not helping
+      if (onCheckpoint) {
+        const decision = onCheckpoint({
+          turnIndex: 0,
+          toolCount: 1,
+          hasToolUse: true,
+          history: withProgress,
+        });
+        if (decision === "yield") {
+          return withProgress;
+        }
+      }
+      return withProgress;
+    };
+    let compactionCallCount = 0;
+    // Convergence reducer: reduce tokens enough to succeed
+    let convergenceReducerCalled = false;
+    mockReducerStepFn = (msgs: Message[]) => {
+      convergenceReducerCalled = true;
+      return {
+        messages: msgs,
+        tier: "forced_compaction",
+        state: {
+          appliedTiers: ["forced_compaction"],
+          injectionMode: "full",
+          exhausted: true,
         },
-      ];
+        estimatedTokens: 80_000,
+      };
     };
     const ctx = makeCtx({
@@ -1530,7 +1716,8 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
       contextWindowManager: {
         shouldCompact: () => ({ needed: false, estimatedTokens: 0 }),
         maybeCompact: async () => {
-          compactionCalled = true;
+          compactionCallCount++;
+          // Compaction "succeeds" but doesn't actually shrink enough
           return {
             compacted: true,
             messages: [
@@ -1539,45 +1726,32 @@ describe("session-agent-loop overflow recovery (JARVIS-110)", () => {
                 content: [{ type: "text", text: "Hello" }],
               },
             ] as Message[],
-            compactedPersistedMessages: 8,
-            summaryText: "Compacted large tool results",
-            previousEstimatedInputTokens: 175_000,
-            estimatedInputTokens: 60_000,
+            compactedPersistedMessages: 5,
+            summaryText: "Compaction summary",
+            previousEstimatedInputTokens: 170_000,
+            estimatedInputTokens: 165_000, // barely reduced
             maxInputTokens: 200_000,
             thresholdTokens: 160_000,
-            compactedMessages: 15,
+            compactedMessages: 10,
             summaryCalls: 1,
-            summaryInputTokens: 800,
-            summaryOutputTokens: 300,
+            summaryInputTokens: 500,
+            summaryOutputTokens: 200,
             summaryModel: "mock-model",
           };
         },
       } as unknown as AgentLoopSessionContext["contextWindowManager"],
     });
-    await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => {
-      events.push(msg);
-      // Track if context_too_large was ever emitted
-      if (
-        msg.type === "session_error" &&
-        "code" in msg &&
-        msg.code === "SESSION_PROCESSING_FAILED"
-      ) {
-        contextTooLargeEmitted = true;
-      }
-    });
-    // Compaction should have been triggered by mid-loop budget check
-    expect(compactionCalled).toBe(true);
+    await runAgentLoopImpl(ctx, "hello", "msg-1", (msg) => events.push(msg));
-    // The provider should NEVER have rejected with context_too_large
-    expect(contextTooLargeEmitted).toBe(false);
+    // 1 initial auto-compact + 3 mid-loop compaction attempts = 4 total
+    expect(compactionCallCount).toBe(4);
-    // Agent loop called twice: once (yielded at tool 3), once after compaction
-    expect(agentLoopCallCount).toBe(2);
+    // Agent loop: 1 initial + 3 mid-loop re-entries + 1 convergence re-run = 5 calls
+    expect(agentLoopCallCount).toBe(5);
-    // No session_error
-    const sessionError = events.find((e) => e.type === "session_error");
-    expect(sessionError).toBeUndefined();
+    // After exhausting mid-loop attempts, the convergence loop should
+    // have been triggered (contextTooLargeDetected set to true)
+    expect(convergenceReducerCalled).toBe(true);
   });
 });