npm - @chances-ai/engine - Versions diffs - 26.0.0 → 27.0.0 - Mend

@chances-ai/engine 26.0.0 → 27.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

package/dist/ai/adapters/ai-sdk-stream.d.ts.map +1 -1
package/dist/ai/adapters/ai-sdk-stream.js +6 -1
package/dist/ai/adapters/ai-sdk-stream.js.map +1 -1
package/dist/ai/index.d.ts +1 -0
package/dist/ai/index.d.ts.map +1 -1
package/dist/ai/index.js +1 -0
package/dist/ai/index.js.map +1 -1
package/dist/ai/overflow.d.ts +40 -0
package/dist/ai/overflow.d.ts.map +1 -0
package/dist/ai/overflow.js +84 -0
package/dist/ai/overflow.js.map +1 -0
package/dist/ai/types.d.ts +8 -1
package/dist/ai/types.d.ts.map +1 -1
package/dist/core/engine.d.ts +205 -10
package/dist/core/engine.d.ts.map +1 -1
package/dist/core/engine.js +539 -272
package/dist/core/engine.js.map +1 -1
package/dist/core/index.d.ts +1 -1
package/dist/core/index.d.ts.map +1 -1
package/dist/core/index.js.map +1 -1
package/dist/core/task-tool.d.ts.map +1 -1
package/dist/core/task-tool.js +6 -0
package/dist/core/task-tool.js.map +1 -1
package/dist/session/index.d.ts +11 -0
package/dist/session/index.d.ts.map +1 -1
package/dist/session/index.js +22 -1
package/dist/session/index.js.map +1 -1
package/dist/tools/bash-readonly.d.ts +26 -0
package/dist/tools/bash-readonly.d.ts.map +1 -0
package/dist/tools/bash-readonly.js +130 -0
package/dist/tools/bash-readonly.js.map +1 -0
package/dist/tools/builtins/bash.d.ts.map +1 -1
package/dist/tools/builtins/bash.js +12 -0
package/dist/tools/builtins/bash.js.map +1 -1
package/dist/tools/builtins/edit.d.ts.map +1 -1
package/dist/tools/builtins/edit.js +18 -12
package/dist/tools/builtins/edit.js.map +1 -1
package/dist/tools/builtins/todo.d.ts +33 -0
package/dist/tools/builtins/todo.d.ts.map +1 -0
package/dist/tools/builtins/todo.js +245 -0
package/dist/tools/builtins/todo.js.map +1 -0
package/dist/tools/builtins/write.d.ts.map +1 -1
package/dist/tools/builtins/write.js +10 -5
package/dist/tools/builtins/write.js.map +1 -1
package/dist/tools/concurrency.d.ts +37 -0
package/dist/tools/concurrency.d.ts.map +1 -0
package/dist/tools/concurrency.js +50 -0
package/dist/tools/concurrency.js.map +1 -0
package/dist/tools/file-lock.d.ts +22 -0
package/dist/tools/file-lock.d.ts.map +1 -0
package/dist/tools/file-lock.js +85 -0
package/dist/tools/file-lock.js.map +1 -0
package/dist/tools/index.d.ts +4 -0
package/dist/tools/index.d.ts.map +1 -1
package/dist/tools/index.js +4 -0
package/dist/tools/index.js.map +1 -1
package/dist/tools/types.d.ts +31 -0
package/dist/tools/types.d.ts.map +1 -1
package/dist/tools/types.js.map +1 -1
package/package.json +3 -3

package/dist/core/engine.js CHANGED Viewed

@@ -1,24 +1,57 @@
-import { AppError, ErrorCode, ModelSelection, createId, runWithCwd, } from "@chances-ai/runtime";
+import { AppError, ErrorCode, ModelSelection, createId, runConcurrent, runWithCwd, } from "@chances-ai/runtime";
 import { refreshActiveMarker } from "./worktree/index.js";
-import { classifyProviderError, defaultRetryConfig, estimateCost, } from "../ai/index.js";
-import { ASK_USER_QUESTION_TOOL_NAME, READONLY_CATEGORIES } from "../tools/index.js";
-/**
- * (3.5 — codex Round-1 SHOULD-FIX #4) Anthropic-only overflow
- * detection. Three patterns from pi's overflow catalogue
- * (`pi/packages/ai/src/utils/overflow.ts:11`). The other 10
- * providers we ship stay deferred until real-world telemetry shows
- * a hit; their error shapes are less stable and a wrong regex would
- * surface as silent overflow loops.
- */
-function isAnthropicOverflowError(adapterId, message) {
-    if (adapterId !== "anthropic")
-        return false;
-    return (/prompt is too long/i.test(message) ||
-        /request_too_large/i.test(message) ||
-        /maximum.*context.*length/i.test(message));
+import { classifyProviderError, defaultRetryConfig, estimateCost, isContextOverflow, } from "../ai/index.js";
+import { ASK_USER_QUESTION_TOOL_NAME, READONLY_CATEGORIES, TODO_TOOL_NAME, isCallConcurrencySafe, partitionToolCalls, } from "../tools/index.js";
+/** (7.7 §6) Soft turn ceiling — raised from the old 12 (too low for a
+ *  daily-driver agent). At this many tool iterations without a final answer the
+ *  interactive engine PAUSES gracefully ("continue?") rather than throwing. The
+ *  real runaway guard is the token/compaction machinery + the absolute hard cap
+ *  below, not a low turn count. */
+export const DEFAULT_MAX_TURNS = 50;
+/** (7.7 §6) Absolute runaway backstop. Even a caller that sets a huge `maxTurns`
+ *  can't loop past this; hitting it always errors (`hard-cap`), never a silent
+ *  pause. Env-tunable. */
+function hardMaxTurns() {
+    const raw = Number.parseInt(process.env.CHANCES_HARD_MAX_TURNS ?? "", 10);
+    return Number.isFinite(raw) && raw > 0 ? raw : 500;
+}
+/** (7.7 §3.3) Max tool calls that run concurrently within one parallel batch.
+ *  Env-tunable (claude-code's `CLAUDE_CODE_MAX_TOOL_USE_CONCURRENCY` analog);
+ *  defaults to 10. A non-numeric / non-positive value falls back to 10. */
+function maxToolConcurrency() {
+    const raw = Number.parseInt(process.env.CHANCES_MAX_TOOL_CONCURRENCY ?? "", 10);
+    return Number.isFinite(raw) && raw > 0 ? raw : 10;
+}
+/** (7.7 §3.5) Aggregate-size budget for one batch's tool results, in chars.
+ *  Env-tunable; defaults to 200_000 (claude-code MAX_TOOL_RESULTS_PER_MESSAGE_CHARS). */
+function batchResultBudget() {
+    const raw = Number.parseInt(process.env.CHANCES_MAX_BATCH_RESULT_CHARS ?? "", 10);
+    return Number.isFinite(raw) && raw > 0 ? raw : 200_000;
+}
+/** (7.7 §3.5) When the combined size of a batch's results exceeds the budget,
+ *  truncate the LARGEST results (in place) until under budget, leaving a note.
+ *  Bounds a fan-out of parallel reads from blowing the next request's context in
+ *  one user message. Mutates the passed map's value `.output` fields. */
+function applyBatchResultBudget(resultByCall) {
+    const budget = batchResultBudget();
+    let total = 0;
+    for (const r of resultByCall.values())
+        total += r.output.length;
+    if (total <= budget)
+        return;
+    const bySize = [...resultByCall.values()].sort((a, b) => b.output.length - a.output.length);
+    for (const r of bySize) {
+        if (total <= budget)
+            break;
+        const over = total - budget;
+        const keep = Math.max(2048, r.output.length - over);
+        if (keep >= r.output.length)
+            continue;
+        const omitted = r.output.length - keep;
+        r.output = `${r.output.slice(0, keep)}\n[…${omitted} chars truncated — batch result budget (${budget}) exceeded]`;
+        total -= omitted;
+    }
 }
-/** Engine default when no caller-supplied or config-supplied value applies. */
-export const DEFAULT_MAX_TURNS = 12;
 /** Default base prompt the engine uses when no `systemBaseOverride` is set.
  * Exported so tests can assert "is this the default or an agent override?" and
  * so the doc + plugin authors can read the exact text. */
@@ -56,6 +89,69 @@ export class AgentEngine {
     getSelection() {
         return this.selection;
     }
+    /**
+     * (7.7 §4) Queue a user steering message to be injected at the next turn
+     * boundary of the in-flight turn (or the top of the next turn if idle). A
+     * no-op when no `steering` queue was provided. The CLI / serve driver call
+     * this on a submit-while-busy instead of rejecting the input.
+     */
+    enqueueSteering(text) {
+        this.opts.steering?.enqueue(text);
+    }
+    /** (7.7 §4) Peek the steering queue for entries not yet injected this turn,
+     *  render each as a user message, and append its id to `injectedSteerIds` for
+     *  post-persist ack. Peek-not-drain: a cancelled turn re-delivers. */
+    drainSteering(injectedSteerIds) {
+        const seen = new Set(injectedSteerIds);
+        const out = [];
+        for (const e of this.opts.steering?.peek() ?? []) {
+            if (seen.has(e.id))
+                continue;
+            out.push({ role: "user", content: [{ type: "text", text: e.text }] });
+            injectedSteerIds.push(e.id);
+        }
+        return out;
+    }
+    /** (7.7 §4) Iteration-boundary drain: background-task notifications that
+     *  arrived mid-turn (combined render, same as turn-top) PLUS steering. Both
+     *  peek-not-drain; ids recorded for post-persist ack. */
+    drainBoundaryInjections(injectedNotifIds, injectedSteerIds) {
+        const out = [];
+        const fresh = (this.opts.backgroundTasks?.peekPendingNotifications() ?? []).filter((n) => !injectedNotifIds.has(n.taskId));
+        if (fresh.length > 0) {
+            out.push({
+                role: "user",
+                content: [{ type: "text", text: fresh.map(renderTaskNotificationXml).join("\n") }],
+            });
+            for (const n of fresh)
+                injectedNotifIds.add(n.taskId);
+        }
+        out.push(...this.drainSteering(injectedSteerIds));
+        return out;
+    }
+    /** (7.7 §5.3) Build an incomplete-todos reminder when the model stopped with
+     *  open (pending/in_progress) todos and the per-turn cap isn't exhausted.
+     *  Returns a `user`-role system-reminder message (no `developer` role exists),
+     *  or null to let the turn resolve. */
+    maybeTodoReminder(count) {
+        const max = this.opts.todoReminderMax ?? 3;
+        if (max <= 0 || count >= max)
+            return null;
+        const open = [];
+        for (const p of this.opts.session.getTodoPhases()) {
+            for (const t of p.tasks) {
+                if (t.status === "pending" || t.status === "in_progress")
+                    open.push(t.content);
+            }
+        }
+        if (open.length === 0)
+            return null;
+        const list = open.map((c) => `"${c}"`).join(", ");
+        const text = `<system-reminder>You stopped with ${open.length} incomplete todo item(s): ${list}. ` +
+            `Continue working through them, or mark each done/abandoned with the todo tool. ` +
+            `(Reminder ${count + 1}/${max})</system-reminder>`;
+        return { role: "user", content: [{ type: "text", text }] };
+    }
     /** Bus-emit wrapper. Three responsibilities (3.4):
      *  1. Suppress lifecycle frames (`turn:*`, `error`) when the engine is a
      *     child (`suppressLifecycleEvents=true`). Codex Round-1 MUST-FIX #2.
@@ -115,7 +211,7 @@ export class AgentEngine {
         return this.runTurnImpl(prompt, token, opts.expandMentions !== false, opts.trustedContext);
     }
     async runTurnImpl(prompt, token, expandMentions, trustedContext) {
-        const { router, tools, gate, session, plugins, backgroundTasks } = this.opts;
+        const { tools, session, plugins, backgroundTasks } = this.opts;
         const turnId = createId("turn");
         // (3.6) Carry the active session id on `turn:start` so the OTel
         // exporter can stamp `chances.gen_ai.session.id` correctly across
@@ -174,248 +270,41 @@ export class AgentEngine {
             turnMessages.push({ role: "user", content: [{ type: "text", text: trustedContext }] });
         }
         turnMessages.push({ role: "user", content: [{ type: "text", text: prompt }] });
+        // (7.7 §4) Drain any steering queued before this turn started, plus track
+        // which notifications/steering ids have been injected so the iteration-
+        // boundary drain (and the post-persist ack) don't double-count.
+        const injectedNotifIds = new Set(notificationIds);
+        const injectedSteerIds = [];
+        turnMessages.push(...this.drainSteering(injectedSteerIds));
         const result = { text: "", inputTokens: 0, outputTokens: 0, costUsd: 0 };
-        const maxTurns = this.opts.maxTurns ?? this.opts.maxIterations ?? DEFAULT_MAX_TURNS;
-        let resolved = false;
-        // (3.5 — codex Round-1 MUST-FIX #1) `result.inputTokens` aggregates
-        // every `usage` event across the multi-step tool loop. The compactor's
-        // threshold check needs the LAST stream's input only — that's what
-        // the provider will count for the NEXT request, plus the new user
-        // prompt. Tracked separately here; emitted via `usage:turn`.
-        let lastRequestInputTokens = 0;
-        // (3.5 — codex Round-1 SHOULD-FIX #4) Per-turn flag. Anthropic
-        // overflow recovery fires AT MOST ONCE per turn — a second 413 after
-        // we already compacted is an actual ceiling we can't paper over.
-        let recoveredFromOverflow = false;
-        // (3.5) Tracked at the outer scope so the post-turn compaction check
-        // can read `route.model`. The for-loop reuses the variable across
-        // iterations; we just need the most recent value to query the model
-        // descriptor for `contextWindow`.
-        let lastRoute;
-        for (let i = 0; i < maxTurns; i++) {
-            token.throwIfCancelled();
-            // Re-read selection per turn so a `/model` switch between turns lands on
-            // the next request without rebuilding the engine.
-            const choice = this.selection.get();
-            const route = router.pick({
-                preferredModel: choice.model,
-                preferredProvider: choice.provider,
-                needsTools: toolDefs.length > 0,
-            });
-            lastRoute = route;
-            const retry = this.opts.retry ?? defaultRetryConfig;
-            let textBuffer = "";
-            let calls = [];
-            let attempt = 0;
-            while (true) {
-                token.throwIfCancelled();
-                textBuffer = "";
-                calls = [];
-                // (3.5) Reset per attempt — only the LAST successful stream's
-                // last `usage.inputTokens` carries forward into the post-turn
-                // compactor check.
-                let attemptLastInputTokens = 0;
-                // (6.5b review) Stage usage in attempt-local accumulators instead of
-                // folding it straight into the turn-level `result`. A retryable
-                // mid-stream error (e.g. ECONNRESET after a partial stream) discards
-                // the attempt and restreams; folding here would double-count tokens
-                // and double-emit `usage`. We only merge into `result` + emit once
-                // the stream completes (`streamError === null`, below).
-                let attemptInputTokens = 0;
-                let attemptOutputTokens = 0;
-                let attemptCostUsd = 0;
-                let streamError = null;
-                const stream = route.adapter.stream({ model: route.model.id, system, messages: [...session.messages(), ...turnMessages], tools: toolDefs }, token.signal);
-                for await (const event of stream) {
-                    // Enforce cancellation per-event so a provider that ignores or
-                    // queues past the AbortSignal can't keep dripping text/tool-calls
-                    // into a turn the user already abandoned. Particularly important
-                    // for subagents: the parent's abort must stop the child instantly,
-                    // not wait until the child stream naturally ends.
-                    token.throwIfCancelled();
-                    switch (event.type) {
-                        case "text-delta":
-                            textBuffer += event.text;
-                            this.emit({ type: "assistant:delta", turnId, text: event.text });
-                            break;
-                        case "tool-call":
-                            // Defer the `tool:call` bus emit until the execution loop
-                            // below — pairs each emit atomically with its matching
-                            // `tool:result`. Emitting here would leave orphan call frames
-                            // on the bus whenever the turn aborts between stream-end and
-                            // tool execution (Ctrl-C) or a retry attempt discards the
-                            // collected calls and tries again on attempt N+1.
-                            calls.push(event.call);
-                            break;
-                        case "usage": {
-                            const costUsd = estimateCost(route.model, event.usage);
-                            // (6.5b review) Accumulate into attempt-local totals; the merge
-                            // into `result` + the `usage` emit happen once the stream
-                            // succeeds, so a discarded retry attempt can't double-count.
-                            attemptInputTokens += event.usage.inputTokens;
-                            attemptOutputTokens += event.usage.outputTokens;
-                            attemptCostUsd += costUsd;
-                            // (3.5) Track most recent stream's last input count for the
-                            // post-turn compaction threshold check. NOT the aggregate.
-                            attemptLastInputTokens = event.usage.inputTokens;
-                            break;
-                        }
-                        case "error":
-                            // Defer the bus emit until after cancellation check — if the
-                            // user just hit Ctrl-C, the SDK's abort path surfaces as a
-                            // stream error and we shouldn't shout "PROVIDER" at them.
-                            streamError = event.message;
-                            break;
-                        case "done":
-                            break;
-                    }
-                    if (streamError !== null)
-                        break;
-                }
-                if (streamError === null) {
-                    // Stream completed successfully — NOW fold this attempt's usage
-                    // into the turn-level `result` and emit the (aggregated) `usage`
-                    // frame. Deferred to here so a discarded retry attempt's partial
-                    // usage never double-counts (6.5b review).
-                    result.inputTokens += attemptInputTokens;
-                    result.outputTokens += attemptOutputTokens;
-                    result.costUsd += attemptCostUsd;
-                    if (attemptInputTokens > 0 || attemptOutputTokens > 0 || attemptCostUsd > 0) {
-                        this.emit({
-                            type: "usage",
-                            model: route.model.id,
-                            inputTokens: attemptInputTokens,
-                            outputTokens: attemptOutputTokens,
-                            costUsd: attemptCostUsd,
-                        });
-                    }
-                    // Persist this attempt's last input-token count for the post-turn
-                    // compaction check.
-                    lastRequestInputTokens = attemptLastInputTokens;
-                    break;
-                }
-                // If the abort signal fired during the stream, the error we just
-                // captured is the SDK reacting to the cancellation — treat it as
-                // Cancelled rather than misclassifying as a provider error.
-                token.throwIfCancelled();
-                const decision = classifyProviderError(streamError);
-                const terminal = !decision.retryable || attempt >= retry.delaysMs.length;
-                if (terminal) {
-                    // (3.5 — codex Round-1 SHOULD-FIX #4) Anthropic-only reactive
-                    // overflow recovery. Catches the 413 BEFORE the terminal throw,
-                    // runs compaction with `reason: "overflow"` (bypasses circuit
-                    // breaker), and retries the stream once with the compacted
-                    // history. Wider 10-provider catalogue stays deferred until
-                    // telemetry shows a real-world miss.
-                    if (this.opts.compactor &&
-                        !recoveredFromOverflow &&
-                        isAnthropicOverflowError(route.adapter.id, streamError)) {
-                        recoveredFromOverflow = true;
-                        try {
-                            const recovery = await this.opts.compactor.compact("overflow", token.signal);
-                            if (recovery.ok) {
-                                // Reset the attempt counter so we get the full retry
-                                // budget against the now-smaller request, AND clear the
-                                // accumulated message buffer that the failed attempt
-                                // wrote into `turnMessages`. The retry rebuilds from
-                                // `session.messages()` (which now reflects compaction)
-                                // plus this turn's prepended user/notification messages.
-                                // (6.5b follow-up) Mirror the normal retry path's partial-undo:
-                                // if this attempt streamed partial text before the overflow, drop
-                                // it so the post-compaction restream doesn't append onto a stale
-                                // partial. A 413 usually precedes any delta, so this is typically
-                                // a no-op — emitted only when text was actually shown.
-                                if (textBuffer.length > 0) {
-                                    this.emit({ type: "assistant:reset", turnId });
-                                }
-                                attempt = 0;
-                                continue;
-                            }
-                        }
-                        catch (e) {
-                            // Compactor.compact never throws by contract, but be defensive:
-                            // a malformed Compactor implementation shouldn't break the
-                            // original error path.
-                            this.emit({
-                                type: "log",
-                                level: "warn",
-                                message: `overflow compaction unexpectedly threw: ${e.message ?? e}`,
-                            });
-                        }
-                    }
-                    // Emit the bus `error` ONLY when we're about to throw. Emitting on
-                    // every retry attempt would cause `runPrompt`'s `lastError`
-                    // listener to record a transient failure as the turn's exit code
-                    // even after a later attempt succeeded (codex re-review finding).
-                    // Subagent engines suppress this emit — see `suppressTerminalErrors`.
-                    if (!this.opts.suppressTerminalErrors) {
-                        this.emit({ type: "error", code: "PROVIDER", message: streamError });
-                    }
-                    throw new AppError(ErrorCode.Provider, `Provider error (${decision.reason}): ${streamError}`);
-                }
-                const delayMs = retry.delaysMs[attempt] ?? 0;
-                this.emit({
-                    type: "log",
-                    level: "warn",
-                    message: `provider stream errored (${decision.reason}); retry ${attempt + 1}/${retry.delaysMs.length} after ${delayMs}ms; original: ${streamError}`,
-                });
-                // (6.5b review) If this attempt already streamed partial assistant text
-                // to the bus, the upcoming restream would APPEND a fresh copy on top
-                // (consumers don't replace) — duplicating it on screen. Tell consumers
-                // to drop the in-flight partial first. `usage`/`tool-call` are deferred
-                // (not yet on the bus), so only `textBuffer` needs undoing.
-                if (textBuffer.length > 0) {
-                    this.emit({ type: "assistant:reset", turnId });
-                }
-                attempt += 1;
-                await sleepCancellable(delayMs, token);
-            }
-            if (calls.length === 0) {
-                const content = [{ type: "text", text: textBuffer }];
-                turnMessages.push({ role: "assistant", content });
-                result.text = textBuffer;
-                this.emit({ type: "assistant:message", turnId, text: textBuffer });
-                await safeRunHook(plugins, "afterResponse", { text: textBuffer }, this.opts.bus);
-                resolved = true;
-                break;
-            }
-            // Record the assistant message that requested the tools.
-            const assistantContent = [];
-            if (textBuffer)
-                assistantContent.push({ type: "text", text: textBuffer });
-            for (const call of calls) {
-                assistantContent.push({ type: "tool-call", callId: call.callId, name: call.name, args: call.args });
-            }
-            turnMessages.push({ role: "assistant", content: assistantContent });
-            // Execute each tool through the permission gate, then feed results back.
-            for (const call of calls) {
-                // Check cancellation before each tool — a long batch of tool-calls
-                // from one model turn shouldn't keep running after the user aborts.
-                token.throwIfCancelled();
-                // Emit `tool:call` here (not in the stream loop) so each call is
-                // paired with its `tool:result` from runTool — keeps the bus
-                // observably balanced for subscribers (TUI, NDJSON, telemetry).
-                this.emit({ type: "tool:call", callId: call.callId, name: call.name, args: call.args });
-                const outcome = await this.runTool(call, token);
-                turnMessages.push({
-                    role: "tool",
-                    content: [{ type: "tool-result", callId: call.callId, name: call.name, output: outcome.output, ok: outcome.ok }],
-                });
-                // Round 3 codex SHOULD-FIX: check cancellation AFTER each tool
-                // result too. A tool that catches cancellation internally and
-                // returns `ok:false` (e.g. `bash` returning `(cancelled)`) does
-                // NOT re-throw, so without this check the loop would continue
-                // to the next turn and could exhaust `maxTurns`, surfacing as
-                // a misleading `PROVIDER: Reached maximum number of turns`
-                // instead of the user's actual `Cancelled` intent.
-                token.throwIfCancelled();
-            }
-        }
+        // (7.7 §6) Soft ceiling (default 50), clamped to the absolute hard cap so a
+        // huge caller-supplied `maxTurns` can't loop forever. `hitHardCap` records
+        // whether the loop will stop at the hard backstop (always errors) vs the
+        // soft ceiling (interactive pauses).
+        const softMax = this.opts.maxTurns ?? this.opts.maxIterations ?? DEFAULT_MAX_TURNS;
+        const hardMax = hardMaxTurns();
+        const maxTurns = Math.min(softMax, hardMax);
+        const hitHardCap = softMax > hardMax;
+        // (7.7 §3.8) Per-turn mutable accumulators (formerly a fistful of inline
+        // `let`s). The loop units fold usage / track overflow recovery / record the
+        // last route through this. `result` aliases `state.result`, read back below
+        // to finalize the turn.
+        const state = { result, lastRequestInputTokens: 0, recoveredFromOverflow: false };
+        // (7.7 §3.8) Hand the assembled turn to the hook-driven loop. The default
+        // hooks bind this engine's own units (stream / tool-batch / steering / todo
+        // reminder); task 08's coordinator can pass ALTERNATIVE hooks WITHOUT
+        // forking the loop body — that injectable seam is the Axis 3.8 deliverable.
+        const { resolved } = await this.runAgentLoop({ turnId, system, toolDefs, turnMessages, injectedNotifIds, injectedSteerIds, maxTurns, state, token }, this.defaultLoopHooks());
         session.appendTurn(turnMessages);
-        // Codex Round-2 MUST-FIX #3: ack notifications AFTER appendTurn so a
-        // cancellation between peek and persist leaves the queue intact.
-        if (notificationIds.length > 0) {
-            backgroundTasks?.acknowledgeNotifications(notificationIds);
+        // Codex Round-2 MUST-FIX #3: ack notifications + steering AFTER appendTurn so
+        // a cancellation between peek and persist leaves both queues intact (the
+        // next turn re-delivers). `injectedNotifIds` covers turn-top AND mid-turn
+        // notifications; `injectedSteerIds` covers all injected steering.
+        if (injectedNotifIds.size > 0) {
+            backgroundTasks?.acknowledgeNotifications([...injectedNotifIds]);
+        }
+        if (injectedSteerIds.length > 0) {
+            this.opts.steering?.acknowledge(injectedSteerIds);
         }
         // (3.5) Per-turn aggregate event. Lifecycle suppression honored —
         // child engines (with `suppressLifecycleEvents`) skip this too.
@@ -428,7 +317,7 @@ export class AgentEngine {
                 inputTokens: result.inputTokens,
                 outputTokens: result.outputTokens,
                 costUsd: result.costUsd,
-                lastRequestInputTokens,
+                lastRequestInputTokens: state.lastRequestInputTokens,
             });
         }
         // (3.5 — codex Round-1 MUST-FIX #4) Threshold-triggered compaction.
@@ -437,10 +326,10 @@ export class AgentEngine {
         // `compaction:start` / `compaction:end` frames inside this await.
         // The compactor itself swallows all failures into ok:false (never
         // throws by contract); `Cancelled` propagates as `cancelled` reason.
-        if (this.opts.compactor && resolved && lastRoute) {
+        if (this.opts.compactor && resolved && state.lastRoute) {
             const should = this.opts.compactor.shouldCompact({
-                lastRequestInputTokens,
-                model: lastRoute.model,
+                lastRequestInputTokens: state.lastRequestInputTokens,
+                model: state.lastRoute.model,
             });
             if (should) {
                 await this.opts.compactor.compact("threshold", token.signal);
@@ -462,20 +351,398 @@ export class AgentEngine {
         }
         this.emit({ type: "turn:end", turnId });
         if (!resolved) {
-            // Loop exhausted the turn budget without the model returning a final
-            // answer. Match claude-code's terminal-error pattern (`QueryEngine.ts:914`)
-            // — emit a bus error and throw so the caller sees a concrete signal
-            // instead of an empty result. The turn is still persisted above so
-            // `/resume` can pick up the partial work.
-            // Subagent engines suppress this emit — see `suppressTerminalErrors`.
+            // (7.7 §6) The loop exhausted its budget without a final answer. The turn
+            // is already persisted above, so `/resume` (or the next message / steering)
+            // picks up the partial work. Two outcomes:
+            //   - SOFT pause (interactive default): emit `turn:paused` and return
+            //     cleanly — "reached max actions, continue?". No scary throw.
+            //   - THROW: when the caller opted out of pausing (`pauseOnMaxTurns:false`,
+            //     i.e. `-p`/serve/SDK automation) OR the absolute hard cap was hit.
+            //     A precise `MaxTurns` code (not the misleading PROVIDER) → non-zero
+            //     exit for automation.
+            const pause = (this.opts.pauseOnMaxTurns ?? true) && !hitHardCap;
+            if (pause) {
+                // (codex R2 MUST-FIX) Emit `turn:paused` ONLY on the actual pause — the
+                // throw path (automation / hard-cap) must NOT signal a "continue?"
+                // affordance it then contradicts with an error. Soft pause always means
+                // the soft ceiling (pause requires !hitHardCap), hence reason max-turns.
+                if (!this.opts.suppressLifecycleEvents) {
+                    this.emit({ type: "turn:paused", turnId, reason: "max-turns", turnsTaken: maxTurns });
+                }
+                return result;
+            }
+            // Throw path: `-p`/serve/SDK automation, or the absolute hard cap. A
+            // precise `MaxTurns` error (not the misleading PROVIDER) → non-zero exit.
             const message = `Reached maximum number of turns (${maxTurns})`;
             if (!this.opts.suppressTerminalErrors) {
-                this.emit({ type: "error", code: "PROVIDER", message });
+                this.emit({ type: "error", code: ErrorCode.MaxTurns, message });
             }
-            throw new AppError(ErrorCode.Provider, message);
+            throw new AppError(ErrorCode.MaxTurns, message);
         }
         return result;
     }
+    /**
+     * (7.7 §3.8) The hook-driven turn loop — a thin orchestrator over
+     * {@link AgentLoopHooks}, mirroring pi's `runLoop`
+     * (`packages/agent/src/agent-loop.ts`). Each iteration streams one assistant
+     * response, executes its tool batch, then drains boundary injections (steering
+     * + mid-turn notifications) before the next stream. Returns whether the turn
+     * reached a final answer (`resolved`) or exhausted `maxTurns`.
+     *
+     * Behaviour is IDENTICAL to the pre-refactor inline loop — `defaultLoopHooks`
+     * binds the same units; the seam exists so task 08's coordinator can inject
+     * alternatives without copying this body.
+     */
+    async runAgentLoop(run, hooks) {
+        const { token, turnId } = run;
+        // (7.7 §5.3) Per-turn incomplete-todos reminder counter. Resets every
+        // runTurn (= every new user prompt), matching oh-my-pi.
+        let todoReminderCount = 0;
+        for (let i = 0; i < run.maxTurns; i++) {
+            token.throwIfCancelled();
+            // Re-read selection per turn so a `/model` switch between turns lands on
+            // the next request without rebuilding the engine.
+            const choice = this.selection.get();
+            const route = this.opts.router.pick({
+                preferredModel: choice.model,
+                preferredProvider: choice.provider,
+                needsTools: run.toolDefs.length > 0,
+            });
+            run.state.lastRoute = route;
+            const { text, calls } = await hooks.streamAssistant(route, run);
+            if (calls.length === 0) {
+                run.turnMessages.push({ role: "assistant", content: [{ type: "text", text }] });
+                run.state.result.text = text;
+                this.emit({ type: "assistant:message", turnId, text });
+                // (7.7 §5.3) If the model stopped with incomplete todos, nudge it to
+                // finish (capped). Inject a user-role reminder and continue the loop
+                // instead of resolving — boosts autonomous completion (oh-my-pi
+                // `#checkTodoCompletion`). The reminder rides the maxTurns budget.
+                const reminder = hooks.incompleteTodoReminder(todoReminderCount);
+                if (reminder) {
+                    todoReminderCount += 1;
+                    run.turnMessages.push(reminder);
+                    continue;
+                }
+                await safeRunHook(this.opts.plugins, "afterResponse", { text }, this.opts.bus);
+                return { resolved: true };
+            }
+            // Record the assistant message that requested the tools.
+            const assistantContent = [];
+            if (text)
+                assistantContent.push({ type: "text", text });
+            for (const call of calls) {
+                assistantContent.push({ type: "tool-call", callId: call.callId, name: call.name, args: call.args });
+            }
+            run.turnMessages.push({ role: "assistant", content: assistantContent });
+            // (7.7 §3) Execute the batch (parallel safe-batch / serial unsafe;
+            // submission order preserved; all-settled on cancellation).
+            const batch = await hooks.executeToolBatch(calls, token, turnId);
+            run.turnMessages.push(...batch.messages);
+            // (7.7 §6.2) Model-signalled terminate: EVERY result asked to stop → end
+            // gracefully (transcript intact) instead of streaming another turn.
+            if (batch.terminate) {
+                run.state.result.text = text;
+                return { resolved: true };
+            }
+            // (7.7 §4) Iteration boundary: inject steering the user typed mid-turn +
+            // bg-task notifications that completed mid-turn, so they reach the model on
+            // the NEXT stream of this turn (claude-code mid-turn drain).
+            run.turnMessages.push(...hooks.getBoundaryMessages(run.injectedNotifIds, run.injectedSteerIds));
+        }
+        return { resolved: false };
+    }
+    /**
+     * (7.7 §3.8) The default loop hooks — an object binding the engine's own
+     * units. An object (not the methods passed directly) so the shape is a
+     * documented, swappable seam: task 08 supplies alternatives; a test can wrap a
+     * single hook to assert the loop dispatches through it.
+     */
+    defaultLoopHooks() {
+        return {
+            streamAssistant: (route, run) => this.streamAssistantResponse(route, run),
+            executeToolBatch: (calls, token, turnId) => this.executeToolBatch(calls, token, turnId),
+            getBoundaryMessages: (notifIds, steerIds) => this.drainBoundaryInjections(notifIds, steerIds),
+            incompleteTodoReminder: (count) => this.maybeTodoReminder(count),
+        };
+    }
+    /**
+     * (7.7 §3.8) Stream ONE assistant response — the careful inner unit (pi
+     * `streamAssistantResponse`). Owns: per-attempt classified retry with backoff,
+     * attempt-local usage staging (a discarded retry never double-counts),
+     * `assistant:reset` partial-undo, and provider-agnostic overflow recovery on
+     * BOTH the terminal-error path (a 413 before the throw) and the success path
+     * (z.ai silent truncation / MiMo length-stop). Folds usage into `state.result`
+     * and records `state.lastRequestInputTokens` once the stream completes. Throws
+     * `AppError(Provider|Cancelled)` on a terminal failure; otherwise returns the
+     * streamed text + requested tool calls.
+     */
+    async streamAssistantResponse(route, run) {
+        const { token, turnId, system, toolDefs, turnMessages, state } = run;
+        const { session } = this.opts;
+        const retry = this.opts.retry ?? defaultRetryConfig;
+        let textBuffer = "";
+        let calls = [];
+        let attempt = 0;
+        while (true) {
+            token.throwIfCancelled();
+            textBuffer = "";
+            calls = [];
+            // (3.5) Reset per attempt — only the LAST successful stream's last
+            // `usage.inputTokens` carries forward into the post-turn compactor check.
+            let attemptLastInputTokens = 0;
+            // (6.5b review) Stage usage in attempt-local accumulators instead of
+            // folding straight into the turn-level `result`. A retryable mid-stream
+            // error discards the attempt and restreams; folding here would
+            // double-count. Merge into `result` + emit once the stream completes.
+            let attemptInputTokens = 0;
+            let attemptOutputTokens = 0;
+            let attemptCostUsd = 0;
+            let streamError = null;
+            // (7.7 §7) The provider's finish reason (when surfaced) — drives the
+            // length-stop overflow signal on the success path.
+            let finishReason;
+            const stream = route.adapter.stream({ model: route.model.id, system, messages: [...session.messages(), ...turnMessages], tools: toolDefs }, token.signal);
+            for await (const event of stream) {
+                // Enforce cancellation per-event so a provider that ignores or queues
+                // past the AbortSignal can't keep dripping into a turn the user already
+                // abandoned. Critical for subagents: the parent's abort stops the child
+                // instantly, not when the child stream naturally ends.
+                token.throwIfCancelled();
+                switch (event.type) {
+                    case "text-delta":
+                        textBuffer += event.text;
+                        this.emit({ type: "assistant:delta", turnId, text: event.text });
+                        break;
+                    case "tool-call":
+                        // Defer the `tool:call` bus emit until the execution loop — pairs
+                        // each emit atomically with its `tool:result`. Emitting here would
+                        // orphan call frames whenever the turn aborts between stream-end and
+                        // tool execution (Ctrl-C) or a retry discards the collected calls.
+                        calls.push(event.call);
+                        break;
+                    case "usage": {
+                        const costUsd = estimateCost(route.model, event.usage);
+                        attemptInputTokens += event.usage.inputTokens;
+                        attemptOutputTokens += event.usage.outputTokens;
+                        attemptCostUsd += costUsd;
+                        // (3.5) Track most recent stream's last input count for the post-turn
+                        // compaction threshold check. NOT the aggregate.
+                        attemptLastInputTokens = event.usage.inputTokens;
+                        break;
+                    }
+                    case "error":
+                        // Defer the bus emit until after the cancellation check — a Ctrl-C
+                        // surfaces as a stream error and we shouldn't shout "PROVIDER".
+                        streamError = event.message;
+                        break;
+                    case "done":
+                        finishReason = event.finishReason;
+                        break;
+                }
+                if (streamError !== null)
+                    break;
+            }
+            if (streamError === null) {
+                // Stream completed — NOW fold this attempt's usage into the turn-level
+                // `result` and emit the aggregated `usage` frame (deferred so a discarded
+                // retry attempt's partial usage never double-counts).
+                state.result.inputTokens += attemptInputTokens;
+                state.result.outputTokens += attemptOutputTokens;
+                state.result.costUsd += attemptCostUsd;
+                if (attemptInputTokens > 0 || attemptOutputTokens > 0 || attemptCostUsd > 0) {
+                    this.emit({
+                        type: "usage",
+                        model: route.model.id,
+                        inputTokens: attemptInputTokens,
+                        outputTokens: attemptOutputTokens,
+                        costUsd: attemptCostUsd,
+                    });
+                }
+                state.lastRequestInputTokens = attemptLastInputTokens;
+                // (7.7 §7) Success-path overflow: a NORMAL stop whose reported input
+                // exceeds the window (z.ai silently truncated the prompt), or a `length`
+                // stop with zero output (MiMo truncated input, no room to generate). Both
+                // mean the model never saw the full context — recover (compact +
+                // restream) once, exactly like the error path. A no-op for every healthy
+                // stream: `isContextOverflow` needs a real `contextWindow` AND input over
+                // it (or `length` + zero output).
+                const recovered = await this.maybeRecoverFromOverflow({
+                    stopReason: finishReason === "length" ? "length" : "stop",
+                    usage: { input: attemptLastInputTokens, output: attemptOutputTokens },
+                    contextWindow: route.model.contextWindow,
+                }, route, textBuffer, turnId, state, token);
+                if (recovered) {
+                    attempt = 0;
+                    continue;
+                }
+                break;
+            }
+            // If the abort signal fired during the stream, the captured error is the
+            // SDK reacting to cancellation — treat as Cancelled, not a provider error.
+            token.throwIfCancelled();
+            const decision = classifyProviderError(streamError);
+            const terminal = !decision.retryable || attempt >= retry.delaysMs.length;
+            if (terminal) {
+                // (7.7 §7) Error-path overflow recovery: catch a 413 BEFORE the terminal
+                // throw, compact (bypasses the circuit breaker), and restream once with
+                // the now-smaller history. pi's full catalogue replaces the old
+                // Anthropic-only regex so any provider's overflow recovers the same way.
+                const recovered = await this.maybeRecoverFromOverflow({ stopReason: "error", errorMessage: streamError }, route, textBuffer, turnId, state, token);
+                if (recovered) {
+                    attempt = 0;
+                    continue;
+                }
+                // Emit the bus `error` ONLY when about to throw — emitting on every retry
+                // would let `runPrompt`'s `lastError` listener record a transient failure
+                // as the exit code even after a later attempt succeeded. Subagent engines
+                // suppress this (see `suppressTerminalErrors`).
+                if (!this.opts.suppressTerminalErrors) {
+                    this.emit({ type: "error", code: "PROVIDER", message: streamError });
+                }
+                throw new AppError(ErrorCode.Provider, `Provider error (${decision.reason}): ${streamError}`);
+            }
+            const delayMs = retry.delaysMs[attempt] ?? 0;
+            this.emit({
+                type: "log",
+                level: "warn",
+                message: `provider stream errored (${decision.reason}); retry ${attempt + 1}/${retry.delaysMs.length} after ${delayMs}ms; original: ${streamError}`,
+            });
+            // (6.5b review) If this attempt streamed partial text, the upcoming
+            // restream APPENDS a fresh copy (consumers don't replace) — duplicating it
+            // on screen. Tell consumers to drop the in-flight partial first.
+            if (textBuffer.length > 0) {
+                this.emit({ type: "assistant:reset", turnId });
+            }
+            attempt += 1;
+            await sleepCancellable(delayMs, token);
+        }
+        return { text: textBuffer, calls };
+    }
+    /**
+     * (7.7 §7) Provider-agnostic reactive overflow recovery, shared by the
+     * terminal-error path and the success path. When `signal` reads as a context
+     * overflow AND a compactor is wired AND we haven't already recovered this turn,
+     * compact with `reason: "overflow"` (bypasses the circuit breaker) and report
+     * whether the caller should restream. Telemetry-gated: a non-Anthropic recovery
+     * emits a `log info` so a wrong pattern surfaces as an observable event, not a
+     * silent loop. Fires AT MOST ONCE per turn.
+     */
+    async maybeRecoverFromOverflow(signal, route, textBuffer, turnId, state, token) {
+        if (!this.opts.compactor || state.recoveredFromOverflow)
+            return false;
+        if (!isContextOverflow(signal))
+            return false;
+        state.recoveredFromOverflow = true;
+        // (7.7 §7) Telemetry-gated: flag a non-Anthropic recovery so a wrong pattern
+        // surfaces as an observable event, not a silent overflow loop.
+        if (route.adapter.id !== "anthropic") {
+            this.emit({
+                type: "log",
+                level: "info",
+                message: `reactive overflow recovery fired for adapter '${route.adapter.id}'`,
+            });
+        }
+        try {
+            const recovery = await this.opts.compactor.compact("overflow", token.signal);
+            if (recovery.ok) {
+                // (6.5b follow-up) Mirror the retry path's partial-undo: if this attempt
+                // streamed partial text before the overflow, drop it so the
+                // post-compaction restream doesn't append onto a stale partial. Usually a
+                // no-op (a 413 precedes any delta) — emitted only when text was shown.
+                if (textBuffer.length > 0) {
+                    this.emit({ type: "assistant:reset", turnId });
+                }
+                return true;
+            }
+        }
+        catch (e) {
+            // Compactor.compact never throws by contract, but be defensive: a malformed
+            // implementation shouldn't break the original error path.
+            this.emit({
+                type: "log",
+                level: "warn",
+                message: `overflow compaction unexpectedly threw: ${e.message ?? e}`,
+            });
+        }
+        return false;
+    }
+    /**
+     * (7.7 §3.3) Execute one assistant turn's tool calls. Partitions into
+     * batches (consecutive concurrency-safe → one parallel batch; any unsafe →
+     * its own serial batch), preserving submission order so a write is never
+     * reordered before a preceding read. Returns the `tool` messages in
+     * submission order plus whether the model signalled `terminate`.
+     *
+     * Cancellation is ALL-SETTLED: `runConcurrent` drains every thunk before it
+     * propagates, and `runTool` emits a `tool:result` on every path (success /
+     * denied / error / cancelled), so each emitted `tool:call` always has its
+     * paired result on the bus even when a sibling cancels (codex 7.7 R1
+     * MUST-FIX #3).
+     */
+    async executeToolBatch(calls, token, turnId) {
+        const { tools } = this.opts;
+        const cap = maxToolConcurrency();
+        const batches = partitionToolCalls(calls, (call) => {
+            const tool = tools.get(call.name);
+            return tool ? isCallConcurrencySafe(tool, call.args) : false;
+        });
+        const resultByCall = new Map();
+        const runOne = async (call) => {
+            // Emit `tool:call` here (not in the stream loop) so each call is paired
+            // with its `tool:result` from runTool — keeps the bus observably balanced
+            // for subscribers (TUI, NDJSON, telemetry, serve replay).
+            this.emit({ type: "tool:call", callId: call.callId, name: call.name, args: call.args });
+            resultByCall.set(call.callId, await this.runTool(call, token));
+        };
+        for (const b of batches) {
+            if (b.safe && b.calls.length > 1) {
+                // Parallel, bounded, drain-all-before-throw (no orphan tool:call).
+                await runConcurrent(b.calls.map((c) => () => runOne(c)), cap);
+            }
+            else {
+                // Serial: a single safe call, or an unsafe/write call. Preserve the
+                // pre-7.7 per-call cancellation checks (before AND after — a tool that
+                // catches cancellation internally and returns ok:false doesn't rethrow).
+                for (const c of b.calls) {
+                    token.throwIfCancelled();
+                    await runOne(c);
+                    token.throwIfCancelled();
+                }
+            }
+        }
+        // (7.7 §3.5) Bound the aggregate size of one batch's results so a fan-out of
+        // parallel reads can't blow the next request's context in a single user
+        // message (claude-code MAX_TOOL_RESULTS_PER_MESSAGE_CHARS). Inline
+        // truncation of the largest results; disk-spill via the artifact mechanism
+        // is a future enhancement.
+        applyBatchResultBudget(resultByCall);
+        // Reassemble in submission order + aggregate terminate.
+        const messages = [];
+        let allTerminate = calls.length > 0;
+        for (const call of calls) {
+            const r = resultByCall.get(call.callId);
+            // A call whose runTool threw (cancellation) has no entry; the turn aborts
+            // and won't be persisted, so skipping it is correct.
+            if (!r) {
+                allTerminate = false;
+                continue;
+            }
+            messages.push({
+                role: "tool",
+                content: [{ type: "tool-result", callId: call.callId, name: call.name, output: r.output, ok: r.ok }],
+            });
+            if (!r.terminate)
+                allTerminate = false;
+        }
+        // (7.7 §5, OQ-5) Single todo emit point: after a successful `todo` tool call
+        // the session phases changed — project the full state to all four surfaces.
+        const todoChanged = calls.some((c) => c.name === TODO_TOOL_NAME && resultByCall.get(c.callId)?.ok);
+        if (todoChanged) {
+            this.emit({ type: "todo", turnId, phases: this.opts.session.getTodoPhases() });
+        }
+        return { messages, terminate: allTerminate };
+    }
     async runTool(call, token) {
         const { bus, tools, gate, plugins, workspaceRoot } = this.opts;
         const tool = tools.get(call.name);