npm - @zhixuan92/multi-model-agent-core - Versions diffs - 0.1.0 → 0.2.0 - Mend

@zhixuan92/multi-model-agent-core 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (82) hide show

package/README.md +0 -6
package/dist/config/schema.d.ts +27 -0
package/dist/config/schema.d.ts.map +1 -1
package/dist/config/schema.js +13 -0
package/dist/config/schema.js.map +1 -1
package/dist/context/context-block-store.d.ts +75 -0
package/dist/context/context-block-store.d.ts.map +1 -0
package/dist/context/context-block-store.js +82 -0
package/dist/context/context-block-store.js.map +1 -0
package/dist/context/expand-context-blocks.d.ts +20 -0
package/dist/context/expand-context-blocks.d.ts.map +1 -0
package/dist/context/expand-context-blocks.js +46 -0
package/dist/context/expand-context-blocks.js.map +1 -0
package/dist/delegate-with-escalation.d.ts +34 -0
package/dist/delegate-with-escalation.d.ts.map +1 -0
package/dist/delegate-with-escalation.js +168 -0
package/dist/delegate-with-escalation.js.map +1 -0
package/dist/index.d.ts +4 -1
package/dist/index.d.ts.map +1 -1
package/dist/index.js +3 -0
package/dist/index.js.map +1 -1
package/dist/model-profiles.json +8 -4
package/dist/provider.d.ts.map +1 -1
package/dist/provider.js +7 -1
package/dist/provider.js.map +1 -1
package/dist/routing/model-profiles.d.ts +1 -0
package/dist/routing/model-profiles.d.ts.map +1 -1
package/dist/routing/model-profiles.js +4 -0
package/dist/routing/model-profiles.js.map +1 -1
package/dist/run-tasks.d.ts +26 -2
package/dist/run-tasks.d.ts.map +1 -1
package/dist/run-tasks.js +61 -19
package/dist/run-tasks.js.map +1 -1
package/dist/runners/claude-runner.d.ts.map +1 -1
package/dist/runners/claude-runner.js +643 -32
package/dist/runners/claude-runner.js.map +1 -1
package/dist/runners/codex-runner.d.ts.map +1 -1
package/dist/runners/codex-runner.js +473 -48
package/dist/runners/codex-runner.js.map +1 -1
package/dist/runners/error-classification.d.ts +30 -0
package/dist/runners/error-classification.d.ts.map +1 -0
package/dist/runners/error-classification.js +72 -0
package/dist/runners/error-classification.js.map +1 -0
package/dist/runners/injection-type.d.ts +17 -0
package/dist/runners/injection-type.d.ts.map +1 -0
package/dist/runners/injection-type.js +27 -0
package/dist/runners/injection-type.js.map +1 -0
package/dist/runners/openai-runner.d.ts +5 -0
package/dist/runners/openai-runner.d.ts.map +1 -1
package/dist/runners/openai-runner.js +508 -36
package/dist/runners/openai-runner.js.map +1 -1
package/dist/runners/prevention.d.ts +41 -0
package/dist/runners/prevention.d.ts.map +1 -0
package/dist/runners/prevention.js +68 -0
package/dist/runners/prevention.js.map +1 -0
package/dist/runners/supervision.d.ts +130 -0
package/dist/runners/supervision.d.ts.map +1 -0
package/dist/runners/supervision.js +238 -0
package/dist/runners/supervision.js.map +1 -0
package/dist/tools/claude-adapter.d.ts.map +1 -1
package/dist/tools/claude-adapter.js +6 -3
package/dist/tools/claude-adapter.js.map +1 -1
package/dist/tools/definitions.d.ts +3 -1
package/dist/tools/definitions.d.ts.map +1 -1
package/dist/tools/definitions.js +56 -5
package/dist/tools/definitions.js.map +1 -1
package/dist/tools/openai-adapter.d.ts.map +1 -1
package/dist/tools/openai-adapter.js +6 -3
package/dist/tools/openai-adapter.js.map +1 -1
package/dist/tools/scratchpad.d.ts +28 -0
package/dist/tools/scratchpad.d.ts.map +1 -0
package/dist/tools/scratchpad.js +49 -0
package/dist/tools/scratchpad.js.map +1 -0
package/dist/tools/tracker.d.ts +38 -2
package/dist/tools/tracker.d.ts.map +1 -1
package/dist/tools/tracker.js +54 -5
package/dist/tools/tracker.js.map +1 -1
package/dist/types.d.ts +184 -2
package/dist/types.d.ts.map +1 -1
package/dist/types.js +17 -1
package/dist/types.js.map +1 -1
package/package.json +9 -15

package/dist/runners/codex-runner.js CHANGED Viewed

@@ -1,9 +1,16 @@
 import OpenAI from 'openai';
 import { z } from 'zod';
+import { createHash } from 'node:crypto';
 import { getCodexAuth } from '../auth/codex-oauth.js';
-import { withTimeout } from '../types.js';
+import { withTimeout, computeCostUSD, } from '../types.js';
 import { FileTracker } from '../tools/tracker.js';
 import { createToolImplementations } from '../tools/definitions.js';
+import { TextScratchpad } from '../tools/scratchpad.js';
+import { buildSystemPrompt, buildBudgetHint, buildReGroundingMessage, buildBudgetPressureNudge, RE_GROUNDING_INTERVAL_TURNS, } from './prevention.js';
+import { validateCompletion, buildRePrompt, sameDegenerateOutput, resolveInputTokenSoftLimit, checkWatchdogThreshold, logWatchdogEvent, } from './supervision.js';
+import { injectionTypeFor } from './injection-type.js';
+import { classifyError } from './error-classification.js';
+import { findModelProfile } from '../routing/model-profiles.js';
 // CODEX_DEBUG=1 causes the runner to log raw HTTP request/response bodies to
 // stderr. Those bodies routinely include the user's prompt, file contents,
 // tool arguments, and other sensitive data — fine for local debugging,
@@ -16,6 +23,11 @@ if (process.env.CODEX_DEBUG === '1') {
         'bodies (including prompts and file contents) will be logged to stderr. ' +
         'Disable in any environment where logs may be retained or shared.');
 }
+/**
+ * Hard cap on supervision re-prompts before we give up and salvage. Three is
+ * the value chosen in the spec (A.2.2); mirrors openai-runner and claude-runner.
+ */
+const MAX_SUPERVISION_RETRIES = 3;
 export function createCodexClient(capture) {
     const debug = process.env.CODEX_DEBUG === '1';
     // A custom fetch that tees error-response bodies into `capture`.
@@ -146,7 +158,33 @@ export async function runCodex(prompt, options, providerConfig, defaults) {
     const sandboxPolicy = options.sandboxPolicy ?? providerConfig.sandboxPolicy ?? 'cwd-only';
     const effort = options.effort ?? providerConfig.effort;
     const abortController = new AbortController();
-    const tracker = new FileTracker();
+    // --- Progress event emission (Task 11) ----------------------------------
+    //
+    // `onProgress` is already wrapped in `safeSink` by the orchestrator
+    // (Task 8), so any throw from the consumer callback is swallowed
+    // upstream and cannot corrupt this loop. We do not need to wrap it
+    // again here.
+    const onProgress = options.onProgress;
+    const emit = (event) => {
+        if (onProgress)
+            onProgress(event);
+    };
+    // Accumulated state (hoisted so the timeout callback can read partial
+    // progress, AND so the FileTracker callback closure — constructed below
+    // — can read the running turn count at firing time).
+    //
+    // Turn attribution for tool calls: in codex-runner, tool calls fire in
+    // the tool-execution loop AFTER the model's stream for that turn has
+    // completed but BEFORE the next iteration of `while` starts. The `turns`
+    // variable already reflects the current turn at that point (it was
+    // incremented at the top of the iteration), so the callback can read it
+    // directly — no +1 offset.
+    let inputTokens = 0;
+    let outputTokens = 0;
+    let turns = 0;
+    const tracker = new FileTracker((summary) => {
+        emit({ kind: 'tool_call', turn: turns, toolSummary: summary });
+    });
     const toolImpls = createToolImplementations(tracker, cwd, sandboxPolicy, abortController.signal);
     const codexTools = toolMode === 'full' ? buildCodexTools(toolImpls, sandboxPolicy) : [];
     const toolsByName = new Map(codexTools.map(t => [t.name, t]));
@@ -167,29 +205,108 @@ export async function runCodex(prompt, options, providerConfig, defaults) {
         ? configuredHostedTools.map(t => ({ type: t }))
         : [];
     const allTools = [...responsesTools, ...hostedTools];
-    // Accumulated state (hoisted so the timeout callback can read partial progress)
-    let inputTokens = 0;
-    let outputTokens = 0;
-    let turns = 0;
+    // --- Prevention layer: system prompt + budget hint ---
+    //
+    // buildSystemPrompt() is deliberately static and parameter-free (same
+    // decision as openai-runner and claude-runner: Task 1 review rejected
+    // provider/maxTurns options). The budget hint is prepended to the user
+    // prompt so the model sees it as part of its task brief, while the system
+    // prompt is threaded through the Responses API `instructions` field.
+    const systemPrompt = buildSystemPrompt();
+    const budgetHint = buildBudgetHint({ maxTurns });
+    const promptWithBudgetHint = `${budgetHint}\n\n${prompt}`;
+    // --- onInitialRequest (Task 12) ----------------------------------------
+    //
+    // Fire once per attempt with the canonical orchestrator-side initial
+    // brief: `${systemPrompt}\n\n${promptWithBudgetHint}`. This is NOT the
+    // literal request body the OpenAI Responses API transmits — codex
+    // sends the systemPrompt via the Responses API `instructions` field
+    // and the user prompt as a structured `input` message array. We hash
+    // the canonical form instead so the hash is cross-runner stable:
+    // openai-runner and claude-runner compute the same hash from the same
+    // canonical string. See `AttemptRecord.initialPromptHash` in types.ts
+    // for the full wire-level caveat.
+    if (options.onInitialRequest) {
+        const canonicalInitialBrief = `${systemPrompt}\n\n${promptWithBudgetHint}`;
+        try {
+            options.onInitialRequest({
+                lengthChars: canonicalInitialBrief.length,
+                sha256: createHash('sha256').update(canonicalInitialBrief).digest('hex'),
+            });
+        }
+        catch {
+            // Swallow — a broken callback must not affect dispatch.
+        }
+    }
+    // --- Scratchpad: buffers every text emission the codex backend streams
+    // through our loop. Every termination path (ok / incomplete / max_turns /
+    // error / timeout / force_salvage) salvages `scratchpad.latest()` when
+    // the final message is empty or degenerate. ---
+    const scratchpad = new TextScratchpad();
+    // --- Watchdog: resolve the input-token soft limit once per run ---
+    const profile = findModelProfile(providerConfig.model);
+    const softLimit = resolveInputTokenSoftLimit(providerConfig, profile);
     const run = async () => {
         const capture = {};
         const client = createCodexClient(capture);
         const input = [
             // eslint-disable-next-line @typescript-eslint/no-explicit-any
-            { role: 'user', content: prompt },
+            { role: 'user', content: promptWithBudgetHint },
         ];
         let output = '';
+        // --- Abort-path investigation (plan Step 2) ---------------------------
+        //
+        // The 2026-04-10 Fate dispatch captured an error "Request was aborted |
+        // last response status: completed". The "completed" suffix was
+        // misleading: it was captured from a PREVIOUS successful turn, not the
+        // failed one. Mechanism:
+        //
+        //   1. Turn N's stream emits `response.completed` with status
+        //      `'completed'`. We update `lastResponseStatus = 'completed'`.
+        //   2. Turn N+1 starts; `client.responses.create(...)` opens a new
+        //      stream, but the abort signal fires before any
+        //      `response.completed` event is received.
+        //   3. The thrown error is caught below. The catch branch reads
+        //      `lastResponseStatus` — which is STILL `'completed'` from turn N
+        //      — and appends it as "last response status: completed", making
+        //      the error look like it originated from a successful response.
+        //
+        // Fix: track which turn the status was captured on. If the status was
+        // NOT captured on the current (failed) turn, drop the suffix. That way
+        // we never emit a status that belongs to a different, already-
+        // concluded request. Users saw the misleading suffix and wasted time
+        // debugging a phantom "the request completed but was aborted" condition
+        // that doesn't exist.
         let lastResponseStatus = null;
+        let lastResponseStatusTurn = null;
+        // --- Supervision / watchdog bookkeeping ---
+        let supervisionRetries = 0;
+        // Initialised to `null` (NOT ''): on the first turn there is no
+        // previous degenerate output to compare against, so the same-output
+        // early-out must be skipped. Initialising to '' would cause
+        // sameDegenerateOutput('', '') to fire on a first-turn empty output
+        // and break the loop before any retries run. See openai-runner
+        // regression #5.
+        let lastDegenerateOutput = null;
+        // High-watermark guard for the watchdog warning nudge — fire at most
+        // once per distinct input-token level. Mirrors openai-runner and
+        // claude-runner.
+        let lastWarnedInputTokens = -1;
         try {
             while (turns < maxTurns) {
                 turns++;
+                // Emit turn_start AFTER incrementing so `turn` matches the 1-indexed
+                // turn number we use everywhere else in this runner (the scratchpad
+                // append, watchdog logs, error diagnostics, result.turns).
+                emit({ kind: 'turn_start', turn: turns, provider: 'codex' });
                 // Codex backend requires streaming. The Codex backend's
                 // `response.completed` event does NOT populate `response.output` —
                 // we must accumulate content from individual stream events.
-                // `instructions` is required (mirrors gumi-agent's proven shape).
+                // `instructions` carries the prevention-layer system prompt; the
+                // per-run budget hint is already prepended to the first user input.
                 const stream = await client.responses.create({
                     model: providerConfig.model,
-                    instructions: prompt,
+                    instructions: systemPrompt,
                     input,
                     stream: true,
                     store: false,
@@ -241,8 +358,10 @@ export async function runCodex(prompt, options, providerConfig, defaults) {
                             inputTokens += r.usage.input_tokens ?? 0;
                             outputTokens += r.usage.output_tokens ?? 0;
                         }
-                        if (r?.status)
+                        if (r?.status) {
                             lastResponseStatus = r.status;
+                            lastResponseStatusTurn = turns;
+                        }
                     }
                 }
                 if (process.env.CODEX_DEBUG === '1') {
@@ -256,6 +375,20 @@ export async function runCodex(prompt, options, providerConfig, defaults) {
                 if (!sawCompleted) {
                     throw new Error('Codex stream ended without a response.completed event');
                 }
+                // Buffer this turn's text into the scratchpad BEFORE any exit so
+                // every termination path (including supervision exhaustion and
+                // force_salvage) can salvage it. Codex does not emit <think> tags
+                // by default, so there is no stripping step here.
+                if (textThisTurn) {
+                    scratchpad.append(turns, textThisTurn);
+                    emit({
+                        kind: 'text_emission',
+                        turn: turns,
+                        chars: textThisTurn.length,
+                        preview: textThisTurn.slice(0, 200),
+                    });
+                    output = textThisTurn;
+                }
                 // Replay only function_call items into the next turn's input.
                 //
                 // We send `store: false` to the Responses API, which means the server
@@ -287,26 +420,156 @@ export async function runCodex(prompt, options, providerConfig, defaults) {
                         });
                     }
                 }
-                if (textThisTurn) {
-                    output = textThisTurn;
+                // --- Watchdog checks after tokens are updated -------------------
+                const watchdogStatus = checkWatchdogThreshold(inputTokens, softLimit);
+                if (watchdogStatus !== 'ok') {
+                    logWatchdogEvent(watchdogStatus, {
+                        provider: 'codex',
+                        model: providerConfig.model,
+                        turn: turns,
+                        inputTokens,
+                        softLimit,
+                        scratchpadChars: scratchpad.toString().length,
+                    });
                 }
-                else if (toolCalls.length === 0) {
-                    output = `[codex returned no text — items streamed: ${itemTypesSeen.join(', ') || '(none)'}]`;
+                if (watchdogStatus === 'force_salvage') {
+                    // `watchdog_force_salvage` is not an injected message — no
+                    // re-prompt is sent — but observers still want to see exactly
+                    // why the run is being killed. Emit with contentLengthChars: 0
+                    // to reflect the "nothing was injected, we just terminated"
+                    // semantics (mirrors openai-runner and claude-runner).
+                    emit({
+                        kind: 'injection',
+                        injectionType: 'watchdog_force_salvage',
+                        turn: turns,
+                        contentLengthChars: 0,
+                    });
+                    const salvaged = buildCodexForceSalvageResult({
+                        tracker,
+                        scratchpad,
+                        providerConfig,
+                        inputTokens,
+                        outputTokens,
+                        turns,
+                        softLimit,
+                    });
+                    emit({ kind: 'done', status: salvaged.status });
+                    return salvaged;
+                }
+                // Warning-band nudge: fire at most once per distinct input-token
+                // high-watermark. Pushed as a user message so the next turn of
+                // the codex loop addresses the budget-pressure prompt. We use
+                // the shared prevention helper (NOT an inline string) so every
+                // runner emits byte-identical wording.
+                if (watchdogStatus === 'warning' && inputTokens > lastWarnedInputTokens) {
+                    lastWarnedInputTokens = inputTokens;
+                    const warning = buildBudgetPressureNudge({ inputTokens, softLimit });
+                    input.push({
+                        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+                        role: 'user',
+                        content: warning,
+                        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+                    });
+                    emit({
+                        kind: 'injection',
+                        injectionType: 'watchdog_warning',
+                        turn: turns,
+                        contentLengthChars: warning.length,
+                    });
                 }
-                // If the model made no tool calls, it's done
+                // --- Periodic re-grounding inside the loop ---------------------
+                if (turns > 0 && turns % RE_GROUNDING_INTERVAL_TURNS === 0) {
+                    const reground = buildReGroundingMessage({
+                        originalPromptExcerpt: prompt,
+                        currentTurn: turns,
+                        maxTurns,
+                        toolCallsSoFar: tracker.getToolCalls().length,
+                        filesReadSoFar: tracker.getReads().length,
+                    });
+                    input.push({
+                        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+                        role: 'user',
+                        content: reground,
+                        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+                    });
+                    emit({
+                        kind: 'injection',
+                        injectionType: 'reground',
+                        turn: turns,
+                        contentLengthChars: reground.length,
+                    });
+                }
+                // --- turn_complete: one event per while-iteration. Fires after the
+                // watchdog + re-grounding checks have run (so cumulative token
+                // counts and any injection events are already on the wire) and
+                // BEFORE the supervision branching / tool-execution loop. Every
+                // continue/return in the branches below happens AFTER this event,
+                // so the sequence "turn_start ... text_emission ... turn_complete"
+                // is guaranteed per iteration.
+                emit({
+                    kind: 'turn_complete',
+                    turn: turns,
+                    cumulativeInputTokens: inputTokens,
+                    cumulativeOutputTokens: outputTokens,
+                });
+                // If the model made no tool calls, the turn ended with either a
+                // final answer or a degenerate emission. Wrap in the supervision
+                // state machine: valid text is an immediate ok-exit; degenerate
+                // either re-prompts (and continues the loop) or — if the retry
+                // budget is spent / same-output early-out fires — exits as
+                // incomplete with scratchpad salvage.
                 if (toolCalls.length === 0) {
-                    return {
-                        output,
-                        status: 'ok',
-                        usage: {
+                    const stripped = textThisTurn; // codex does not emit <think> tags
+                    const validation = validateCompletion(stripped);
+                    if (validation.valid) {
+                        const ok = buildCodexOkResult({
+                            tracker,
+                            scratchpad,
+                            providerConfig,
                             inputTokens,
                             outputTokens,
-                            totalTokens: inputTokens + outputTokens,
-                            costUSD: null,
-                        },
-                        turns,
-                        files: tracker.getFiles(),
-                    };
+                            turns,
+                            output: stripped,
+                        });
+                        emit({ kind: 'done', status: ok.status });
+                        return ok;
+                    }
+                    // Same-output early-out: only compare when we have a previous
+                    // degenerate output. First-turn degeneracy must still get
+                    // retries — see openai-runner regression #5.
+                    if ((lastDegenerateOutput !== null &&
+                        sameDegenerateOutput(stripped, lastDegenerateOutput)) ||
+                        supervisionRetries >= MAX_SUPERVISION_RETRIES) {
+                        const exhausted = buildCodexIncompleteResult({
+                            tracker,
+                            scratchpad,
+                            providerConfig,
+                            inputTokens,
+                            outputTokens,
+                            turns,
+                        });
+                        emit({ kind: 'done', status: exhausted.status });
+                        return exhausted;
+                    }
+                    // Inject the re-prompt as the next user input and continue
+                    // the loop. The next turn of the codex backend will respond
+                    // to the re-prompt directly.
+                    lastDegenerateOutput = stripped;
+                    supervisionRetries++;
+                    const rePrompt = buildRePrompt(validation);
+                    input.push({
+                        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+                        role: 'user',
+                        content: rePrompt,
+                        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+                    });
+                    emit({
+                        kind: 'injection',
+                        injectionType: injectionTypeFor(validation.kind),
+                        turn: turns,
+                        contentLengthChars: rePrompt.length,
+                    });
+                    continue;
                 }
                 // Execute tool calls and feed outputs back
                 for (const call of toolCalls) {
@@ -331,19 +594,19 @@ export async function runCodex(prompt, options, providerConfig, defaults) {
                     });
                 }
             }
-            // Max turns exhausted
-            return {
-                output: output || `Agent exceeded max turns (${maxTurns}).`,
-                status: 'max_turns',
-                usage: {
-                    inputTokens,
-                    outputTokens,
-                    totalTokens: inputTokens + outputTokens,
-                    costUSD: null,
-                },
+            // Max turns exhausted — salvage any buffered text.
+            const maxTurnsResult = buildCodexMaxTurnsResult({
+                tracker,
+                scratchpad,
+                providerConfig,
+                inputTokens,
+                outputTokens,
                 turns,
-                files: tracker.getFiles(),
-            };
+                maxTurns,
+                lastOutput: output,
+            });
+            emit({ kind: 'done', status: maxTurnsResult.status });
+            return maxTurnsResult;
         }
         catch (err) {
             // OpenAI SDK's APIError carries status/body/headers — surface them
@@ -374,30 +637,192 @@ export async function runCodex(prompt, options, providerConfig, defaults) {
             }
             if (e?.requestID)
                 pieces.push(`req_id=${e.requestID}`);
-            if (lastResponseStatus)
+            // Only include `last response status` when it was captured on the
+            // CURRENT (failing) turn — otherwise it belongs to a previous,
+            // separate request and appending it is actively misleading. See the
+            // abort-path investigation comment at the top of `run()`.
+            if (lastResponseStatus && lastResponseStatusTurn === turns) {
                 pieces.push(`last response status: ${lastResponseStatus}`);
+            }
+            else if (lastResponseStatus && lastResponseStatusTurn !== turns) {
+                pieces.push(`note: a previous request (turn ${lastResponseStatusTurn}) completed with status ` +
+                    `"${lastResponseStatus}" — it is unrelated to this failure`);
+            }
             const detailed = pieces.join(' | ') || String(err);
+            // Classify the thrown error into a finer-grained RunStatus. Task 7
+            // introduces api_aborted / api_error / network_error alongside the
+            // catch-all 'error' status. The turn-scoped `lastResponseStatus`
+            // disambiguation above is ORTHOGONAL to this classification: the
+            // `detailed` message is still the rich operator-facing diagnostic,
+            // and `classifyError` only decides which RunStatus bucket the
+            // failure lands in.
+            const { status } = classifyError(err);
+            // Salvage: if the scratchpad has buffered text from earlier turns,
+            // return it as the output. Pre-Task-5 behavior returned only the
+            // error string, losing 30k+ tokens of work on abort.
+            emit({ kind: 'done', status });
+            const hasSalvage = !scratchpad.isEmpty();
             return {
-                output: `Sub-agent error: ${detailed}`,
-                status: 'error',
+                output: hasSalvage ? scratchpad.latest() : `Sub-agent error: ${detailed}`,
+                status,
                 usage: {
                     inputTokens,
                     outputTokens,
                     totalTokens: inputTokens + outputTokens,
-                    costUSD: null,
+                    costUSD: computeCostUSD(inputTokens, outputTokens, providerConfig),
                 },
                 turns,
-                files: tracker.getFiles(),
+                filesRead: tracker.getReads(),
+                filesWritten: tracker.getWrites(),
+                toolCalls: tracker.getToolCalls(),
+                outputIsDiagnostic: !hasSalvage,
+                escalationLog: [],
                 error: detailed,
             };
         }
     };
-    return withTimeout(run(), timeoutMs, () => ({
-        output: `Agent timed out after ${timeoutMs}ms.`,
-        status: 'timeout',
-        files: tracker.getFiles(),
-        usage: { inputTokens, outputTokens, totalTokens: inputTokens + outputTokens, costUSD: null },
+    return withTimeout(run(), timeoutMs, () => {
+        emit({ kind: 'done', status: 'timeout' });
+        const hasSalvage = !scratchpad.isEmpty();
+        return {
+            // Preserve any text the scratchpad buffered before the timeout fired.
+            // Partial usage is read from the running accumulators hoisted above —
+            // hardcoded zeros would discard every token counted on partial turns.
+            output: hasSalvage ? scratchpad.latest() : `Agent timed out after ${timeoutMs}ms.`,
+            status: 'timeout',
+            filesRead: tracker.getReads(),
+            filesWritten: tracker.getWrites(),
+            toolCalls: tracker.getToolCalls(),
+            usage: {
+                inputTokens,
+                outputTokens,
+                totalTokens: inputTokens + outputTokens,
+                costUSD: computeCostUSD(inputTokens, outputTokens, providerConfig),
+            },
+            turns,
+            outputIsDiagnostic: !hasSalvage,
+            escalationLog: [],
+        };
+    }, abortController);
+}
+function buildCodexOkResult(args) {
+    const { tracker, providerConfig, inputTokens, outputTokens, turns, output } = args;
+    return {
+        output,
+        status: 'ok',
+        usage: {
+            inputTokens,
+            outputTokens,
+            totalTokens: inputTokens + outputTokens,
+            costUSD: computeCostUSD(inputTokens, outputTokens, providerConfig),
+        },
+        turns,
+        filesRead: tracker.getReads(),
+        filesWritten: tracker.getWrites(),
+        toolCalls: tracker.getToolCalls(),
+        // `ok` always carries a real model answer — never a diagnostic.
+        outputIsDiagnostic: false,
+        escalationLog: [],
+    };
+}
+/**
+ * Supervision-exhausted path: retry cap hit or same-output early-out. Prefer
+ * scratchpad salvage; fall back to the incomplete diagnostic.
+ */
+function buildCodexIncompleteResult(args) {
+    const { tracker, scratchpad, providerConfig, inputTokens, outputTokens, turns } = args;
+    const filesRead = tracker.getReads();
+    const filesWritten = tracker.getWrites();
+    const hasSalvage = !scratchpad.isEmpty();
+    return {
+        output: hasSalvage
+            ? scratchpad.latest()
+            : buildCodexIncompleteDiagnostic({
+                turns,
+                inputTokens,
+                outputTokens,
+                filesRead,
+                filesWritten,
+            }),
+        status: 'incomplete',
+        usage: {
+            inputTokens,
+            outputTokens,
+            totalTokens: inputTokens + outputTokens,
+            costUSD: computeCostUSD(inputTokens, outputTokens, providerConfig),
+        },
+        turns,
+        filesRead,
+        filesWritten,
+        toolCalls: tracker.getToolCalls(),
+        outputIsDiagnostic: !hasSalvage,
+        escalationLog: [],
+    };
+}
+function buildCodexForceSalvageResult(args) {
+    const { tracker, scratchpad, providerConfig, inputTokens, outputTokens, turns, softLimit } = args;
+    const hasSalvage = !scratchpad.isEmpty();
+    return {
+        output: hasSalvage
+            ? scratchpad.latest()
+            : `[codex sub-agent forcibly terminated at ${inputTokens} input tokens (soft limit ${softLimit}). No usable text was buffered.]`,
+        status: 'incomplete',
+        usage: {
+            inputTokens,
+            outputTokens,
+            totalTokens: inputTokens + outputTokens,
+            costUSD: computeCostUSD(inputTokens, outputTokens, providerConfig),
+        },
         turns,
-    }), abortController);
+        filesRead: tracker.getReads(),
+        filesWritten: tracker.getWrites(),
+        toolCalls: tracker.getToolCalls(),
+        outputIsDiagnostic: !hasSalvage,
+        escalationLog: [],
+    };
+}
+function buildCodexMaxTurnsResult(args) {
+    const { tracker, scratchpad, providerConfig, inputTokens, outputTokens, turns, maxTurns, lastOutput } = args;
+    const hasSalvage = !scratchpad.isEmpty();
+    // Note: `lastOutput` here is the model's final text for the max-turns
+    // boundary — real model content, not a diagnostic template. Only the
+    // `Agent exceeded max turns…` fallback (empty scratchpad AND empty
+    // lastOutput) is a diagnostic.
+    const output = hasSalvage
+        ? scratchpad.latest()
+        : (lastOutput || `Agent exceeded max turns (${maxTurns}).`);
+    const outputIsDiagnostic = !hasSalvage && !lastOutput;
+    return {
+        output,
+        status: 'max_turns',
+        usage: {
+            inputTokens,
+            outputTokens,
+            totalTokens: inputTokens + outputTokens,
+            costUSD: computeCostUSD(inputTokens, outputTokens, providerConfig),
+        },
+        turns,
+        filesRead: tracker.getReads(),
+        filesWritten: tracker.getWrites(),
+        toolCalls: tracker.getToolCalls(),
+        outputIsDiagnostic,
+        escalationLog: [],
+    };
+}
+function buildCodexIncompleteDiagnostic(opts) {
+    return [
+        '[codex sub-agent terminated without producing a final answer]',
+        '',
+        'The model emitted no tool calls and no usable text on its final turn, and',
+        'supervision re-prompts did not recover a valid response.',
+        '',
+        `Turns used:    ${opts.turns}`,
+        `Input tokens:  ${opts.inputTokens}`,
+        `Output tokens: ${opts.outputTokens}`,
+        `Files read:    ${opts.filesRead.length}`,
+        `Files written: ${opts.filesWritten.length}`,
+        '',
+        'Recommended action: re-dispatch with a tighter brief, or escalate provider tier.',
+    ].join('\n');
 }
 //# sourceMappingURL=codex-runner.js.map