npm - @zhixuan92/multi-model-agent-core - Versions diffs - 3.5.2 → 3.6.0 - Mend

@zhixuan92/multi-model-agent-core 3.5.2 → 3.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (93) hide show

package/dist/config/schema.d.ts +3 -0
package/dist/config/schema.d.ts.map +1 -1
package/dist/config/schema.js +10 -1
package/dist/config/schema.js.map +1 -1
package/dist/delegate-with-escalation.d.ts +14 -0
package/dist/delegate-with-escalation.d.ts.map +1 -1
package/dist/delegate-with-escalation.js +29 -1
package/dist/delegate-with-escalation.js.map +1 -1
package/dist/executors/audit.d.ts.map +1 -1
package/dist/executors/audit.js +16 -2
package/dist/executors/audit.js.map +1 -1
package/dist/executors/debug.d.ts.map +1 -1
package/dist/executors/debug.js +8 -1
package/dist/executors/debug.js.map +1 -1
package/dist/executors/delegate.d.ts.map +1 -1
package/dist/executors/delegate.js +23 -1
package/dist/executors/delegate.js.map +1 -1
package/dist/executors/execute-plan.d.ts.map +1 -1
package/dist/executors/execute-plan.js +16 -2
package/dist/executors/execute-plan.js.map +1 -1
package/dist/executors/execution-context.d.ts.map +1 -1
package/dist/executors/execution-context.js +4 -0
package/dist/executors/execution-context.js.map +1 -1
package/dist/executors/investigate.d.ts.map +1 -1
package/dist/executors/investigate.js +24 -1
package/dist/executors/investigate.js.map +1 -1
package/dist/executors/retry.d.ts.map +1 -1
package/dist/executors/retry.js +25 -2
package/dist/executors/retry.js.map +1 -1
package/dist/executors/review.d.ts.map +1 -1
package/dist/executors/review.js +16 -2
package/dist/executors/review.js.map +1 -1
package/dist/executors/types.d.ts +35 -0
package/dist/executors/types.d.ts.map +1 -1
package/dist/executors/verify.d.ts.map +1 -1
package/dist/executors/verify.js +16 -2
package/dist/executors/verify.js.map +1 -1
package/dist/heartbeat.d.ts +1 -1
package/dist/heartbeat.d.ts.map +1 -1
package/dist/heartbeat.js +10 -3
package/dist/heartbeat.js.map +1 -1
package/dist/routing/model-profiles.d.ts +1 -0
package/dist/routing/model-profiles.d.ts.map +1 -1
package/dist/routing/model-profiles.js +3 -0
package/dist/routing/model-profiles.js.map +1 -1
package/dist/run-tasks/index.d.ts +17 -0
package/dist/run-tasks/index.d.ts.map +1 -1
package/dist/run-tasks/index.js +1 -1
package/dist/run-tasks/index.js.map +1 -1
package/dist/run-tasks/reviewed-lifecycle.d.ts +24 -2
package/dist/run-tasks/reviewed-lifecycle.d.ts.map +1 -1
package/dist/run-tasks/reviewed-lifecycle.js +191 -13
package/dist/run-tasks/reviewed-lifecycle.js.map +1 -1
package/dist/runners/claude-runner.d.ts.map +1 -1
package/dist/runners/claude-runner.js +8 -5
package/dist/runners/claude-runner.js.map +1 -1
package/dist/runners/codex-runner.d.ts.map +1 -1
package/dist/runners/codex-runner.js +6 -3
package/dist/runners/codex-runner.js.map +1 -1
package/dist/runners/error-classification.d.ts +8 -0
package/dist/runners/error-classification.d.ts.map +1 -1
package/dist/runners/error-classification.js +17 -0
package/dist/runners/error-classification.js.map +1 -1
package/dist/runners/openai-runner.d.ts.map +1 -1
package/dist/runners/openai-runner.js +6 -3
package/dist/runners/openai-runner.js.map +1 -1
package/dist/runners/types.d.ts +6 -1
package/dist/runners/types.d.ts.map +1 -1
package/dist/telemetry/bucketing.d.ts +11 -0
package/dist/telemetry/bucketing.d.ts.map +1 -0
package/dist/telemetry/bucketing.js +52 -0
package/dist/telemetry/bucketing.js.map +1 -0
package/dist/telemetry/concern-classifier.d.ts +9 -0
package/dist/telemetry/concern-classifier.d.ts.map +1 -0
package/dist/telemetry/concern-classifier.js +21 -0
package/dist/telemetry/concern-classifier.js.map +1 -0
package/dist/telemetry/consent-rules.d.ts +17 -0
package/dist/telemetry/consent-rules.d.ts.map +1 -0
package/dist/telemetry/consent-rules.js +32 -0
package/dist/telemetry/consent-rules.js.map +1 -0
package/dist/telemetry/event-builder.d.ts +23 -0
package/dist/telemetry/event-builder.d.ts.map +1 -0
package/dist/telemetry/event-builder.js +321 -0
package/dist/telemetry/event-builder.js.map +1 -0
package/dist/telemetry/types.d.ts +1870 -0
package/dist/telemetry/types.d.ts.map +1 -0
package/dist/telemetry/types.js +373 -0
package/dist/telemetry/types.js.map +1 -0
package/dist/types.d.ts +81 -2
package/dist/types.d.ts.map +1 -1
package/dist/types.js +18 -2
package/dist/types.js.map +1 -1
package/package.json +13 -1

package/dist/run-tasks/reviewed-lifecycle.js CHANGED Viewed

@@ -5,6 +5,7 @@ import { createProvider } from '../provider.js';
 import { delegateWithEscalation } from '../delegate-with-escalation.js';
 import { pickEscalation, pickReviewer, maxRowsFor, } from '../escalation/policy.js';
 import { runWithFallback, makeSyntheticRunResult, TRANSPORT_FAILURES, isReviewTransportFailure, } from '../escalation/fallback.js';
+import { findModelCapabilities } from '../routing/model-profiles.js';
 import { HeartbeatTimer } from '../heartbeat.js';
 import { runSpecReview } from '../review/spec-reviewer.js';
 import { makeSkippedReviewResult } from '../review/skipped-result.js';
@@ -22,7 +23,63 @@ import { buildFallbackImplReport, readImplementerFileContents } from './fallback
 import { composeVerboseLine, toVerboseFields } from '../diagnostics/verbose-line.js';
 import { withDoneCondition } from './execute-task.js';
 const exec = promisify(execFile);
-export async function executeReviewedLifecycle(task, resolved, config, taskIndex, onProgress, heartbeatWiring, diagnostics) {
+export function emptyStats() {
+    return {
+        implementing: { stage: 'implementing', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null },
+        spec_rework: { stage: 'spec_rework', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null },
+        quality_rework: { stage: 'quality_rework', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null },
+        committing: { stage: 'committing', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null },
+        verifying: { stage: 'verifying', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, outcome: null, skipReason: null },
+        spec_review: { stage: 'spec_review', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, verdict: null, roundsUsed: null },
+        quality_review: { stage: 'quality_review', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, verdict: null, roundsUsed: null },
+        diff_review: { stage: 'diff_review', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, verdict: null, roundsUsed: null },
+    };
+}
+function modelFamily(model) {
+    const dash = model.indexOf('-');
+    return dash > 0 ? model.slice(0, dash) : model;
+}
+export function endBaseStage(stats, name, t0, c0, agent, finalCostUSD) {
+    // Cast through unknown — TS can't narrow stats[name] on a union-typed index;
+    // the runtime invariant (set name's slot to its matching variant) is enforced
+    // by the helper signature and tested by tests/run-tasks/stage-stats.test.ts.
+    stats[name] = {
+        stage: name,
+        entered: true,
+        durationMs: Date.now() - t0,
+        costUSD: finalCostUSD !== null && c0 !== null ? finalCostUSD - c0 : null,
+        agentTier: agent.tier,
+        modelFamily: modelFamily(agent.model),
+        model: agent.model,
+    };
+}
+export function endReviewStage(stats, name, t0, c0, agent, finalCostUSD, verdict, roundsUsed) {
+    stats[name] = {
+        stage: name,
+        entered: true,
+        durationMs: Date.now() - t0,
+        costUSD: finalCostUSD !== null && c0 !== null ? finalCostUSD - c0 : null,
+        agentTier: agent.tier,
+        modelFamily: modelFamily(agent.model),
+        model: agent.model,
+        verdict,
+        roundsUsed,
+    };
+}
+export function endVerifyStage(stats, t0, c0, agent, finalCostUSD, outcome, skipReason) {
+    stats.verifying = {
+        stage: 'verifying',
+        entered: true,
+        durationMs: Date.now() - t0,
+        costUSD: finalCostUSD !== null && c0 !== null ? finalCostUSD - c0 : null,
+        agentTier: agent.tier,
+        modelFamily: modelFamily(agent.model),
+        model: agent.model,
+        outcome,
+        skipReason,
+    };
+}
+export async function executeReviewedLifecycle(task, resolved, config, taskIndex, onProgress, heartbeatWiring, diagnostics, recorder, _route, _client, _triggeringSkill) {
     const reviewPolicy = task.reviewPolicy ?? 'full';
     const otherSlot = resolved.slot === 'standard' ? 'complex' : 'standard';
     let escalationProvider;
@@ -123,6 +180,20 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
         tick_ms: heartbeat ? 5000 : undefined,
         reason: heartbeat ? undefined : 'no_consumer',
     });
+    // Stall watchdog: poll every 5s; abort if no runner event has fired for
+    // stallTimeoutMs. Stops at lifecycle exit (cleared in the finally block
+    // around runReviewedLifecycle's body — see end-of-function teardown).
+    const stallWatchdogInterval = setInterval(() => {
+        if (stallFired)
+            return;
+        const idleMs = Date.now() - lastRunnerEventAtMs;
+        if (idleMs >= stallTimeoutMs) {
+            stallFired = true;
+            emitTaskEvent('stall_abort', { idle_ms: idleMs, threshold_ms: stallTimeoutMs });
+            stallController.abort();
+        }
+    }, 5000);
+    stallWatchdogInterval.unref?.();
     const implModel = resolved.provider.config.model;
     const progressCounters = { filesRead: 0, filesWritten: 0, toolCalls: 0 };
     const verboseStream = verboseStreamRaw;
@@ -133,8 +204,12 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
     // the caller passed onProgress, so --verbose + HTTP handlers (which don't
     // pass onProgress) silently dropped every tool_call / turn_complete event.
     let textEmissionChars = 0;
+    const markRunnerEvent = () => { lastRunnerEventAtMs = Date.now(); };
     const wrappedOnProgress = needHeartbeat
         ? (event) => {
+            if (event.kind === 'turn_start' || event.kind === 'text_emission' || event.kind === 'tool_call' || event.kind === 'turn_complete') {
+                markRunnerEvent();
+            }
             if (event.kind === 'turn_start') {
                 heartbeat?.markEvent('llm');
                 if (verbose)
@@ -143,6 +218,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
                     emitTaskEvent('turn_start', {
                         turn: event.turn,
                         provider: event.provider,
+                        model: event.model,
                     });
                 }
             }
@@ -205,6 +281,21 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
         : undefined;
     const cwd = task.cwd ?? process.cwd();
     const taskStartMs = Date.now();
+    // Hard task-level wall-clock cap. Once Date.now() crosses this, no new
+    // provider.run is dispatched (retries / tier-fallback short-circuit) and
+    // any in-flight call gets a per-call timeoutMs clamped to remaining
+    // budget so it returns its salvage promptly. The user gets *something*
+    // back instead of an open-ended retry storm.
+    const taskTimeoutMs = task.timeoutMs ?? config.defaults.timeoutMs ?? 1_800_000;
+    const taskDeadlineMs = taskStartMs + taskTimeoutMs;
+    // Stall watchdog: when no LLM / tool / text event has fired for this
+    // many ms, the in-flight runner is force-aborted via `stallController`.
+    // Catches "model is silently thinking forever" and "transport hung" —
+    // both invisible to the wall-clock cap until the very end.
+    const stallTimeoutMs = config.defaults.stallTimeoutMs ?? 600_000;
+    const stallController = new AbortController();
+    let lastRunnerEventAtMs = taskStartMs;
+    let stallFired = false;
     const commits = [];
     let commitError;
     let specAttemptIndex = 0;
@@ -223,6 +314,15 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
     let lastNonRejectedImpl;
     const reviewRounds = () => ({ spec: specAttemptIndex, quality: qualityAttemptIndex, metadata: metadataRepair, cap: Math.max(maxSpecRows, maxQualityRows) });
     const taskCostUSD = () => (heartbeat ? heartbeat.getHeartbeatTickInfo().costUSD : null);
+    // Per-stage stats tracking
+    const stats = emptyStats();
+    const resolvedModel = config.agents[resolved.slot].model;
+    const implementerAgentInfo = {
+        tier: resolved.slot,
+        family: modelFamily(resolvedModel),
+        model: resolvedModel,
+    };
+    const runningCostUSD = () => taskCostUSD();
     const policyEscalated = { spec: false, quality: false, diff: false };
     const emitFallback = (p) => {
         diagnostics?.logger?.fallback(p);
@@ -259,17 +359,23 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
             reviewRounds: reviewRounds(),
             error: `runWithFallback: both tiers unavailable (loop=${loop}, attempt=${attempt}, role=implementer)`,
             agents: agentEnvelope(specReviewerHistory[specReviewerHistory.length - 1] ?? 'not_applicable', qualityReviewerHistory[qualityReviewerHistory.length - 1] ?? (reviewPolicy === 'full' ? 'not_applicable' : 'skipped')),
+            stageStats: stats,
         };
     }
     function reviewDidNotReject(status) {
         return status === 'approved' || status === 'skipped';
     }
+    const implementerToolMode = task.tools ?? config.defaults.tools;
+    const agentConfig = config.agents[resolved.slot];
+    const implementerCapabilities = (agentConfig.capabilities ?? findModelCapabilities(agentConfig.model) ?? []);
     const agentEnvelope = (specReviewer, qualityReviewer) => {
         const selectedImpl = latestAttemptedImpl ?? lastNonRejectedImpl;
         const implementer = selectedImpl?.tier ?? resolved.slot;
         return {
             implementer,
             ...(implementerHistory.length > 1 || implementerHistory.some(t => t !== implementer) ? { implementerHistory } : {}),
+            implementerToolMode,
+            implementerCapabilities,
             specReviewer,
             ...(specReviewerHistory.length > 0 && (specReviewerHistory.length > 1 || specReviewerHistory.some(t => t === 'skipped')) ? { specReviewerHistory } : {}),
             qualityReviewer,
@@ -287,16 +393,15 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
         specReviewStatus: aborting === 'spec' ? 'changes_required' : (base.specReviewStatus ?? 'approved'),
         qualityReviewStatus: aborting === 'quality' ? 'changes_required' : (base.qualityReviewStatus ?? 'skipped'),
         agents: agentEnvelope(specReviewerHistory[specReviewerHistory.length - 1] ?? 'not_applicable', qualityReviewerHistory[qualityReviewerHistory.length - 1] ?? (reviewPolicy === 'full' ? 'not_applicable' : 'skipped')),
+        stageStats: stats,
     });
     const defaultVerification = { status: 'skipped', steps: [], totalDurationMs: 0, skipReason: 'no_command' };
     let latestVerification = defaultVerification;
     async function runVerificationStage() {
-        emitTaskEvent('stage_change', { from: 'committing', to: 'verifying' });
-        heartbeat?.transition({
-            stage: 'verifying',
-            stageIndex: 4,
-            reviewRound: undefined,
-        });
+        emitTaskEvent('stage_change', { from: 'implementing', to: 'verifying' });
+        heartbeat?.setStage('verifying', 4);
+        const overallVerificationStart = Date.now();
+        const verifyCostStart = runningCostUSD();
         const verification = await runVerifyStage({
             cwd,
             verifyCommand: task.verifyCommand,
@@ -304,6 +409,10 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
             taskStartMs,
         });
         latestVerification = verification;
+        endVerifyStage(stats, overallVerificationStart, verifyCostStart, implementerAgentInfo, runningCostUSD(), verification.status === 'passed' ? 'passed'
+            : verification.status === 'failed' ? 'failed'
+                : verification.status === 'skipped' ? 'skipped'
+                    : 'not_applicable', verification.skipReason ?? null);
         for (const step of verification.steps) {
             emitTaskEvent('verify_step', {
                 command: step.command,
@@ -354,7 +463,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
         });
     }
     function withVerification(result, verification = latestVerification) {
-        return signalize({ ...result, verification });
+        return signalize({ ...result, verification, stageStats: stats });
     }
     function verificationErrorResult(base, verification) {
         if (verification.status !== 'error')
@@ -405,6 +514,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
             commits,
             commitError,
             verification,
+            stageStats: stats,
         }, verification);
     }
     function resolveDiffOnlyTerminal(base, verdict, verification, diffTruncated) {
@@ -520,8 +630,12 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
             const validCommit = implReport?.commit ?? await repairCommitMetadata(implReport?.commitDiagnostic ?? 'no commit block emitted');
             if (!validCommit)
                 return;
+            heartbeat?.setStage('committing', 7);
+            const commitT0 = Date.now();
+            const commitC0 = runningCostUSD();
             const c = await runCommitStage({ cwd, filesWritten: implResult.filesWritten, commit: validCommit });
             commits.push(c);
+            endBaseStage(stats, 'committing', commitT0, commitC0, implementerAgentInfo, runningCostUSD());
         }
     }
     try {
@@ -557,6 +671,8 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
             attemptIndex: 0,
             baseTier: resolved.slot,
         });
+        const implT0 = Date.now();
+        const implC0 = runningCostUSD();
         const initialImpl = await runWithFallback({
             assigned: initialDecision.impl,
             providerFor,
@@ -564,7 +680,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
             isTransportFailure: (r) => TRANSPORT_FAILURES.has(r.status) && r.capExhausted === undefined,
             getStatus: (r) => r.status,
             makeSyntheticFailure: (assigned) => makeSyntheticRunResult(assigned, 'all_tiers_unavailable'),
-            call: (provider) => delegateWithEscalation(withDoneCondition(task), [provider], { explicitlyPinned: false, onProgress: wrappedOnProgress }),
+            call: (provider) => delegateWithEscalation(withDoneCondition(task), [provider], { explicitlyPinned: false, onProgress: wrappedOnProgress, taskDeadlineMs, abortSignal: stallController.signal }),
         });
         if (initialImpl.fallbackFired || initialImpl.bothUnavailable) {
             fallbackOverrides.push({
@@ -602,6 +718,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
         latestAttemptedImpl = { tier: initialImpl.usedTier, result: implResult };
         lastNonRejectedImpl = { tier: initialImpl.usedTier, result: implResult };
         implementerHistory.push(initialImpl.usedTier);
+        endBaseStage(stats, 'implementing', implT0, implC0, implementerAgentInfo, runningCostUSD());
         specAttemptIndex = 1;
         const implReport = implResult.status === 'ok' ? parseStructuredReport(implResult.output) : undefined;
         const workerStatus = extractWorkerStatus(implReport);
@@ -673,6 +790,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
                 commits,
                 commitError,
                 verification,
+                stageStats: stats,
             };
         }
         if (workerStatus === 'needs_context' || workerStatus === 'blocked') {
@@ -693,6 +811,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
                 commits,
                 commitError,
                 verification,
+                stageStats: stats,
             };
         }
         if (reviewPolicy === 'off') {
@@ -730,7 +849,11 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
             const diffUnavailable = new Map();
             const diffReviewerTier = pickReviewer({ loop: 'spec', attemptIndex: 0, baseTier: resolved.slot });
             emitTaskEvent('stage_change', { from: 'verifying', to: 'diff_review' });
+            const diffReviewT0 = Date.now();
+            const diffReviewC0 = runningCostUSD();
             heartbeat?.transition({ stage: 'diff_review', stageIndex: 2, reviewRound: 1, attemptCap: 1 });
+            const diffReviewT0_commit = Date.now();
+            const diffReviewC0_commit = runningCostUSD();
             const diffCall = await runWithFallback({
                 assigned: diffReviewerTier,
                 providerFor,
@@ -749,6 +872,15 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
             }
             const verdict = diffCall.bothUnavailable || isReviewTransportFailure(diffCall.result) ? makeSkippedReviewResult('all_tiers_unavailable') : diffCall.result;
             emitTaskEvent('review_decision', { stage: 'diff_review', verdict: 'kind' in verdict ? verdict.kind : 'skipped', round: 1 });
+            endReviewStage(stats, 'diff_review', diffReviewT0_commit, diffReviewC0_commit, implementerAgentInfo, runningCostUSD(),
+            // Diff review uses 'approve' | 'concerns' | 'reject' | 'transport_failure' (DiffReviewVerdict),
+            // distinct from spec/quality verdicts. Map to the telemetry verdict enum here.
+            'kind' in verdict
+                ? (verdict.kind === 'approve' ? 'approved'
+                    : verdict.kind === 'concerns' ? 'concerns'
+                        : verdict.kind === 'reject' ? 'changes_required'
+                            : 'error')
+                : 'skipped', 0);
             return resolveDiffOnlyTerminal({
                 ...implResult,
                 workerStatus,
@@ -770,6 +902,8 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
         let specReviewReason;
         heartbeat?.transition({ stage: 'spec_review', stageIndex: 2, reviewRound: 1, attemptCap: maxSpecRows });
         const initialReviewerTier = pickReviewer({ loop: 'spec', attemptIndex: 0, baseTier: resolved.slot });
+        const specReviewT0 = Date.now();
+        const specReviewC0 = runningCostUSD();
         const initialSpecReview = await runWithFallback({
             assigned: initialReviewerTier,
             providerFor,
@@ -813,7 +947,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
             heartbeat?.transition({ stage: 'spec_rework', stageIndex: 3, reviewRound: specAttemptIndex, attemptCap: maxSpecRows });
             const feedback = specResult.findings.length > 0 ? `\n\n## Spec Review Feedback (round ${specAttemptIndex}):\n${specResult.findings.map(f => `- ${f}`).join('\n')}` : '';
             const reworkTask = withDoneCondition({ ...task, prompt: `${task.prompt}${feedback}` });
-            const reworkCall = await runWithFallback({ assigned: decision.impl, providerFor, unavailableTiers: specUnavailable, isTransportFailure: (r) => TRANSPORT_FAILURES.has(r.status) && r.capExhausted === undefined, getStatus: (r) => r.status, makeSyntheticFailure: (assigned) => makeSyntheticRunResult(assigned, 'all_tiers_unavailable'), call: (provider) => delegateWithEscalation(reworkTask, [provider], { explicitlyPinned: true, onProgress: wrappedOnProgress }) });
+            const reworkCall = await runWithFallback({ assigned: decision.impl, providerFor, unavailableTiers: specUnavailable, isTransportFailure: (r) => TRANSPORT_FAILURES.has(r.status) && r.capExhausted === undefined, getStatus: (r) => r.status, makeSyntheticFailure: (assigned) => makeSyntheticRunResult(assigned, 'all_tiers_unavailable'), call: (provider) => delegateWithEscalation(reworkTask, [provider], { explicitlyPinned: true, onProgress: wrappedOnProgress, taskDeadlineMs, abortSignal: stallController.signal }) });
             if (reworkCall.fallbackFired || reworkCall.bothUnavailable)
                 fallbackOverrides.push({ role: 'implementer', loop: 'spec', attempt: specAttemptIndex, assigned: decision.impl, used: reworkCall.usedTier, reason: (reworkCall.fallbackReason ?? reworkCall.unavailableReason), triggeringStatus: reworkCall.fallbackTriggeringStatus, bothUnavailable: reworkCall.bothUnavailable });
             if (reworkCall.fallbackFired) {
@@ -863,10 +997,18 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
             prevSpecFindings = [...(specResult.findings ?? [])];
         }
         let qualityResult = { status: 'skipped', report: undefined, findings: [], errorReason: reviewPolicy === 'full' ? 'all_tiers_unavailable' : 'skipped: reviewPolicy is spec_only' };
+        // Hoisted so endReviewStage (called after this block) can read them on the
+        // success path. When the quality review is skipped (`reviewPolicy !== 'full'`),
+        // the values stay at 0/null and the corresponding stage entry remains in its
+        // `entered: false` default — endReviewStage is never called.
+        let qualityReviewT0 = 0;
+        let qualityReviewC0 = null;
         if (reviewPolicy === 'full') {
             qualityUnavailable = new Map();
             const qualityReviewerTier = pickReviewer({ loop: 'quality', attemptIndex: 0, baseTier: resolved.slot });
             heartbeat?.transition({ stage: 'quality_review', stageIndex: 4, reviewRound: 1, attemptCap: maxQualityRows });
+            qualityReviewT0 = Date.now();
+            qualityReviewC0 = runningCostUSD();
             const initialQuality = await runWithFallback({ assigned: qualityReviewerTier, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), call: (provider) => runQualityReview(provider, packet, specReport ?? finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block) });
             if (initialQuality.bothUnavailable) {
                 emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: 0, role: 'qualityReviewer', assignedTier: qualityReviewerTier, reason: initialQuality.unavailableReason });
@@ -898,7 +1040,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
                 heartbeat?.transition({ stage: 'quality_rework', stageIndex: 5, reviewRound: qualityAttemptIndex, attemptCap: maxQualityRows });
                 const feedback = qualityResult.findings.length > 0 ? `\n\n## Quality Review Feedback (round ${qualityAttemptIndex}):\n${qualityResult.findings.map(f => `- ${f}`).join('\n')}` : '';
                 const reworkTask = withDoneCondition({ ...task, prompt: `${task.prompt}${feedback}` });
-                const reworkCall = await runWithFallback({ assigned: decision.impl, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => TRANSPORT_FAILURES.has(r.status) && r.capExhausted === undefined, getStatus: (r) => r.status, makeSyntheticFailure: (assigned) => makeSyntheticRunResult(assigned, 'all_tiers_unavailable'), call: (provider) => delegateWithEscalation(reworkTask, [provider], { explicitlyPinned: true, onProgress: wrappedOnProgress }) });
+                const reworkCall = await runWithFallback({ assigned: decision.impl, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => TRANSPORT_FAILURES.has(r.status) && r.capExhausted === undefined, getStatus: (r) => r.status, makeSyntheticFailure: (assigned) => makeSyntheticRunResult(assigned, 'all_tiers_unavailable'), call: (provider) => delegateWithEscalation(reworkTask, [provider], { explicitlyPinned: true, onProgress: wrappedOnProgress, taskDeadlineMs, abortSignal: stallController.signal }) });
                 if (reworkCall.fallbackFired || reworkCall.bothUnavailable)
                     fallbackOverrides.push({ role: 'implementer', loop: 'quality', attempt: qualityAttemptIndex, assigned: decision.impl, used: reworkCall.usedTier, reason: (reworkCall.fallbackReason ?? reworkCall.unavailableReason), triggeringStatus: reworkCall.fallbackTriggeringStatus, bothUnavailable: reworkCall.bothUnavailable });
                 if (reworkCall.fallbackFired)
@@ -962,7 +1104,16 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
             });
         }
         const specAggregateStatus = (['approved', 'changes_required', 'skipped', 'error', 'api_error', 'network_error', 'timeout'].includes(specStatus) ? specStatus : 'error');
+        endReviewStage(stats, 'spec_review', specReviewT0, specReviewC0, implementerAgentInfo, runningCostUSD(), specStatus === 'approved' ? 'approved'
+            : specStatus === 'changes_required' ? 'changes_required'
+                : specStatus === 'skipped' ? 'skipped'
+                    : specStatus === 'not_applicable' ? 'not_applicable'
+                        : 'error', specAttemptIndex - 1);
         const qualityAggregateStatus = qualityResult.status;
+        endReviewStage(stats, 'quality_review', qualityReviewT0, qualityReviewC0, implementerAgentInfo, runningCostUSD(), qualityResult.status === 'approved' ? 'approved'
+            : qualityResult.status === 'changes_required' ? 'changes_required'
+                : qualityResult.status === 'skipped' ? 'skipped'
+                    : 'error', qualityAttemptIndex - 1);
         const aggregated = aggregateResult(finalReport, specReport, qualityResult.report, specAggregateStatus, qualityAggregateStatus);
         // File artifact verification: check whether output targets exist on disk after all work.
         // Only applies when status is ok; non-ok statuses skip verification entirely.
@@ -979,13 +1130,14 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
                 : finalImplResult.status;
         const specEnvelopeStatus = (specStatus === 'api_error' || specStatus === 'network_error' || specStatus === 'timeout' ? 'error' : specStatus);
         const qualityEnvelopeStatus = qualityResult.status === 'api_error' || qualityResult.status === 'network_error' || qualityResult.status === 'timeout' ? 'error' : qualityResult.status;
-        return {
+        const runResult = {
             ...finalImplResult,
             status: finalStatus,
             workerStatus: finalWorkerStatus,
             concerns,
             specReviewStatus: specEnvelopeStatus,
             qualityReviewStatus: qualityEnvelopeStatus,
+            stageStats: stats,
             specReviewReason: 'errorReason' in specResult ? specResult.errorReason : undefined,
             qualityReviewReason: 'errorReason' in qualityResult ? qualityResult.errorReason : undefined,
             structuredReport: aggregated,
@@ -1004,12 +1156,38 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
             commitError,
             verification,
         };
+        try {
+            recorder?.recordTaskCompleted({
+                route: _route ?? 'delegate',
+                taskSpec: task,
+                runResult,
+                client: _client ?? 'claude-code',
+                triggeringSkill: _triggeringSkill ?? 'direct',
+                parentModel: task.parentModel ?? null,
+            });
+        }
+        catch { /* silent — bedrock invariant */ }
+        return runResult;
     }
     catch (err) {
-        return withVerification(workerErrorResult(err));
+        const errorRunResult = withVerification(workerErrorResult(err));
+        try {
+            recorder?.recordTaskCompleted({
+                route: _route ?? 'delegate',
+                taskSpec: task,
+                runResult: errorRunResult,
+                client: _client ?? 'claude-code',
+                triggeringSkill: _triggeringSkill ?? 'direct',
+                parentModel: task.parentModel ?? null,
+            });
+        }
+        catch { /* silent — bedrock invariant */ }
+        return errorRunResult;
     }
     finally {
+        heartbeat?.setStage('terminal', 8);
         heartbeat?.stop();
+        clearInterval(stallWatchdogInterval);
     }
 }
 //# sourceMappingURL=reviewed-lifecycle.js.map