@zhixuan92/multi-model-agent-core 3.12.0 → 3.12.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. package/README.md +1 -1
  2. package/dist/cost/compute.d.ts +33 -0
  3. package/dist/cost/compute.d.ts.map +1 -0
  4. package/dist/cost/compute.js +67 -0
  5. package/dist/cost/compute.js.map +1 -0
  6. package/dist/cost/rollup.d.ts +18 -0
  7. package/dist/cost/rollup.d.ts.map +1 -0
  8. package/dist/cost/rollup.js +43 -0
  9. package/dist/cost/rollup.js.map +1 -0
  10. package/dist/diagnostics/types.d.ts +1 -1
  11. package/dist/diagnostics/types.d.ts.map +1 -1
  12. package/dist/escalation/fallback.d.ts +7 -1
  13. package/dist/escalation/fallback.d.ts.map +1 -1
  14. package/dist/escalation/fallback.js +39 -4
  15. package/dist/escalation/fallback.js.map +1 -1
  16. package/dist/executors/execute-plan.d.ts.map +1 -1
  17. package/dist/executors/execute-plan.js +1 -0
  18. package/dist/executors/execute-plan.js.map +1 -1
  19. package/dist/executors/types.d.ts +4 -0
  20. package/dist/executors/types.d.ts.map +1 -1
  21. package/dist/heartbeat.d.ts +7 -0
  22. package/dist/heartbeat.d.ts.map +1 -1
  23. package/dist/heartbeat.js +18 -5
  24. package/dist/heartbeat.js.map +1 -1
  25. package/dist/intake/compilers/delegate.d.ts +1 -0
  26. package/dist/intake/compilers/delegate.d.ts.map +1 -1
  27. package/dist/intake/compilers/delegate.js +1 -0
  28. package/dist/intake/compilers/delegate.js.map +1 -1
  29. package/dist/intake/compilers/execute-plan.d.ts +1 -0
  30. package/dist/intake/compilers/execute-plan.d.ts.map +1 -1
  31. package/dist/intake/compilers/execute-plan.js +1 -0
  32. package/dist/intake/compilers/execute-plan.js.map +1 -1
  33. package/dist/intake/resolve.d.ts.map +1 -1
  34. package/dist/intake/resolve.js +1 -0
  35. package/dist/intake/resolve.js.map +1 -1
  36. package/dist/intake/types.d.ts +1 -0
  37. package/dist/intake/types.d.ts.map +1 -1
  38. package/dist/model-profiles.json +6 -6
  39. package/dist/observability/events.d.ts +9 -3
  40. package/dist/observability/events.d.ts.map +1 -1
  41. package/dist/observability/events.js +1 -1
  42. package/dist/observability/events.js.map +1 -1
  43. package/dist/provider.d.ts.map +1 -1
  44. package/dist/provider.js +46 -1
  45. package/dist/provider.js.map +1 -1
  46. package/dist/review/diff-review.js +7 -8
  47. package/dist/review/diff-review.js.map +1 -1
  48. package/dist/review/quality-reviewer.d.ts +15 -1
  49. package/dist/review/quality-reviewer.d.ts.map +1 -1
  50. package/dist/review/quality-reviewer.js +12 -7
  51. package/dist/review/quality-reviewer.js.map +1 -1
  52. package/dist/review/reviewer-prompt.d.ts +6 -2
  53. package/dist/review/reviewer-prompt.d.ts.map +1 -1
  54. package/dist/review/reviewer-prompt.js +23 -15
  55. package/dist/review/reviewer-prompt.js.map +1 -1
  56. package/dist/review/spec-reviewer.d.ts.map +1 -1
  57. package/dist/review/spec-reviewer.js +5 -4
  58. package/dist/review/spec-reviewer.js.map +1 -1
  59. package/dist/routing/canonical-model.d.ts +7 -0
  60. package/dist/routing/canonical-model.d.ts.map +1 -0
  61. package/dist/routing/canonical-model.js +11 -0
  62. package/dist/routing/canonical-model.js.map +1 -0
  63. package/dist/routing/model-profiles.d.ts +4 -3
  64. package/dist/routing/model-profiles.d.ts.map +1 -1
  65. package/dist/routing/model-profiles.js +12 -6
  66. package/dist/routing/model-profiles.js.map +1 -1
  67. package/dist/run-tasks/index.d.ts +2 -0
  68. package/dist/run-tasks/index.d.ts.map +1 -1
  69. package/dist/run-tasks/index.js.map +1 -1
  70. package/dist/run-tasks/reviewed-lifecycle.d.ts +2 -0
  71. package/dist/run-tasks/reviewed-lifecycle.d.ts.map +1 -1
  72. package/dist/run-tasks/reviewed-lifecycle.js +270 -102
  73. package/dist/run-tasks/reviewed-lifecycle.js.map +1 -1
  74. package/dist/run-tasks/worker-status.d.ts +6 -0
  75. package/dist/run-tasks/worker-status.d.ts.map +1 -1
  76. package/dist/run-tasks/worker-status.js +6 -0
  77. package/dist/run-tasks/worker-status.js.map +1 -1
  78. package/dist/runners/base/result-builders.d.ts +2 -0
  79. package/dist/runners/base/result-builders.d.ts.map +1 -1
  80. package/dist/runners/base/result-builders.js +2 -0
  81. package/dist/runners/base/result-builders.js.map +1 -1
  82. package/dist/runners/base/usage-accumulator.d.ts +2 -1
  83. package/dist/runners/base/usage-accumulator.d.ts.map +1 -1
  84. package/dist/runners/base/usage-accumulator.js +13 -10
  85. package/dist/runners/base/usage-accumulator.js.map +1 -1
  86. package/dist/runners/claude-runner.d.ts +11 -1
  87. package/dist/runners/claude-runner.d.ts.map +1 -1
  88. package/dist/runners/claude-runner.js +141 -27
  89. package/dist/runners/claude-runner.js.map +1 -1
  90. package/dist/runners/codex-runner.d.ts +10 -1
  91. package/dist/runners/codex-runner.d.ts.map +1 -1
  92. package/dist/runners/codex-runner.js +129 -29
  93. package/dist/runners/codex-runner.js.map +1 -1
  94. package/dist/runners/openai-runner.d.ts +41 -1
  95. package/dist/runners/openai-runner.d.ts.map +1 -1
  96. package/dist/runners/openai-runner.js +146 -48
  97. package/dist/runners/openai-runner.js.map +1 -1
  98. package/dist/runners/types.d.ts +19 -1
  99. package/dist/runners/types.d.ts.map +1 -1
  100. package/dist/telemetry/event-builder.d.ts +2 -2
  101. package/dist/telemetry/event-builder.d.ts.map +1 -1
  102. package/dist/telemetry/event-builder.js +111 -30
  103. package/dist/telemetry/event-builder.js.map +1 -1
  104. package/dist/telemetry/field-coverage.d.ts.map +1 -1
  105. package/dist/telemetry/field-coverage.js +17 -7
  106. package/dist/telemetry/field-coverage.js.map +1 -1
  107. package/dist/telemetry/types.d.ts +488 -233
  108. package/dist/telemetry/types.d.ts.map +1 -1
  109. package/dist/telemetry/types.js +86 -37
  110. package/dist/telemetry/types.js.map +1 -1
  111. package/dist/types.d.ts +14 -26
  112. package/dist/types.d.ts.map +1 -1
  113. package/dist/types.js +0 -73
  114. package/dist/types.js.map +1 -1
  115. package/package.json +1 -1
@@ -1,6 +1,5 @@
1
1
  import { execFile } from 'node:child_process';
2
2
  import { promisify } from 'node:util';
3
- import { computeCostUSD } from '../types.js';
4
3
  import { createProvider } from '../provider.js';
5
4
  import { delegateWithEscalation } from '../delegate-with-escalation.js';
6
5
  import { pickEscalation, pickReviewer, maxRowsFor, } from '../escalation/policy.js';
@@ -9,6 +8,7 @@ import { findModelCapabilities, findModelProfile } from '../routing/model-profil
9
8
  import { canonicalIdentity } from '../routing/canonical-model-identity.js';
10
9
  import { HeartbeatTimer } from '../heartbeat.js';
11
10
  import { newStageIdleTracker, snapshotIdle } from './stage-idle-tracker.js';
11
+ import { priceTokens, subtractTokens, resolveRateCard } from '../cost/compute.js';
12
12
  import { DEFAULT_TASK_TIMEOUT_MS, DEFAULT_STALL_TIMEOUT_MS, MAX_TIME_PRESTOP_RATIO } from '../config/schema.js';
13
13
  import { runSpecReview } from '../review/spec-reviewer.js';
14
14
  import { makeSkippedReviewResult } from '../review/skipped-result.js';
@@ -33,14 +33,14 @@ const READ_ONLY_TOOL_NAMES = new Set([
33
33
  const _emptyMetrics = { inputTokens: null, outputTokens: null, cachedTokens: null, reasoningTokens: null, turnCount: null, toolCallCount: null, filesReadCount: null, filesWrittenCount: null };
34
34
  export function emptyStats() {
35
35
  return {
36
- implementing: { stage: 'implementing', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs: null, totalIdleMs: null, activityEvents: null, ..._emptyMetrics },
37
- spec_rework: { stage: 'spec_rework', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs: null, totalIdleMs: null, activityEvents: null, ..._emptyMetrics },
38
- quality_rework: { stage: 'quality_rework', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs: null, totalIdleMs: null, activityEvents: null, ..._emptyMetrics },
39
- committing: { stage: 'committing', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs: null, totalIdleMs: null, activityEvents: null, ..._emptyMetrics },
40
- verifying: { stage: 'verifying', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs: null, totalIdleMs: null, activityEvents: null, outcome: null, skipReason: null, ..._emptyMetrics },
41
- spec_review: { stage: 'spec_review', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs: null, totalIdleMs: null, activityEvents: null, verdict: null, roundsUsed: null, ..._emptyMetrics },
42
- quality_review: { stage: 'quality_review', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs: null, totalIdleMs: null, activityEvents: null, verdict: null, roundsUsed: null, ..._emptyMetrics },
43
- diff_review: { stage: 'diff_review', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs: null, totalIdleMs: null, activityEvents: null, verdict: null, roundsUsed: null, ..._emptyMetrics },
36
+ implementing: { stage: 'implementing', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs: 0, totalIdleMs: 0, activityEvents: 0, ..._emptyMetrics },
37
+ spec_rework: { stage: 'spec_rework', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs: 0, totalIdleMs: 0, activityEvents: 0, ..._emptyMetrics },
38
+ quality_rework: { stage: 'quality_rework', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs: 0, totalIdleMs: 0, activityEvents: 0, ..._emptyMetrics },
39
+ committing: { stage: 'committing', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs: 0, totalIdleMs: 0, activityEvents: 0, ..._emptyMetrics },
40
+ verifying: { stage: 'verifying', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs: 0, totalIdleMs: 0, activityEvents: 0, outcome: null, skipReason: null, ..._emptyMetrics },
41
+ spec_review: { stage: 'spec_review', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs: 0, totalIdleMs: 0, activityEvents: 0, verdict: null, roundsUsed: null, ..._emptyMetrics },
42
+ quality_review: { stage: 'quality_review', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs: 0, totalIdleMs: 0, activityEvents: 0, verdict: null, roundsUsed: null, ..._emptyMetrics },
43
+ diff_review: { stage: 'diff_review', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs: 0, totalIdleMs: 0, activityEvents: 0, verdict: null, roundsUsed: null, ..._emptyMetrics },
44
44
  };
45
45
  }
46
46
  function modelFamily(model) {
@@ -59,9 +59,9 @@ export function endBaseStage(stats, name, t0, c0, agent, finalCostUSD, idle, met
59
59
  agentTier: agent.tier,
60
60
  modelFamily: modelFamily(agent.model),
61
61
  model: agent.model,
62
- maxIdleMs: idle?.maxIdleMs ?? null,
63
- totalIdleMs: idle?.totalIdleMs ?? null,
64
- activityEvents: idle?.activityEvents ?? null,
62
+ maxIdleMs: idle?.maxIdleMs ?? 0,
63
+ totalIdleMs: idle?.totalIdleMs ?? 0,
64
+ activityEvents: idle?.activityEvents ?? 0,
65
65
  inputTokens: metrics?.inputTokens ?? null,
66
66
  outputTokens: metrics?.outputTokens ?? null,
67
67
  cachedTokens: metrics?.cachedTokens ?? null,
@@ -83,14 +83,17 @@ metrics) {
83
83
  stage: name,
84
84
  entered: true,
85
85
  durationMs: metrics?.durationMs !== undefined ? metrics.durationMs : Date.now() - t0,
86
- costUSD: metrics?.costUSD !== undefined ? metrics.costUSD
86
+ // Item 7: != null (covers both undefined AND null) — null means
87
+ // "pricing unavailable, fall through to runningCostUSD computation"
88
+ // rather than masking unknown as the literal 0.
89
+ costUSD: metrics?.costUSD != null ? metrics.costUSD
87
90
  : finalCostUSD !== null && c0 !== null ? finalCostUSD - c0 : null,
88
91
  agentTier: agent.tier,
89
92
  modelFamily: modelFamily(agent.model),
90
93
  model: agent.model,
91
- maxIdleMs: idle?.maxIdleMs ?? null,
92
- totalIdleMs: idle?.totalIdleMs ?? null,
93
- activityEvents: idle?.activityEvents ?? null,
94
+ maxIdleMs: idle?.maxIdleMs ?? 0,
95
+ totalIdleMs: idle?.totalIdleMs ?? 0,
96
+ activityEvents: idle?.activityEvents ?? 0,
94
97
  inputTokens: metrics?.inputTokens ?? null,
95
98
  outputTokens: metrics?.outputTokens ?? null,
96
99
  cachedTokens: metrics?.cachedTokens ?? null,
@@ -164,9 +167,9 @@ export function endVerifyStage(stats, t0, c0, agent, finalCostUSD, idle, outcome
164
167
  agentTier: agent.tier,
165
168
  modelFamily: modelFamily(agent.model),
166
169
  model: agent.model,
167
- maxIdleMs: idle?.maxIdleMs ?? null,
168
- totalIdleMs: idle?.totalIdleMs ?? null,
169
- activityEvents: idle?.activityEvents ?? null,
170
+ maxIdleMs: idle?.maxIdleMs ?? 0,
171
+ totalIdleMs: idle?.totalIdleMs ?? 0,
172
+ activityEvents: idle?.activityEvents ?? 0,
170
173
  inputTokens: null,
171
174
  outputTokens: null,
172
175
  cachedTokens: null,
@@ -448,10 +451,32 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
448
451
  if (event.kind === 'turn_complete') {
449
452
  heartbeat?.markEvent('llm');
450
453
  const providerConfig = _activeRunnerProviderConfig ?? resolved.provider.config;
451
- const costUSD = computeCostUSD(event.cumulativeInputTokens, event.cumulativeOutputTokens, providerConfig);
452
- _currentRunnerCostUSD = costUSD ?? 0;
454
+ // §3.5 point 2: per-turn delta tracking from cumulative usage
455
+ const cur = {
456
+ inputTokens: event.cumulativeInputTokens,
457
+ outputTokens: event.cumulativeOutputTokens,
458
+ cachedReadTokens: event.cumulativeCachedReadTokens ?? 0,
459
+ cachedCreationTokens: event.cumulativeCachedCreationTokens ?? 0,
460
+ reasoningTokens: event.cumulativeReasoningTokens ?? 0,
461
+ };
462
+ const turnTokens = subtractTokens(cur, _lastCumulative);
463
+ _lastCumulative = cur;
464
+ const card = resolveRateCard(providerConfig.model, {
465
+ ...(providerConfig.inputCostPerMTok !== undefined && { inputCostPerMTok: providerConfig.inputCostPerMTok }),
466
+ ...(providerConfig.outputCostPerMTok !== undefined && { outputCostPerMTok: providerConfig.outputCostPerMTok }),
467
+ });
468
+ const turnCost = card ? priceTokens(turnTokens, card) : null;
469
+ if (turnCost !== null) {
470
+ _currentRunnerCostUSD = (_currentRunnerCostUSD ?? 0) + turnCost;
471
+ }
472
+ else {
473
+ _rateCardUnresolved = true;
474
+ }
453
475
  const cumulativeCostUSD = (_completedRunnerCostUSD ?? 0) + _currentRunnerCostUSD;
454
476
  heartbeat?.updateCost(cumulativeCostUSD, null);
477
+ if (_rateCardUnresolved) {
478
+ heartbeat?.markRateCardUnresolved();
479
+ }
455
480
  const nowTurn = Date.now();
456
481
  const turnDurMs = nowTurn - prevEventAtMs;
457
482
  prevEventAtMs = nowTurn;
@@ -459,7 +484,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
459
484
  emitTaskEvent('turn_complete', {
460
485
  input_tokens: event.cumulativeInputTokens,
461
486
  output_tokens: event.cumulativeOutputTokens,
462
- cost: costUSD,
487
+ cost: turnCost,
463
488
  duration_ms: turnDurMs,
464
489
  provider: providerConfig.model,
465
490
  });
@@ -514,6 +539,24 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
514
539
  const fallbackOverrides = [];
515
540
  let latestAttemptedImpl;
516
541
  let lastNonRejectedImpl;
542
+ // Review-stage timing variables hoisted so deferred-finalizer closures
543
+ // (defined below) can reference them from all early-exit paths.
544
+ let specReviewT0 = 0;
545
+ let specReviewC0 = null;
546
+ let specReviewDurationMs = 0;
547
+ let qualityReviewT0 = 0;
548
+ let qualityReviewC0 = null;
549
+ let qualityReviewDurationMs = 0;
550
+ // Accumulated metrics from spec/quality review results — threaded to
551
+ // the deferred finalizers so early-exit paths carry the same token/turn
552
+ // counts the normal post-loop path always had.
553
+ let specReviewMetrics = {};
554
+ let qualityReviewMetrics = {};
555
+ // Hoisted so deferred-finalizer closures (defined below) can reference
556
+ // these from all early-exit paths. Reassigned after the corresponding
557
+ // review stage runs.
558
+ let specStatus = 'error';
559
+ let qualityResult = { status: 'skipped', report: undefined, findings: [], errorReason: (reviewPolicy === 'full' || reviewPolicy === 'quality_only') ? 'all_tiers_unavailable' : 'skipped: reviewPolicy is spec_only' };
517
560
  const reviewRounds = () => ({ spec: specAttemptIndex, quality: qualityAttemptIndex, metadata: metadataRepair, cap: Math.max(maxSpecRows, maxQualityRows) });
518
561
  const taskCostUSD = () => (heartbeat ? heartbeat.getHeartbeatTickInfo().costUSD : null);
519
562
  // Per-stage stats tracking
@@ -535,6 +578,46 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
535
578
  const model = provider?.config.model ?? config.agents[tier]?.model ?? resolvedModel;
536
579
  return { tier, family: modelFamily(model), model };
537
580
  };
581
+ // Deferred finalizers for spec_review and quality_review. Called from
582
+ // the normal post-loop path AND from every early-exit path
583
+ // (round_cap, cost_ceiling, time_ceiling, all_tiers_unavailable).
584
+ // Idempotent on re-call; no-op when the stage was never started.
585
+ let specReviewFinalized = false;
586
+ let qualityReviewFinalized = false;
587
+ const finalizeSpecReviewStage = () => {
588
+ if (specReviewFinalized)
589
+ return;
590
+ if (specReviewT0 === 0)
591
+ return; // never started
592
+ specReviewFinalized = true;
593
+ const lastReviewer = specReviewerHistory[specReviewerHistory.length - 1];
594
+ const reviewerAgent = (lastReviewer === undefined || lastReviewer === 'skipped')
595
+ ? implementerAgentInfo
596
+ : reviewerAgentInfoFor(lastReviewer);
597
+ endReviewStage(stats, 'spec_review', specReviewT0, specReviewC0, reviewerAgent, runningCostUSD(), snapshotIdle(stageIdle), specStatus === 'approved' ? 'approved'
598
+ : specStatus === 'changes_required' ? 'changes_required'
599
+ : specStatus === 'skipped' ? 'skipped'
600
+ : specStatus === 'not_applicable' ? 'not_applicable'
601
+ : 'error', specAttemptIndex, { ...specReviewMetrics, durationMs: specReviewDurationMs });
602
+ };
603
+ const finalizeQualityReviewStage = () => {
604
+ if (qualityReviewFinalized)
605
+ return;
606
+ if (qualityReviewT0 === 0)
607
+ return;
608
+ if (reviewPolicy !== 'full' && reviewPolicy !== 'quality_only')
609
+ return;
610
+ qualityReviewFinalized = true;
611
+ const lastReviewer = qualityReviewerHistory[qualityReviewerHistory.length - 1];
612
+ const reviewerAgent = (lastReviewer === undefined || lastReviewer === 'skipped')
613
+ ? implementerAgentInfo
614
+ : reviewerAgentInfoFor(lastReviewer);
615
+ endReviewStage(stats, 'quality_review', qualityReviewT0, qualityReviewC0, reviewerAgent, runningCostUSD(), snapshotIdle(stageIdle), qualityResult.status === 'approved' ? 'approved'
616
+ : qualityResult.status === 'changes_required' ? 'changes_required'
617
+ : qualityResult.status === 'annotated' ? 'annotated'
618
+ : qualityResult.status === 'skipped' ? 'skipped'
619
+ : 'error', qualityAttemptIndex, { ...qualityReviewMetrics, durationMs: qualityReviewDurationMs });
620
+ };
538
621
  // §3.9: runningCostUSD must be cumulative and monotonic across explicit
539
622
  // runner boundaries. Runner progress reports per-runner cumulative token
540
623
  // counts, so lifecycle cost is completed runners + current runner partial.
@@ -545,6 +628,13 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
545
628
  let _currentRunnerCostUSD = 0;
546
629
  let _activeRunnerProviderConfig = null;
547
630
  let _prevRunningCost = null;
631
+ // Per-turn delta tracking state (§3.5 point 2). Reset at each
632
+ // provider.run() boundary via `runAccounted`.
633
+ let _lastCumulative = {
634
+ inputTokens: 0, outputTokens: 0,
635
+ cachedReadTokens: 0, cachedCreationTokens: 0, reasoningTokens: 0,
636
+ };
637
+ let _rateCardUnresolved = false;
548
638
  const runningCostUSD = () => {
549
639
  const current = _completedRunnerCostUSD !== null || _currentRunnerCostUSD !== 0
550
640
  ? (_completedRunnerCostUSD ?? 0) + _currentRunnerCostUSD
@@ -563,6 +653,11 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
563
653
  }
564
654
  _activeRunnerProviderConfig = provider.config;
565
655
  _currentRunnerCostUSD = 0;
656
+ _lastCumulative = {
657
+ inputTokens: 0, outputTokens: 0,
658
+ cachedReadTokens: 0, cachedCreationTokens: 0, reasoningTokens: 0,
659
+ };
660
+ _rateCardUnresolved = false;
566
661
  try {
567
662
  const result = await call();
568
663
  const actualCost = result?.usage?.costUSD
@@ -599,7 +694,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
599
694
  // on the base result (set by callers via abortReviewLoop({ ...res, specReviewStatus, ... })).
600
695
  // Defaults to 'changes_required' for whichever loop tripped — that's the only state the
601
696
  // loop ever fires from, by construction.
602
- function adaptForAllTiersUnavailable(base, loop, attempt, resolvedModel, salvageSource) {
697
+ function adaptForAllTiersUnavailable(base, loop, attempt, resolvedModel, salvageSource, unavailableReason) {
603
698
  const stageName = loop === 'spec' && attempt === 0 ? 'implementing'
604
699
  : loop === 'spec' ? 'spec_rework'
605
700
  : 'quality_rework';
@@ -623,9 +718,9 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
623
718
  agentTier: implementerAgentInfo.tier,
624
719
  modelFamily: modelFamily(implementerAgentInfo.model),
625
720
  model: implementerAgentInfo.model,
626
- maxIdleMs: null,
627
- totalIdleMs: null,
628
- activityEvents: null,
721
+ maxIdleMs: 0,
722
+ totalIdleMs: 0,
723
+ activityEvents: 0,
629
724
  inputTokens: salvageSource?.usage?.inputTokens ?? null,
630
725
  outputTokens: salvageSource?.usage?.outputTokens ?? null,
631
726
  cachedTokens: salvageSource?.usage?.cachedTokens ?? null,
@@ -636,6 +731,8 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
636
731
  filesWrittenCount: (salvageSource?.filesWritten?.length) || null,
637
732
  };
638
733
  }
734
+ finalizeSpecReviewStage();
735
+ finalizeQualityReviewStage();
639
736
  const ship = salvageSource ?? lastNonRejectedImpl?.result ?? base;
640
737
  return {
641
738
  ...ship,
@@ -644,6 +741,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
644
741
  terminationReason: 'all_tiers_unavailable',
645
742
  reviewRounds: reviewRounds(),
646
743
  error: `runWithFallback: both tiers unavailable (loop=${loop}, attempt=${attempt}, role=implementer)`,
744
+ errorCode: unavailableReason === 'reviewer_separation_unsatisfiable' ? 'reviewer_separation_unsatisfiable' : ship.errorCode,
647
745
  agents: agentEnvelope(specReviewerHistory[specReviewerHistory.length - 1] ?? 'not_applicable', qualityReviewerHistory[qualityReviewerHistory.length - 1] ?? ((reviewPolicy === 'full' || reviewPolicy === 'quality_only') ? 'not_applicable' : 'skipped')),
648
746
  stageStats: stats,
649
747
  models: {
@@ -676,28 +774,32 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
676
774
  ...(fallbackOverrides.length > 0 ? { fallbackOverrides } : {}),
677
775
  };
678
776
  };
679
- const abortReviewLoop = (base, terminationReason, message, aborting, wallClockMs) => ({
680
- ...base,
681
- status: 'incomplete',
682
- workerStatus: 'review_loop_aborted',
683
- terminationReason: terminationReason === 'round_cap'
684
- ? 'round_cap'
685
- : {
686
- cause: terminationReason === 'cost_ceiling' ? 'cost_exceeded' : 'time_ceiling',
687
- turnsUsed: base.turns,
688
- hasFileArtifacts: (base.filesWritten ?? []).length > 0,
689
- usedShell: (base.toolCalls ?? []).some(c => c.startsWith('shell') || c.startsWith('runShell')),
690
- workerSelfAssessment: 'review_loop_aborted',
691
- wasPromoted: false,
692
- ...(wallClockMs !== undefined ? { wallClockMs } : {}),
693
- },
694
- reviewRounds: reviewRounds(),
695
- error: message,
696
- specReviewStatus: aborting === 'spec' ? 'changes_required' : (base.specReviewStatus ?? 'approved'),
697
- qualityReviewStatus: aborting === 'quality' ? 'changes_required' : (base.qualityReviewStatus ?? 'skipped'),
698
- agents: agentEnvelope(specReviewerHistory[specReviewerHistory.length - 1] ?? 'not_applicable', qualityReviewerHistory[qualityReviewerHistory.length - 1] ?? ((reviewPolicy === 'full' || reviewPolicy === 'quality_only') ? 'not_applicable' : 'skipped')),
699
- stageStats: stats,
700
- });
777
+ const abortReviewLoop = (base, terminationReason, message, aborting, wallClockMs) => {
778
+ finalizeSpecReviewStage();
779
+ finalizeQualityReviewStage();
780
+ return {
781
+ ...base,
782
+ status: 'incomplete',
783
+ workerStatus: 'review_loop_aborted',
784
+ terminationReason: terminationReason === 'round_cap'
785
+ ? 'round_cap'
786
+ : {
787
+ cause: terminationReason === 'cost_ceiling' ? 'cost_exceeded' : 'time_ceiling',
788
+ turnsUsed: base.turns,
789
+ hasFileArtifacts: (base.filesWritten ?? []).length > 0,
790
+ usedShell: (base.toolCalls ?? []).some(c => c.startsWith('shell') || c.startsWith('runShell')),
791
+ workerSelfAssessment: 'review_loop_aborted',
792
+ wasPromoted: false,
793
+ ...(wallClockMs !== undefined ? { wallClockMs } : {}),
794
+ },
795
+ reviewRounds: reviewRounds(),
796
+ error: message,
797
+ specReviewStatus: aborting === 'spec' ? 'changes_required' : (base.specReviewStatus ?? 'approved'),
798
+ qualityReviewStatus: aborting === 'quality' ? 'changes_required' : (base.qualityReviewStatus ?? 'skipped'),
799
+ agents: agentEnvelope(specReviewerHistory[specReviewerHistory.length - 1] ?? 'not_applicable', qualityReviewerHistory[qualityReviewerHistory.length - 1] ?? ((reviewPolicy === 'full' || reviewPolicy === 'quality_only') ? 'not_applicable' : 'skipped')),
800
+ stageStats: stats,
801
+ };
802
+ };
701
803
  const defaultVerification = { status: 'skipped', steps: [], totalDurationMs: 0, skipReason: 'no_command' };
702
804
  let latestVerification = defaultVerification;
703
805
  async function runVerificationStage() {
@@ -763,6 +865,11 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
763
865
  structuredError: { code: 'runner_crash', message: workerError.message },
764
866
  workerStatus: 'failed',
765
867
  workerError,
868
+ models: {
869
+ implementer: implModel,
870
+ specReviewer: null,
871
+ qualityReviewer: null,
872
+ },
766
873
  });
767
874
  }
768
875
  function withVerification(result, verification = latestVerification) {
@@ -820,6 +927,17 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
820
927
  stageStats: stats,
821
928
  }, verification);
822
929
  }
930
+ function diffReviewErrorTerminationReason(base) {
931
+ return {
932
+ cause: 'error',
933
+ turnsUsed: base.turns,
934
+ hasFileArtifacts: (base.filesWritten ?? []).length > 0,
935
+ usedShell: (base.toolCalls ?? []).some(c => c.startsWith('shell') || c.startsWith('runShell')),
936
+ workerSelfAssessment: 'failed',
937
+ wasPromoted: false,
938
+ ...(base.terminationReason && typeof base.terminationReason === 'object' && base.terminationReason.wallClockMs !== undefined ? { wallClockMs: base.terminationReason.wallClockMs } : {}),
939
+ };
940
+ }
823
941
  function resolveDiffOnlyTerminal(base, verdict, verification, diffTruncated) {
824
942
  const concerns = [...(base.concerns ?? [])];
825
943
  if ('status' in verdict && verdict.status === 'skipped') {
@@ -842,6 +960,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
842
960
  code: 'diff_review_rejected',
843
961
  message: verdict.message || 'diff review rejected implementation',
844
962
  },
963
+ terminationReason: diffReviewErrorTerminationReason(base),
845
964
  concerns,
846
965
  commits,
847
966
  commitError,
@@ -854,6 +973,12 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
854
973
  status: verdict.status,
855
974
  workerStatus: 'failed',
856
975
  error: verdict.reason ?? `diff review transport failure: ${verdict.status}`,
976
+ errorCode: verdict.status,
977
+ structuredError: {
978
+ code: verdict.status,
979
+ message: verdict.reason ?? `diff review transport failure: ${verdict.status}`,
980
+ },
981
+ terminationReason: diffReviewErrorTerminationReason(base),
857
982
  concerns: [...concerns, ...verdict.concerns],
858
983
  commits,
859
984
  commitError,
@@ -927,19 +1052,23 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
927
1052
  const treeDirty = porcelain.length > 0;
928
1053
  if (!headMoved && !treeDirty)
929
1054
  return;
1055
+ // Emit committing stage for both worker-committed (headMoved) and
1056
+ // pending-commit (treeDirty) paths. Workers that auto-commit during
1057
+ // turns leave a clean tree but moved HEAD — they must still produce
1058
+ // a committing stage so telemetry includes filesCommittedCount.
1059
+ transitionStage('verifying', 'committing', { stage: 'committing', stageIndex: 7 }, null);
1060
+ const commitT0 = Date.now();
1061
+ const commitC0 = runningCostUSD();
930
1062
  if (headMoved)
931
1063
  await recordWorkerCommits(baselineHead, 'HEAD');
932
1064
  if (treeDirty) {
933
1065
  const validCommit = implReport?.commit ?? await repairCommitMetadata(implReport?.commitDiagnostic ?? 'no commit block emitted');
934
- if (!validCommit)
935
- return;
936
- transitionStage('verifying', 'committing', { stage: 'committing', stageIndex: 7 }, null);
937
- const commitT0 = Date.now();
938
- const commitC0 = runningCostUSD();
939
- const c = await runCommitStage({ cwd, filesWritten: implResult.filesWritten, commit: validCommit });
940
- commits.push(c);
941
- endBaseStage(stats, 'committing', commitT0, commitC0, implementerAgentInfo, runningCostUSD(), snapshotIdle(stageIdle));
1066
+ if (validCommit) {
1067
+ const c = await runCommitStage({ cwd, filesWritten: implResult.filesWritten, commit: validCommit });
1068
+ commits.push(c);
1069
+ }
942
1070
  }
1071
+ endBaseStage(stats, 'committing', commitT0, commitC0, implementerAgentInfo, runningCostUSD(), snapshotIdle(stageIdle));
943
1072
  }
944
1073
  // Tracks the final RunResult across every exit path so the `finally` block
945
1074
  // below fires `recorder.recordTaskCompleted` exactly once regardless of which
@@ -1037,9 +1166,9 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
1037
1166
  assignedTier: initialDecision.impl,
1038
1167
  reason: initialImpl.unavailableReason,
1039
1168
  });
1040
- return __recordOnce(adaptForAllTiersUnavailable(initialImpl.result, 'spec', 0, resolvedModel, initialImpl.salvageResult));
1169
+ return __recordOnce(adaptForAllTiersUnavailable(initialImpl.result, 'spec', 0, resolvedModel, initialImpl.salvageResult, initialImpl.unavailableReason));
1041
1170
  }
1042
- const implResult = initialImpl.result;
1171
+ let implResult = initialImpl.result;
1043
1172
  latestAttemptedImpl = { tier: initialImpl.usedTier, result: implResult };
1044
1173
  lastNonRejectedImpl = { tier: initialImpl.usedTier, result: implResult };
1045
1174
  implementerHistory.push(initialImpl.usedTier);
@@ -1055,8 +1184,29 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
1055
1184
  costUSD: implResult.usage?.costUSD ?? undefined,
1056
1185
  });
1057
1186
  specAttemptIndex = 1;
1058
- const implReport = implResult.status === 'ok' ? parseStructuredReport(implResult.output) : undefined;
1187
+ const implReport = parseStructuredReport(implResult.output);
1059
1188
  const workerStatus = extractWorkerStatus(implReport);
1189
+ // Item 9: surface silent-incomplete via errorCode — the delegation layer
1190
+ // cascades result.status as a fallback errorCode (e.g., 'incomplete'),
1191
+ // which is not an informative error code. Replace it when the runner
1192
+ // produced no parseable summary — the operator can now filter on
1193
+ // 'incomplete_no_summary' instead of guessing.
1194
+ //
1195
+ // parseStructuredReport always returns a report object and has a
1196
+ // last-resort fallback that treats the first paragraph as an implicit
1197
+ // summary, so implReport.summary alone is not a reliable signal. Treat
1198
+ // the run as having a structured summary only when a real ## Summary
1199
+ // section exists and parses to non-placeholder content.
1200
+ const hasSummaryHeader = /\n##\s+summary\s*\n/i.test(implResult.output) || /^##\s+summary\s*\n/im.test(implResult.output);
1201
+ const summaryText = (hasSummaryHeader ? implReport.summary : null)?.trim().toLowerCase() ?? '';
1202
+ const hasStructuredSummary = hasSummaryHeader && summaryText !== ''
1203
+ && !['none', '(none)', 'n/a', 'na', 'todo', 'tbd'].includes(summaryText);
1204
+ if (implResult.status === 'incomplete' && !hasStructuredSummary) {
1205
+ const cascadedFallback = implResult.errorCode === implResult.status;
1206
+ if (!implResult.errorCode || cascadedFallback) {
1207
+ implResult = { ...implResult, errorCode: 'incomplete_no_summary' };
1208
+ }
1209
+ }
1060
1210
  if (implResult.status === 'ok' && isArtifactProducing) {
1061
1211
  await captureCommitsAfterImplementation(implResult, implReport, baselineHead);
1062
1212
  }
@@ -1195,6 +1345,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
1195
1345
  getStatus: (r) => r.status,
1196
1346
  makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'),
1197
1347
  forbiddenIdentities: implementerIdentity ? [implementerIdentity] : undefined,
1348
+ forbiddenTiers: [resolved.slot],
1198
1349
  call: (provider) => runAccounted(provider, () => runDiffReview({ cwd, diff: evidence.fullDiff, diffTruncated: evidence.diffTruncated, verification, worker: { call: (prompt, opts) => provider.run(prompt, { cwd: opts?.cwd ?? cwd, abortSignal: opts?.abortSignal, timeoutMs: opts?.timeoutMs }) }, taskDeadlineMs, abortSignal: stallController.signal })),
1199
1350
  });
1200
1351
  if (diffCall.fallbackFired) {
@@ -1203,8 +1354,17 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
1203
1354
  }
1204
1355
  if (diffCall.bothUnavailable) {
1205
1356
  emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'diff', attempt: 0, role: 'diffReviewer', assignedTier: diffReviewerTier, reason: diffCall.unavailableReason });
1357
+ if (diffCall.unavailableReason === 'reviewer_separation_unsatisfiable') {
1358
+ return __recordOnce(adaptForAllTiersUnavailable({ ...implResult, errorCode: 'reviewer_separation_unsatisfiable', diffReviewStatus: 'error' }, 'spec', 0, resolvedModel, implResult, diffCall.unavailableReason));
1359
+ }
1206
1360
  }
1207
- const verdict = diffCall.bothUnavailable || isReviewTransportFailure(diffCall.result) ? makeSkippedReviewResult('all_tiers_unavailable') : diffCall.result;
1361
+ const verdict = diffCall.bothUnavailable ? makeSkippedReviewResult('all_tiers_unavailable') : diffCall.result;
1362
+ const diffEnvelopeStatus = 'kind' in verdict
1363
+ ? (verdict.kind === 'approve' ? 'approved'
1364
+ : verdict.kind === 'concerns' ? 'approved'
1365
+ : verdict.kind === 'reject' ? 'changes_required'
1366
+ : 'error')
1367
+ : 'skipped';
1208
1368
  emitTaskEvent('review_decision', {
1209
1369
  stage: 'diff_review',
1210
1370
  verdict: 'kind' in verdict
@@ -1220,7 +1380,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
1220
1380
  // distinct from spec/quality verdicts. Map to the telemetry verdict enum here.
1221
1381
  'kind' in verdict
1222
1382
  ? (verdict.kind === 'approve' ? 'approved'
1223
- : verdict.kind === 'concerns' ? 'concerns'
1383
+ : verdict.kind === 'concerns' ? 'approved'
1224
1384
  : verdict.kind === 'reject' ? 'changes_required'
1225
1385
  : 'error')
1226
1386
  : 'skipped', 0);
@@ -1231,6 +1391,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
1231
1391
  qualityReviewStatus: 'skipped',
1232
1392
  specReviewReason: 'skipped: reviewPolicy is diff_only',
1233
1393
  qualityReviewReason: 'skipped: reviewPolicy is diff_only',
1394
+ diffReviewStatus: diffEnvelopeStatus,
1234
1395
  implementationReport: effectiveImplReport,
1235
1396
  fileArtifactsMissing: implResult.status === 'ok' ? checkOutputTargets(outputTargets) : undefined,
1236
1397
  agents: agentEnvelope('skipped', 'skipped'),
@@ -1240,19 +1401,8 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
1240
1401
  let finalImplResult = implResult;
1241
1402
  let finalImplReport = effectiveImplReport;
1242
1403
  let specResult;
1243
- let specStatus;
1244
1404
  let specReport;
1245
1405
  let specReviewReason;
1246
- let specReviewT0 = 0;
1247
- let specReviewC0 = null;
1248
- // Delta-only timing: accumulate per-call wall durations across the
1249
- // initial spec_review + every spec_rework round's re-review. This
1250
- // replaces the `Date.now() - specReviewT0` fallback at endReviewStage,
1251
- // which over-counts because endReviewStage runs AFTER spec_rework,
1252
- // quality_review, AND quality_rework all complete. No absolute
1253
- // timestamps go on the wire — Date.now() is used only as a local
1254
- // delta source. Privacy.md guarantees ms-deltas only.
1255
- let specReviewDurationMs = 0;
1256
1406
  if (reviewPolicy !== 'quality_only') {
1257
1407
  transitionStage('verifying', 'spec_review', { stage: 'spec_review', stageIndex: 2, reviewRound: 1, attemptCap: maxSpecRows }, null);
1258
1408
  const initialReviewerTier = pickReviewer({ loop: 'spec', attemptIndex: 0, baseTier: resolved.slot });
@@ -1267,6 +1417,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
1267
1417
  getStatus: (r) => r.status,
1268
1418
  makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'),
1269
1419
  forbiddenIdentities: implementerIdentity ? [implementerIdentity] : undefined,
1420
+ forbiddenTiers: [resolved.slot],
1270
1421
  call: (provider) => runAccounted(provider, () => runSpecReview(provider, packet, effectiveImplReport, fileContents, implResult.toolCalls, task.planContext, evidence.block, taskDeadlineMs, stallController.signal, wrappedOnProgress, cwd)),
1271
1422
  });
1272
1423
  specReviewDurationMs += Date.now() - initialSpecReviewIterStart;
@@ -1274,6 +1425,15 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
1274
1425
  emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: 0, role: 'specReviewer', assignedTier: initialReviewerTier, reason: initialSpecReview.unavailableReason });
1275
1426
  fallbackOverrides.push({ role: 'specReviewer', loop: 'spec', attempt: 0, assigned: initialReviewerTier, used: initialSpecReview.usedTier, reason: initialSpecReview.unavailableReason, triggeringStatus: initialSpecReview.fallbackTriggeringStatus, bothUnavailable: true });
1276
1427
  specReviewerHistory.push('skipped');
1428
+ if (initialSpecReview.unavailableReason === 'reviewer_separation_unsatisfiable') {
1429
+ const unavailableBase = {
1430
+ ...implResult,
1431
+ specReviewStatus: 'error',
1432
+ specReviewReason: 'reviewer separation unsatisfiable',
1433
+ errorCode: 'reviewer_separation_unsatisfiable',
1434
+ };
1435
+ return __recordOnce(adaptForAllTiersUnavailable(unavailableBase, 'spec', 0, resolvedModel, implResult, initialSpecReview.unavailableReason));
1436
+ }
1277
1437
  }
1278
1438
  else {
1279
1439
  specReviewerHistory.push(initialSpecReview.usedTier);
@@ -1322,7 +1482,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
1322
1482
  emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'implementer', assignedTier: decision.impl, reason: reworkCall.unavailableReason });
1323
1483
  if (decision.isEscalated)
1324
1484
  emitEscalationUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'implementer', wantedTier: decision.impl, reason: reworkCall.unavailableReason });
1325
- return __recordOnce(adaptForAllTiersUnavailable(reworkCall.result, 'spec', specAttemptIndex, resolvedModel, reworkCall.salvageResult));
1485
+ return __recordOnce(adaptForAllTiersUnavailable(reworkCall.result, 'spec', specAttemptIndex, resolvedModel, reworkCall.salvageResult, reworkCall.unavailableReason));
1326
1486
  }
1327
1487
  finalImplResult = reworkCall.result;
1328
1488
  latestAttemptedImpl = { tier: reworkCall.usedTier, result: finalImplResult };
@@ -1334,12 +1494,21 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
1334
1494
  commitReworkStage(stats, 'spec_rework', specReworkAcc, implementerAgentInfo);
1335
1495
  transitionStage('spec_rework', 'spec_review', { stage: 'spec_review', stageIndex: 2, reviewRound: specAttemptIndex + 1, attemptCap: maxSpecRows }, null);
1336
1496
  const reReviewIterStart = Date.now();
1337
- const reviewCall = await runWithFallback({ assigned: decision.reviewer, providerFor, unavailableTiers: specUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), forbiddenIdentities: implementerIdentity ? [implementerIdentity] : undefined, call: (provider) => runAccounted(provider, () => runSpecReview(provider, packet, finalImplReport, fileContents, finalImplResult.toolCalls, task.planContext, evidence.block, taskDeadlineMs, stallController.signal, wrappedOnProgress, cwd)) });
1497
+ const reviewCall = await runWithFallback({ assigned: decision.reviewer, providerFor, unavailableTiers: specUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), forbiddenIdentities: implementerIdentity ? [implementerIdentity] : undefined, forbiddenTiers: [resolved.slot], call: (provider) => runAccounted(provider, () => runSpecReview(provider, packet, finalImplReport, fileContents, finalImplResult.toolCalls, task.planContext, evidence.block, taskDeadlineMs, stallController.signal, wrappedOnProgress, cwd)) });
1338
1498
  specReviewDurationMs += Date.now() - reReviewIterStart;
1339
1499
  if (reviewCall.bothUnavailable) {
1340
1500
  emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'specReviewer', assignedTier: decision.reviewer, reason: reviewCall.unavailableReason });
1341
1501
  fallbackOverrides.push({ role: 'specReviewer', loop: 'spec', attempt: specAttemptIndex, assigned: decision.reviewer, used: reviewCall.usedTier, reason: reviewCall.unavailableReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, bothUnavailable: true });
1342
1502
  specReviewerHistory.push('skipped');
1503
+ if (reviewCall.unavailableReason === 'reviewer_separation_unsatisfiable') {
1504
+ const unavailableBase = {
1505
+ ...finalImplResult,
1506
+ specReviewStatus: 'error',
1507
+ specReviewReason: 'reviewer separation unsatisfiable',
1508
+ errorCode: 'reviewer_separation_unsatisfiable',
1509
+ };
1510
+ return __recordOnce(adaptForAllTiersUnavailable(unavailableBase, 'spec', specAttemptIndex, resolvedModel, finalImplResult, reviewCall.unavailableReason));
1511
+ }
1343
1512
  }
1344
1513
  else {
1345
1514
  specReviewerHistory.push(reviewCall.usedTier);
@@ -1370,17 +1539,6 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
1370
1539
  specReport = undefined;
1371
1540
  specReviewReason = 'skipped: reviewPolicy is quality_only';
1372
1541
  }
1373
- let qualityResult = { status: 'skipped', report: undefined, findings: [], errorReason: (reviewPolicy === 'full' || reviewPolicy === 'quality_only') ? 'all_tiers_unavailable' : 'skipped: reviewPolicy is spec_only' };
1374
- // Hoisted so endReviewStage (called after this block) can read them on the
1375
- // success path. When the quality review is skipped (`reviewPolicy !== 'full'`),
1376
- // the values stay at 0/null and the corresponding stage entry remains in its
1377
- // `entered: false` default — endReviewStage is never called.
1378
- let qualityReviewT0 = 0;
1379
- let qualityReviewC0 = null;
1380
- // Same delta-only timing pattern as spec_review — accumulate per-call
1381
- // wall durations across initial + each rework round's re-review. No
1382
- // raw timestamps cross the wire.
1383
- let qualityReviewDurationMs = 0;
1384
1542
  if (reviewPolicy === 'full' || reviewPolicy === 'quality_only') {
1385
1543
  qualityUnavailable = new Map();
1386
1544
  const qualityReviewerTier = pickReviewer({ loop: 'quality', attemptIndex: 0, baseTier: resolved.slot });
@@ -1388,12 +1546,21 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
1388
1546
  qualityReviewT0 = Date.now();
1389
1547
  qualityReviewC0 = runningCostUSD();
1390
1548
  const initialQualityIterStart = Date.now();
1391
- const initialQuality = await runWithFallback({ assigned: qualityReviewerTier, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), forbiddenIdentities: implementerIdentity ? [implementerIdentity] : undefined, call: (provider) => runAccounted(provider, () => runQualityReview(provider, packet, specReport ?? finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block, qualityReviewPromptBuilder, finalImplResult.output, taskDeadlineMs, stallController.signal, wrappedOnProgress, cwd)) });
1549
+ const initialQuality = await runWithFallback({ assigned: qualityReviewerTier, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), forbiddenIdentities: implementerIdentity ? [implementerIdentity] : undefined, forbiddenTiers: [resolved.slot], call: (provider) => runAccounted(provider, () => runQualityReview(provider, packet, specReport ?? finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block, qualityReviewPromptBuilder, finalImplResult.output, taskDeadlineMs, stallController.signal, wrappedOnProgress, cwd)) });
1392
1550
  qualityReviewDurationMs += Date.now() - initialQualityIterStart;
1393
1551
  if (initialQuality.bothUnavailable) {
1394
1552
  emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: 0, role: 'qualityReviewer', assignedTier: qualityReviewerTier, reason: initialQuality.unavailableReason });
1395
1553
  fallbackOverrides.push({ role: 'qualityReviewer', loop: 'quality', attempt: 0, assigned: qualityReviewerTier, used: initialQuality.usedTier, reason: initialQuality.unavailableReason, triggeringStatus: initialQuality.fallbackTriggeringStatus, bothUnavailable: true });
1396
1554
  qualityReviewerHistory.push('skipped');
1555
+ if (initialQuality.unavailableReason === 'reviewer_separation_unsatisfiable') {
1556
+ const unavailableBase = {
1557
+ ...finalImplResult,
1558
+ qualityReviewStatus: 'error',
1559
+ qualityReviewReason: 'reviewer separation unsatisfiable',
1560
+ errorCode: 'reviewer_separation_unsatisfiable',
1561
+ };
1562
+ return __recordOnce(adaptForAllTiersUnavailable(unavailableBase, 'quality', 0, resolvedModel, finalImplResult, initialQuality.unavailableReason));
1563
+ }
1397
1564
  }
1398
1565
  else {
1399
1566
  qualityReviewerHistory.push(initialQuality.usedTier);
@@ -1480,7 +1647,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
1480
1647
  emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'implementer', assignedTier: decision.impl, reason: reworkCall.unavailableReason });
1481
1648
  if (decision.isEscalated)
1482
1649
  emitEscalationUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'implementer', wantedTier: decision.impl, reason: reworkCall.unavailableReason });
1483
- return __recordOnce(adaptForAllTiersUnavailable(reworkCall.result, 'quality', qualityAttemptIndex, resolvedModel, reworkCall.salvageResult));
1650
+ return __recordOnce(adaptForAllTiersUnavailable(reworkCall.result, 'quality', qualityAttemptIndex, resolvedModel, reworkCall.salvageResult, reworkCall.unavailableReason));
1484
1651
  }
1485
1652
  finalImplResult = reworkCall.result;
1486
1653
  latestAttemptedImpl = { tier: reworkCall.usedTier, result: finalImplResult };
@@ -1492,12 +1659,21 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
1492
1659
  commitReworkStage(stats, 'quality_rework', qualityReworkAcc, implementerAgentInfo);
1493
1660
  transitionStage('quality_rework', 'quality_review', { stage: 'quality_review', stageIndex: 4, reviewRound: qualityAttemptIndex + 1, attemptCap: maxQualityRows }, null);
1494
1661
  const qReReviewIterStart = Date.now();
1495
- const reviewCall = await runWithFallback({ assigned: decision.reviewer, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), forbiddenIdentities: implementerIdentity ? [implementerIdentity] : undefined, call: (provider) => runAccounted(provider, () => runQualityReview(provider, packet, finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block, qualityReviewPromptBuilder, finalImplResult.output, taskDeadlineMs, stallController.signal, wrappedOnProgress, cwd)) });
1662
+ const reviewCall = await runWithFallback({ assigned: decision.reviewer, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), forbiddenIdentities: implementerIdentity ? [implementerIdentity] : undefined, forbiddenTiers: [resolved.slot], call: (provider) => runAccounted(provider, () => runQualityReview(provider, packet, finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block, qualityReviewPromptBuilder, finalImplResult.output, taskDeadlineMs, stallController.signal, wrappedOnProgress, cwd)) });
1496
1663
  qualityReviewDurationMs += Date.now() - qReReviewIterStart;
1497
1664
  if (reviewCall.bothUnavailable) {
1498
1665
  emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'qualityReviewer', assignedTier: decision.reviewer, reason: reviewCall.unavailableReason });
1499
1666
  fallbackOverrides.push({ role: 'qualityReviewer', loop: 'quality', attempt: qualityAttemptIndex, assigned: decision.reviewer, used: reviewCall.usedTier, reason: reviewCall.unavailableReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, bothUnavailable: true });
1500
1667
  qualityReviewerHistory.push('skipped');
1668
+ if (reviewCall.unavailableReason === 'reviewer_separation_unsatisfiable') {
1669
+ const unavailableBase = {
1670
+ ...finalImplResult,
1671
+ qualityReviewStatus: 'error',
1672
+ qualityReviewReason: 'reviewer separation unsatisfiable',
1673
+ errorCode: 'reviewer_separation_unsatisfiable',
1674
+ };
1675
+ return __recordOnce(adaptForAllTiersUnavailable(unavailableBase, 'quality', qualityAttemptIndex, resolvedModel, finalImplResult, reviewCall.unavailableReason));
1676
+ }
1501
1677
  }
1502
1678
  else {
1503
1679
  qualityReviewerHistory.push(reviewCall.usedTier);
@@ -1559,21 +1735,11 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
1559
1735
  // override. endReviewStage uses the override when present and falls
1560
1736
  // back to `Date.now() - t0` otherwise (which over-counts review-block
1561
1737
  // span across rework + later stages).
1562
- const specMetrics = { ...(specResult.metrics ?? {}), durationMs: specReviewDurationMs };
1563
- const qualityMetrics = { ...(qualityResult.metrics ?? {}), durationMs: qualityReviewDurationMs };
1564
- if (reviewPolicy !== 'quality_only') {
1565
- endReviewStage(stats, 'spec_review', specReviewT0, specReviewC0, specReviewAgent, runningCostUSD(), snapshotIdle(stageIdle), specStatus === 'approved' ? 'approved'
1566
- : specStatus === 'changes_required' ? 'changes_required'
1567
- : specStatus === 'skipped' ? 'skipped'
1568
- : specStatus === 'not_applicable' ? 'not_applicable'
1569
- : 'error', specAttemptIndex, specMetrics);
1570
- }
1738
+ specReviewMetrics = (specResult.metrics ?? {});
1739
+ qualityReviewMetrics = (qualityResult.metrics ?? {});
1740
+ finalizeSpecReviewStage();
1741
+ finalizeQualityReviewStage();
1571
1742
  const qualityAggregateStatus = qualityResult.status;
1572
- endReviewStage(stats, 'quality_review', qualityReviewT0, qualityReviewC0, qualityReviewAgent, runningCostUSD(), snapshotIdle(stageIdle), qualityResult.status === 'approved' ? 'approved'
1573
- : qualityResult.status === 'changes_required' ? 'changes_required'
1574
- : qualityResult.status === 'annotated' ? 'annotated'
1575
- : qualityResult.status === 'skipped' ? 'skipped'
1576
- : 'error', qualityAttemptIndex, qualityMetrics);
1577
1743
  const aggregated = aggregateResult(finalReport, specReport, qualityResult.report, specAggregateStatus, qualityAggregateStatus);
1578
1744
  // File artifact verification: check whether output targets exist on disk after all work.
1579
1745
  // Only applies when status is ok; non-ok statuses skip verification entirely.
@@ -1655,6 +1821,8 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
1655
1821
  client: _client ?? 'claude-code',
1656
1822
  triggeringSkill: _triggeringSkill ?? 'direct',
1657
1823
  parentModel: task.parentModel ?? null,
1824
+ reviewPolicy,
1825
+ verifyCommandPresent: !!(task.verifyCommand && task.verifyCommand.length > 0),
1658
1826
  });
1659
1827
  }
1660
1828
  catch { /* silent */ }