@zhixuan92/multi-model-agent-core 3.5.2 → 3.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/dist/config/schema.d.ts +3 -0
  2. package/dist/config/schema.d.ts.map +1 -1
  3. package/dist/config/schema.js +10 -1
  4. package/dist/config/schema.js.map +1 -1
  5. package/dist/delegate-with-escalation.d.ts +14 -0
  6. package/dist/delegate-with-escalation.d.ts.map +1 -1
  7. package/dist/delegate-with-escalation.js +29 -1
  8. package/dist/delegate-with-escalation.js.map +1 -1
  9. package/dist/executors/audit.d.ts.map +1 -1
  10. package/dist/executors/audit.js +16 -2
  11. package/dist/executors/audit.js.map +1 -1
  12. package/dist/executors/debug.d.ts.map +1 -1
  13. package/dist/executors/debug.js +8 -1
  14. package/dist/executors/debug.js.map +1 -1
  15. package/dist/executors/delegate.d.ts.map +1 -1
  16. package/dist/executors/delegate.js +23 -1
  17. package/dist/executors/delegate.js.map +1 -1
  18. package/dist/executors/execute-plan.d.ts.map +1 -1
  19. package/dist/executors/execute-plan.js +16 -2
  20. package/dist/executors/execute-plan.js.map +1 -1
  21. package/dist/executors/execution-context.d.ts.map +1 -1
  22. package/dist/executors/execution-context.js +4 -0
  23. package/dist/executors/execution-context.js.map +1 -1
  24. package/dist/executors/investigate.d.ts.map +1 -1
  25. package/dist/executors/investigate.js +24 -1
  26. package/dist/executors/investigate.js.map +1 -1
  27. package/dist/executors/retry.d.ts.map +1 -1
  28. package/dist/executors/retry.js +25 -2
  29. package/dist/executors/retry.js.map +1 -1
  30. package/dist/executors/review.d.ts.map +1 -1
  31. package/dist/executors/review.js +16 -2
  32. package/dist/executors/review.js.map +1 -1
  33. package/dist/executors/types.d.ts +35 -0
  34. package/dist/executors/types.d.ts.map +1 -1
  35. package/dist/executors/verify.d.ts.map +1 -1
  36. package/dist/executors/verify.js +16 -2
  37. package/dist/executors/verify.js.map +1 -1
  38. package/dist/heartbeat.d.ts +1 -1
  39. package/dist/heartbeat.d.ts.map +1 -1
  40. package/dist/heartbeat.js +10 -3
  41. package/dist/heartbeat.js.map +1 -1
  42. package/dist/routing/model-profiles.d.ts +1 -0
  43. package/dist/routing/model-profiles.d.ts.map +1 -1
  44. package/dist/routing/model-profiles.js +3 -0
  45. package/dist/routing/model-profiles.js.map +1 -1
  46. package/dist/run-tasks/index.d.ts +17 -0
  47. package/dist/run-tasks/index.d.ts.map +1 -1
  48. package/dist/run-tasks/index.js +1 -1
  49. package/dist/run-tasks/index.js.map +1 -1
  50. package/dist/run-tasks/reviewed-lifecycle.d.ts +24 -2
  51. package/dist/run-tasks/reviewed-lifecycle.d.ts.map +1 -1
  52. package/dist/run-tasks/reviewed-lifecycle.js +191 -13
  53. package/dist/run-tasks/reviewed-lifecycle.js.map +1 -1
  54. package/dist/runners/claude-runner.d.ts.map +1 -1
  55. package/dist/runners/claude-runner.js +8 -5
  56. package/dist/runners/claude-runner.js.map +1 -1
  57. package/dist/runners/codex-runner.d.ts.map +1 -1
  58. package/dist/runners/codex-runner.js +6 -3
  59. package/dist/runners/codex-runner.js.map +1 -1
  60. package/dist/runners/error-classification.d.ts +8 -0
  61. package/dist/runners/error-classification.d.ts.map +1 -1
  62. package/dist/runners/error-classification.js +17 -0
  63. package/dist/runners/error-classification.js.map +1 -1
  64. package/dist/runners/openai-runner.d.ts.map +1 -1
  65. package/dist/runners/openai-runner.js +6 -3
  66. package/dist/runners/openai-runner.js.map +1 -1
  67. package/dist/runners/types.d.ts +6 -1
  68. package/dist/runners/types.d.ts.map +1 -1
  69. package/dist/telemetry/bucketing.d.ts +11 -0
  70. package/dist/telemetry/bucketing.d.ts.map +1 -0
  71. package/dist/telemetry/bucketing.js +52 -0
  72. package/dist/telemetry/bucketing.js.map +1 -0
  73. package/dist/telemetry/concern-classifier.d.ts +9 -0
  74. package/dist/telemetry/concern-classifier.d.ts.map +1 -0
  75. package/dist/telemetry/concern-classifier.js +21 -0
  76. package/dist/telemetry/concern-classifier.js.map +1 -0
  77. package/dist/telemetry/consent-rules.d.ts +17 -0
  78. package/dist/telemetry/consent-rules.d.ts.map +1 -0
  79. package/dist/telemetry/consent-rules.js +32 -0
  80. package/dist/telemetry/consent-rules.js.map +1 -0
  81. package/dist/telemetry/event-builder.d.ts +23 -0
  82. package/dist/telemetry/event-builder.d.ts.map +1 -0
  83. package/dist/telemetry/event-builder.js +321 -0
  84. package/dist/telemetry/event-builder.js.map +1 -0
  85. package/dist/telemetry/types.d.ts +1870 -0
  86. package/dist/telemetry/types.d.ts.map +1 -0
  87. package/dist/telemetry/types.js +373 -0
  88. package/dist/telemetry/types.js.map +1 -0
  89. package/dist/types.d.ts +81 -2
  90. package/dist/types.d.ts.map +1 -1
  91. package/dist/types.js +18 -2
  92. package/dist/types.js.map +1 -1
  93. package/package.json +13 -1
@@ -5,6 +5,7 @@ import { createProvider } from '../provider.js';
5
5
  import { delegateWithEscalation } from '../delegate-with-escalation.js';
6
6
  import { pickEscalation, pickReviewer, maxRowsFor, } from '../escalation/policy.js';
7
7
  import { runWithFallback, makeSyntheticRunResult, TRANSPORT_FAILURES, isReviewTransportFailure, } from '../escalation/fallback.js';
8
+ import { findModelCapabilities } from '../routing/model-profiles.js';
8
9
  import { HeartbeatTimer } from '../heartbeat.js';
9
10
  import { runSpecReview } from '../review/spec-reviewer.js';
10
11
  import { makeSkippedReviewResult } from '../review/skipped-result.js';
@@ -22,7 +23,63 @@ import { buildFallbackImplReport, readImplementerFileContents } from './fallback
22
23
  import { composeVerboseLine, toVerboseFields } from '../diagnostics/verbose-line.js';
23
24
  import { withDoneCondition } from './execute-task.js';
24
25
  const exec = promisify(execFile);
25
- export async function executeReviewedLifecycle(task, resolved, config, taskIndex, onProgress, heartbeatWiring, diagnostics) {
26
+ export function emptyStats() {
27
+ return {
28
+ implementing: { stage: 'implementing', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null },
29
+ spec_rework: { stage: 'spec_rework', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null },
30
+ quality_rework: { stage: 'quality_rework', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null },
31
+ committing: { stage: 'committing', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null },
32
+ verifying: { stage: 'verifying', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, outcome: null, skipReason: null },
33
+ spec_review: { stage: 'spec_review', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, verdict: null, roundsUsed: null },
34
+ quality_review: { stage: 'quality_review', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, verdict: null, roundsUsed: null },
35
+ diff_review: { stage: 'diff_review', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, verdict: null, roundsUsed: null },
36
+ };
37
+ }
38
+ function modelFamily(model) {
39
+ const dash = model.indexOf('-');
40
+ return dash > 0 ? model.slice(0, dash) : model;
41
+ }
42
+ export function endBaseStage(stats, name, t0, c0, agent, finalCostUSD) {
43
+ // Cast through unknown — TS can't narrow stats[name] on a union-typed index;
44
+ // the runtime invariant (set name's slot to its matching variant) is enforced
45
+ // by the helper signature and tested by tests/run-tasks/stage-stats.test.ts.
46
+ stats[name] = {
47
+ stage: name,
48
+ entered: true,
49
+ durationMs: Date.now() - t0,
50
+ costUSD: finalCostUSD !== null && c0 !== null ? finalCostUSD - c0 : null,
51
+ agentTier: agent.tier,
52
+ modelFamily: modelFamily(agent.model),
53
+ model: agent.model,
54
+ };
55
+ }
56
+ export function endReviewStage(stats, name, t0, c0, agent, finalCostUSD, verdict, roundsUsed) {
57
+ stats[name] = {
58
+ stage: name,
59
+ entered: true,
60
+ durationMs: Date.now() - t0,
61
+ costUSD: finalCostUSD !== null && c0 !== null ? finalCostUSD - c0 : null,
62
+ agentTier: agent.tier,
63
+ modelFamily: modelFamily(agent.model),
64
+ model: agent.model,
65
+ verdict,
66
+ roundsUsed,
67
+ };
68
+ }
69
+ export function endVerifyStage(stats, t0, c0, agent, finalCostUSD, outcome, skipReason) {
70
+ stats.verifying = {
71
+ stage: 'verifying',
72
+ entered: true,
73
+ durationMs: Date.now() - t0,
74
+ costUSD: finalCostUSD !== null && c0 !== null ? finalCostUSD - c0 : null,
75
+ agentTier: agent.tier,
76
+ modelFamily: modelFamily(agent.model),
77
+ model: agent.model,
78
+ outcome,
79
+ skipReason,
80
+ };
81
+ }
82
+ export async function executeReviewedLifecycle(task, resolved, config, taskIndex, onProgress, heartbeatWiring, diagnostics, recorder, _route, _client, _triggeringSkill) {
26
83
  const reviewPolicy = task.reviewPolicy ?? 'full';
27
84
  const otherSlot = resolved.slot === 'standard' ? 'complex' : 'standard';
28
85
  let escalationProvider;
@@ -123,6 +180,20 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
123
180
  tick_ms: heartbeat ? 5000 : undefined,
124
181
  reason: heartbeat ? undefined : 'no_consumer',
125
182
  });
183
+ // Stall watchdog: poll every 5s; abort if no runner event has fired for
184
+ // stallTimeoutMs. Stops at lifecycle exit (cleared in the finally block
185
+ // around runReviewedLifecycle's body — see end-of-function teardown).
186
+ const stallWatchdogInterval = setInterval(() => {
187
+ if (stallFired)
188
+ return;
189
+ const idleMs = Date.now() - lastRunnerEventAtMs;
190
+ if (idleMs >= stallTimeoutMs) {
191
+ stallFired = true;
192
+ emitTaskEvent('stall_abort', { idle_ms: idleMs, threshold_ms: stallTimeoutMs });
193
+ stallController.abort();
194
+ }
195
+ }, 5000);
196
+ stallWatchdogInterval.unref?.();
126
197
  const implModel = resolved.provider.config.model;
127
198
  const progressCounters = { filesRead: 0, filesWritten: 0, toolCalls: 0 };
128
199
  const verboseStream = verboseStreamRaw;
@@ -133,8 +204,12 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
133
204
  // the caller passed onProgress, so --verbose + HTTP handlers (which don't
134
205
  // pass onProgress) silently dropped every tool_call / turn_complete event.
135
206
  let textEmissionChars = 0;
207
+ const markRunnerEvent = () => { lastRunnerEventAtMs = Date.now(); };
136
208
  const wrappedOnProgress = needHeartbeat
137
209
  ? (event) => {
210
+ if (event.kind === 'turn_start' || event.kind === 'text_emission' || event.kind === 'tool_call' || event.kind === 'turn_complete') {
211
+ markRunnerEvent();
212
+ }
138
213
  if (event.kind === 'turn_start') {
139
214
  heartbeat?.markEvent('llm');
140
215
  if (verbose)
@@ -143,6 +218,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
143
218
  emitTaskEvent('turn_start', {
144
219
  turn: event.turn,
145
220
  provider: event.provider,
221
+ model: event.model,
146
222
  });
147
223
  }
148
224
  }
@@ -205,6 +281,21 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
205
281
  : undefined;
206
282
  const cwd = task.cwd ?? process.cwd();
207
283
  const taskStartMs = Date.now();
284
+ // Hard task-level wall-clock cap. Once Date.now() crosses this, no new
285
+ // provider.run is dispatched (retries / tier-fallback short-circuit) and
286
+ // any in-flight call gets a per-call timeoutMs clamped to remaining
287
+ // budget so it returns its salvage promptly. The user gets *something*
288
+ // back instead of an open-ended retry storm.
289
+ const taskTimeoutMs = task.timeoutMs ?? config.defaults.timeoutMs ?? 1_800_000;
290
+ const taskDeadlineMs = taskStartMs + taskTimeoutMs;
291
+ // Stall watchdog: when no LLM / tool / text event has fired for this
292
+ // many ms, the in-flight runner is force-aborted via `stallController`.
293
+ // Catches "model is silently thinking forever" and "transport hung" —
294
+ // both invisible to the wall-clock cap until the very end.
295
+ const stallTimeoutMs = config.defaults.stallTimeoutMs ?? 600_000;
296
+ const stallController = new AbortController();
297
+ let lastRunnerEventAtMs = taskStartMs;
298
+ let stallFired = false;
208
299
  const commits = [];
209
300
  let commitError;
210
301
  let specAttemptIndex = 0;
@@ -223,6 +314,15 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
223
314
  let lastNonRejectedImpl;
224
315
  const reviewRounds = () => ({ spec: specAttemptIndex, quality: qualityAttemptIndex, metadata: metadataRepair, cap: Math.max(maxSpecRows, maxQualityRows) });
225
316
  const taskCostUSD = () => (heartbeat ? heartbeat.getHeartbeatTickInfo().costUSD : null);
317
+ // Per-stage stats tracking
318
+ const stats = emptyStats();
319
+ const resolvedModel = config.agents[resolved.slot].model;
320
+ const implementerAgentInfo = {
321
+ tier: resolved.slot,
322
+ family: modelFamily(resolvedModel),
323
+ model: resolvedModel,
324
+ };
325
+ const runningCostUSD = () => taskCostUSD();
226
326
  const policyEscalated = { spec: false, quality: false, diff: false };
227
327
  const emitFallback = (p) => {
228
328
  diagnostics?.logger?.fallback(p);
@@ -259,17 +359,23 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
259
359
  reviewRounds: reviewRounds(),
260
360
  error: `runWithFallback: both tiers unavailable (loop=${loop}, attempt=${attempt}, role=implementer)`,
261
361
  agents: agentEnvelope(specReviewerHistory[specReviewerHistory.length - 1] ?? 'not_applicable', qualityReviewerHistory[qualityReviewerHistory.length - 1] ?? (reviewPolicy === 'full' ? 'not_applicable' : 'skipped')),
362
+ stageStats: stats,
262
363
  };
263
364
  }
264
365
  function reviewDidNotReject(status) {
265
366
  return status === 'approved' || status === 'skipped';
266
367
  }
368
+ const implementerToolMode = task.tools ?? config.defaults.tools;
369
+ const agentConfig = config.agents[resolved.slot];
370
+ const implementerCapabilities = (agentConfig.capabilities ?? findModelCapabilities(agentConfig.model) ?? []);
267
371
  const agentEnvelope = (specReviewer, qualityReviewer) => {
268
372
  const selectedImpl = latestAttemptedImpl ?? lastNonRejectedImpl;
269
373
  const implementer = selectedImpl?.tier ?? resolved.slot;
270
374
  return {
271
375
  implementer,
272
376
  ...(implementerHistory.length > 1 || implementerHistory.some(t => t !== implementer) ? { implementerHistory } : {}),
377
+ implementerToolMode,
378
+ implementerCapabilities,
273
379
  specReviewer,
274
380
  ...(specReviewerHistory.length > 0 && (specReviewerHistory.length > 1 || specReviewerHistory.some(t => t === 'skipped')) ? { specReviewerHistory } : {}),
275
381
  qualityReviewer,
@@ -287,16 +393,15 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
287
393
  specReviewStatus: aborting === 'spec' ? 'changes_required' : (base.specReviewStatus ?? 'approved'),
288
394
  qualityReviewStatus: aborting === 'quality' ? 'changes_required' : (base.qualityReviewStatus ?? 'skipped'),
289
395
  agents: agentEnvelope(specReviewerHistory[specReviewerHistory.length - 1] ?? 'not_applicable', qualityReviewerHistory[qualityReviewerHistory.length - 1] ?? (reviewPolicy === 'full' ? 'not_applicable' : 'skipped')),
396
+ stageStats: stats,
290
397
  });
291
398
  const defaultVerification = { status: 'skipped', steps: [], totalDurationMs: 0, skipReason: 'no_command' };
292
399
  let latestVerification = defaultVerification;
293
400
  async function runVerificationStage() {
294
- emitTaskEvent('stage_change', { from: 'committing', to: 'verifying' });
295
- heartbeat?.transition({
296
- stage: 'verifying',
297
- stageIndex: 4,
298
- reviewRound: undefined,
299
- });
401
+ emitTaskEvent('stage_change', { from: 'implementing', to: 'verifying' });
402
+ heartbeat?.setStage('verifying', 4);
403
+ const overallVerificationStart = Date.now();
404
+ const verifyCostStart = runningCostUSD();
300
405
  const verification = await runVerifyStage({
301
406
  cwd,
302
407
  verifyCommand: task.verifyCommand,
@@ -304,6 +409,10 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
304
409
  taskStartMs,
305
410
  });
306
411
  latestVerification = verification;
412
+ endVerifyStage(stats, overallVerificationStart, verifyCostStart, implementerAgentInfo, runningCostUSD(), verification.status === 'passed' ? 'passed'
413
+ : verification.status === 'failed' ? 'failed'
414
+ : verification.status === 'skipped' ? 'skipped'
415
+ : 'not_applicable', verification.skipReason ?? null);
307
416
  for (const step of verification.steps) {
308
417
  emitTaskEvent('verify_step', {
309
418
  command: step.command,
@@ -354,7 +463,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
354
463
  });
355
464
  }
356
465
  function withVerification(result, verification = latestVerification) {
357
- return signalize({ ...result, verification });
466
+ return signalize({ ...result, verification, stageStats: stats });
358
467
  }
359
468
  function verificationErrorResult(base, verification) {
360
469
  if (verification.status !== 'error')
@@ -405,6 +514,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
405
514
  commits,
406
515
  commitError,
407
516
  verification,
517
+ stageStats: stats,
408
518
  }, verification);
409
519
  }
410
520
  function resolveDiffOnlyTerminal(base, verdict, verification, diffTruncated) {
@@ -520,8 +630,12 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
520
630
  const validCommit = implReport?.commit ?? await repairCommitMetadata(implReport?.commitDiagnostic ?? 'no commit block emitted');
521
631
  if (!validCommit)
522
632
  return;
633
+ heartbeat?.setStage('committing', 7);
634
+ const commitT0 = Date.now();
635
+ const commitC0 = runningCostUSD();
523
636
  const c = await runCommitStage({ cwd, filesWritten: implResult.filesWritten, commit: validCommit });
524
637
  commits.push(c);
638
+ endBaseStage(stats, 'committing', commitT0, commitC0, implementerAgentInfo, runningCostUSD());
525
639
  }
526
640
  }
527
641
  try {
@@ -557,6 +671,8 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
557
671
  attemptIndex: 0,
558
672
  baseTier: resolved.slot,
559
673
  });
674
+ const implT0 = Date.now();
675
+ const implC0 = runningCostUSD();
560
676
  const initialImpl = await runWithFallback({
561
677
  assigned: initialDecision.impl,
562
678
  providerFor,
@@ -564,7 +680,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
564
680
  isTransportFailure: (r) => TRANSPORT_FAILURES.has(r.status) && r.capExhausted === undefined,
565
681
  getStatus: (r) => r.status,
566
682
  makeSyntheticFailure: (assigned) => makeSyntheticRunResult(assigned, 'all_tiers_unavailable'),
567
- call: (provider) => delegateWithEscalation(withDoneCondition(task), [provider], { explicitlyPinned: false, onProgress: wrappedOnProgress }),
683
+ call: (provider) => delegateWithEscalation(withDoneCondition(task), [provider], { explicitlyPinned: false, onProgress: wrappedOnProgress, taskDeadlineMs, abortSignal: stallController.signal }),
568
684
  });
569
685
  if (initialImpl.fallbackFired || initialImpl.bothUnavailable) {
570
686
  fallbackOverrides.push({
@@ -602,6 +718,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
602
718
  latestAttemptedImpl = { tier: initialImpl.usedTier, result: implResult };
603
719
  lastNonRejectedImpl = { tier: initialImpl.usedTier, result: implResult };
604
720
  implementerHistory.push(initialImpl.usedTier);
721
+ endBaseStage(stats, 'implementing', implT0, implC0, implementerAgentInfo, runningCostUSD());
605
722
  specAttemptIndex = 1;
606
723
  const implReport = implResult.status === 'ok' ? parseStructuredReport(implResult.output) : undefined;
607
724
  const workerStatus = extractWorkerStatus(implReport);
@@ -673,6 +790,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
673
790
  commits,
674
791
  commitError,
675
792
  verification,
793
+ stageStats: stats,
676
794
  };
677
795
  }
678
796
  if (workerStatus === 'needs_context' || workerStatus === 'blocked') {
@@ -693,6 +811,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
693
811
  commits,
694
812
  commitError,
695
813
  verification,
814
+ stageStats: stats,
696
815
  };
697
816
  }
698
817
  if (reviewPolicy === 'off') {
@@ -730,7 +849,11 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
730
849
  const diffUnavailable = new Map();
731
850
  const diffReviewerTier = pickReviewer({ loop: 'spec', attemptIndex: 0, baseTier: resolved.slot });
732
851
  emitTaskEvent('stage_change', { from: 'verifying', to: 'diff_review' });
852
+ const diffReviewT0 = Date.now();
853
+ const diffReviewC0 = runningCostUSD();
733
854
  heartbeat?.transition({ stage: 'diff_review', stageIndex: 2, reviewRound: 1, attemptCap: 1 });
855
+ const diffReviewT0_commit = Date.now();
856
+ const diffReviewC0_commit = runningCostUSD();
734
857
  const diffCall = await runWithFallback({
735
858
  assigned: diffReviewerTier,
736
859
  providerFor,
@@ -749,6 +872,15 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
749
872
  }
750
873
  const verdict = diffCall.bothUnavailable || isReviewTransportFailure(diffCall.result) ? makeSkippedReviewResult('all_tiers_unavailable') : diffCall.result;
751
874
  emitTaskEvent('review_decision', { stage: 'diff_review', verdict: 'kind' in verdict ? verdict.kind : 'skipped', round: 1 });
875
+ endReviewStage(stats, 'diff_review', diffReviewT0_commit, diffReviewC0_commit, implementerAgentInfo, runningCostUSD(),
876
+ // Diff review uses 'approve' | 'concerns' | 'reject' | 'transport_failure' (DiffReviewVerdict),
877
+ // distinct from spec/quality verdicts. Map to the telemetry verdict enum here.
878
+ 'kind' in verdict
879
+ ? (verdict.kind === 'approve' ? 'approved'
880
+ : verdict.kind === 'concerns' ? 'concerns'
881
+ : verdict.kind === 'reject' ? 'changes_required'
882
+ : 'error')
883
+ : 'skipped', 0);
752
884
  return resolveDiffOnlyTerminal({
753
885
  ...implResult,
754
886
  workerStatus,
@@ -770,6 +902,8 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
770
902
  let specReviewReason;
771
903
  heartbeat?.transition({ stage: 'spec_review', stageIndex: 2, reviewRound: 1, attemptCap: maxSpecRows });
772
904
  const initialReviewerTier = pickReviewer({ loop: 'spec', attemptIndex: 0, baseTier: resolved.slot });
905
+ const specReviewT0 = Date.now();
906
+ const specReviewC0 = runningCostUSD();
773
907
  const initialSpecReview = await runWithFallback({
774
908
  assigned: initialReviewerTier,
775
909
  providerFor,
@@ -813,7 +947,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
813
947
  heartbeat?.transition({ stage: 'spec_rework', stageIndex: 3, reviewRound: specAttemptIndex, attemptCap: maxSpecRows });
814
948
  const feedback = specResult.findings.length > 0 ? `\n\n## Spec Review Feedback (round ${specAttemptIndex}):\n${specResult.findings.map(f => `- ${f}`).join('\n')}` : '';
815
949
  const reworkTask = withDoneCondition({ ...task, prompt: `${task.prompt}${feedback}` });
816
- const reworkCall = await runWithFallback({ assigned: decision.impl, providerFor, unavailableTiers: specUnavailable, isTransportFailure: (r) => TRANSPORT_FAILURES.has(r.status) && r.capExhausted === undefined, getStatus: (r) => r.status, makeSyntheticFailure: (assigned) => makeSyntheticRunResult(assigned, 'all_tiers_unavailable'), call: (provider) => delegateWithEscalation(reworkTask, [provider], { explicitlyPinned: true, onProgress: wrappedOnProgress }) });
950
+ const reworkCall = await runWithFallback({ assigned: decision.impl, providerFor, unavailableTiers: specUnavailable, isTransportFailure: (r) => TRANSPORT_FAILURES.has(r.status) && r.capExhausted === undefined, getStatus: (r) => r.status, makeSyntheticFailure: (assigned) => makeSyntheticRunResult(assigned, 'all_tiers_unavailable'), call: (provider) => delegateWithEscalation(reworkTask, [provider], { explicitlyPinned: true, onProgress: wrappedOnProgress, taskDeadlineMs, abortSignal: stallController.signal }) });
817
951
  if (reworkCall.fallbackFired || reworkCall.bothUnavailable)
818
952
  fallbackOverrides.push({ role: 'implementer', loop: 'spec', attempt: specAttemptIndex, assigned: decision.impl, used: reworkCall.usedTier, reason: (reworkCall.fallbackReason ?? reworkCall.unavailableReason), triggeringStatus: reworkCall.fallbackTriggeringStatus, bothUnavailable: reworkCall.bothUnavailable });
819
953
  if (reworkCall.fallbackFired) {
@@ -863,10 +997,18 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
863
997
  prevSpecFindings = [...(specResult.findings ?? [])];
864
998
  }
865
999
  let qualityResult = { status: 'skipped', report: undefined, findings: [], errorReason: reviewPolicy === 'full' ? 'all_tiers_unavailable' : 'skipped: reviewPolicy is spec_only' };
1000
+ // Hoisted so endReviewStage (called after this block) can read them on the
1001
+ // success path. When the quality review is skipped (`reviewPolicy !== 'full'`),
1002
+ // the values stay at 0/null and the corresponding stage entry remains in its
1003
+ // `entered: false` default — endReviewStage is never called.
1004
+ let qualityReviewT0 = 0;
1005
+ let qualityReviewC0 = null;
866
1006
  if (reviewPolicy === 'full') {
867
1007
  qualityUnavailable = new Map();
868
1008
  const qualityReviewerTier = pickReviewer({ loop: 'quality', attemptIndex: 0, baseTier: resolved.slot });
869
1009
  heartbeat?.transition({ stage: 'quality_review', stageIndex: 4, reviewRound: 1, attemptCap: maxQualityRows });
1010
+ qualityReviewT0 = Date.now();
1011
+ qualityReviewC0 = runningCostUSD();
870
1012
  const initialQuality = await runWithFallback({ assigned: qualityReviewerTier, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), call: (provider) => runQualityReview(provider, packet, specReport ?? finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block) });
871
1013
  if (initialQuality.bothUnavailable) {
872
1014
  emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: 0, role: 'qualityReviewer', assignedTier: qualityReviewerTier, reason: initialQuality.unavailableReason });
@@ -898,7 +1040,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
898
1040
  heartbeat?.transition({ stage: 'quality_rework', stageIndex: 5, reviewRound: qualityAttemptIndex, attemptCap: maxQualityRows });
899
1041
  const feedback = qualityResult.findings.length > 0 ? `\n\n## Quality Review Feedback (round ${qualityAttemptIndex}):\n${qualityResult.findings.map(f => `- ${f}`).join('\n')}` : '';
900
1042
  const reworkTask = withDoneCondition({ ...task, prompt: `${task.prompt}${feedback}` });
901
- const reworkCall = await runWithFallback({ assigned: decision.impl, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => TRANSPORT_FAILURES.has(r.status) && r.capExhausted === undefined, getStatus: (r) => r.status, makeSyntheticFailure: (assigned) => makeSyntheticRunResult(assigned, 'all_tiers_unavailable'), call: (provider) => delegateWithEscalation(reworkTask, [provider], { explicitlyPinned: true, onProgress: wrappedOnProgress }) });
1043
+ const reworkCall = await runWithFallback({ assigned: decision.impl, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => TRANSPORT_FAILURES.has(r.status) && r.capExhausted === undefined, getStatus: (r) => r.status, makeSyntheticFailure: (assigned) => makeSyntheticRunResult(assigned, 'all_tiers_unavailable'), call: (provider) => delegateWithEscalation(reworkTask, [provider], { explicitlyPinned: true, onProgress: wrappedOnProgress, taskDeadlineMs, abortSignal: stallController.signal }) });
902
1044
  if (reworkCall.fallbackFired || reworkCall.bothUnavailable)
903
1045
  fallbackOverrides.push({ role: 'implementer', loop: 'quality', attempt: qualityAttemptIndex, assigned: decision.impl, used: reworkCall.usedTier, reason: (reworkCall.fallbackReason ?? reworkCall.unavailableReason), triggeringStatus: reworkCall.fallbackTriggeringStatus, bothUnavailable: reworkCall.bothUnavailable });
904
1046
  if (reworkCall.fallbackFired)
@@ -962,7 +1104,16 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
962
1104
  });
963
1105
  }
964
1106
  const specAggregateStatus = (['approved', 'changes_required', 'skipped', 'error', 'api_error', 'network_error', 'timeout'].includes(specStatus) ? specStatus : 'error');
1107
+ endReviewStage(stats, 'spec_review', specReviewT0, specReviewC0, implementerAgentInfo, runningCostUSD(), specStatus === 'approved' ? 'approved'
1108
+ : specStatus === 'changes_required' ? 'changes_required'
1109
+ : specStatus === 'skipped' ? 'skipped'
1110
+ : specStatus === 'not_applicable' ? 'not_applicable'
1111
+ : 'error', specAttemptIndex - 1);
965
1112
  const qualityAggregateStatus = qualityResult.status;
1113
+ endReviewStage(stats, 'quality_review', qualityReviewT0, qualityReviewC0, implementerAgentInfo, runningCostUSD(), qualityResult.status === 'approved' ? 'approved'
1114
+ : qualityResult.status === 'changes_required' ? 'changes_required'
1115
+ : qualityResult.status === 'skipped' ? 'skipped'
1116
+ : 'error', qualityAttemptIndex - 1);
966
1117
  const aggregated = aggregateResult(finalReport, specReport, qualityResult.report, specAggregateStatus, qualityAggregateStatus);
967
1118
  // File artifact verification: check whether output targets exist on disk after all work.
968
1119
  // Only applies when status is ok; non-ok statuses skip verification entirely.
@@ -979,13 +1130,14 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
979
1130
  : finalImplResult.status;
980
1131
  const specEnvelopeStatus = (specStatus === 'api_error' || specStatus === 'network_error' || specStatus === 'timeout' ? 'error' : specStatus);
981
1132
  const qualityEnvelopeStatus = qualityResult.status === 'api_error' || qualityResult.status === 'network_error' || qualityResult.status === 'timeout' ? 'error' : qualityResult.status;
982
- return {
1133
+ const runResult = {
983
1134
  ...finalImplResult,
984
1135
  status: finalStatus,
985
1136
  workerStatus: finalWorkerStatus,
986
1137
  concerns,
987
1138
  specReviewStatus: specEnvelopeStatus,
988
1139
  qualityReviewStatus: qualityEnvelopeStatus,
1140
+ stageStats: stats,
989
1141
  specReviewReason: 'errorReason' in specResult ? specResult.errorReason : undefined,
990
1142
  qualityReviewReason: 'errorReason' in qualityResult ? qualityResult.errorReason : undefined,
991
1143
  structuredReport: aggregated,
@@ -1004,12 +1156,38 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
1004
1156
  commitError,
1005
1157
  verification,
1006
1158
  };
1159
+ try {
1160
+ recorder?.recordTaskCompleted({
1161
+ route: _route ?? 'delegate',
1162
+ taskSpec: task,
1163
+ runResult,
1164
+ client: _client ?? 'claude-code',
1165
+ triggeringSkill: _triggeringSkill ?? 'direct',
1166
+ parentModel: task.parentModel ?? null,
1167
+ });
1168
+ }
1169
+ catch { /* silent — bedrock invariant */ }
1170
+ return runResult;
1007
1171
  }
1008
1172
  catch (err) {
1009
- return withVerification(workerErrorResult(err));
1173
+ const errorRunResult = withVerification(workerErrorResult(err));
1174
+ try {
1175
+ recorder?.recordTaskCompleted({
1176
+ route: _route ?? 'delegate',
1177
+ taskSpec: task,
1178
+ runResult: errorRunResult,
1179
+ client: _client ?? 'claude-code',
1180
+ triggeringSkill: _triggeringSkill ?? 'direct',
1181
+ parentModel: task.parentModel ?? null,
1182
+ });
1183
+ }
1184
+ catch { /* silent — bedrock invariant */ }
1185
+ return errorRunResult;
1010
1186
  }
1011
1187
  finally {
1188
+ heartbeat?.setStage('terminal', 8);
1012
1189
  heartbeat?.stop();
1190
+ clearInterval(stallWatchdogInterval);
1013
1191
  }
1014
1192
  }
1015
1193
  //# sourceMappingURL=reviewed-lifecycle.js.map