@zhixuan92/multi-model-agent-core 3.8.0 → 3.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/README.md +1 -1
  2. package/dist/config/schema.d.ts +12 -0
  3. package/dist/config/schema.d.ts.map +1 -1
  4. package/dist/config/schema.js +16 -9
  5. package/dist/config/schema.js.map +1 -1
  6. package/dist/executors/_shared/findings-schema.d.ts +61 -21
  7. package/dist/executors/_shared/findings-schema.d.ts.map +1 -1
  8. package/dist/executors/_shared/findings-schema.js +27 -15
  9. package/dist/executors/_shared/findings-schema.js.map +1 -1
  10. package/dist/executors/audit.d.ts.map +1 -1
  11. package/dist/executors/audit.js +2 -1
  12. package/dist/executors/audit.js.map +1 -1
  13. package/dist/executors/debug.d.ts.map +1 -1
  14. package/dist/executors/debug.js +2 -1
  15. package/dist/executors/debug.js.map +1 -1
  16. package/dist/executors/execute-plan.d.ts.map +1 -1
  17. package/dist/executors/execute-plan.js +2 -1
  18. package/dist/executors/execute-plan.js.map +1 -1
  19. package/dist/executors/investigate.d.ts.map +1 -1
  20. package/dist/executors/investigate.js +4 -2
  21. package/dist/executors/investigate.js.map +1 -1
  22. package/dist/executors/review.d.ts.map +1 -1
  23. package/dist/executors/review.js +2 -1
  24. package/dist/executors/review.js.map +1 -1
  25. package/dist/executors/verify.d.ts.map +1 -1
  26. package/dist/executors/verify.js +2 -1
  27. package/dist/executors/verify.js.map +1 -1
  28. package/dist/heartbeat.d.ts +3 -0
  29. package/dist/heartbeat.d.ts.map +1 -1
  30. package/dist/heartbeat.js +5 -0
  31. package/dist/heartbeat.js.map +1 -1
  32. package/dist/intake/resolve.d.ts.map +1 -1
  33. package/dist/intake/resolve.js +21 -6
  34. package/dist/intake/resolve.js.map +1 -1
  35. package/dist/observability/events.d.ts +192 -23
  36. package/dist/observability/events.d.ts.map +1 -1
  37. package/dist/observability/events.js +50 -11
  38. package/dist/observability/events.js.map +1 -1
  39. package/dist/review/aggregate-result.d.ts +1 -1
  40. package/dist/review/aggregate-result.d.ts.map +1 -1
  41. package/dist/review/aggregate-result.js.map +1 -1
  42. package/dist/review/diff-review.d.ts +7 -2
  43. package/dist/review/diff-review.d.ts.map +1 -1
  44. package/dist/review/diff-review.js +11 -2
  45. package/dist/review/diff-review.js.map +1 -1
  46. package/dist/review/quality-only-prompts.d.ts +11 -5
  47. package/dist/review/quality-only-prompts.d.ts.map +1 -1
  48. package/dist/review/quality-only-prompts.js +98 -51
  49. package/dist/review/quality-only-prompts.js.map +1 -1
  50. package/dist/review/quality-reviewer.d.ts +51 -10
  51. package/dist/review/quality-reviewer.d.ts.map +1 -1
  52. package/dist/review/quality-reviewer.js +163 -21
  53. package/dist/review/quality-reviewer.js.map +1 -1
  54. package/dist/review/spec-reviewer.d.ts +1 -1
  55. package/dist/review/spec-reviewer.d.ts.map +1 -1
  56. package/dist/review/spec-reviewer.js +4 -3
  57. package/dist/review/spec-reviewer.js.map +1 -1
  58. package/dist/run-tasks/index.d.ts +1 -0
  59. package/dist/run-tasks/index.d.ts.map +1 -1
  60. package/dist/run-tasks/reviewed-lifecycle.d.ts +16 -3
  61. package/dist/run-tasks/reviewed-lifecycle.d.ts.map +1 -1
  62. package/dist/run-tasks/reviewed-lifecycle.js +278 -212
  63. package/dist/run-tasks/reviewed-lifecycle.js.map +1 -1
  64. package/dist/run-tasks/stage-idle-tracker.d.ts +14 -0
  65. package/dist/run-tasks/stage-idle-tracker.d.ts.map +1 -0
  66. package/dist/run-tasks/stage-idle-tracker.js +17 -0
  67. package/dist/run-tasks/stage-idle-tracker.js.map +1 -0
  68. package/dist/tool-schemas/audit.d.ts +2 -0
  69. package/dist/tool-schemas/audit.d.ts.map +1 -1
  70. package/dist/tool-schemas/debug.d.ts +2 -0
  71. package/dist/tool-schemas/debug.d.ts.map +1 -1
  72. package/dist/tool-schemas/delegate.d.ts +2 -0
  73. package/dist/tool-schemas/delegate.d.ts.map +1 -1
  74. package/dist/tool-schemas/execute-plan.d.ts +2 -0
  75. package/dist/tool-schemas/execute-plan.d.ts.map +1 -1
  76. package/dist/tool-schemas/investigate.d.ts +2 -0
  77. package/dist/tool-schemas/investigate.d.ts.map +1 -1
  78. package/dist/tool-schemas/retry.d.ts +2 -0
  79. package/dist/tool-schemas/retry.d.ts.map +1 -1
  80. package/dist/tool-schemas/review.d.ts +2 -0
  81. package/dist/tool-schemas/review.d.ts.map +1 -1
  82. package/dist/tool-schemas/shared-output.d.ts +2 -0
  83. package/dist/tool-schemas/shared-output.d.ts.map +1 -1
  84. package/dist/tool-schemas/shared-output.js +1 -1
  85. package/dist/tool-schemas/shared-output.js.map +1 -1
  86. package/dist/tool-schemas/verify.d.ts +2 -0
  87. package/dist/tool-schemas/verify.d.ts.map +1 -1
  88. package/dist/types.d.ts +8 -2
  89. package/dist/types.d.ts.map +1 -1
  90. package/dist/types.js.map +1 -1
  91. package/package.json +1 -1
@@ -7,6 +7,8 @@ import { pickEscalation, pickReviewer, maxRowsFor, } from '../escalation/policy.
7
7
  import { runWithFallback, makeSyntheticRunResult, TRANSPORT_FAILURES, isReviewTransportFailure, } from '../escalation/fallback.js';
8
8
  import { findModelCapabilities, extractCanonicalModelName } from '../routing/model-profiles.js';
9
9
  import { HeartbeatTimer } from '../heartbeat.js';
10
+ import { newStageIdleTracker, snapshotIdle } from './stage-idle-tracker.js';
11
+ import { DEFAULT_TASK_TIMEOUT_MS, DEFAULT_STALL_TIMEOUT_MS } from '../config/schema.js';
10
12
  import { runSpecReview } from '../review/spec-reviewer.js';
11
13
  import { makeSkippedReviewResult } from '../review/skipped-result.js';
12
14
  import { runQualityReview } from '../review/quality-reviewer.js';
@@ -28,14 +30,14 @@ const READ_ONLY_TOOL_NAMES = new Set([
28
30
  ]);
29
31
  export function emptyStats() {
30
32
  return {
31
- implementing: { stage: 'implementing', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null },
32
- spec_rework: { stage: 'spec_rework', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null },
33
- quality_rework: { stage: 'quality_rework', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null },
34
- committing: { stage: 'committing', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null },
35
- verifying: { stage: 'verifying', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, outcome: null, skipReason: null },
36
- spec_review: { stage: 'spec_review', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, verdict: null, roundsUsed: null },
37
- quality_review: { stage: 'quality_review', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, verdict: null, roundsUsed: null },
38
- diff_review: { stage: 'diff_review', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, verdict: null, roundsUsed: null },
33
+ implementing: { stage: 'implementing', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs: null, totalIdleMs: null, activityEvents: null },
34
+ spec_rework: { stage: 'spec_rework', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs: null, totalIdleMs: null, activityEvents: null },
35
+ quality_rework: { stage: 'quality_rework', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs: null, totalIdleMs: null, activityEvents: null },
36
+ committing: { stage: 'committing', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs: null, totalIdleMs: null, activityEvents: null },
37
+ verifying: { stage: 'verifying', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs: null, totalIdleMs: null, activityEvents: null, outcome: null, skipReason: null },
38
+ spec_review: { stage: 'spec_review', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs: null, totalIdleMs: null, activityEvents: null, verdict: null, roundsUsed: null },
39
+ quality_review: { stage: 'quality_review', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs: null, totalIdleMs: null, activityEvents: null, verdict: null, roundsUsed: null },
40
+ diff_review: { stage: 'diff_review', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null, maxIdleMs: null, totalIdleMs: null, activityEvents: null, verdict: null, roundsUsed: null },
39
41
  };
40
42
  }
41
43
  const FAMILY_MAP = {
@@ -50,7 +52,7 @@ function modelFamily(model) {
50
52
  const raw = dash > 0 ? canonical.slice(0, dash) : canonical;
51
53
  return FAMILY_MAP[raw.toLowerCase()] ?? 'other';
52
54
  }
53
- export function endBaseStage(stats, name, t0, c0, agent, finalCostUSD) {
55
+ export function endBaseStage(stats, name, t0, c0, agent, finalCostUSD, idle) {
54
56
  // Cast through unknown — TS can't narrow stats[name] on a union-typed index;
55
57
  // the runtime invariant (set name's slot to its matching variant) is enforced
56
58
  // by the helper signature and tested by tests/run-tasks/stage-stats.test.ts.
@@ -62,9 +64,12 @@ export function endBaseStage(stats, name, t0, c0, agent, finalCostUSD) {
62
64
  agentTier: agent.tier,
63
65
  modelFamily: modelFamily(agent.model),
64
66
  model: agent.model,
67
+ maxIdleMs: idle?.maxIdleMs ?? null,
68
+ totalIdleMs: idle?.totalIdleMs ?? null,
69
+ activityEvents: idle?.activityEvents ?? null,
65
70
  };
66
71
  }
67
- export function endReviewStage(stats, name, t0, c0, agent, finalCostUSD, verdict, roundsUsed) {
72
+ export function endReviewStage(stats, name, t0, c0, agent, finalCostUSD, idle, verdict, roundsUsed) {
68
73
  stats[name] = {
69
74
  stage: name,
70
75
  entered: true,
@@ -73,11 +78,14 @@ export function endReviewStage(stats, name, t0, c0, agent, finalCostUSD, verdict
73
78
  agentTier: agent.tier,
74
79
  modelFamily: modelFamily(agent.model),
75
80
  model: agent.model,
81
+ maxIdleMs: idle?.maxIdleMs ?? null,
82
+ totalIdleMs: idle?.totalIdleMs ?? null,
83
+ activityEvents: idle?.activityEvents ?? null,
76
84
  verdict,
77
85
  roundsUsed,
78
86
  };
79
87
  }
80
- export function endVerifyStage(stats, t0, c0, agent, finalCostUSD, outcome, skipReason) {
88
+ export function endVerifyStage(stats, t0, c0, agent, finalCostUSD, idle, outcome, skipReason) {
81
89
  stats.verifying = {
82
90
  stage: 'verifying',
83
91
  entered: true,
@@ -86,6 +94,9 @@ export function endVerifyStage(stats, t0, c0, agent, finalCostUSD, outcome, skip
86
94
  agentTier: agent.tier,
87
95
  modelFamily: modelFamily(agent.model),
88
96
  model: agent.model,
97
+ maxIdleMs: idle?.maxIdleMs ?? null,
98
+ totalIdleMs: idle?.totalIdleMs ?? null,
99
+ activityEvents: idle?.activityEvents ?? null,
89
100
  outcome,
90
101
  skipReason,
91
102
  };
@@ -151,8 +162,9 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
151
162
  diagnostics?.logger !== undefined ||
152
163
  bus !== undefined;
153
164
  // Synthesize an onProgress sink when the caller didn't pass one — the
154
- // heartbeat needs a place to emit heartbeat events so the stage-change
155
- // detector below fires. Discards events if there is no external consumer.
165
+ // heartbeat needs a place to emit heartbeat events. Discards events if
166
+ // there is no external consumer. wrappedOnProgress (defined below) is
167
+ // ALWAYS defined and feeds the stall watchdog regardless of consumers.
156
168
  const synthOnProgress = onProgress ?? (() => { });
157
169
  const heartbeat = needHeartbeat
158
170
  ? new HeartbeatTimer((event) => {
@@ -162,6 +174,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
162
174
  // only via explicit emit calls at lifecycle points; the
163
175
  // heartbeat tick no longer infers transitions (P5).
164
176
  const sinceLastMs = Date.now() - prevEventAtMs;
177
+ const tickInfo = heartbeat?.getHeartbeatTickInfo();
165
178
  emitTaskEvent('heartbeat', {
166
179
  elapsed: event.elapsed,
167
180
  stage: event.stage,
@@ -173,6 +186,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
173
186
  text: textEmissionChars,
174
187
  cost: event.costUSD,
175
188
  idle_ms: sinceLastMs,
189
+ stage_idle_ms: tickInfo?.stageIdleMs ?? sinceLastMs,
176
190
  });
177
191
  }
178
192
  synthOnProgress(taskIndex, event);
@@ -207,94 +221,108 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
207
221
  const implModel = resolved.provider.config.model;
208
222
  const progressCounters = { filesRead: 0, filesWritten: 0, toolCalls: 0 };
209
223
  const verboseStream = verboseStreamRaw;
210
- let prevEventAtMs = verbose ? Date.now() : 0;
224
+ let prevEventAtMs = Date.now();
211
225
  // Wrap whenever we have ANY consumer for InternalRunnerEvent (heartbeat,
212
226
  // verbose stream, or verbose logger). Previously this only wrapped when
213
227
  // the caller passed onProgress, so --verbose + HTTP handlers (which don't
214
228
  // pass onProgress) silently dropped every tool_call / turn_complete event.
215
229
  let textEmissionChars = 0;
216
- const markRunnerEvent = () => { lastRunnerEventAtMs = Date.now(); };
217
- const wrappedOnProgress = needHeartbeat
218
- ? (event) => {
219
- if (event.kind === 'turn_start' || event.kind === 'text_emission' || event.kind === 'tool_call' || event.kind === 'turn_complete') {
220
- markRunnerEvent();
221
- }
222
- if (event.kind === 'worker_start') {
223
- emitTaskEvent('worker_start', {
230
+ const markRunnerEvent = () => {
231
+ const now = Date.now();
232
+ const gap = now - stageIdle.stageLastEventMs;
233
+ if (gap > stageIdle.stageMaxIdleMs)
234
+ stageIdle.stageMaxIdleMs = gap;
235
+ if (gap > taskMaxIdleMs)
236
+ taskMaxIdleMs = gap;
237
+ if (gap > 1000)
238
+ stageIdle.stageTotalIdleMs += gap;
239
+ stageIdle.stageActivityCount += 1;
240
+ stageIdle.stageLastEventMs = now;
241
+ lastRunnerEventAtMs = now;
242
+ };
243
+ const wrappedOnProgress = (event) => {
244
+ // Watchdog: fire on every activity event regardless of telemetry consumers.
245
+ // Without this, a no-consumer caller leaves lastRunnerEventAtMs frozen at
246
+ // taskStartMs and the stall watchdog fires at stallTimeoutMs regardless of
247
+ // actual LLM activity.
248
+ if (event.kind === 'turn_start' || event.kind === 'text_emission' || event.kind === 'tool_call' || event.kind === 'turn_complete') {
249
+ markRunnerEvent();
250
+ }
251
+ if (!needHeartbeat)
252
+ return;
253
+ if (event.kind === 'worker_start') {
254
+ emitTaskEvent('worker_start', {
255
+ model: event.model,
256
+ providerType: event.providerType,
257
+ tier: event.tier,
258
+ });
259
+ }
260
+ if (event.kind === 'turn_start') {
261
+ heartbeat?.markEvent('llm');
262
+ prevEventAtMs = Date.now();
263
+ if (verbose) {
264
+ emitTaskEvent('turn_start', {
265
+ turn: event.turn,
266
+ provider: event.provider,
224
267
  model: event.model,
225
- providerType: event.providerType,
226
- tier: event.tier,
227
268
  });
228
269
  }
229
- if (event.kind === 'turn_start') {
230
- heartbeat?.markEvent('llm');
231
- if (verbose)
232
- prevEventAtMs = Date.now();
233
- if (verbose) {
234
- emitTaskEvent('turn_start', {
235
- turn: event.turn,
236
- provider: event.provider,
237
- model: event.model,
238
- });
239
- }
270
+ }
271
+ if (event.kind === 'text_emission') {
272
+ prevEventAtMs = Date.now();
273
+ heartbeat?.markEvent('text');
274
+ textEmissionChars += event.chars;
275
+ if (verbose && event.chars > 0) {
276
+ const preview = event.preview.length > 60
277
+ ? event.preview.slice(0, 57) + '...'
278
+ : event.preview;
279
+ emitTaskEvent('text_emission', {
280
+ chars: event.chars,
281
+ total: textEmissionChars,
282
+ preview,
283
+ });
240
284
  }
241
- if (event.kind === 'text_emission') {
242
- heartbeat?.markEvent('text');
243
- textEmissionChars += event.chars;
244
- if (verbose && event.chars > 0) {
245
- const preview = event.preview.length > 60
246
- ? event.preview.slice(0, 57) + '...'
247
- : event.preview;
248
- emitTaskEvent('text_emission', {
249
- chars: event.chars,
250
- total: textEmissionChars,
251
- preview,
252
- });
253
- }
285
+ }
286
+ if (event.kind === 'tool_call') {
287
+ heartbeat?.markEvent('tool');
288
+ progressCounters.toolCalls++;
289
+ const name = event.toolSummary.split('(')[0];
290
+ if (name === 'readFile' || name === 'grep' || name === 'glob' || name === 'listFiles') {
291
+ progressCounters.filesRead++;
254
292
  }
255
- if (event.kind === 'tool_call') {
256
- heartbeat?.markEvent('tool');
257
- progressCounters.toolCalls++;
258
- const name = event.toolSummary.split('(')[0];
259
- if (name === 'readFile' || name === 'grep' || name === 'glob' || name === 'listFiles') {
260
- progressCounters.filesRead++;
261
- }
262
- else if (name === 'writeFile' || name === 'editFile') {
263
- progressCounters.filesWritten++;
264
- }
265
- heartbeat?.updateProgress(progressCounters.filesRead, progressCounters.filesWritten, progressCounters.toolCalls);
266
- const now = verbose ? Date.now() : 0;
267
- const sincePrevMs = verbose ? now - prevEventAtMs : 0;
268
- if (verbose)
269
- prevEventAtMs = now;
270
- if (verbose) {
271
- emitTaskEvent('tool_call', {
272
- tool: event.toolSummary,
273
- duration_ms: sincePrevMs,
274
- });
275
- }
293
+ else if (name === 'writeFile' || name === 'editFile') {
294
+ progressCounters.filesWritten++;
276
295
  }
277
- if (event.kind === 'turn_complete') {
278
- heartbeat?.markEvent('llm');
279
- const costUSD = computeCostUSD(event.cumulativeInputTokens, event.cumulativeOutputTokens, resolved.provider.config);
280
- const savedCostUSD = computeSavedCostUSD(costUSD, event.cumulativeInputTokens, event.cumulativeOutputTokens, task.parentModel);
281
- heartbeat?.updateCost(costUSD, savedCostUSD);
282
- const nowTurn = verbose ? Date.now() : 0;
283
- const turnDurMs = verbose ? nowTurn - prevEventAtMs : 0;
284
- if (verbose)
285
- prevEventAtMs = nowTurn;
286
- if (verbose) {
287
- emitTaskEvent('turn_complete', {
288
- input_tokens: event.cumulativeInputTokens,
289
- output_tokens: event.cumulativeOutputTokens,
290
- cost: costUSD,
291
- duration_ms: turnDurMs,
292
- provider: resolved.provider.config.model,
293
- });
294
- }
296
+ heartbeat?.updateProgress(progressCounters.filesRead, progressCounters.filesWritten, progressCounters.toolCalls);
297
+ const now = Date.now();
298
+ const sincePrevMs = now - prevEventAtMs;
299
+ prevEventAtMs = now;
300
+ if (verbose) {
301
+ emitTaskEvent('tool_call', {
302
+ tool: event.toolSummary,
303
+ duration_ms: sincePrevMs,
304
+ });
295
305
  }
296
306
  }
297
- : undefined;
307
+ if (event.kind === 'turn_complete') {
308
+ heartbeat?.markEvent('llm');
309
+ const costUSD = computeCostUSD(event.cumulativeInputTokens, event.cumulativeOutputTokens, resolved.provider.config);
310
+ const savedCostUSD = computeSavedCostUSD(costUSD, event.cumulativeInputTokens, event.cumulativeOutputTokens, task.parentModel);
311
+ heartbeat?.updateCost(costUSD, savedCostUSD);
312
+ const nowTurn = Date.now();
313
+ const turnDurMs = nowTurn - prevEventAtMs;
314
+ prevEventAtMs = nowTurn;
315
+ if (verbose) {
316
+ emitTaskEvent('turn_complete', {
317
+ input_tokens: event.cumulativeInputTokens,
318
+ output_tokens: event.cumulativeOutputTokens,
319
+ cost: costUSD,
320
+ duration_ms: turnDurMs,
321
+ provider: resolved.provider.config.model,
322
+ });
323
+ }
324
+ }
325
+ };
298
326
  const cwd = task.cwd ?? process.cwd();
299
327
  const taskStartMs = Date.now();
300
328
  // Hard task-level wall-clock cap. Once Date.now() crosses this, no new
@@ -302,16 +330,31 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
302
330
  // any in-flight call gets a per-call timeoutMs clamped to remaining
303
331
  // budget so it returns its salvage promptly. The user gets *something*
304
332
  // back instead of an open-ended retry storm.
305
- const taskTimeoutMs = task.timeoutMs ?? config.defaults.timeoutMs ?? 1_800_000;
333
+ const taskTimeoutMs = task.timeoutMs ?? config.defaults.timeoutMs ?? DEFAULT_TASK_TIMEOUT_MS;
306
334
  const taskDeadlineMs = taskStartMs + taskTimeoutMs;
307
335
  // Stall watchdog: when no LLM / tool / text event has fired for this
308
336
  // many ms, the in-flight runner is force-aborted via `stallController`.
309
337
  // Catches "model is silently thinking forever" and "transport hung" —
310
338
  // both invisible to the wall-clock cap until the very end.
311
- const stallTimeoutMs = config.defaults.stallTimeoutMs ?? 600_000;
339
+ const stallTimeoutMs = config.defaults.stallTimeoutMs ?? DEFAULT_STALL_TIMEOUT_MS;
312
340
  const stallController = new AbortController();
313
341
  let lastRunnerEventAtMs = taskStartMs;
342
+ let stageIdle = newStageIdleTracker(taskStartMs);
343
+ let taskMaxIdleMs = 0;
314
344
  let stallFired = false;
345
+ // Track the current stage so the terminal transition can pass an accurate
346
+ // `from`. Initialized to 'implementing' (matching HeartbeatTimer.start's
347
+ // initial stage). Updated on every transitionStage call.
348
+ let currentStage = 'implementing';
349
+ function transitionStage(from, to, heartbeatPayload, jsonlPayload) {
350
+ if (heartbeatPayload !== null)
351
+ heartbeat?.transition(heartbeatPayload);
352
+ if (jsonlPayload !== null) {
353
+ emitTaskEvent('stage_change', { from, to, ...jsonlPayload });
354
+ }
355
+ stageIdle = newStageIdleTracker(Date.now());
356
+ currentStage = to;
357
+ }
315
358
  const commits = [];
316
359
  let commitError;
317
360
  let specAttemptIndex = 0;
@@ -410,18 +453,17 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
410
453
  const defaultVerification = { status: 'skipped', steps: [], totalDurationMs: 0, skipReason: 'no_command' };
411
454
  let latestVerification = defaultVerification;
412
455
  async function runVerificationStage() {
413
- emitTaskEvent('stage_change', { from: 'implementing', to: 'verifying' });
414
- heartbeat?.setStage('verifying', 4);
456
+ transitionStage('implementing', 'verifying', { stage: 'verifying', stageIndex: 4 }, {});
415
457
  const overallVerificationStart = Date.now();
416
458
  const verifyCostStart = runningCostUSD();
417
459
  const verification = await runVerifyStage({
418
460
  cwd,
419
461
  verifyCommand: task.verifyCommand,
420
- taskTimeoutMs: task.timeoutMs ?? config.defaults.timeoutMs ?? 1_800_000,
462
+ taskTimeoutMs: task.timeoutMs ?? config.defaults.timeoutMs ?? DEFAULT_TASK_TIMEOUT_MS,
421
463
  taskStartMs,
422
464
  });
423
465
  latestVerification = verification;
424
- endVerifyStage(stats, overallVerificationStart, verifyCostStart, implementerAgentInfo, runningCostUSD(), verification.status === 'passed' ? 'passed'
466
+ endVerifyStage(stats, overallVerificationStart, verifyCostStart, implementerAgentInfo, runningCostUSD(), snapshotIdle(stageIdle), verification.status === 'passed' ? 'passed'
425
467
  : verification.status === 'failed' ? 'failed'
426
468
  : verification.status === 'skipped' ? 'skipped'
427
469
  : 'not_applicable', verification.skipReason ?? null);
@@ -642,12 +684,12 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
642
684
  const validCommit = implReport?.commit ?? await repairCommitMetadata(implReport?.commitDiagnostic ?? 'no commit block emitted');
643
685
  if (!validCommit)
644
686
  return;
645
- heartbeat?.setStage('committing', 7);
687
+ transitionStage('verifying', 'committing', { stage: 'committing', stageIndex: 7 }, null);
646
688
  const commitT0 = Date.now();
647
689
  const commitC0 = runningCostUSD();
648
690
  const c = await runCommitStage({ cwd, filesWritten: implResult.filesWritten, commit: validCommit });
649
691
  commits.push(c);
650
- endBaseStage(stats, 'committing', commitT0, commitC0, implementerAgentInfo, runningCostUSD());
692
+ endBaseStage(stats, 'committing', commitT0, commitC0, implementerAgentInfo, runningCostUSD(), snapshotIdle(stageIdle));
651
693
  }
652
694
  }
653
695
  // Tracks the final RunResult across every exit path so the `finally` block
@@ -657,10 +699,16 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
657
699
  // catch path. Without this, the recorder only fires on 2 of ~5 exit paths.
658
700
  let __finalRunResult;
659
701
  const __recordOnce = (r) => {
660
- // Stamp stallTriggered on every exit path. The watchdog flag is owned
661
- // by this scope; surfacing it on the RunResult lets the caller (and
662
- // telemetry) distinguish "no progress" aborts from cap exhaustion.
663
- const stamped = stallFired ? { ...r, stallTriggered: true } : r;
702
+ // Stamp stallTriggered and taskMaxIdleMs on every exit path.
703
+ // The watchdog flag is owned by this scope; surfacing it on the
704
+ // RunResult lets the caller (and telemetry) distinguish "no progress"
705
+ // aborts from cap exhaustion. taskMaxIdleMs is always populated so the
706
+ // task_completed JSONL event has it regardless of early return.
707
+ const stamped = {
708
+ ...r,
709
+ ...(stallFired ? { stallTriggered: true } : {}),
710
+ taskMaxIdleMs,
711
+ };
664
712
  if (__finalRunResult === undefined)
665
713
  __finalRunResult = stamped;
666
714
  return stamped;
@@ -745,7 +793,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
745
793
  latestAttemptedImpl = { tier: initialImpl.usedTier, result: implResult };
746
794
  lastNonRejectedImpl = { tier: initialImpl.usedTier, result: implResult };
747
795
  implementerHistory.push(initialImpl.usedTier);
748
- endBaseStage(stats, 'implementing', implT0, implC0, implementerAgentInfo, runningCostUSD());
796
+ endBaseStage(stats, 'implementing', implT0, implC0, implementerAgentInfo, runningCostUSD(), snapshotIdle(stageIdle));
749
797
  specAttemptIndex = 1;
750
798
  const implReport = implResult.status === 'ok' ? parseStructuredReport(implResult.output) : undefined;
751
799
  const workerStatus = extractWorkerStatus(implReport);
@@ -762,7 +810,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
762
810
  const filePathsSkipped = !filePathsInteracted;
763
811
  if (implResult.filesWritten.length === 0 && reviewPolicy !== 'quality_only') {
764
812
  if (reviewPolicy === 'off') {
765
- emitTaskEvent('stage_change', { from: 'verifying', to: 'terminal' });
813
+ transitionStage('verifying', 'terminal', null, {});
766
814
  const terminal = resolveOffTerminal({
767
815
  ...implResult,
768
816
  workerStatus,
@@ -841,7 +889,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
841
889
  };
842
890
  }
843
891
  if (reviewPolicy === 'off') {
844
- emitTaskEvent('stage_change', { from: 'verifying', to: 'terminal' });
892
+ transitionStage('verifying', 'terminal', null, {});
845
893
  const terminal = resolveOffTerminal({
846
894
  ...implResult,
847
895
  workerStatus,
@@ -874,10 +922,9 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
874
922
  if (reviewPolicy === 'diff_only') {
875
923
  const diffUnavailable = new Map();
876
924
  const diffReviewerTier = pickReviewer({ loop: 'spec', attemptIndex: 0, baseTier: resolved.slot });
877
- emitTaskEvent('stage_change', { from: 'verifying', to: 'diff_review' });
925
+ transitionStage('verifying', 'diff_review', { stage: 'diff_review', stageIndex: 2, reviewRound: 1, attemptCap: 1 }, {});
878
926
  const diffReviewT0 = Date.now();
879
927
  const diffReviewC0 = runningCostUSD();
880
- heartbeat?.transition({ stage: 'diff_review', stageIndex: 2, reviewRound: 1, attemptCap: 1 });
881
928
  const diffReviewT0_commit = Date.now();
882
929
  const diffReviewC0_commit = runningCostUSD();
883
930
  const diffCall = await runWithFallback({
@@ -887,7 +934,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
887
934
  isTransportFailure: (r) => isReviewTransportFailure(r),
888
935
  getStatus: (r) => r.status,
889
936
  makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'),
890
- call: (provider) => runDiffReview({ cwd, diff: evidence.fullDiff, diffTruncated: evidence.diffTruncated, verification, worker: { call: (prompt) => provider.run(prompt) } }),
937
+ call: (provider) => runDiffReview({ cwd, diff: evidence.fullDiff, diffTruncated: evidence.diffTruncated, verification, worker: { call: (prompt, opts) => provider.run(prompt, { abortSignal: opts?.abortSignal, timeoutMs: opts?.timeoutMs }) }, taskDeadlineMs, abortSignal: stallController.signal }),
891
938
  });
892
939
  if (diffCall.fallbackFired) {
893
940
  emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'diff', attempt: 0, role: 'diffReviewer', assignedTier: diffReviewerTier, usedTier: diffCall.usedTier, reason: diffCall.fallbackReason, triggeringStatus: diffCall.fallbackTriggeringStatus, violatesSeparation: diffCall.usedTier === implementerHistory[implementerHistory.length - 1] });
@@ -897,8 +944,17 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
897
944
  emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'diff', attempt: 0, role: 'diffReviewer', assignedTier: diffReviewerTier, reason: diffCall.unavailableReason });
898
945
  }
899
946
  const verdict = diffCall.bothUnavailable || isReviewTransportFailure(diffCall.result) ? makeSkippedReviewResult('all_tiers_unavailable') : diffCall.result;
900
- emitTaskEvent('review_decision', { stage: 'diff_review', verdict: 'kind' in verdict ? verdict.kind : 'skipped', round: 1 });
901
- endReviewStage(stats, 'diff_review', diffReviewT0_commit, diffReviewC0_commit, implementerAgentInfo, runningCostUSD(),
947
+ emitTaskEvent('review_decision', {
948
+ stage: 'diff_review',
949
+ verdict: 'kind' in verdict
950
+ ? (verdict.kind === 'approve' ? 'approved'
951
+ : verdict.kind === 'concerns' ? 'concerns'
952
+ : verdict.kind === 'reject' ? 'changes_required'
953
+ : 'error') // verdict.kind === 'transport_failure'
954
+ : 'skipped',
955
+ round: 1,
956
+ });
957
+ endReviewStage(stats, 'diff_review', diffReviewT0_commit, diffReviewC0_commit, implementerAgentInfo, runningCostUSD(), snapshotIdle(stageIdle),
902
958
  // Diff review uses 'approve' | 'concerns' | 'reject' | 'transport_failure' (DiffReviewVerdict),
903
959
  // distinct from spec/quality verdicts. Map to the telemetry verdict enum here.
904
960
  'kind' in verdict
@@ -929,7 +985,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
929
985
  let specReviewT0 = 0;
930
986
  let specReviewC0 = null;
931
987
  if (reviewPolicy !== 'quality_only') {
932
- heartbeat?.transition({ stage: 'spec_review', stageIndex: 2, reviewRound: 1, attemptCap: maxSpecRows });
988
+ transitionStage('verifying', 'spec_review', { stage: 'spec_review', stageIndex: 2, reviewRound: 1, attemptCap: maxSpecRows }, null);
933
989
  const initialReviewerTier = pickReviewer({ loop: 'spec', attemptIndex: 0, baseTier: resolved.slot });
934
990
  specReviewT0 = Date.now();
935
991
  specReviewC0 = runningCostUSD();
@@ -940,7 +996,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
940
996
  isTransportFailure: (r) => isReviewTransportFailure(r),
941
997
  getStatus: (r) => r.status,
942
998
  makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'),
943
- call: (provider) => runSpecReview(provider, packet, effectiveImplReport, fileContents, implResult.toolCalls, task.planContext, evidence.block),
999
+ call: (provider) => runSpecReview(provider, packet, effectiveImplReport, fileContents, implResult.toolCalls, task.planContext, evidence.block, taskDeadlineMs, stallController.signal, wrappedOnProgress),
944
1000
  });
945
1001
  if (initialSpecReview.bothUnavailable) {
946
1002
  emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: 0, role: 'specReviewer', assignedTier: initialReviewerTier, reason: initialSpecReview.unavailableReason });
@@ -972,8 +1028,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
972
1028
  const decision = pickEscalation({ loop: 'spec', attemptIndex: specAttemptIndex, baseTier: resolved.slot });
973
1029
  if (decision.isEscalated)
974
1030
  emitEscalationEvent('spec', specAttemptIndex, decision);
975
- emitTaskEvent('stage_change', { from: 'spec_review', to: 'spec_rework', attempt: specAttemptIndex, attemptCap: maxSpecRows, implTier: decision.impl, reviewerTier: decision.reviewer, escalated: decision.isEscalated });
976
- heartbeat?.transition({ stage: 'spec_rework', stageIndex: 3, reviewRound: specAttemptIndex, attemptCap: maxSpecRows });
1031
+ transitionStage('spec_review', 'spec_rework', { stage: 'spec_rework', stageIndex: 3, reviewRound: specAttemptIndex, attemptCap: maxSpecRows }, { attempt: specAttemptIndex, attemptCap: maxSpecRows, implTier: decision.impl, reviewerTier: decision.reviewer, escalated: decision.isEscalated });
977
1032
  const feedback = specResult.findings.length > 0 ? `\n\n## Spec Review Feedback (round ${specAttemptIndex}):\n${specResult.findings.map(f => `- ${f}`).join('\n')}` : '';
978
1033
  const reworkTask = withDoneCondition({ ...task, prompt: `${task.prompt}${feedback}` });
979
1034
  const reworkCall = await runWithFallback({ assigned: decision.impl, providerFor, unavailableTiers: specUnavailable, isTransportFailure: (r) => TRANSPORT_FAILURES.has(r.status) && r.capExhausted === undefined, getStatus: (r) => r.status, makeSyntheticFailure: (assigned) => makeSyntheticRunResult(assigned, 'all_tiers_unavailable'), call: (provider) => delegateWithEscalation(reworkTask, [provider], { explicitlyPinned: true, onProgress: wrappedOnProgress, taskDeadlineMs, abortSignal: stallController.signal, assignedTier: decision.impl }) });
@@ -996,8 +1051,8 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
996
1051
  const reworkReport = parseStructuredReport(finalImplResult.output);
997
1052
  finalImplReport = reworkReport.summary ? reworkReport : buildFallbackImplReport(finalImplResult);
998
1053
  fileContents = await readImplementerFileContents(finalImplResult.filesWritten, task.cwd);
999
- heartbeat?.transition({ stage: 'spec_review', stageIndex: 2, reviewRound: specAttemptIndex + 1, attemptCap: maxSpecRows });
1000
- const reviewCall = await runWithFallback({ assigned: decision.reviewer, providerFor, unavailableTiers: specUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), call: (provider) => runSpecReview(provider, packet, finalImplReport, fileContents, finalImplResult.toolCalls, task.planContext, evidence.block) });
1054
+ transitionStage('spec_rework', 'spec_review', { stage: 'spec_review', stageIndex: 2, reviewRound: specAttemptIndex + 1, attemptCap: maxSpecRows }, null);
1055
+ const reviewCall = await runWithFallback({ assigned: decision.reviewer, providerFor, unavailableTiers: specUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), call: (provider) => runSpecReview(provider, packet, finalImplReport, fileContents, finalImplResult.toolCalls, task.planContext, evidence.block, taskDeadlineMs, stallController.signal, wrappedOnProgress) });
1001
1056
  if (reviewCall.bothUnavailable) {
1002
1057
  emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'specReviewer', assignedTier: decision.reviewer, reason: reviewCall.unavailableReason });
1003
1058
  fallbackOverrides.push({ role: 'specReviewer', loop: 'spec', attempt: specAttemptIndex, assigned: decision.reviewer, used: reviewCall.usedTier, reason: reviewCall.unavailableReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, bothUnavailable: true });
@@ -1042,10 +1097,10 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
1042
1097
  if (reviewPolicy === 'full' || reviewPolicy === 'quality_only') {
1043
1098
  qualityUnavailable = new Map();
1044
1099
  const qualityReviewerTier = pickReviewer({ loop: 'quality', attemptIndex: 0, baseTier: resolved.slot });
1045
- heartbeat?.transition({ stage: 'quality_review', stageIndex: 4, reviewRound: 1, attemptCap: maxQualityRows });
1100
+ transitionStage(currentStage, 'quality_review', { stage: 'quality_review', stageIndex: 4, reviewRound: 1, attemptCap: maxQualityRows }, null);
1046
1101
  qualityReviewT0 = Date.now();
1047
1102
  qualityReviewC0 = runningCostUSD();
1048
- const initialQuality = await runWithFallback({ assigned: qualityReviewerTier, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), call: (provider) => runQualityReview(provider, packet, specReport ?? finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block, qualityReviewPromptBuilder, finalImplResult.output) });
1103
+ const initialQuality = await runWithFallback({ assigned: qualityReviewerTier, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), call: (provider) => runQualityReview(provider, packet, specReport ?? finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block, qualityReviewPromptBuilder, finalImplResult.output, taskDeadlineMs, stallController.signal, wrappedOnProgress) });
1049
1104
  if (initialQuality.bothUnavailable) {
1050
1105
  emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: 0, role: 'qualityReviewer', assignedTier: qualityReviewerTier, reason: initialQuality.unavailableReason });
1051
1106
  fallbackOverrides.push({ role: 'qualityReviewer', loop: 'quality', attempt: 0, assigned: qualityReviewerTier, used: initialQuality.usedTier, reason: initialQuality.unavailableReason, triggeringStatus: initialQuality.fallbackTriggeringStatus, bothUnavailable: true });
@@ -1059,102 +1114,91 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
1059
1114
  }
1060
1115
  }
1061
1116
  qualityResult = initialQuality.result;
1117
+ qualityAttemptIndex = 1;
1118
+ if (reviewDidNotReject(qualityResult.status))
1119
+ lastNonRejectedImpl = { tier: implementerHistory[implementerHistory.length - 1], result: finalImplResult };
1062
1120
  if (reviewPolicy === 'quality_only') {
1121
+ // Annotation model: emit one quality event per pass with severity-correction
1122
+ // and mean-confidence summary fields. Then we are done — no rework loop.
1123
+ const annotated = qualityResult.annotatedFindings ?? [];
1124
+ const severityCorrections = annotated.filter(f => f.reviewerSeverity !== undefined).length;
1125
+ const meanConfidence = annotated.length > 0
1126
+ ? Math.round((annotated.reduce((s, f) => s + f.reviewerConfidence, 0) / annotated.length) * 100) / 100
1127
+ : null;
1063
1128
  emitTaskEvent('read_only_review.quality', {
1064
1129
  route: routeKey,
1065
- verdict: qualityResult.status === 'approved' ? 'approved'
1066
- : qualityResult.status === 'changes_required' ? 'changes_required'
1067
- : qualityResult.status === 'skipped' ? 'skipped'
1068
- : 'error',
1130
+ verdict: qualityResult.status === 'annotated' ? 'annotated'
1131
+ : qualityResult.status === 'skipped' ? 'skipped'
1132
+ : 'error',
1069
1133
  iterationIndex: 1,
1070
- findingsReviewed: qualityResult.findings?.length ?? 0,
1071
- findingsFlagged: qualityResult.status === 'changes_required' ? (qualityResult.findings?.length ?? 0) : 0,
1134
+ findingsReviewed: annotated.length,
1135
+ findingsFlagged: severityCorrections,
1136
+ severityCorrections,
1137
+ meanConfidence,
1072
1138
  durationMs: Date.now() - qualityReviewT0,
1073
1139
  costUSD: runningCostUSD() !== null && qualityReviewC0 !== null ? runningCostUSD() - qualityReviewC0 : null,
1074
1140
  });
1075
1141
  }
1076
- let prevQualityFindings = [...(qualityResult.findings ?? [])];
1077
- qualityAttemptIndex = 1;
1078
- while (qualityResult.status === 'changes_required') {
1079
- if (qualityAttemptIndex >= maxQualityRows)
1080
- return abortReviewLoop(finalImplResult, 'round_cap', 'review round cap reached before quality rework', 'quality');
1081
- const currentCostUSD = taskCostUSD();
1082
- if (currentCostUSD !== null && maxCostUSD !== undefined && currentCostUSD >= 0.8 * maxCostUSD) {
1083
- emitTaskEvent('cost_check', { stage: 'quality_rework', tripped: true, cost_used_usd: currentCostUSD, cost_cap_usd: maxCostUSD, cost_available: true });
1084
- return abortReviewLoop(finalImplResult, 'cost_ceiling', 'cost ceiling reached before quality rework', 'quality');
1085
- }
1086
- const decision = pickEscalation({ loop: 'quality', attemptIndex: qualityAttemptIndex, baseTier: resolved.slot });
1087
- if (decision.isEscalated)
1088
- emitEscalationEvent('quality', qualityAttemptIndex, decision);
1089
- emitTaskEvent('stage_change', { from: 'quality_review', to: 'quality_rework', attempt: qualityAttemptIndex, attemptCap: maxQualityRows, implTier: decision.impl, reviewerTier: decision.reviewer, escalated: decision.isEscalated });
1090
- if (reviewPolicy === 'quality_only') {
1091
- emitTaskEvent('read_only_review.rework', {
1092
- route: routeKey,
1093
- iterationIndex: qualityAttemptIndex,
1094
- triggeringIssues: qualityResult.findings?.length ?? 0,
1095
- });
1096
- }
1097
- heartbeat?.transition({ stage: 'quality_rework', stageIndex: 5, reviewRound: qualityAttemptIndex, attemptCap: maxQualityRows });
1098
- const feedback = qualityResult.findings.length > 0 ? `\n\n## Quality Review Feedback (round ${qualityAttemptIndex}):\n${qualityResult.findings.map(f => `- ${f}`).join('\n')}` : '';
1099
- const reworkTask = withDoneCondition({ ...task, prompt: `${task.prompt}${feedback}` });
1100
- const reworkCall = await runWithFallback({ assigned: decision.impl, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => TRANSPORT_FAILURES.has(r.status) && r.capExhausted === undefined, getStatus: (r) => r.status, makeSyntheticFailure: (assigned) => makeSyntheticRunResult(assigned, 'all_tiers_unavailable'), call: (provider) => delegateWithEscalation(reworkTask, [provider], { explicitlyPinned: true, onProgress: wrappedOnProgress, taskDeadlineMs, abortSignal: stallController.signal, assignedTier: decision.impl }) });
1101
- if (reworkCall.fallbackFired || reworkCall.bothUnavailable)
1102
- fallbackOverrides.push({ role: 'implementer', loop: 'quality', attempt: qualityAttemptIndex, assigned: decision.impl, used: reworkCall.usedTier, reason: (reworkCall.fallbackReason ?? reworkCall.unavailableReason), triggeringStatus: reworkCall.fallbackTriggeringStatus, bothUnavailable: reworkCall.bothUnavailable });
1103
- if (reworkCall.fallbackFired)
1104
- emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'implementer', assignedTier: decision.impl, usedTier: reworkCall.usedTier, reason: reworkCall.fallbackReason, triggeringStatus: reworkCall.fallbackTriggeringStatus, violatesSeparation: false });
1105
- if (reworkCall.bothUnavailable) {
1106
- emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'implementer', assignedTier: decision.impl, reason: reworkCall.unavailableReason });
1142
+ else {
1143
+ // Artifact-route gating model — keep the rework loop.
1144
+ let prevQualityFindings = [...(qualityResult.findings ?? [])];
1145
+ while (qualityResult.status === 'changes_required') {
1146
+ if (qualityAttemptIndex >= maxQualityRows)
1147
+ return abortReviewLoop(finalImplResult, 'round_cap', 'review round cap reached before quality rework', 'quality');
1148
+ const currentCostUSD = taskCostUSD();
1149
+ if (currentCostUSD !== null && maxCostUSD !== undefined && currentCostUSD >= 0.8 * maxCostUSD) {
1150
+ emitTaskEvent('cost_check', { stage: 'quality_rework', tripped: true, cost_used_usd: currentCostUSD, cost_cap_usd: maxCostUSD, cost_available: true });
1151
+ return abortReviewLoop(finalImplResult, 'cost_ceiling', 'cost ceiling reached before quality rework', 'quality');
1152
+ }
1153
+ const decision = pickEscalation({ loop: 'quality', attemptIndex: qualityAttemptIndex, baseTier: resolved.slot });
1107
1154
  if (decision.isEscalated)
1108
- emitEscalationUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'implementer', wantedTier: decision.impl, reason: reworkCall.unavailableReason });
1109
- return __recordOnce(adaptForAllTiersUnavailable(reworkCall.result, 'quality', qualityAttemptIndex));
1110
- }
1111
- finalImplResult = reworkCall.result;
1112
- latestAttemptedImpl = { tier: reworkCall.usedTier, result: finalImplResult };
1113
- implementerHistory.push(reworkCall.usedTier);
1114
- const reworkReport = parseStructuredReport(finalImplResult.output);
1115
- finalImplReport = reworkReport.summary ? reworkReport : buildFallbackImplReport(finalImplResult);
1116
- fileContents = await readImplementerFileContents(finalImplResult.filesWritten, task.cwd);
1117
- heartbeat?.transition({ stage: 'quality_review', stageIndex: 4, reviewRound: qualityAttemptIndex + 1, attemptCap: maxQualityRows });
1118
- const reworkQualityT0 = Date.now();
1119
- const reworkQualityC0 = runningCostUSD();
1120
- const reviewCall = await runWithFallback({ assigned: decision.reviewer, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), call: (provider) => runQualityReview(provider, packet, finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block, qualityReviewPromptBuilder, finalImplResult.output) });
1121
- if (reviewCall.bothUnavailable) {
1122
- emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'qualityReviewer', assignedTier: decision.reviewer, reason: reviewCall.unavailableReason });
1123
- fallbackOverrides.push({ role: 'qualityReviewer', loop: 'quality', attempt: qualityAttemptIndex, assigned: decision.reviewer, used: reviewCall.usedTier, reason: reviewCall.unavailableReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, bothUnavailable: true });
1124
- qualityReviewerHistory.push('skipped');
1125
- }
1126
- else {
1127
- qualityReviewerHistory.push(reviewCall.usedTier);
1128
- if (reviewCall.fallbackFired) {
1129
- emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'qualityReviewer', assignedTier: decision.reviewer, usedTier: reviewCall.usedTier, reason: reviewCall.fallbackReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, violatesSeparation: reviewCall.usedTier === implementerHistory[implementerHistory.length - 1] });
1130
- fallbackOverrides.push({ role: 'qualityReviewer', loop: 'quality', attempt: qualityAttemptIndex, assigned: decision.reviewer, used: reviewCall.usedTier, reason: reviewCall.fallbackReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, bothUnavailable: false });
1155
+ emitEscalationEvent('quality', qualityAttemptIndex, decision);
1156
+ transitionStage('quality_review', 'quality_rework', { stage: 'quality_rework', stageIndex: 5, reviewRound: qualityAttemptIndex, attemptCap: maxQualityRows }, { attempt: qualityAttemptIndex, attemptCap: maxQualityRows, implTier: decision.impl, reviewerTier: decision.reviewer, escalated: decision.isEscalated });
1157
+ const feedback = qualityResult.findings.length > 0 ? `\n\n## Quality Review Feedback (round ${qualityAttemptIndex}):\n${qualityResult.findings.map(f => `- ${f}`).join('\n')}` : '';
1158
+ const reworkTask = withDoneCondition({ ...task, prompt: `${task.prompt}${feedback}` });
1159
+ const reworkCall = await runWithFallback({ assigned: decision.impl, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => TRANSPORT_FAILURES.has(r.status) && r.capExhausted === undefined, getStatus: (r) => r.status, makeSyntheticFailure: (assigned) => makeSyntheticRunResult(assigned, 'all_tiers_unavailable'), call: (provider) => delegateWithEscalation(reworkTask, [provider], { explicitlyPinned: true, onProgress: wrappedOnProgress, taskDeadlineMs, abortSignal: stallController.signal, assignedTier: decision.impl }) });
1160
+ if (reworkCall.fallbackFired || reworkCall.bothUnavailable)
1161
+ fallbackOverrides.push({ role: 'implementer', loop: 'quality', attempt: qualityAttemptIndex, assigned: decision.impl, used: reworkCall.usedTier, reason: (reworkCall.fallbackReason ?? reworkCall.unavailableReason), triggeringStatus: reworkCall.fallbackTriggeringStatus, bothUnavailable: reworkCall.bothUnavailable });
1162
+ if (reworkCall.fallbackFired)
1163
+ emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'implementer', assignedTier: decision.impl, usedTier: reworkCall.usedTier, reason: reworkCall.fallbackReason, triggeringStatus: reworkCall.fallbackTriggeringStatus, violatesSeparation: false });
1164
+ if (reworkCall.bothUnavailable) {
1165
+ emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'implementer', assignedTier: decision.impl, reason: reworkCall.unavailableReason });
1166
+ if (decision.isEscalated)
1167
+ emitEscalationUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'implementer', wantedTier: decision.impl, reason: reworkCall.unavailableReason });
1168
+ return __recordOnce(adaptForAllTiersUnavailable(reworkCall.result, 'quality', qualityAttemptIndex));
1131
1169
  }
1170
+ finalImplResult = reworkCall.result;
1171
+ latestAttemptedImpl = { tier: reworkCall.usedTier, result: finalImplResult };
1172
+ implementerHistory.push(reworkCall.usedTier);
1173
+ const reworkReport = parseStructuredReport(finalImplResult.output);
1174
+ finalImplReport = reworkReport.summary ? reworkReport : buildFallbackImplReport(finalImplResult);
1175
+ fileContents = await readImplementerFileContents(finalImplResult.filesWritten, task.cwd);
1176
+ transitionStage('quality_rework', 'quality_review', { stage: 'quality_review', stageIndex: 4, reviewRound: qualityAttemptIndex + 1, attemptCap: maxQualityRows }, null);
1177
+ const reviewCall = await runWithFallback({ assigned: decision.reviewer, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), call: (provider) => runQualityReview(provider, packet, finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block, qualityReviewPromptBuilder, finalImplResult.output, taskDeadlineMs, stallController.signal, wrappedOnProgress) });
1178
+ if (reviewCall.bothUnavailable) {
1179
+ emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'qualityReviewer', assignedTier: decision.reviewer, reason: reviewCall.unavailableReason });
1180
+ fallbackOverrides.push({ role: 'qualityReviewer', loop: 'quality', attempt: qualityAttemptIndex, assigned: decision.reviewer, used: reviewCall.usedTier, reason: reviewCall.unavailableReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, bothUnavailable: true });
1181
+ qualityReviewerHistory.push('skipped');
1182
+ }
1183
+ else {
1184
+ qualityReviewerHistory.push(reviewCall.usedTier);
1185
+ if (reviewCall.fallbackFired) {
1186
+ emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'qualityReviewer', assignedTier: decision.reviewer, usedTier: reviewCall.usedTier, reason: reviewCall.fallbackReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, violatesSeparation: reviewCall.usedTier === implementerHistory[implementerHistory.length - 1] });
1187
+ fallbackOverrides.push({ role: 'qualityReviewer', loop: 'quality', attempt: qualityAttemptIndex, assigned: decision.reviewer, used: reviewCall.usedTier, reason: reviewCall.fallbackReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, bothUnavailable: false });
1188
+ }
1189
+ }
1190
+ qualityResult = reviewCall.result;
1191
+ if (reviewDidNotReject(qualityResult.status))
1192
+ lastNonRejectedImpl = { tier: implementerHistory[implementerHistory.length - 1], result: finalImplResult };
1193
+ qualityAttemptIndex++;
1194
+ if (qualityResult.status === 'approved' || qualityResult.status === 'skipped')
1195
+ break;
1196
+ const currentFindings = [...(qualityResult.findings ?? [])].sort().join('\0');
1197
+ const prevFindings = [...prevQualityFindings].sort().join('\0');
1198
+ if (currentFindings === prevFindings && currentFindings !== '')
1199
+ break;
1200
+ prevQualityFindings = [...(qualityResult.findings ?? [])];
1132
1201
  }
1133
- qualityResult = reviewCall.result;
1134
- if (reviewPolicy === 'quality_only') {
1135
- emitTaskEvent('read_only_review.quality', {
1136
- route: routeKey,
1137
- verdict: qualityResult.status === 'approved' ? 'approved'
1138
- : qualityResult.status === 'changes_required' ? 'changes_required'
1139
- : qualityResult.status === 'skipped' ? 'skipped'
1140
- : 'error',
1141
- iterationIndex: qualityAttemptIndex + 1,
1142
- findingsReviewed: qualityResult.findings?.length ?? 0,
1143
- findingsFlagged: qualityResult.status === 'changes_required' ? (qualityResult.findings?.length ?? 0) : 0,
1144
- durationMs: Date.now() - reworkQualityT0,
1145
- costUSD: runningCostUSD() !== null && reworkQualityC0 !== null ? runningCostUSD() - reworkQualityC0 : null,
1146
- });
1147
- }
1148
- if (reviewDidNotReject(qualityResult.status))
1149
- lastNonRejectedImpl = { tier: implementerHistory[implementerHistory.length - 1], result: finalImplResult };
1150
- qualityAttemptIndex++;
1151
- if (qualityResult.status === 'approved' || qualityResult.status === 'skipped')
1152
- break;
1153
- const currentFindings = [...(qualityResult.findings ?? [])].sort().join('\0');
1154
- const prevFindings = [...prevQualityFindings].sort().join('\0');
1155
- if (currentFindings === prevFindings && currentFindings !== '')
1156
- break;
1157
- prevQualityFindings = [...(qualityResult.findings ?? [])];
1158
1202
  }
1159
1203
  }
1160
1204
  const finalReport = specReport ?? finalImplReport;
@@ -1180,17 +1224,18 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
1180
1224
  ? 'skipped'
1181
1225
  : (['approved', 'changes_required', 'skipped', 'error', 'api_error', 'network_error', 'timeout'].includes(specStatus) ? specStatus : 'error');
1182
1226
  if (reviewPolicy !== 'quality_only') {
1183
- endReviewStage(stats, 'spec_review', specReviewT0, specReviewC0, implementerAgentInfo, runningCostUSD(), specStatus === 'approved' ? 'approved'
1227
+ endReviewStage(stats, 'spec_review', specReviewT0, specReviewC0, implementerAgentInfo, runningCostUSD(), snapshotIdle(stageIdle), specStatus === 'approved' ? 'approved'
1184
1228
  : specStatus === 'changes_required' ? 'changes_required'
1185
1229
  : specStatus === 'skipped' ? 'skipped'
1186
1230
  : specStatus === 'not_applicable' ? 'not_applicable'
1187
1231
  : 'error', specAttemptIndex - 1);
1188
1232
  }
1189
1233
  const qualityAggregateStatus = qualityResult.status;
1190
- endReviewStage(stats, 'quality_review', qualityReviewT0, qualityReviewC0, implementerAgentInfo, runningCostUSD(), qualityResult.status === 'approved' ? 'approved'
1234
+ endReviewStage(stats, 'quality_review', qualityReviewT0, qualityReviewC0, implementerAgentInfo, runningCostUSD(), snapshotIdle(stageIdle), qualityResult.status === 'approved' ? 'approved'
1191
1235
  : qualityResult.status === 'changes_required' ? 'changes_required'
1192
- : qualityResult.status === 'skipped' ? 'skipped'
1193
- : 'error', qualityAttemptIndex - 1);
1236
+ : qualityResult.status === 'annotated' ? 'annotated'
1237
+ : qualityResult.status === 'skipped' ? 'skipped'
1238
+ : 'error', qualityAttemptIndex - 1);
1194
1239
  const aggregated = aggregateResult(finalReport, specReport, qualityResult.report, specAggregateStatus, qualityAggregateStatus);
1195
1240
  // File artifact verification: check whether output targets exist on disk after all work.
1196
1241
  // Only applies when status is ok; non-ok statuses skip verification entirely.
@@ -1205,8 +1250,8 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
1205
1250
  : finalImplResult.status === 'ok' && fileArtifactsMissing
1206
1251
  ? 'incomplete'
1207
1252
  : finalImplResult.status;
1208
- const specEnvelopeStatus = (specStatus === 'api_error' || specStatus === 'network_error' || specStatus === 'timeout' ? 'error' : specStatus);
1209
- const qualityEnvelopeStatus = qualityResult.status === 'api_error' || qualityResult.status === 'network_error' || qualityResult.status === 'timeout' ? 'error' : qualityResult.status;
1253
+ const specEnvelopeStatus = (specStatus === 'api_error' || specStatus === 'network_error' || specStatus === 'timeout' || specStatus === 'api_aborted' ? 'error' : specStatus);
1254
+ const qualityEnvelopeStatus = qualityResult.status === 'api_error' || qualityResult.status === 'network_error' || qualityResult.status === 'timeout' || qualityResult.status === 'api_aborted' ? 'error' : qualityResult.status;
1210
1255
  const runResult = {
1211
1256
  ...finalImplResult,
1212
1257
  status: finalStatus,
@@ -1238,10 +1283,9 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
1238
1283
  emitTaskEvent('read_only_review.terminal', {
1239
1284
  route: routeKey,
1240
1285
  roundsUsed: qualityAttemptIndex,
1241
- finalQualityVerdict: qualityResult.status === 'approved' ? 'approved'
1242
- : qualityResult.status === 'changes_required' ? 'changes_required'
1243
- : qualityResult.status === 'skipped' ? 'skipped'
1244
- : 'error',
1286
+ finalQualityVerdict: qualityResult.status === 'annotated' ? 'annotated'
1287
+ : qualityResult.status === 'skipped' ? 'skipped'
1288
+ : 'error',
1245
1289
  costUSD: taskCostUSD(),
1246
1290
  durationMs: Date.now() - taskStartMs,
1247
1291
  });
@@ -1276,8 +1320,30 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
1276
1320
  });
1277
1321
  }
1278
1322
  catch { /* silent */ }
1323
+ // NEW in v3.9.0: local JSONL emit. Distinct from cloud — local is
1324
+ // for verbose/observability consumers; cloud is for telemetry sink.
1325
+ try {
1326
+ const r = __finalRunResult;
1327
+ emitTaskEvent('task_completed', {
1328
+ status: r.status,
1329
+ workerStatus: r.workerStatus ?? null,
1330
+ turns: r.turns,
1331
+ durationMs: r.durationMs ?? null,
1332
+ filesRead: r.filesRead?.length ?? 0,
1333
+ filesWritten: r.filesWritten?.length ?? 0,
1334
+ toolCalls: r.toolCalls?.length ?? 0,
1335
+ inputTokens: r.usage.inputTokens,
1336
+ outputTokens: r.usage.outputTokens,
1337
+ costUSD: r.usage.costUSD,
1338
+ taskMaxIdleMs: r.taskMaxIdleMs ?? null,
1339
+ stallTriggered: r.stallTriggered ?? false,
1340
+ // JSON-stringify so verbose-stream primitives check passes
1341
+ stages: JSON.stringify(r.stageStats ?? emptyStats()),
1342
+ });
1343
+ }
1344
+ catch { /* silent — never break the user task */ }
1279
1345
  }
1280
- heartbeat?.setStage('terminal', 8);
1346
+ transitionStage(currentStage, 'terminal', { stage: 'terminal', stageIndex: 8 }, null);
1281
1347
  heartbeat?.stop();
1282
1348
  clearInterval(stallWatchdogInterval);
1283
1349
  }