@zhixuan92/multi-model-agent-core 3.3.0 → 3.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. package/README.md +4 -3
  2. package/dist/delegate-with-escalation.d.ts +0 -1
  3. package/dist/delegate-with-escalation.d.ts.map +1 -1
  4. package/dist/delegate-with-escalation.js +7 -52
  5. package/dist/delegate-with-escalation.js.map +1 -1
  6. package/dist/diagnostics/disconnect-log.d.ts +56 -27
  7. package/dist/diagnostics/disconnect-log.d.ts.map +1 -1
  8. package/dist/diagnostics/disconnect-log.js +34 -49
  9. package/dist/diagnostics/disconnect-log.js.map +1 -1
  10. package/dist/escalation/fallback.d.ts +59 -0
  11. package/dist/escalation/fallback.d.ts.map +1 -0
  12. package/dist/escalation/fallback.js +172 -0
  13. package/dist/escalation/fallback.js.map +1 -0
  14. package/dist/escalation/policy.d.ts +37 -0
  15. package/dist/escalation/policy.d.ts.map +1 -0
  16. package/dist/escalation/policy.js +67 -0
  17. package/dist/escalation/policy.js.map +1 -0
  18. package/dist/executors/debug.d.ts.map +1 -1
  19. package/dist/executors/debug.js +0 -1
  20. package/dist/executors/debug.js.map +1 -1
  21. package/dist/executors/execute-plan.js +1 -1
  22. package/dist/executors/execute-plan.js.map +1 -1
  23. package/dist/executors/investigate.d.ts +11 -0
  24. package/dist/executors/investigate.d.ts.map +1 -0
  25. package/dist/executors/investigate.js +101 -0
  26. package/dist/executors/investigate.js.map +1 -0
  27. package/dist/heartbeat.d.ts +4 -4
  28. package/dist/heartbeat.d.ts.map +1 -1
  29. package/dist/heartbeat.js +17 -17
  30. package/dist/heartbeat.js.map +1 -1
  31. package/dist/intake/compilers/execute-plan.d.ts.map +1 -1
  32. package/dist/intake/compilers/execute-plan.js +1 -0
  33. package/dist/intake/compilers/execute-plan.js.map +1 -1
  34. package/dist/intake/compilers/investigate.d.ts +12 -0
  35. package/dist/intake/compilers/investigate.d.ts.map +1 -0
  36. package/dist/intake/compilers/investigate.js +36 -0
  37. package/dist/intake/compilers/investigate.js.map +1 -0
  38. package/dist/intake/resolve.d.ts.map +1 -1
  39. package/dist/intake/resolve.js +3 -2
  40. package/dist/intake/resolve.js.map +1 -1
  41. package/dist/intake/types.d.ts +8 -2
  42. package/dist/intake/types.d.ts.map +1 -1
  43. package/dist/model-profiles.json +10 -6
  44. package/dist/reporting/compose-investigate-headline.d.ts +11 -0
  45. package/dist/reporting/compose-investigate-headline.d.ts.map +1 -0
  46. package/dist/reporting/compose-investigate-headline.js +29 -0
  47. package/dist/reporting/compose-investigate-headline.js.map +1 -0
  48. package/dist/reporting/compose-terminal-headline.d.ts +5 -0
  49. package/dist/reporting/compose-terminal-headline.d.ts.map +1 -1
  50. package/dist/reporting/compose-terminal-headline.js +23 -6
  51. package/dist/reporting/compose-terminal-headline.js.map +1 -1
  52. package/dist/reporting/derive-investigate-status.d.ts +17 -0
  53. package/dist/reporting/derive-investigate-status.d.ts.map +1 -0
  54. package/dist/reporting/derive-investigate-status.js +30 -0
  55. package/dist/reporting/derive-investigate-status.js.map +1 -0
  56. package/dist/reporting/parse-investigation-report.d.ts +39 -0
  57. package/dist/reporting/parse-investigation-report.d.ts.map +1 -0
  58. package/dist/reporting/parse-investigation-report.js +150 -0
  59. package/dist/reporting/parse-investigation-report.js.map +1 -0
  60. package/dist/reporting/structured-report.d.ts +1 -0
  61. package/dist/reporting/structured-report.d.ts.map +1 -1
  62. package/dist/reporting/structured-report.js +26 -2
  63. package/dist/reporting/structured-report.js.map +1 -1
  64. package/dist/review/aggregate-result.d.ts +3 -1
  65. package/dist/review/aggregate-result.d.ts.map +1 -1
  66. package/dist/review/aggregate-result.js +5 -0
  67. package/dist/review/aggregate-result.js.map +1 -1
  68. package/dist/review/diff-review.d.ts +11 -0
  69. package/dist/review/diff-review.d.ts.map +1 -1
  70. package/dist/review/diff-review.js +5 -2
  71. package/dist/review/diff-review.js.map +1 -1
  72. package/dist/review/quality-reviewer.d.ts +11 -2
  73. package/dist/review/quality-reviewer.d.ts.map +1 -1
  74. package/dist/review/quality-reviewer.js +3 -0
  75. package/dist/review/quality-reviewer.js.map +1 -1
  76. package/dist/review/skipped-result.d.ts +8 -0
  77. package/dist/review/skipped-result.d.ts.map +1 -0
  78. package/dist/review/skipped-result.js +4 -0
  79. package/dist/review/skipped-result.js.map +1 -0
  80. package/dist/review/spec-reviewer.d.ts +4 -1
  81. package/dist/review/spec-reviewer.d.ts.map +1 -1
  82. package/dist/review/spec-reviewer.js +3 -0
  83. package/dist/review/spec-reviewer.js.map +1 -1
  84. package/dist/run-tasks/commit-stage.d.ts.map +1 -1
  85. package/dist/run-tasks/commit-stage.js +17 -8
  86. package/dist/run-tasks/commit-stage.js.map +1 -1
  87. package/dist/run-tasks/fallback-report.d.ts.map +1 -1
  88. package/dist/run-tasks/fallback-report.js +1 -0
  89. package/dist/run-tasks/fallback-report.js.map +1 -1
  90. package/dist/run-tasks/metadata-repair.d.ts.map +1 -1
  91. package/dist/run-tasks/metadata-repair.js +0 -1
  92. package/dist/run-tasks/metadata-repair.js.map +1 -1
  93. package/dist/run-tasks/reviewed-lifecycle.d.ts.map +1 -1
  94. package/dist/run-tasks/reviewed-lifecycle.js +460 -284
  95. package/dist/run-tasks/reviewed-lifecycle.js.map +1 -1
  96. package/dist/runners/base/result-builders.d.ts +1 -1
  97. package/dist/runners/base/result-builders.d.ts.map +1 -1
  98. package/dist/runners/types.d.ts +2 -2
  99. package/dist/runners/types.d.ts.map +1 -1
  100. package/dist/tool-schemas/delegate.d.ts +0 -1
  101. package/dist/tool-schemas/delegate.d.ts.map +1 -1
  102. package/dist/tool-schemas/delegate.js +0 -1
  103. package/dist/tool-schemas/delegate.js.map +1 -1
  104. package/dist/tool-schemas/execute-plan.d.ts +0 -5
  105. package/dist/tool-schemas/execute-plan.d.ts.map +1 -1
  106. package/dist/tool-schemas/execute-plan.js +0 -4
  107. package/dist/tool-schemas/execute-plan.js.map +1 -1
  108. package/dist/tool-schemas/investigate.d.ts +48 -0
  109. package/dist/tool-schemas/investigate.d.ts.map +1 -0
  110. package/dist/tool-schemas/investigate.js +13 -0
  111. package/dist/tool-schemas/investigate.js.map +1 -0
  112. package/dist/types.d.ts +18 -2
  113. package/dist/types.d.ts.map +1 -1
  114. package/dist/types.js.map +1 -1
  115. package/package.json +9 -1
@@ -3,8 +3,11 @@ import { promisify } from 'node:util';
3
3
  import { computeCostUSD, computeSavedCostUSD } from '../types.js';
4
4
  import { createProvider } from '../provider.js';
5
5
  import { delegateWithEscalation } from '../delegate-with-escalation.js';
6
+ import { pickEscalation, pickReviewer, maxRowsFor, } from '../escalation/policy.js';
7
+ import { runWithFallback, makeSyntheticRunResult, TRANSPORT_FAILURES, isReviewTransportFailure, } from '../escalation/fallback.js';
6
8
  import { HeartbeatTimer } from '../heartbeat.js';
7
9
  import { runSpecReview } from '../review/spec-reviewer.js';
10
+ import { makeSkippedReviewResult } from '../review/skipped-result.js';
8
11
  import { runQualityReview } from '../review/quality-reviewer.js';
9
12
  import { runDiffReview } from '../review/diff-review.js';
10
13
  import { aggregateResult } from '../review/aggregate-result.js';
@@ -22,16 +25,24 @@ const exec = promisify(execFile);
22
25
  export async function executeReviewedLifecycle(task, resolved, config, taskIndex, onProgress, heartbeatWiring, diagnostics) {
23
26
  const reviewPolicy = task.reviewPolicy ?? 'full';
24
27
  const otherSlot = resolved.slot === 'standard' ? 'complex' : 'standard';
25
- // Partition filePaths into output targets before the worker runs.
26
- // Output targets are paths that do not yet exist on disk.
27
- const { outputTargets } = partitionFilePaths(task.filePaths, task.cwd ?? process.cwd());
28
28
  let escalationProvider;
29
29
  try {
30
30
  escalationProvider = createProvider(otherSlot, config);
31
31
  }
32
32
  catch {
33
- // Other slot not configured — auto-escalation not available
33
+ escalationProvider = undefined;
34
+ }
35
+ const providers = {
36
+ [resolved.slot]: resolved.provider,
37
+ };
38
+ if (escalationProvider)
39
+ providers[otherSlot] = escalationProvider;
40
+ function providerFor(tier) {
41
+ return providers[tier];
34
42
  }
43
+ // Partition filePaths into output targets before the worker runs.
44
+ // Output targets are paths that do not yet exist on disk.
45
+ const { outputTargets } = partitionFilePaths(task.filePaths, task.cwd ?? process.cwd());
35
46
  const stageCount = reviewPolicy === 'off' ? 1 :
36
47
  reviewPolicy === 'spec_only' ? 3 :
37
48
  5;
@@ -42,6 +53,20 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
42
53
  : undefined;
43
54
  const verboseBatchIdEarly = heartbeatWiring?.batchId;
44
55
  const shortBatchEarly = verboseBatchIdEarly ? verboseBatchIdEarly.slice(0, 8) : '????????';
56
+ const taskEventLogger = diagnostics?.logger;
57
+ const emitTaskEvent = (event, fields) => {
58
+ if (taskEventLogger && verboseBatchIdEarly !== undefined) {
59
+ const cleaned = {};
60
+ for (const [key, value] of Object.entries(fields)) {
61
+ if (value !== undefined)
62
+ cleaned[key] = value;
63
+ }
64
+ taskEventLogger.emit({ event, batchId: verboseBatchIdEarly, taskIndex, ...cleaned });
65
+ }
66
+ if (verboseStreamRaw) {
67
+ verboseStreamRaw(composeVerboseLine({ event, ts: new Date().toISOString(), batch: shortBatchEarly, task: taskIndex, ...fields }));
68
+ }
69
+ };
45
70
  // Start the heartbeat whenever there's a downstream consumer:
46
71
  // - onProgress (external progress callback from the runTasks caller)
47
72
  // - verbose (stderr stream needs the heartbeat's tool_call / turn_complete relay)
@@ -58,41 +83,30 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
58
83
  const synthOnProgress = onProgress ?? (() => { });
59
84
  const heartbeat = needHeartbeat
60
85
  ? new HeartbeatTimer((event) => {
61
- if (verboseStreamRaw && event.kind === 'heartbeat') {
86
+ if (event.kind === 'heartbeat') {
62
87
  // Emit on every heartbeat tick so the operator can confirm
63
88
  // the timer is actually firing. Stage-change lines are richer
64
89
  // but fire only on transitions; plain ticks let you see
65
90
  // per-5s progress inside a long-running stage.
66
91
  if (event.stage !== lastStageSeen) {
67
92
  if (lastStageSeen !== undefined) {
68
- verboseStreamRaw(composeVerboseLine({
69
- event: 'stage_change',
70
- ts: new Date().toISOString(),
71
- batch: shortBatchEarly,
72
- task: taskIndex,
73
- from: lastStageSeen,
74
- to: event.stage,
75
- }));
93
+ emitTaskEvent('stage_change', { from: lastStageSeen, to: event.stage });
76
94
  }
77
95
  lastStageSeen = event.stage;
78
96
  }
79
97
  const sinceLastMs = Date.now() - prevEventAtMs;
80
- verboseStreamRaw(composeVerboseLine({
81
- event: 'heartbeat',
82
- ts: new Date().toISOString(),
83
- batch: shortBatchEarly,
84
- task: taskIndex,
98
+ emitTaskEvent('heartbeat', {
85
99
  elapsed: event.elapsed,
86
100
  stage: event.stage,
87
101
  round: event.reviewRound,
88
- cap: event.maxReviewRounds,
102
+ cap: event.attemptCap,
89
103
  tools: event.progress.toolCalls,
90
104
  read: event.progress.filesRead,
91
105
  wrote: event.progress.filesWritten,
92
106
  text: textEmissionChars,
93
107
  cost: event.costUSD,
94
108
  idle_ms: sinceLastMs,
95
- }));
109
+ });
96
110
  }
97
111
  synthOnProgress(taskIndex, event);
98
112
  }, {
@@ -103,33 +117,16 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
103
117
  })
104
118
  : undefined;
105
119
  heartbeat?.start(stageCount);
106
- if (verboseStreamRaw) {
107
- verboseStreamRaw(composeVerboseLine({
108
- event: 'heartbeat_timer',
109
- ts: new Date().toISOString(),
110
- batch: shortBatchEarly,
111
- task: taskIndex,
112
- state: heartbeat ? 'started' : 'disabled',
113
- stage_count: stageCount,
114
- tick_ms: heartbeat ? 5000 : undefined,
115
- reason: heartbeat ? undefined : 'no_consumer',
116
- }));
117
- }
120
+ emitTaskEvent('heartbeat_timer', {
121
+ state: heartbeat ? 'started' : 'disabled',
122
+ stage_count: stageCount,
123
+ tick_ms: heartbeat ? 5000 : undefined,
124
+ reason: heartbeat ? undefined : 'no_consumer',
125
+ });
118
126
  const implModel = resolved.provider.config.model;
119
127
  const progressCounters = { filesRead: 0, filesWritten: 0, toolCalls: 0 };
120
- const verboseLogger = verbose && diagnostics?.logger ? diagnostics.logger : undefined;
121
- const verboseBatchId = verboseBatchIdEarly;
122
128
  const verboseStream = verboseStreamRaw;
123
- const shortBatch = shortBatchEarly;
124
- if (verboseStream) {
125
- verboseStream(composeVerboseLine({
126
- event: 'worker_start',
127
- ts: new Date().toISOString(),
128
- batch: shortBatch,
129
- task: taskIndex,
130
- worker: resolved.provider.config.model,
131
- }));
132
- }
129
+ emitTaskEvent('worker_start', { worker: resolved.provider.config.model });
133
130
  let prevEventAtMs = verbose ? Date.now() : 0;
134
131
  // Wrap whenever we have ANY consumer for InternalRunnerEvent (heartbeat,
135
132
  // verbose stream, or verbose logger). Previously this only wrapped when
@@ -142,33 +139,25 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
142
139
  heartbeat?.markEvent('llm');
143
140
  if (verbose)
144
141
  prevEventAtMs = Date.now();
145
- if (verboseStream) {
146
- verboseStream(composeVerboseLine({
147
- event: 'turn_start',
148
- ts: new Date().toISOString(),
149
- batch: shortBatch,
150
- task: taskIndex,
142
+ if (verbose) {
143
+ emitTaskEvent('turn_start', {
151
144
  turn: event.turn,
152
145
  provider: event.provider,
153
- }));
146
+ });
154
147
  }
155
148
  }
156
149
  if (event.kind === 'text_emission') {
157
150
  heartbeat?.markEvent('text');
158
151
  textEmissionChars += event.chars;
159
- if (verboseStream && event.chars > 0) {
152
+ if (verbose && event.chars > 0) {
160
153
  const preview = event.preview.length > 60
161
154
  ? event.preview.slice(0, 57) + '...'
162
155
  : event.preview;
163
- verboseStream(composeVerboseLine({
164
- event: 'text_emission',
165
- ts: new Date().toISOString(),
166
- batch: shortBatch,
167
- task: taskIndex,
156
+ emitTaskEvent('text_emission', {
168
157
  chars: event.chars,
169
158
  total: textEmissionChars,
170
159
  preview,
171
- }));
160
+ });
172
161
  }
173
162
  }
174
163
  if (event.kind === 'tool_call') {
@@ -186,23 +175,11 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
186
175
  const sincePrevMs = verbose ? now - prevEventAtMs : 0;
187
176
  if (verbose)
188
177
  prevEventAtMs = now;
189
- if (verboseLogger && verboseBatchId) {
190
- verboseLogger.toolCall({
191
- batchId: verboseBatchId,
192
- taskIndex,
193
- tool: event.toolSummary,
194
- durationMs: sincePrevMs,
195
- });
196
- }
197
- if (verboseStream) {
198
- verboseStream(composeVerboseLine({
199
- event: 'tool_call',
200
- ts: new Date().toISOString(),
201
- batch: shortBatch,
202
- task: taskIndex,
178
+ if (verbose) {
179
+ emitTaskEvent('tool_call', {
203
180
  tool: event.toolSummary,
204
181
  duration_ms: sincePrevMs,
205
- }));
182
+ });
206
183
  }
207
184
  }
208
185
  if (event.kind === 'turn_complete') {
@@ -214,29 +191,14 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
214
191
  const turnDurMs = verbose ? nowTurn - prevEventAtMs : 0;
215
192
  if (verbose)
216
193
  prevEventAtMs = nowTurn;
217
- if (verboseLogger && verboseBatchId) {
218
- verboseLogger.llmTurn({
219
- batchId: verboseBatchId,
220
- taskIndex,
221
- turnIndex: progressCounters.toolCalls,
222
- provider: resolved.provider.config.model,
223
- inputTokens: event.cumulativeInputTokens,
224
- outputTokens: event.cumulativeOutputTokens,
225
- costUSD,
226
- });
227
- }
228
- if (verboseStream) {
229
- verboseStream(composeVerboseLine({
230
- event: 'turn_complete',
231
- ts: new Date().toISOString(),
232
- batch: shortBatch,
233
- task: taskIndex,
194
+ if (verbose) {
195
+ emitTaskEvent('turn_complete', {
234
196
  input_tokens: event.cumulativeInputTokens,
235
197
  output_tokens: event.cumulativeOutputTokens,
236
198
  cost: costUSD,
237
199
  duration_ms: turnDurMs,
238
200
  provider: resolved.provider.config.model,
239
- }));
201
+ });
240
202
  }
241
203
  }
242
204
  }
@@ -245,17 +207,76 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
245
207
  const taskStartMs = Date.now();
246
208
  const commits = [];
247
209
  let commitError;
248
- let specRework = 0;
249
- let qualityRework = 0;
210
+ let specAttemptIndex = 0;
211
+ let qualityAttemptIndex = 0;
212
+ const maxSpecRows = maxRowsFor('spec');
213
+ const maxQualityRows = maxRowsFor('quality');
214
+ const specUnavailable = new Map();
215
+ let qualityUnavailable = new Map();
250
216
  let metadataRepair = 0;
251
- const maxReviewRounds = task.maxReviewRounds ?? 3;
252
217
  const maxCostUSD = task.maxCostUSD;
253
- const reviewRounds = () => ({ spec: specRework, quality: qualityRework, metadata: metadataRepair, cap: maxReviewRounds });
218
+ const implementerHistory = [];
219
+ const specReviewerHistory = [];
220
+ const qualityReviewerHistory = [];
221
+ const fallbackOverrides = [];
222
+ let latestAttemptedImpl;
223
+ let lastNonRejectedImpl;
224
+ const reviewRounds = () => ({ spec: specAttemptIndex, quality: qualityAttemptIndex, metadata: metadataRepair, cap: Math.max(maxSpecRows, maxQualityRows) });
254
225
  const taskCostUSD = () => (heartbeat ? heartbeat.getHeartbeatTickInfo().costUSD : null);
226
+ const policyEscalated = { spec: false, quality: false, diff: false };
227
+ const emitFallback = (p) => {
228
+ diagnostics?.logger?.fallback(p);
229
+ emitTaskEvent('fallback', p);
230
+ };
231
+ const emitFallbackUnavailable = (p) => {
232
+ diagnostics?.logger?.fallbackUnavailable(p);
233
+ emitTaskEvent('fallback_unavailable', p);
234
+ };
235
+ const emitEscalationEvent = (loop, attempt, decision) => {
236
+ const p = {
237
+ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop, attempt,
238
+ baseTier: resolved.slot, implTier: decision.impl, reviewerTier: decision.reviewer,
239
+ };
240
+ diagnostics?.logger?.escalation(p);
241
+ emitTaskEvent('escalation', p);
242
+ policyEscalated[loop] = true;
243
+ };
244
+ const emitEscalationUnavailable = (p) => {
245
+ diagnostics?.logger?.escalationUnavailable(p);
246
+ emitTaskEvent('escalation_unavailable', p);
247
+ };
255
248
  // When the review loop aborts mid-flight, preserve any review-status info already set
256
249
  // on the base result (set by callers via abortReviewLoop({ ...res, specReviewStatus, ... })).
257
250
  // Defaults to 'changes_required' for whichever loop tripped — that's the only state the
258
251
  // loop ever fires from, by construction.
252
+ function adaptForAllTiersUnavailable(base, loop, attempt) {
253
+ const ship = lastNonRejectedImpl?.result ?? base;
254
+ return {
255
+ ...ship,
256
+ status: 'incomplete',
257
+ workerStatus: 'blocked',
258
+ terminationReason: 'all_tiers_unavailable',
259
+ reviewRounds: reviewRounds(),
260
+ error: `runWithFallback: both tiers unavailable (loop=${loop}, attempt=${attempt}, role=implementer)`,
261
+ agents: agentEnvelope(specReviewerHistory[specReviewerHistory.length - 1] ?? 'not_applicable', qualityReviewerHistory[qualityReviewerHistory.length - 1] ?? (reviewPolicy === 'full' ? 'not_applicable' : 'skipped')),
262
+ };
263
+ }
264
+ function reviewDidNotReject(status) {
265
+ return status === 'approved' || status === 'skipped';
266
+ }
267
+ const agentEnvelope = (specReviewer, qualityReviewer) => {
268
+ const selectedImpl = latestAttemptedImpl ?? lastNonRejectedImpl;
269
+ const implementer = selectedImpl?.tier ?? resolved.slot;
270
+ return {
271
+ implementer,
272
+ ...(implementerHistory.length > 1 || implementerHistory.some(t => t !== implementer) ? { implementerHistory } : {}),
273
+ specReviewer,
274
+ ...(specReviewerHistory.length > 0 && (specReviewerHistory.length > 1 || specReviewerHistory.some(t => t === 'skipped')) ? { specReviewerHistory } : {}),
275
+ qualityReviewer,
276
+ ...(qualityReviewerHistory.length > 0 && (qualityReviewerHistory.length > 1 || qualityReviewerHistory.some(t => t === 'skipped')) ? { qualityReviewerHistory } : {}),
277
+ ...(fallbackOverrides.length > 0 ? { fallbackOverrides } : {}),
278
+ };
279
+ };
259
280
  const abortReviewLoop = (base, terminationReason, message, aborting) => ({
260
281
  ...base,
261
282
  status: 'incomplete',
@@ -265,27 +286,16 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
265
286
  error: message,
266
287
  specReviewStatus: aborting === 'spec' ? 'changes_required' : (base.specReviewStatus ?? 'approved'),
267
288
  qualityReviewStatus: aborting === 'quality' ? 'changes_required' : (base.qualityReviewStatus ?? 'skipped'),
289
+ agents: agentEnvelope(specReviewerHistory[specReviewerHistory.length - 1] ?? 'not_applicable', qualityReviewerHistory[qualityReviewerHistory.length - 1] ?? (reviewPolicy === 'full' ? 'not_applicable' : 'skipped')),
268
290
  });
269
291
  const defaultVerification = { status: 'skipped', steps: [], totalDurationMs: 0, skipReason: 'no_command' };
270
292
  let latestVerification = defaultVerification;
271
- const emitVerbose = (event, fields) => {
272
- if (!verboseStream)
273
- return;
274
- verboseStream(composeVerboseLine({
275
- event,
276
- ts: new Date().toISOString(),
277
- batch: shortBatch,
278
- task: taskIndex,
279
- ...fields,
280
- }));
281
- };
282
293
  async function runVerificationStage() {
283
- emitVerbose('stage_change', { from: 'committing', to: 'verifying' });
294
+ emitTaskEvent('stage_change', { from: 'committing', to: 'verifying' });
284
295
  heartbeat?.transition({
285
296
  stage: 'verifying',
286
297
  stageIndex: 4,
287
298
  reviewRound: undefined,
288
- maxReviewRounds: task.maxReviewRounds ?? 5,
289
299
  });
290
300
  const verification = await runVerifyStage({
291
301
  cwd,
@@ -295,7 +305,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
295
305
  });
296
306
  latestVerification = verification;
297
307
  for (const step of verification.steps) {
298
- emitVerbose('verify_step', {
308
+ emitTaskEvent('verify_step', {
299
309
  command: step.command,
300
310
  status: step.status,
301
311
  exit_code: step.exitCode,
@@ -305,12 +315,46 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
305
315
  });
306
316
  }
307
317
  if (verification.status === 'skipped') {
308
- emitVerbose('verify_skipped', { reason: verification.skipReason ?? 'no_command', stage: 'verifying' });
318
+ emitTaskEvent('verify_skipped', { reason: verification.skipReason ?? 'no_command', stage: 'verifying' });
309
319
  }
310
320
  return verification;
311
321
  }
322
+ function signalize(result) {
323
+ const cause = typeof result.terminationReason === 'object' ? result.terminationReason.cause : result.terminationReason;
324
+ const capExhausted = result.capExhausted
325
+ ?? (result.status === 'cost_exceeded' || cause === 'cost_exceeded' || cause === 'cost_ceiling' ? 'cost'
326
+ : result.status === 'timeout' || cause === 'timeout' ? 'wall_clock'
327
+ : result.status === 'incomplete' && result.turns > 1 ? 'turn'
328
+ : undefined);
329
+ const lifecycleClarificationRequested = result.lifecycleClarificationRequested
330
+ ?? (result.status === 'brief_too_vague' || cause === 'brief_too_vague' ? true : undefined);
331
+ return {
332
+ ...result,
333
+ ...(capExhausted !== undefined && { capExhausted }),
334
+ ...(lifecycleClarificationRequested !== undefined && { lifecycleClarificationRequested }),
335
+ };
336
+ }
337
+ function workerErrorResult(err) {
338
+ const workerError = err instanceof Error ? err : new Error(String(err));
339
+ return signalize({
340
+ output: '',
341
+ status: 'error',
342
+ usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0, costUSD: null },
343
+ turns: 0,
344
+ filesRead: [],
345
+ filesWritten: [],
346
+ toolCalls: [],
347
+ outputIsDiagnostic: true,
348
+ escalationLog: [],
349
+ error: workerError.message,
350
+ errorCode: 'runner_crash',
351
+ structuredError: { code: 'runner_crash', message: workerError.message },
352
+ workerStatus: 'failed',
353
+ workerError,
354
+ });
355
+ }
312
356
  function withVerification(result, verification = latestVerification) {
313
- return { ...result, verification };
357
+ return signalize({ ...result, verification });
314
358
  }
315
359
  function verificationErrorResult(base, verification) {
316
360
  if (verification.status !== 'error')
@@ -365,6 +409,15 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
365
409
  }
366
410
  function resolveDiffOnlyTerminal(base, verdict, verification, diffTruncated) {
367
411
  const concerns = [...(base.concerns ?? [])];
412
+ if ('status' in verdict && verdict.status === 'skipped') {
413
+ return withVerification({
414
+ ...base,
415
+ workerStatus: workerStatusForTerminal(base.workerStatus),
416
+ commits,
417
+ commitError,
418
+ verification,
419
+ }, verification);
420
+ }
368
421
  if (verdict.kind === 'reject') {
369
422
  return withVerification({
370
423
  ...base,
@@ -382,6 +435,18 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
382
435
  verification,
383
436
  }, verification);
384
437
  }
438
+ if (verdict.kind === 'transport_failure') {
439
+ return withVerification({
440
+ ...base,
441
+ status: verdict.status,
442
+ workerStatus: 'failed',
443
+ error: verdict.reason ?? `diff review transport failure: ${verdict.status}`,
444
+ concerns: [...concerns, ...verdict.concerns],
445
+ commits,
446
+ commitError,
447
+ verification,
448
+ }, verification);
449
+ }
385
450
  concerns.push(...verdict.concerns);
386
451
  if (verification.status === 'failed') {
387
452
  concerns.push({
@@ -487,7 +552,57 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
487
552
  });
488
553
  }
489
554
  }
490
- const implResult = await delegateWithEscalation(withDoneCondition(task), [resolved.provider], { explicitlyPinned: false, escalateToProvider: escalationProvider, onProgress: wrappedOnProgress });
555
+ const initialDecision = pickEscalation({
556
+ loop: 'spec',
557
+ attemptIndex: 0,
558
+ baseTier: resolved.slot,
559
+ });
560
+ const initialImpl = await runWithFallback({
561
+ assigned: initialDecision.impl,
562
+ providerFor,
563
+ unavailableTiers: specUnavailable,
564
+ isTransportFailure: (r) => TRANSPORT_FAILURES.has(r.status) && r.capExhausted === undefined,
565
+ getStatus: (r) => r.status,
566
+ makeSyntheticFailure: (assigned) => makeSyntheticRunResult(assigned, 'all_tiers_unavailable'),
567
+ call: (provider) => delegateWithEscalation(withDoneCondition(task), [provider], { explicitlyPinned: false, onProgress: wrappedOnProgress }),
568
+ });
569
+ if (initialImpl.fallbackFired || initialImpl.bothUnavailable) {
570
+ fallbackOverrides.push({
571
+ role: 'implementer',
572
+ loop: 'spec',
573
+ attempt: 0,
574
+ assigned: initialDecision.impl,
575
+ used: initialImpl.usedTier,
576
+ reason: (initialImpl.fallbackReason ?? initialImpl.unavailableReason),
577
+ triggeringStatus: initialImpl.fallbackTriggeringStatus,
578
+ bothUnavailable: initialImpl.bothUnavailable,
579
+ });
580
+ }
581
+ if (initialImpl.fallbackFired) {
582
+ emitFallback({
583
+ batchId: heartbeatWiring?.batchId ?? '', taskIndex,
584
+ loop: 'spec', attempt: 0, role: 'implementer',
585
+ assignedTier: initialDecision.impl,
586
+ usedTier: initialImpl.usedTier,
587
+ reason: initialImpl.fallbackReason,
588
+ triggeringStatus: initialImpl.fallbackTriggeringStatus,
589
+ violatesSeparation: false,
590
+ });
591
+ }
592
+ if (initialImpl.bothUnavailable) {
593
+ emitFallbackUnavailable({
594
+ batchId: heartbeatWiring?.batchId ?? '', taskIndex,
595
+ loop: 'spec', attempt: 0, role: 'implementer',
596
+ assignedTier: initialDecision.impl,
597
+ reason: initialImpl.unavailableReason,
598
+ });
599
+ return adaptForAllTiersUnavailable(initialImpl.result, 'spec', 0);
600
+ }
601
+ const implResult = initialImpl.result;
602
+ latestAttemptedImpl = { tier: initialImpl.usedTier, result: implResult };
603
+ lastNonRejectedImpl = { tier: initialImpl.usedTier, result: implResult };
604
+ implementerHistory.push(initialImpl.usedTier);
605
+ specAttemptIndex = 1;
491
606
  const implReport = implResult.status === 'ok' ? parseStructuredReport(implResult.output) : undefined;
492
607
  const workerStatus = extractWorkerStatus(implReport);
493
608
  if (implResult.status === 'ok' && isArtifactProducing) {
@@ -503,6 +618,28 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
503
618
  const filePathsSkipped = !filePathsInteracted;
504
619
  if (implResult.filesWritten.length === 0) {
505
620
  heartbeat?.updateStageCount(1);
621
+ if (reviewPolicy === 'off') {
622
+ emitTaskEvent('stage_change', { from: 'verifying', to: 'terminal' });
623
+ const terminal = resolveOffTerminal({
624
+ ...implResult,
625
+ workerStatus,
626
+ specReviewStatus: 'skipped',
627
+ qualityReviewStatus: 'skipped',
628
+ specReviewReason: 'skipped: reviewPolicy is off',
629
+ qualityReviewReason: 'skipped: reviewPolicy is off',
630
+ agents: agentEnvelope('skipped', 'skipped'),
631
+ models: {
632
+ implementer: implModel,
633
+ specReviewer: null,
634
+ qualityReviewer: null,
635
+ },
636
+ implementationReport: implReport,
637
+ structuredReport: implReport,
638
+ filePathsSkipped,
639
+ fileArtifactsMissing: implResult.status === 'ok' ? checkOutputTargets(outputTargets) : undefined,
640
+ }, verification);
641
+ return terminal;
642
+ }
506
643
  const effectiveImplReport = implReport ?? buildFallbackImplReport(implResult);
507
644
  const earlyFileArtifactsMissing = implResult.status === 'ok' ? checkOutputTargets(outputTargets) : undefined;
508
645
  const earlyStatus = implResult.status === 'ok' && earlyFileArtifactsMissing
@@ -523,13 +660,10 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
523
660
  validationsRun: effectiveImplReport.validationsRun,
524
661
  deviationsFromBrief: effectiveImplReport.deviationsFromBrief,
525
662
  unresolved: effectiveImplReport.unresolved,
663
+ extraSections: effectiveImplReport.extraSections ?? {},
526
664
  },
527
665
  filePathsSkipped,
528
- agents: {
529
- implementer: resolved.slot,
530
- specReviewer: 'not_applicable',
531
- qualityReviewer: 'not_applicable',
532
- },
666
+ agents: agentEnvelope('not_applicable', 'not_applicable'),
533
667
  models: {
534
668
  implementer: implModel,
535
669
  specReviewer: null,
@@ -549,11 +683,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
549
683
  qualityReviewStatus: 'skipped',
550
684
  specReviewReason: 'skipped: worker reported ' + workerStatus,
551
685
  qualityReviewReason: 'skipped: worker reported ' + workerStatus,
552
- agents: {
553
- implementer: resolved.slot,
554
- specReviewer: 'skipped',
555
- qualityReviewer: 'skipped',
556
- },
686
+ agents: agentEnvelope('skipped', 'skipped'),
557
687
  models: {
558
688
  implementer: implModel,
559
689
  specReviewer: null,
@@ -566,7 +696,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
566
696
  };
567
697
  }
568
698
  if (reviewPolicy === 'off') {
569
- emitVerbose('stage_change', { from: 'verifying', to: 'terminal' });
699
+ emitTaskEvent('stage_change', { from: 'verifying', to: 'terminal' });
570
700
  const terminal = resolveOffTerminal({
571
701
  ...implResult,
572
702
  workerStatus,
@@ -574,11 +704,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
574
704
  qualityReviewStatus: 'skipped',
575
705
  specReviewReason: 'skipped: reviewPolicy is off',
576
706
  qualityReviewReason: 'skipped: reviewPolicy is off',
577
- agents: {
578
- implementer: resolved.slot,
579
- specReviewer: 'skipped',
580
- qualityReviewer: 'skipped',
581
- },
707
+ agents: agentEnvelope('skipped', 'skipped'),
582
708
  models: {
583
709
  implementer: implModel,
584
710
  specReviewer: null,
@@ -589,35 +715,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
589
715
  }, verification);
590
716
  return terminal;
591
717
  }
592
- let otherProvider;
593
- try {
594
- otherProvider = createProvider(otherSlot, config);
595
- }
596
- catch {
597
- return {
598
- ...implResult,
599
- workerStatus,
600
- specReviewStatus: 'skipped',
601
- qualityReviewStatus: 'skipped',
602
- specReviewReason: 'skipped: no review agent configured',
603
- qualityReviewReason: 'skipped: no review agent configured',
604
- agents: {
605
- implementer: resolved.slot,
606
- specReviewer: 'skipped',
607
- qualityReviewer: 'skipped',
608
- },
609
- models: {
610
- implementer: implModel,
611
- specReviewer: null,
612
- qualityReviewer: null,
613
- },
614
- fileArtifactsMissing: implResult.status === 'ok' ? checkOutputTargets(outputTargets) : undefined,
615
- commits,
616
- commitError,
617
- verification,
618
- };
619
- }
620
- const reviewModel = otherProvider.config.model;
718
+ const reviewModel = providerFor(pickReviewer({ loop: 'spec', attemptIndex: 0, baseTier: resolved.slot }))?.config.model ?? null;
621
719
  const packet = {
622
720
  prompt: task.prompt,
623
721
  scope: task.filePaths ?? [],
@@ -629,21 +727,28 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
629
727
  ? await buildEvidence({ cwd, baselineHead, commits, verification, reviewPolicy })
630
728
  : { block: '', diffTruncated: false, fullDiff: '' };
631
729
  if (reviewPolicy === 'diff_only') {
632
- emitVerbose('stage_change', { from: 'verifying', to: 'diff_review' });
633
- heartbeat?.transition({
634
- stage: 'diff_review',
635
- stageIndex: 2,
636
- reviewRound: 1,
637
- maxReviewRounds,
638
- });
639
- const verdict = await runDiffReview({
640
- cwd,
641
- diff: evidence.fullDiff,
642
- diffTruncated: evidence.diffTruncated,
643
- verification,
644
- worker: { call: (prompt) => otherProvider.run(prompt) },
730
+ const diffUnavailable = new Map();
731
+ const diffReviewerTier = pickReviewer({ loop: 'spec', attemptIndex: 0, baseTier: resolved.slot });
732
+ emitTaskEvent('stage_change', { from: 'verifying', to: 'diff_review' });
733
+ heartbeat?.transition({ stage: 'diff_review', stageIndex: 2, reviewRound: 1, attemptCap: 1 });
734
+ const diffCall = await runWithFallback({
735
+ assigned: diffReviewerTier,
736
+ providerFor,
737
+ unavailableTiers: diffUnavailable,
738
+ isTransportFailure: (r) => isReviewTransportFailure(r),
739
+ getStatus: (r) => r.status,
740
+ makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'),
741
+ call: (provider) => runDiffReview({ cwd, diff: evidence.fullDiff, diffTruncated: evidence.diffTruncated, verification, worker: { call: (prompt) => provider.run(prompt) } }),
645
742
  });
646
- emitVerbose('review_decision', { stage: 'diff_review', verdict: verdict.kind, round: 1 });
743
+ if (diffCall.fallbackFired) {
744
+ emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'diff', attempt: 0, role: 'diffReviewer', assignedTier: diffReviewerTier, usedTier: diffCall.usedTier, reason: diffCall.fallbackReason, triggeringStatus: diffCall.fallbackTriggeringStatus, violatesSeparation: diffCall.usedTier === implementerHistory[implementerHistory.length - 1] });
745
+ fallbackOverrides.push({ role: 'diffReviewer', loop: 'diff', attempt: 0, assigned: diffReviewerTier, used: diffCall.usedTier, reason: diffCall.fallbackReason, triggeringStatus: diffCall.fallbackTriggeringStatus, bothUnavailable: diffCall.bothUnavailable });
746
+ }
747
+ if (diffCall.bothUnavailable) {
748
+ emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'diff', attempt: 0, role: 'diffReviewer', assignedTier: diffReviewerTier, reason: diffCall.unavailableReason });
749
+ }
750
+ const verdict = diffCall.bothUnavailable || isReviewTransportFailure(diffCall.result) ? makeSkippedReviewResult('all_tiers_unavailable') : diffCall.result;
751
+ emitTaskEvent('review_decision', { stage: 'diff_review', verdict: 'kind' in verdict ? verdict.kind : 'skipped', round: 1 });
647
752
  return resolveDiffOnlyTerminal({
648
753
  ...implResult,
649
754
  workerStatus,
@@ -653,120 +758,188 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
653
758
  qualityReviewReason: 'skipped: reviewPolicy is diff_only',
654
759
  implementationReport: effectiveImplReport,
655
760
  fileArtifactsMissing: implResult.status === 'ok' ? checkOutputTargets(outputTargets) : undefined,
656
- agents: {
657
- implementer: resolved.slot,
658
- specReviewer: 'skipped',
659
- qualityReviewer: 'skipped',
660
- },
661
- models: {
662
- implementer: implModel,
663
- specReviewer: reviewModel,
664
- qualityReviewer: null,
665
- },
761
+ agents: agentEnvelope('skipped', 'skipped'),
762
+ models: { implementer: implModel, specReviewer: reviewModel, qualityReviewer: null },
666
763
  }, verdict, verification, evidence.diffTruncated);
667
764
  }
668
- heartbeat?.transition({
669
- stage: 'spec_review', stageIndex: 2,
670
- reviewRound: 1, maxReviewRounds: task.maxReviewRounds ?? 5,
671
- });
672
- let specResult = await runSpecReview(otherProvider, packet, effectiveImplReport, fileContents, implResult.toolCalls, task.planContext, evidence.block);
673
765
  let finalImplResult = implResult;
674
766
  let finalImplReport = effectiveImplReport;
675
- let specStatus = specResult.status;
676
- let specReport = specResult.report;
677
- if (specStatus === 'changes_required') {
678
- let prevSpecFindings = [];
679
- while (true) {
680
- if (specRework + qualityRework >= maxReviewRounds) {
681
- return abortReviewLoop(finalImplResult, 'round_cap', 'review round cap reached before spec rework', 'spec');
767
+ let specResult;
768
+ let specStatus;
769
+ let specReport;
770
+ let specReviewReason;
771
+ heartbeat?.transition({ stage: 'spec_review', stageIndex: 2, reviewRound: 1, attemptCap: maxSpecRows });
772
+ const initialReviewerTier = pickReviewer({ loop: 'spec', attemptIndex: 0, baseTier: resolved.slot });
773
+ const initialSpecReview = await runWithFallback({
774
+ assigned: initialReviewerTier,
775
+ providerFor,
776
+ unavailableTiers: specUnavailable,
777
+ isTransportFailure: (r) => isReviewTransportFailure(r),
778
+ getStatus: (r) => r.status,
779
+ makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'),
780
+ call: (provider) => runSpecReview(provider, packet, effectiveImplReport, fileContents, implResult.toolCalls, task.planContext, evidence.block),
781
+ });
782
+ if (initialSpecReview.bothUnavailable) {
783
+ emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: 0, role: 'specReviewer', assignedTier: initialReviewerTier, reason: initialSpecReview.unavailableReason });
784
+ fallbackOverrides.push({ role: 'specReviewer', loop: 'spec', attempt: 0, assigned: initialReviewerTier, used: initialSpecReview.usedTier, reason: initialSpecReview.unavailableReason, triggeringStatus: initialSpecReview.fallbackTriggeringStatus, bothUnavailable: true });
785
+ specReviewerHistory.push('skipped');
786
+ }
787
+ else {
788
+ specReviewerHistory.push(initialSpecReview.usedTier);
789
+ if (initialSpecReview.fallbackFired) {
790
+ emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: 0, role: 'specReviewer', assignedTier: initialReviewerTier, usedTier: initialSpecReview.usedTier, reason: initialSpecReview.fallbackReason, triggeringStatus: initialSpecReview.fallbackTriggeringStatus, violatesSeparation: initialSpecReview.usedTier === implementerHistory[implementerHistory.length - 1] });
791
+ fallbackOverrides.push({ role: 'specReviewer', loop: 'spec', attempt: 0, assigned: initialReviewerTier, used: initialSpecReview.usedTier, reason: initialSpecReview.fallbackReason, triggeringStatus: initialSpecReview.fallbackTriggeringStatus, bothUnavailable: false });
792
+ }
793
+ }
794
+ specResult = initialSpecReview.bothUnavailable
795
+ ? makeSkippedReviewResult('all_tiers_unavailable')
796
+ : initialSpecReview.result;
797
+ specStatus = specResult.status;
798
+ specReport = 'report' in specResult ? specResult.report : undefined;
799
+ specReviewReason = specStatus === 'skipped' ? 'all_tiers_unavailable' : ('errorReason' in specResult ? specResult.errorReason : undefined);
800
+ let prevSpecFindings = [...(specResult.findings ?? [])];
801
+ while (specStatus === 'changes_required') {
802
+ if (specAttemptIndex >= maxSpecRows)
803
+ return abortReviewLoop(finalImplResult, 'round_cap', 'review round cap reached before spec rework', 'spec');
804
+ const currentCostUSD = taskCostUSD();
805
+ if (currentCostUSD !== null && maxCostUSD !== undefined && currentCostUSD >= 0.8 * maxCostUSD) {
806
+ emitTaskEvent('cost_check', { stage: 'spec_rework', tripped: true, cost_used_usd: currentCostUSD, cost_cap_usd: maxCostUSD, cost_available: true });
807
+ return abortReviewLoop(finalImplResult, 'cost_ceiling', 'cost ceiling reached before spec rework', 'spec');
808
+ }
809
+ const decision = pickEscalation({ loop: 'spec', attemptIndex: specAttemptIndex, baseTier: resolved.slot });
810
+ if (decision.isEscalated)
811
+ emitEscalationEvent('spec', specAttemptIndex, decision);
812
+ emitTaskEvent('stage_change', { from: 'spec_review', to: 'spec_rework', attempt: specAttemptIndex, attemptCap: maxSpecRows, implTier: decision.impl, reviewerTier: decision.reviewer, escalated: decision.isEscalated });
813
+ heartbeat?.transition({ stage: 'spec_rework', stageIndex: 3, reviewRound: specAttemptIndex, attemptCap: maxSpecRows });
814
+ const feedback = specResult.findings.length > 0 ? `\n\n## Spec Review Feedback (round ${specAttemptIndex}):\n${specResult.findings.map(f => `- ${f}`).join('\n')}` : '';
815
+ const reworkTask = withDoneCondition({ ...task, prompt: `${task.prompt}${feedback}` });
816
+ const reworkCall = await runWithFallback({ assigned: decision.impl, providerFor, unavailableTiers: specUnavailable, isTransportFailure: (r) => TRANSPORT_FAILURES.has(r.status) && r.capExhausted === undefined, getStatus: (r) => r.status, makeSyntheticFailure: (assigned) => makeSyntheticRunResult(assigned, 'all_tiers_unavailable'), call: (provider) => delegateWithEscalation(reworkTask, [provider], { explicitlyPinned: true, onProgress: wrappedOnProgress }) });
817
+ if (reworkCall.fallbackFired || reworkCall.bothUnavailable)
818
+ fallbackOverrides.push({ role: 'implementer', loop: 'spec', attempt: specAttemptIndex, assigned: decision.impl, used: reworkCall.usedTier, reason: (reworkCall.fallbackReason ?? reworkCall.unavailableReason), triggeringStatus: reworkCall.fallbackTriggeringStatus, bothUnavailable: reworkCall.bothUnavailable });
819
+ if (reworkCall.fallbackFired) {
820
+ emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'implementer', assignedTier: decision.impl, usedTier: reworkCall.usedTier, reason: reworkCall.fallbackReason, triggeringStatus: reworkCall.fallbackTriggeringStatus, violatesSeparation: false });
821
+ if (decision.isEscalated && reworkCall.fallbackReason === 'not_configured')
822
+ emitEscalationUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'implementer', wantedTier: decision.impl, reason: reworkCall.fallbackReason });
823
+ }
824
+ if (reworkCall.bothUnavailable) {
825
+ emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'implementer', assignedTier: decision.impl, reason: reworkCall.unavailableReason });
826
+ if (decision.isEscalated)
827
+ emitEscalationUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'implementer', wantedTier: decision.impl, reason: reworkCall.unavailableReason });
828
+ return adaptForAllTiersUnavailable(reworkCall.result, 'spec', specAttemptIndex);
829
+ }
830
+ finalImplResult = reworkCall.result;
831
+ latestAttemptedImpl = { tier: reworkCall.usedTier, result: finalImplResult };
832
+ implementerHistory.push(reworkCall.usedTier);
833
+ const reworkReport = parseStructuredReport(finalImplResult.output);
834
+ finalImplReport = reworkReport.summary ? reworkReport : buildFallbackImplReport(finalImplResult);
835
+ fileContents = await readImplementerFileContents(finalImplResult.filesWritten, task.cwd);
836
+ heartbeat?.transition({ stage: 'spec_review', stageIndex: 2, reviewRound: specAttemptIndex + 1, attemptCap: maxSpecRows });
837
+ const reviewCall = await runWithFallback({ assigned: decision.reviewer, providerFor, unavailableTiers: specUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), call: (provider) => runSpecReview(provider, packet, finalImplReport, fileContents, finalImplResult.toolCalls, task.planContext, evidence.block) });
838
+ if (reviewCall.bothUnavailable) {
839
+ emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'specReviewer', assignedTier: decision.reviewer, reason: reviewCall.unavailableReason });
840
+ fallbackOverrides.push({ role: 'specReviewer', loop: 'spec', attempt: specAttemptIndex, assigned: decision.reviewer, used: reviewCall.usedTier, reason: reviewCall.unavailableReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, bothUnavailable: true });
841
+ specReviewerHistory.push('skipped');
842
+ }
843
+ else {
844
+ specReviewerHistory.push(reviewCall.usedTier);
845
+ if (reviewCall.fallbackFired) {
846
+ emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'specReviewer', assignedTier: decision.reviewer, usedTier: reviewCall.usedTier, reason: reviewCall.fallbackReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, violatesSeparation: reviewCall.usedTier === implementerHistory[implementerHistory.length - 1] });
847
+ fallbackOverrides.push({ role: 'specReviewer', loop: 'spec', attempt: specAttemptIndex, assigned: decision.reviewer, used: reviewCall.usedTier, reason: reviewCall.fallbackReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, bothUnavailable: false });
848
+ }
849
+ }
850
+ specResult = reviewCall.result;
851
+ specStatus = specResult.status;
852
+ specReport = 'report' in specResult ? specResult.report : undefined;
853
+ specReviewReason = specStatus === 'skipped' ? 'all_tiers_unavailable' : ('errorReason' in specResult ? specResult.errorReason : undefined);
854
+ if (reviewDidNotReject(specStatus))
855
+ lastNonRejectedImpl = { tier: implementerHistory[implementerHistory.length - 1], result: finalImplResult };
856
+ specAttemptIndex++;
857
+ if (specStatus === 'approved' || specStatus === 'skipped')
858
+ break;
859
+ const currentFindings = [...(specResult.findings ?? [])].sort().join('\0');
860
+ const prevFindings = [...prevSpecFindings].sort().join('\0');
861
+ if (currentFindings === prevFindings && currentFindings !== '')
862
+ break;
863
+ prevSpecFindings = [...(specResult.findings ?? [])];
864
+ }
865
+ let qualityResult = { status: 'skipped', report: undefined, findings: [], errorReason: reviewPolicy === 'full' ? 'all_tiers_unavailable' : 'skipped: reviewPolicy is spec_only' };
866
+ if (reviewPolicy === 'full') {
867
+ qualityUnavailable = new Map();
868
+ const qualityReviewerTier = pickReviewer({ loop: 'quality', attemptIndex: 0, baseTier: resolved.slot });
869
+ heartbeat?.transition({ stage: 'quality_review', stageIndex: 4, reviewRound: 1, attemptCap: maxQualityRows });
870
+ const initialQuality = await runWithFallback({ assigned: qualityReviewerTier, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), call: (provider) => runQualityReview(provider, packet, specReport ?? finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block) });
871
+ if (initialQuality.bothUnavailable) {
872
+ emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: 0, role: 'qualityReviewer', assignedTier: qualityReviewerTier, reason: initialQuality.unavailableReason });
873
+ fallbackOverrides.push({ role: 'qualityReviewer', loop: 'quality', attempt: 0, assigned: qualityReviewerTier, used: initialQuality.usedTier, reason: initialQuality.unavailableReason, triggeringStatus: initialQuality.fallbackTriggeringStatus, bothUnavailable: true });
874
+ qualityReviewerHistory.push('skipped');
875
+ }
876
+ else {
877
+ qualityReviewerHistory.push(initialQuality.usedTier);
878
+ if (initialQuality.fallbackFired) {
879
+ emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: 0, role: 'qualityReviewer', assignedTier: qualityReviewerTier, usedTier: initialQuality.usedTier, reason: initialQuality.fallbackReason, triggeringStatus: initialQuality.fallbackTriggeringStatus, violatesSeparation: initialQuality.usedTier === implementerHistory[implementerHistory.length - 1] });
880
+ fallbackOverrides.push({ role: 'qualityReviewer', loop: 'quality', attempt: 0, assigned: qualityReviewerTier, used: initialQuality.usedTier, reason: initialQuality.fallbackReason, triggeringStatus: initialQuality.fallbackTriggeringStatus, bothUnavailable: false });
682
881
  }
882
+ }
883
+ qualityResult = initialQuality.result;
884
+ let prevQualityFindings = [...(qualityResult.findings ?? [])];
885
+ qualityAttemptIndex = 1;
886
+ while (qualityResult.status === 'changes_required') {
887
+ if (qualityAttemptIndex >= maxQualityRows)
888
+ return abortReviewLoop(finalImplResult, 'round_cap', 'review round cap reached before quality rework', 'quality');
683
889
  const currentCostUSD = taskCostUSD();
684
890
  if (currentCostUSD !== null && maxCostUSD !== undefined && currentCostUSD >= 0.8 * maxCostUSD) {
685
- emitVerbose('cost_check', { stage: 'spec_rework', tripped: true, cost_used_usd: currentCostUSD, cost_cap_usd: maxCostUSD, cost_available: true });
686
- return abortReviewLoop(finalImplResult, 'cost_ceiling', 'cost ceiling reached before spec rework', 'spec');
891
+ emitTaskEvent('cost_check', { stage: 'quality_rework', tripped: true, cost_used_usd: currentCostUSD, cost_cap_usd: maxCostUSD, cost_available: true });
892
+ return abortReviewLoop(finalImplResult, 'cost_ceiling', 'cost ceiling reached before quality rework', 'quality');
687
893
  }
688
- emitVerbose('stage_change', { from: 'spec_review', to: 'spec_rework', round: specRework + 1, cap: maxReviewRounds });
689
- specRework++;
690
- const round = specRework;
691
- heartbeat?.transition({
692
- stage: 'spec_rework', stageIndex: 3,
693
- reviewRound: round, maxReviewRounds,
694
- });
695
- const feedback = specResult.findings.length > 0
696
- ? `\n\n## Spec Review Feedback (round ${round}):\n${specResult.findings.map(f => `- ${f}`).join('\n')}`
697
- : '';
698
- const reworkPrompt = `${task.prompt}${feedback}`;
699
- const reworkTask = withDoneCondition({ ...task, prompt: reworkPrompt });
700
- const reworkResult = await delegateWithEscalation(reworkTask, [resolved.provider], { explicitlyPinned: true, onProgress: wrappedOnProgress });
701
- finalImplResult = reworkResult;
702
- const reworkReport = parseStructuredReport(reworkResult.output);
703
- finalImplReport = reworkReport.summary ? reworkReport : buildFallbackImplReport(reworkResult);
704
- const reworkContents = await readImplementerFileContents(reworkResult.filesWritten, task.cwd);
705
- fileContents = reworkContents;
706
- heartbeat?.transition({
707
- stage: 'spec_review', stageIndex: 2,
708
- reviewRound: round + 1, maxReviewRounds,
709
- });
710
- specResult = await runSpecReview(otherProvider, packet, finalImplReport, reworkContents, reworkResult.toolCalls, task.planContext, evidence.block);
711
- specStatus = specResult.status;
712
- specReport = specResult.report;
713
- if (specStatus === 'approved')
894
+ const decision = pickEscalation({ loop: 'quality', attemptIndex: qualityAttemptIndex, baseTier: resolved.slot });
895
+ if (decision.isEscalated)
896
+ emitEscalationEvent('quality', qualityAttemptIndex, decision);
897
+ emitTaskEvent('stage_change', { from: 'quality_review', to: 'quality_rework', attempt: qualityAttemptIndex, attemptCap: maxQualityRows, implTier: decision.impl, reviewerTier: decision.reviewer, escalated: decision.isEscalated });
898
+ heartbeat?.transition({ stage: 'quality_rework', stageIndex: 5, reviewRound: qualityAttemptIndex, attemptCap: maxQualityRows });
899
+ const feedback = qualityResult.findings.length > 0 ? `\n\n## Quality Review Feedback (round ${qualityAttemptIndex}):\n${qualityResult.findings.map(f => `- ${f}`).join('\n')}` : '';
900
+ const reworkTask = withDoneCondition({ ...task, prompt: `${task.prompt}${feedback}` });
901
+ const reworkCall = await runWithFallback({ assigned: decision.impl, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => TRANSPORT_FAILURES.has(r.status) && r.capExhausted === undefined, getStatus: (r) => r.status, makeSyntheticFailure: (assigned) => makeSyntheticRunResult(assigned, 'all_tiers_unavailable'), call: (provider) => delegateWithEscalation(reworkTask, [provider], { explicitlyPinned: true, onProgress: wrappedOnProgress }) });
902
+ if (reworkCall.fallbackFired || reworkCall.bothUnavailable)
903
+ fallbackOverrides.push({ role: 'implementer', loop: 'quality', attempt: qualityAttemptIndex, assigned: decision.impl, used: reworkCall.usedTier, reason: (reworkCall.fallbackReason ?? reworkCall.unavailableReason), triggeringStatus: reworkCall.fallbackTriggeringStatus, bothUnavailable: reworkCall.bothUnavailable });
904
+ if (reworkCall.fallbackFired)
905
+ emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'implementer', assignedTier: decision.impl, usedTier: reworkCall.usedTier, reason: reworkCall.fallbackReason, triggeringStatus: reworkCall.fallbackTriggeringStatus, violatesSeparation: false });
906
+ if (reworkCall.bothUnavailable) {
907
+ emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'implementer', assignedTier: decision.impl, reason: reworkCall.unavailableReason });
908
+ if (decision.isEscalated)
909
+ emitEscalationUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'implementer', wantedTier: decision.impl, reason: reworkCall.unavailableReason });
910
+ return adaptForAllTiersUnavailable(reworkCall.result, 'quality', qualityAttemptIndex);
911
+ }
912
+ finalImplResult = reworkCall.result;
913
+ latestAttemptedImpl = { tier: reworkCall.usedTier, result: finalImplResult };
914
+ implementerHistory.push(reworkCall.usedTier);
915
+ const reworkReport = parseStructuredReport(finalImplResult.output);
916
+ finalImplReport = reworkReport.summary ? reworkReport : buildFallbackImplReport(finalImplResult);
917
+ fileContents = await readImplementerFileContents(finalImplResult.filesWritten, task.cwd);
918
+ heartbeat?.transition({ stage: 'quality_review', stageIndex: 4, reviewRound: qualityAttemptIndex + 1, attemptCap: maxQualityRows });
919
+ const reviewCall = await runWithFallback({ assigned: decision.reviewer, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), call: (provider) => runQualityReview(provider, packet, finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block) });
920
+ if (reviewCall.bothUnavailable) {
921
+ emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'qualityReviewer', assignedTier: decision.reviewer, reason: reviewCall.unavailableReason });
922
+ fallbackOverrides.push({ role: 'qualityReviewer', loop: 'quality', attempt: qualityAttemptIndex, assigned: decision.reviewer, used: reviewCall.usedTier, reason: reviewCall.unavailableReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, bothUnavailable: true });
923
+ qualityReviewerHistory.push('skipped');
924
+ }
925
+ else {
926
+ qualityReviewerHistory.push(reviewCall.usedTier);
927
+ if (reviewCall.fallbackFired) {
928
+ emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'qualityReviewer', assignedTier: decision.reviewer, usedTier: reviewCall.usedTier, reason: reviewCall.fallbackReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, violatesSeparation: reviewCall.usedTier === implementerHistory[implementerHistory.length - 1] });
929
+ fallbackOverrides.push({ role: 'qualityReviewer', loop: 'quality', attempt: qualityAttemptIndex, assigned: decision.reviewer, used: reviewCall.usedTier, reason: reviewCall.fallbackReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, bothUnavailable: false });
930
+ }
931
+ }
932
+ qualityResult = reviewCall.result;
933
+ if (reviewDidNotReject(qualityResult.status))
934
+ lastNonRejectedImpl = { tier: implementerHistory[implementerHistory.length - 1], result: finalImplResult };
935
+ qualityAttemptIndex++;
936
+ if (qualityResult.status === 'approved' || qualityResult.status === 'skipped')
714
937
  break;
715
- const currentFindings = [...specResult.findings].sort().join('\0');
716
- const prevFindings = prevSpecFindings.sort().join('\0');
938
+ const currentFindings = [...(qualityResult.findings ?? [])].sort().join('\0');
939
+ const prevFindings = [...prevQualityFindings].sort().join('\0');
717
940
  if (currentFindings === prevFindings && currentFindings !== '')
718
941
  break;
719
- prevSpecFindings = specResult.findings;
720
- }
721
- }
722
- let qualityResult = { status: 'skipped', report: undefined, findings: [] };
723
- if (reviewPolicy === 'full') {
724
- heartbeat?.transition({
725
- stage: 'quality_review', stageIndex: 4,
726
- reviewRound: 1, maxReviewRounds,
727
- });
728
- qualityResult = await runQualityReview(otherProvider, packet, specReport ?? finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block);
729
- if (qualityResult.status === 'changes_required') {
730
- let prevQualityFindings = [];
731
- while (true) {
732
- if (specRework + qualityRework >= maxReviewRounds) {
733
- return abortReviewLoop(finalImplResult, 'round_cap', 'review round cap reached before quality rework', 'quality');
734
- }
735
- const currentCostUSD = taskCostUSD();
736
- if (currentCostUSD !== null && maxCostUSD !== undefined && currentCostUSD >= 0.8 * maxCostUSD) {
737
- emitVerbose('cost_check', { stage: 'quality_rework', tripped: true, cost_used_usd: currentCostUSD, cost_cap_usd: maxCostUSD, cost_available: true });
738
- return abortReviewLoop(finalImplResult, 'cost_ceiling', 'cost ceiling reached before quality rework', 'quality');
739
- }
740
- emitVerbose('stage_change', { from: 'quality_review', to: 'quality_rework', round: qualityRework + 1, cap: maxReviewRounds });
741
- qualityRework++;
742
- const round = qualityRework;
743
- heartbeat?.transition({
744
- stage: 'quality_rework', stageIndex: 5,
745
- reviewRound: round, maxReviewRounds,
746
- });
747
- const feedback = qualityResult.findings.length > 0
748
- ? `\n\n## Quality Review Feedback (round ${round}):\n${qualityResult.findings.map(f => `- ${f}`).join('\n')}`
749
- : '';
750
- const reworkPrompt = `${task.prompt}${feedback}`;
751
- const reworkTask = withDoneCondition({ ...task, prompt: reworkPrompt });
752
- const reworkResult = await delegateWithEscalation(reworkTask, [resolved.provider], { explicitlyPinned: true, onProgress: wrappedOnProgress });
753
- finalImplResult = reworkResult;
754
- const reworkReport = parseStructuredReport(reworkResult.output);
755
- finalImplReport = reworkReport.summary ? reworkReport : buildFallbackImplReport(reworkResult);
756
- const reworkContents = await readImplementerFileContents(reworkResult.filesWritten, task.cwd);
757
- heartbeat?.transition({
758
- stage: 'quality_review', stageIndex: 4,
759
- reviewRound: round + 1, maxReviewRounds,
760
- });
761
- qualityResult = await runQualityReview(otherProvider, packet, finalImplReport, reworkContents, reworkResult.toolCalls, reworkResult.filesWritten, evidence.block);
762
- if (qualityResult.status === 'approved')
763
- break;
764
- const currentFindings = [...qualityResult.findings].sort().join('\0');
765
- const prevFindings = prevQualityFindings.sort().join('\0');
766
- if (currentFindings === prevFindings && currentFindings !== '')
767
- break;
768
- prevQualityFindings = qualityResult.findings;
769
- }
942
+ prevQualityFindings = [...(qualityResult.findings ?? [])];
770
943
  }
771
944
  }
772
945
  const finalReport = specReport ?? finalImplReport;
@@ -788,7 +961,9 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
788
961
  message: 'Implementation diff exceeded the reviewer evidence byte cap and was truncated.',
789
962
  });
790
963
  }
791
- const aggregated = aggregateResult(finalReport, specReport, qualityResult.report, specStatus, qualityResult.status);
964
+ const specAggregateStatus = (['approved', 'changes_required', 'skipped', 'error', 'api_error', 'network_error', 'timeout'].includes(specStatus) ? specStatus : 'error');
965
+ const qualityAggregateStatus = qualityResult.status;
966
+ const aggregated = aggregateResult(finalReport, specReport, qualityResult.report, specAggregateStatus, qualityAggregateStatus);
792
967
  // File artifact verification: check whether output targets exist on disk after all work.
793
968
  // Only applies when status is ok; non-ok statuses skip verification entirely.
794
969
  const fileArtifactsMissing = finalImplResult.status === 'ok' && outputTargets.length > 0
@@ -802,25 +977,23 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
802
977
  : finalImplResult.status === 'ok' && fileArtifactsMissing
803
978
  ? 'incomplete'
804
979
  : finalImplResult.status;
980
+ const specEnvelopeStatus = (specStatus === 'api_error' || specStatus === 'network_error' || specStatus === 'timeout' ? 'error' : specStatus);
981
+ const qualityEnvelopeStatus = qualityResult.status === 'api_error' || qualityResult.status === 'network_error' || qualityResult.status === 'timeout' ? 'error' : qualityResult.status;
805
982
  return {
806
983
  ...finalImplResult,
807
984
  status: finalStatus,
808
985
  workerStatus: finalWorkerStatus,
809
986
  concerns,
810
- specReviewStatus: specStatus,
811
- qualityReviewStatus: qualityResult.status,
812
- specReviewReason: specResult.errorReason,
813
- qualityReviewReason: qualityResult.errorReason,
987
+ specReviewStatus: specEnvelopeStatus,
988
+ qualityReviewStatus: qualityEnvelopeStatus,
989
+ specReviewReason: 'errorReason' in specResult ? specResult.errorReason : undefined,
990
+ qualityReviewReason: 'errorReason' in qualityResult ? qualityResult.errorReason : undefined,
814
991
  structuredReport: aggregated,
815
992
  implementationReport: finalImplReport,
816
993
  specReviewReport: specReport,
817
994
  qualityReviewReport: qualityResult.report,
818
995
  filePathsSkipped,
819
- agents: {
820
- implementer: resolved.slot,
821
- specReviewer: otherSlot,
822
- qualityReviewer: reviewPolicy === 'full' ? otherSlot : 'skipped',
823
- },
996
+ agents: agentEnvelope(specReviewerHistory[specReviewerHistory.length - 1] ?? 'not_applicable', qualityReviewerHistory[qualityReviewerHistory.length - 1] ?? (reviewPolicy === 'full' ? 'not_applicable' : 'skipped')),
824
997
  models: {
825
998
  implementer: implModel,
826
999
  specReviewer: reviewModel,
@@ -832,6 +1005,9 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
832
1005
  verification,
833
1006
  };
834
1007
  }
1008
+ catch (err) {
1009
+ return withVerification(workerErrorResult(err));
1010
+ }
835
1011
  finally {
836
1012
  heartbeat?.stop();
837
1013
  }