@zhixuan92/multi-model-agent-core 3.2.0 → 3.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. package/README.md +4 -3
  2. package/dist/auto-commit.d.ts +8 -1
  3. package/dist/auto-commit.d.ts.map +1 -1
  4. package/dist/auto-commit.js +6 -3
  5. package/dist/auto-commit.js.map +1 -1
  6. package/dist/batch-cache.d.ts +1 -1
  7. package/dist/batch-cache.d.ts.map +1 -1
  8. package/dist/batch-cache.js +3 -5
  9. package/dist/batch-cache.js.map +1 -1
  10. package/dist/diagnostics/disconnect-log.d.ts +8 -27
  11. package/dist/diagnostics/disconnect-log.d.ts.map +1 -1
  12. package/dist/diagnostics/disconnect-log.js +10 -49
  13. package/dist/diagnostics/disconnect-log.js.map +1 -1
  14. package/dist/diagnostics/request-spill.d.ts +16 -0
  15. package/dist/diagnostics/request-spill.d.ts.map +1 -0
  16. package/dist/diagnostics/request-spill.js +23 -0
  17. package/dist/diagnostics/request-spill.js.map +1 -0
  18. package/dist/diagnostics/verbose-line.d.ts +12 -0
  19. package/dist/diagnostics/verbose-line.d.ts.map +1 -0
  20. package/dist/diagnostics/verbose-line.js +80 -0
  21. package/dist/diagnostics/verbose-line.js.map +1 -0
  22. package/dist/executors/debug.js +1 -1
  23. package/dist/executors/debug.js.map +1 -1
  24. package/dist/executors/delegate.d.ts.map +1 -1
  25. package/dist/executors/delegate.js +6 -2
  26. package/dist/executors/delegate.js.map +1 -1
  27. package/dist/executors/execute-plan.d.ts.map +1 -1
  28. package/dist/executors/execute-plan.js +9 -2
  29. package/dist/executors/execute-plan.js.map +1 -1
  30. package/dist/executors/investigate.d.ts +11 -0
  31. package/dist/executors/investigate.d.ts.map +1 -0
  32. package/dist/executors/investigate.js +101 -0
  33. package/dist/executors/investigate.js.map +1 -0
  34. package/dist/executors/retry.d.ts.map +1 -1
  35. package/dist/executors/retry.js +4 -1
  36. package/dist/executors/retry.js.map +1 -1
  37. package/dist/heartbeat.d.ts +7 -0
  38. package/dist/heartbeat.d.ts.map +1 -1
  39. package/dist/heartbeat.js +28 -1
  40. package/dist/heartbeat.js.map +1 -1
  41. package/dist/intake/compilers/delegate.d.ts +3 -1
  42. package/dist/intake/compilers/delegate.d.ts.map +1 -1
  43. package/dist/intake/compilers/delegate.js +23 -12
  44. package/dist/intake/compilers/delegate.js.map +1 -1
  45. package/dist/intake/compilers/execute-plan.d.ts +6 -1
  46. package/dist/intake/compilers/execute-plan.d.ts.map +1 -1
  47. package/dist/intake/compilers/execute-plan.js +8 -1
  48. package/dist/intake/compilers/execute-plan.js.map +1 -1
  49. package/dist/intake/compilers/investigate.d.ts +12 -0
  50. package/dist/intake/compilers/investigate.d.ts.map +1 -0
  51. package/dist/intake/compilers/investigate.js +36 -0
  52. package/dist/intake/compilers/investigate.js.map +1 -0
  53. package/dist/intake/resolve.d.ts.map +1 -1
  54. package/dist/intake/resolve.js +3 -1
  55. package/dist/intake/resolve.js.map +1 -1
  56. package/dist/intake/types.d.ts +9 -2
  57. package/dist/intake/types.d.ts.map +1 -1
  58. package/dist/model-profiles.json +10 -6
  59. package/dist/reporting/compose-investigate-headline.d.ts +11 -0
  60. package/dist/reporting/compose-investigate-headline.d.ts.map +1 -0
  61. package/dist/reporting/compose-investigate-headline.js +29 -0
  62. package/dist/reporting/compose-investigate-headline.js.map +1 -0
  63. package/dist/reporting/derive-investigate-status.d.ts +17 -0
  64. package/dist/reporting/derive-investigate-status.d.ts.map +1 -0
  65. package/dist/reporting/derive-investigate-status.js +30 -0
  66. package/dist/reporting/derive-investigate-status.js.map +1 -0
  67. package/dist/reporting/parse-investigation-report.d.ts +39 -0
  68. package/dist/reporting/parse-investigation-report.d.ts.map +1 -0
  69. package/dist/reporting/parse-investigation-report.js +150 -0
  70. package/dist/reporting/parse-investigation-report.js.map +1 -0
  71. package/dist/reporting/structured-report.d.ts +20 -0
  72. package/dist/reporting/structured-report.d.ts.map +1 -1
  73. package/dist/reporting/structured-report.js +76 -3
  74. package/dist/reporting/structured-report.js.map +1 -1
  75. package/dist/review/aggregate-result.d.ts.map +1 -1
  76. package/dist/review/aggregate-result.js +5 -0
  77. package/dist/review/aggregate-result.js.map +1 -1
  78. package/dist/review/diff-review.d.ts +29 -0
  79. package/dist/review/diff-review.d.ts.map +1 -0
  80. package/dist/review/diff-review.js +53 -0
  81. package/dist/review/diff-review.js.map +1 -0
  82. package/dist/review/evidence.d.ts +15 -0
  83. package/dist/review/evidence.d.ts.map +1 -0
  84. package/dist/review/evidence.js +26 -0
  85. package/dist/review/evidence.js.map +1 -0
  86. package/dist/review/quality-reviewer.d.ts +1 -1
  87. package/dist/review/quality-reviewer.d.ts.map +1 -1
  88. package/dist/review/quality-reviewer.js +5 -3
  89. package/dist/review/quality-reviewer.js.map +1 -1
  90. package/dist/review/spec-reviewer.d.ts +1 -1
  91. package/dist/review/spec-reviewer.d.ts.map +1 -1
  92. package/dist/review/spec-reviewer.js +3 -2
  93. package/dist/review/spec-reviewer.js.map +1 -1
  94. package/dist/run-tasks/commit-stage.d.ts +16 -0
  95. package/dist/run-tasks/commit-stage.d.ts.map +1 -0
  96. package/dist/run-tasks/commit-stage.js +52 -0
  97. package/dist/run-tasks/commit-stage.js.map +1 -0
  98. package/dist/run-tasks/fallback-report.d.ts.map +1 -1
  99. package/dist/run-tasks/fallback-report.js +1 -0
  100. package/dist/run-tasks/fallback-report.js.map +1 -1
  101. package/dist/run-tasks/metadata-repair.d.ts +15 -0
  102. package/dist/run-tasks/metadata-repair.d.ts.map +1 -0
  103. package/dist/run-tasks/metadata-repair.js +30 -0
  104. package/dist/run-tasks/metadata-repair.js.map +1 -0
  105. package/dist/run-tasks/reviewed-lifecycle.d.ts.map +1 -1
  106. package/dist/run-tasks/reviewed-lifecycle.js +474 -95
  107. package/dist/run-tasks/reviewed-lifecycle.js.map +1 -1
  108. package/dist/run-tasks/verify-stage.d.ts +25 -0
  109. package/dist/run-tasks/verify-stage.d.ts.map +1 -0
  110. package/dist/run-tasks/verify-stage.js +168 -0
  111. package/dist/run-tasks/verify-stage.js.map +1 -0
  112. package/dist/runners/base/result-builders.d.ts +26 -1
  113. package/dist/runners/base/result-builders.d.ts.map +1 -1
  114. package/dist/runners/base/result-builders.js +5 -0
  115. package/dist/runners/base/result-builders.js.map +1 -1
  116. package/dist/runners/prevention.d.ts.map +1 -1
  117. package/dist/runners/prevention.js +18 -0
  118. package/dist/runners/prevention.js.map +1 -1
  119. package/dist/runners/types.d.ts +4 -1
  120. package/dist/runners/types.d.ts.map +1 -1
  121. package/dist/tool-schemas/audit.d.ts +2 -2
  122. package/dist/tool-schemas/delegate.d.ts +9 -0
  123. package/dist/tool-schemas/delegate.d.ts.map +1 -1
  124. package/dist/tool-schemas/delegate.js +4 -0
  125. package/dist/tool-schemas/delegate.js.map +1 -1
  126. package/dist/tool-schemas/execute-plan.d.ts +13 -2
  127. package/dist/tool-schemas/execute-plan.d.ts.map +1 -1
  128. package/dist/tool-schemas/execute-plan.js +22 -4
  129. package/dist/tool-schemas/execute-plan.js.map +1 -1
  130. package/dist/tool-schemas/investigate.d.ts +48 -0
  131. package/dist/tool-schemas/investigate.d.ts.map +1 -0
  132. package/dist/tool-schemas/investigate.js +13 -0
  133. package/dist/tool-schemas/investigate.js.map +1 -0
  134. package/dist/tool-schemas/review.d.ts +1 -1
  135. package/dist/types.d.ts +36 -4
  136. package/dist/types.d.ts.map +1 -1
  137. package/dist/types.js.map +1 -1
  138. package/package.json +37 -1
@@ -1,16 +1,24 @@
1
+ import { execFile } from 'node:child_process';
2
+ import { promisify } from 'node:util';
1
3
  import { computeCostUSD, computeSavedCostUSD } from '../types.js';
2
4
  import { createProvider } from '../provider.js';
3
5
  import { delegateWithEscalation } from '../delegate-with-escalation.js';
4
6
  import { HeartbeatTimer } from '../heartbeat.js';
5
7
  import { runSpecReview } from '../review/spec-reviewer.js';
6
8
  import { runQualityReview } from '../review/quality-reviewer.js';
9
+ import { runDiffReview } from '../review/diff-review.js';
7
10
  import { aggregateResult } from '../review/aggregate-result.js';
11
+ import { buildEvidence } from '../review/evidence.js';
8
12
  import { parseStructuredReport } from '../reporting/structured-report.js';
9
- import { autoCommitFiles } from '../auto-commit.js';
13
+ import { runCommitStage, readbackCommit } from './commit-stage.js';
14
+ import { runVerifyStage } from './verify-stage.js';
15
+ import { runMetadataRepairTurn } from './metadata-repair.js';
10
16
  import { partitionFilePaths, checkOutputTargets } from '../file-artifact-check.js';
11
17
  import { extractWorkerStatus } from './worker-status.js';
12
18
  import { buildFallbackImplReport, readImplementerFileContents } from './fallback-report.js';
19
+ import { composeVerboseLine } from '../diagnostics/verbose-line.js';
13
20
  import { withDoneCondition } from './execute-task.js';
21
+ const exec = promisify(execFile);
14
22
  export async function executeReviewedLifecycle(task, resolved, config, taskIndex, onProgress, heartbeatWiring, diagnostics) {
15
23
  const reviewPolicy = task.reviewPolicy ?? 'full';
16
24
  const otherSlot = resolved.slot === 'standard' ? 'complex' : 'standard';
@@ -34,6 +42,20 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
34
42
  : undefined;
35
43
  const verboseBatchIdEarly = heartbeatWiring?.batchId;
36
44
  const shortBatchEarly = verboseBatchIdEarly ? verboseBatchIdEarly.slice(0, 8) : '????????';
45
+ const taskEventLogger = diagnostics?.logger;
46
+ const emitTaskEvent = (event, fields) => {
47
+ if (taskEventLogger && verboseBatchIdEarly !== undefined) {
48
+ const cleaned = {};
49
+ for (const [key, value] of Object.entries(fields)) {
50
+ if (value !== undefined)
51
+ cleaned[key] = value;
52
+ }
53
+ taskEventLogger.emit({ event, batchId: verboseBatchIdEarly, taskIndex, ...cleaned });
54
+ }
55
+ if (verboseStreamRaw) {
56
+ verboseStreamRaw(composeVerboseLine({ event, ts: new Date().toISOString(), batch: shortBatchEarly, task: taskIndex, ...fields }));
57
+ }
58
+ };
37
59
  // Start the heartbeat whenever there's a downstream consumer:
38
60
  // - onProgress (external progress callback from the runTasks caller)
39
61
  // - verbose (stderr stream needs the heartbeat's tool_call / turn_complete relay)
@@ -50,23 +72,30 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
50
72
  const synthOnProgress = onProgress ?? (() => { });
51
73
  const heartbeat = needHeartbeat
52
74
  ? new HeartbeatTimer((event) => {
53
- if (verboseStreamRaw && event.kind === 'heartbeat') {
75
+ if (event.kind === 'heartbeat') {
54
76
  // Emit on every heartbeat tick so the operator can confirm
55
77
  // the timer is actually firing. Stage-change lines are richer
56
78
  // but fire only on transitions; plain ticks let you see
57
79
  // per-5s progress inside a long-running stage.
58
80
  if (event.stage !== lastStageSeen) {
59
81
  if (lastStageSeen !== undefined) {
60
- verboseStreamRaw(`[mmagent verbose] batch=${shortBatchEarly} task=${taskIndex} stage ${lastStageSeen} ${event.stage}`);
82
+ emitTaskEvent('stage_change', { from: lastStageSeen, to: event.stage });
61
83
  }
62
84
  lastStageSeen = event.stage;
63
85
  }
64
- const costStr = event.costUSD !== null ? ` cost=$${event.costUSD.toFixed(4)}` : '';
65
- const roundStr = event.reviewRound !== undefined && event.maxReviewRounds !== undefined
66
- ? ` round=${event.reviewRound}/${event.maxReviewRounds}`
67
- : '';
68
86
  const sinceLastMs = Date.now() - prevEventAtMs;
69
- verboseStreamRaw(`[mmagent verbose] batch=${shortBatchEarly} task=${taskIndex} heartbeat ${event.elapsed} stage=${event.stage}${roundStr} tools=${event.progress.toolCalls} read=${event.progress.filesRead} wrote=${event.progress.filesWritten} text=${textEmissionChars}c${costStr} idle=${sinceLastMs}ms`);
87
+ emitTaskEvent('heartbeat', {
88
+ elapsed: event.elapsed,
89
+ stage: event.stage,
90
+ round: event.reviewRound,
91
+ cap: event.maxReviewRounds,
92
+ tools: event.progress.toolCalls,
93
+ read: event.progress.filesRead,
94
+ wrote: event.progress.filesWritten,
95
+ text: textEmissionChars,
96
+ cost: event.costUSD,
97
+ idle_ms: sinceLastMs,
98
+ });
70
99
  }
71
100
  synthOnProgress(taskIndex, event);
72
101
  }, {
@@ -77,19 +106,16 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
77
106
  })
78
107
  : undefined;
79
108
  heartbeat?.start(stageCount);
80
- if (verboseStreamRaw) {
81
- verboseStreamRaw(`[mmagent verbose] batch=${shortBatchEarly} task=${taskIndex} heartbeat ` +
82
- (heartbeat ? `started (stageCount=${stageCount}, 5s tick)` : 'DISABLED (no consumer)'));
83
- }
109
+ emitTaskEvent('heartbeat_timer', {
110
+ state: heartbeat ? 'started' : 'disabled',
111
+ stage_count: stageCount,
112
+ tick_ms: heartbeat ? 5000 : undefined,
113
+ reason: heartbeat ? undefined : 'no_consumer',
114
+ });
84
115
  const implModel = resolved.provider.config.model;
85
116
  const progressCounters = { filesRead: 0, filesWritten: 0, toolCalls: 0 };
86
- const verboseLogger = verbose && diagnostics?.logger ? diagnostics.logger : undefined;
87
- const verboseBatchId = verboseBatchIdEarly;
88
117
  const verboseStream = verboseStreamRaw;
89
- const shortBatch = shortBatchEarly;
90
- if (verboseStream) {
91
- verboseStream(`[mmagent verbose] batch=${shortBatch} task=${taskIndex} start worker=${resolved.provider.config.model}`);
92
- }
118
+ emitTaskEvent('worker_start', { worker: resolved.provider.config.model });
93
119
  let prevEventAtMs = verbose ? Date.now() : 0;
94
120
  // Wrap whenever we have ANY consumer for InternalRunnerEvent (heartbeat,
95
121
  // verbose stream, or verbose logger). Previously this only wrapped when
@@ -99,22 +125,32 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
99
125
  const wrappedOnProgress = needHeartbeat
100
126
  ? (event) => {
101
127
  if (event.kind === 'turn_start') {
128
+ heartbeat?.markEvent('llm');
102
129
  if (verbose)
103
130
  prevEventAtMs = Date.now();
104
- if (verboseStream) {
105
- verboseStream(`[mmagent verbose] batch=${shortBatch} task=${taskIndex} turn_start turn=${event.turn} provider=${event.provider}`);
131
+ if (verbose) {
132
+ emitTaskEvent('turn_start', {
133
+ turn: event.turn,
134
+ provider: event.provider,
135
+ });
106
136
  }
107
137
  }
108
138
  if (event.kind === 'text_emission') {
139
+ heartbeat?.markEvent('text');
109
140
  textEmissionChars += event.chars;
110
- if (verboseStream && event.chars > 0) {
141
+ if (verbose && event.chars > 0) {
111
142
  const preview = event.preview.length > 60
112
143
  ? event.preview.slice(0, 57) + '...'
113
144
  : event.preview;
114
- verboseStream(`[mmagent verbose] batch=${shortBatch} task=${taskIndex} text +${event.chars}c (total ${textEmissionChars}) preview="${preview.replace(/\n/g, '\\n')}"`);
145
+ emitTaskEvent('text_emission', {
146
+ chars: event.chars,
147
+ total: textEmissionChars,
148
+ preview,
149
+ });
115
150
  }
116
151
  }
117
152
  if (event.kind === 'tool_call') {
153
+ heartbeat?.markEvent('tool');
118
154
  progressCounters.toolCalls++;
119
155
  const name = event.toolSummary.split('(')[0];
120
156
  if (name === 'readFile' || name === 'grep' || name === 'glob' || name === 'listFiles') {
@@ -128,19 +164,15 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
128
164
  const sincePrevMs = verbose ? now - prevEventAtMs : 0;
129
165
  if (verbose)
130
166
  prevEventAtMs = now;
131
- if (verboseLogger && verboseBatchId) {
132
- verboseLogger.toolCall({
133
- batchId: verboseBatchId,
134
- taskIndex,
167
+ if (verbose) {
168
+ emitTaskEvent('tool_call', {
135
169
  tool: event.toolSummary,
136
- durationMs: sincePrevMs,
170
+ duration_ms: sincePrevMs,
137
171
  });
138
172
  }
139
- if (verboseStream) {
140
- verboseStream(`[mmagent verbose] batch=${shortBatch} task=${taskIndex} tool=${event.toolSummary} +${sincePrevMs}ms`);
141
- }
142
173
  }
143
174
  if (event.kind === 'turn_complete') {
175
+ heartbeat?.markEvent('llm');
144
176
  const costUSD = computeCostUSD(event.cumulativeInputTokens, event.cumulativeOutputTokens, resolved.provider.config);
145
177
  const savedCostUSD = computeSavedCostUSD(costUSD, event.cumulativeInputTokens, event.cumulativeOutputTokens, task.parentModel);
146
178
  heartbeat?.updateCost(costUSD, savedCostUSD);
@@ -148,45 +180,329 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
148
180
  const turnDurMs = verbose ? nowTurn - prevEventAtMs : 0;
149
181
  if (verbose)
150
182
  prevEventAtMs = nowTurn;
151
- if (verboseLogger && verboseBatchId) {
152
- verboseLogger.llmTurn({
153
- batchId: verboseBatchId,
154
- taskIndex,
155
- turnIndex: progressCounters.toolCalls,
183
+ if (verbose) {
184
+ emitTaskEvent('turn_complete', {
185
+ input_tokens: event.cumulativeInputTokens,
186
+ output_tokens: event.cumulativeOutputTokens,
187
+ cost: costUSD,
188
+ duration_ms: turnDurMs,
156
189
  provider: resolved.provider.config.model,
157
- inputTokens: event.cumulativeInputTokens,
158
- outputTokens: event.cumulativeOutputTokens,
159
- costUSD,
160
190
  });
161
191
  }
162
- if (verboseStream) {
163
- const costStr = costUSD !== null ? ` $${costUSD.toFixed(4)}` : '';
164
- verboseStream(`[mmagent verbose] batch=${shortBatch} task=${taskIndex} ` +
165
- `turn in=${event.cumulativeInputTokens} out=${event.cumulativeOutputTokens}${costStr} ` +
166
- `+${turnDurMs}ms (${resolved.provider.config.model})`);
167
- }
168
192
  }
169
193
  }
170
194
  : undefined;
171
- // Track auto-commit state across all rounds
172
- let commitSha;
195
+ const cwd = task.cwd ?? process.cwd();
196
+ const taskStartMs = Date.now();
197
+ const commits = [];
173
198
  let commitError;
199
+ let specRework = 0;
200
+ let qualityRework = 0;
201
+ let metadataRepair = 0;
202
+ const maxReviewRounds = task.maxReviewRounds ?? 3;
203
+ const maxCostUSD = task.maxCostUSD;
204
+ const reviewRounds = () => ({ spec: specRework, quality: qualityRework, metadata: metadataRepair, cap: maxReviewRounds });
205
+ const taskCostUSD = () => (heartbeat ? heartbeat.getHeartbeatTickInfo().costUSD : null);
206
+ // When the review loop aborts mid-flight, preserve any review-status info already set
207
+ // on the base result (set by callers via abortReviewLoop({ ...res, specReviewStatus, ... })).
208
+ // Defaults to 'changes_required' for whichever loop tripped — that's the only state the
209
+ // loop ever fires from, by construction.
210
+ const abortReviewLoop = (base, terminationReason, message, aborting) => ({
211
+ ...base,
212
+ status: 'incomplete',
213
+ workerStatus: 'review_loop_aborted',
214
+ terminationReason,
215
+ reviewRounds: reviewRounds(),
216
+ error: message,
217
+ specReviewStatus: aborting === 'spec' ? 'changes_required' : (base.specReviewStatus ?? 'approved'),
218
+ qualityReviewStatus: aborting === 'quality' ? 'changes_required' : (base.qualityReviewStatus ?? 'skipped'),
219
+ });
220
+ const defaultVerification = { status: 'skipped', steps: [], totalDurationMs: 0, skipReason: 'no_command' };
221
+ let latestVerification = defaultVerification;
222
+ async function runVerificationStage() {
223
+ emitTaskEvent('stage_change', { from: 'committing', to: 'verifying' });
224
+ heartbeat?.transition({
225
+ stage: 'verifying',
226
+ stageIndex: 4,
227
+ reviewRound: undefined,
228
+ maxReviewRounds: task.maxReviewRounds ?? 5,
229
+ });
230
+ const verification = await runVerifyStage({
231
+ cwd,
232
+ verifyCommand: task.verifyCommand,
233
+ taskTimeoutMs: task.timeoutMs ?? config.defaults.timeoutMs ?? 1_800_000,
234
+ taskStartMs,
235
+ });
236
+ latestVerification = verification;
237
+ for (const step of verification.steps) {
238
+ emitTaskEvent('verify_step', {
239
+ command: step.command,
240
+ status: step.status,
241
+ exit_code: step.exitCode,
242
+ signal: step.signal,
243
+ duration_ms: step.durationMs,
244
+ error_message: step.errorMessage ?? undefined,
245
+ });
246
+ }
247
+ if (verification.status === 'skipped') {
248
+ emitTaskEvent('verify_skipped', { reason: verification.skipReason ?? 'no_command', stage: 'verifying' });
249
+ }
250
+ return verification;
251
+ }
252
+ function signalize(result) {
253
+ const cause = typeof result.terminationReason === 'object' ? result.terminationReason.cause : result.terminationReason;
254
+ const capExhausted = result.capExhausted
255
+ ?? (result.status === 'cost_exceeded' || cause === 'cost_exceeded' || cause === 'cost_ceiling' ? 'cost'
256
+ : result.status === 'timeout' || cause === 'timeout' ? 'wall_clock'
257
+ : result.status === 'incomplete' && result.turns > 1 ? 'turn'
258
+ : undefined);
259
+ const lifecycleClarificationRequested = result.lifecycleClarificationRequested
260
+ ?? (result.status === 'brief_too_vague' || cause === 'brief_too_vague' ? true : undefined);
261
+ return {
262
+ ...result,
263
+ ...(capExhausted !== undefined && { capExhausted }),
264
+ ...(lifecycleClarificationRequested !== undefined && { lifecycleClarificationRequested }),
265
+ };
266
+ }
267
+ function workerErrorResult(err) {
268
+ const workerError = err instanceof Error ? err : new Error(String(err));
269
+ return signalize({
270
+ output: '',
271
+ status: 'error',
272
+ usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0, costUSD: null },
273
+ turns: 0,
274
+ filesRead: [],
275
+ filesWritten: [],
276
+ toolCalls: [],
277
+ outputIsDiagnostic: true,
278
+ escalationLog: [],
279
+ error: workerError.message,
280
+ errorCode: 'runner_crash',
281
+ structuredError: { code: 'runner_crash', message: workerError.message },
282
+ workerStatus: 'failed',
283
+ workerError,
284
+ });
285
+ }
286
+ function withVerification(result, verification = latestVerification) {
287
+ return signalize({ ...result, verification });
288
+ }
289
+ function verificationErrorResult(base, verification) {
290
+ if (verification.status !== 'error')
291
+ return null;
292
+ const failedIndex = verification.steps.findIndex((step) => step.status !== 'passed');
293
+ const failedStep = failedIndex >= 0 ? verification.steps[failedIndex] : undefined;
294
+ return withVerification({
295
+ ...base,
296
+ status: 'error',
297
+ workerStatus: 'done_with_concerns',
298
+ error: failedStep?.errorMessage ?? 'verify command error',
299
+ errorCode: 'verify_command_error',
300
+ commits,
301
+ commitError,
302
+ verification,
303
+ }, verification);
304
+ }
305
+ function resolveOffTerminal(base, verification) {
306
+ const concerns = [...(base.concerns ?? [])];
307
+ let workerStatus = workerStatusForTerminal(base.workerStatus);
308
+ if (verification.status === 'failed') {
309
+ concerns.push({
310
+ source: 'verification',
311
+ severity: 'high',
312
+ message: 'Verification failed after implementation.',
313
+ });
314
+ workerStatus = 'done_with_concerns';
315
+ }
316
+ if (verification.status === 'error') {
317
+ const failedIndex = verification.steps.findIndex((step) => step.status !== 'passed');
318
+ const failedStep = failedIndex >= 0 ? verification.steps[failedIndex] : undefined;
319
+ return withVerification({
320
+ ...base,
321
+ status: 'error',
322
+ workerStatus: 'failed',
323
+ error: failedStep?.errorMessage ?? 'verify command error',
324
+ errorCode: 'verify_command_error',
325
+ commits,
326
+ commitError,
327
+ verification,
328
+ }, verification);
329
+ }
330
+ return withVerification({
331
+ ...base,
332
+ status: base.status === 'ok' ? 'ok' : base.status,
333
+ workerStatus,
334
+ concerns,
335
+ commits,
336
+ commitError,
337
+ verification,
338
+ }, verification);
339
+ }
340
+ function resolveDiffOnlyTerminal(base, verdict, verification, diffTruncated) {
341
+ const concerns = [...(base.concerns ?? [])];
342
+ if (verdict.kind === 'reject') {
343
+ return withVerification({
344
+ ...base,
345
+ status: 'error',
346
+ workerStatus: 'failed',
347
+ error: verdict.message || 'diff review rejected implementation',
348
+ errorCode: 'diff_review_rejected',
349
+ structuredError: {
350
+ code: 'diff_review_rejected',
351
+ message: verdict.message || 'diff review rejected implementation',
352
+ },
353
+ concerns,
354
+ commits,
355
+ commitError,
356
+ verification,
357
+ }, verification);
358
+ }
359
+ concerns.push(...verdict.concerns);
360
+ if (verification.status === 'failed') {
361
+ concerns.push({
362
+ source: 'verification',
363
+ severity: 'high',
364
+ message: 'Verification failed after implementation.',
365
+ });
366
+ }
367
+ if (diffTruncated) {
368
+ concerns.push({
369
+ source: 'diff_truncated',
370
+ severity: 'medium',
371
+ message: 'Implementation diff exceeded the reviewer evidence byte cap and was truncated.',
372
+ });
373
+ }
374
+ const hasConcerns = concerns.length > 0 || verification.status === 'failed';
375
+ return withVerification({
376
+ ...base,
377
+ status: base.status === 'ok' ? 'ok' : base.status,
378
+ workerStatus: hasConcerns ? 'done_with_concerns' : workerStatusForTerminal(base.workerStatus),
379
+ concerns,
380
+ commits,
381
+ commitError,
382
+ verification,
383
+ }, verification);
384
+ }
385
+ function workerStatusForTerminal(status) {
386
+ return status === 'needs_context' || status === 'blocked' || status === 'failed' || status === 'done_with_concerns'
387
+ ? status
388
+ : 'done';
389
+ }
390
+ async function recordWorkerCommits(from, to = 'HEAD') {
391
+ const { stdout: revs } = await exec('git', ['rev-list', '--reverse', `${from}..${to}`], { cwd });
392
+ for (const sha of revs.trim().split('\n').filter(Boolean)) {
393
+ const c = await readbackCommit(sha, cwd);
394
+ commits.push(c);
395
+ }
396
+ }
397
+ async function repairCommitMetadata(initialDiagnostic) {
398
+ let metadataAttempts = 0;
399
+ let lastZodError = initialDiagnostic || 'no commit block emitted';
400
+ let validCommit = null;
401
+ while (metadataAttempts < 2 && !validCommit) {
402
+ const preStatus = (await exec('git', ['status', '--porcelain=v1', '-z'], { cwd })).stdout;
403
+ const repaired = await runMetadataRepairTurn({ task, zodError: lastZodError, cwd, providerSlot: resolved.slot, provider: resolved.provider });
404
+ const postStatus = (await exec('git', ['status', '--porcelain=v1', '-z'], { cwd })).stdout;
405
+ metadataAttempts += 1;
406
+ if (preStatus !== postStatus) {
407
+ commitError = 'commit_metadata_repair_modified_files';
408
+ return null;
409
+ }
410
+ if (repaired.commit)
411
+ validCommit = repaired.commit;
412
+ else
413
+ lastZodError = repaired.commitDiagnostic ?? 'no commit block emitted';
414
+ }
415
+ if (!validCommit)
416
+ commitError = `commit_metadata_invalid: ${lastZodError}`;
417
+ return validCommit;
418
+ }
419
+ async function captureCommitsAfterImplementation(implResult, implReport, baselineHead) {
420
+ const porcelain = (await exec('git', ['status', '--porcelain=v1'], { cwd })).stdout;
421
+ const headNow = (await exec('git', ['rev-parse', 'HEAD'], { cwd })).stdout.trim();
422
+ const headMoved = headNow !== baselineHead;
423
+ const treeDirty = porcelain.length > 0;
424
+ if (!headMoved && !treeDirty)
425
+ return;
426
+ if (headMoved)
427
+ await recordWorkerCommits(baselineHead, 'HEAD');
428
+ if (treeDirty) {
429
+ const validCommit = implReport?.commit ?? await repairCommitMetadata(implReport?.commitDiagnostic ?? 'no commit block emitted');
430
+ if (!validCommit)
431
+ return;
432
+ const c = await runCommitStage({ cwd, filesWritten: implResult.filesWritten, commit: validCommit });
433
+ commits.push(c);
434
+ }
435
+ }
174
436
  try {
437
+ // The dirty-tree precondition + git baseline only apply to artifact-producing tasks
438
+ // (those with autoCommit === true). Non-artifact presets — audit, review, verify,
439
+ // debug — neither produce commits nor read git state, so they bypass the check
440
+ // entirely. Per spec Section A: "Non-artifact tasks (audits, analyses, read-only
441
+ // investigations) skip stages 3 and 4."
442
+ const isArtifactProducing = task.autoCommit === true;
443
+ let baselineHead = '';
444
+ if (isArtifactProducing) {
445
+ baselineHead = (await exec('git', ['rev-parse', 'HEAD'], { cwd })).stdout.trim();
446
+ const baselinePorcelain = (await exec('git', ['status', '--porcelain=v1', '-z'], { cwd })).stdout;
447
+ if (baselinePorcelain.length !== 0) {
448
+ return withVerification({
449
+ output: `Sub-agent error: task.cwd ${cwd} had pre-existing modifications`,
450
+ status: 'error',
451
+ usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0, costUSD: null },
452
+ turns: 0,
453
+ filesRead: [],
454
+ filesWritten: [],
455
+ toolCalls: [],
456
+ outputIsDiagnostic: true,
457
+ escalationLog: [],
458
+ error: `task.cwd ${cwd} had pre-existing modifications`,
459
+ errorCode: 'dirty_worktree',
460
+ commits,
461
+ });
462
+ }
463
+ }
175
464
  const implResult = await delegateWithEscalation(withDoneCondition(task), [resolved.provider], { explicitlyPinned: false, escalateToProvider: escalationProvider, onProgress: wrappedOnProgress });
176
465
  const implReport = implResult.status === 'ok' ? parseStructuredReport(implResult.output) : undefined;
177
466
  const workerStatus = extractWorkerStatus(implReport);
178
- // Auto-commit: commit the worker's file changes
179
- if (task.autoCommit && implResult.status === 'ok' && implResult.filesWritten.length > 0) {
180
- const commitResult = autoCommitFiles(implResult.filesWritten, implReport?.summary ?? undefined, task.cwd ?? process.cwd());
181
- commitSha = commitResult.sha;
182
- commitError = commitResult.error;
467
+ if (implResult.status === 'ok' && isArtifactProducing) {
468
+ await captureCommitsAfterImplementation(implResult, implReport, baselineHead);
183
469
  }
470
+ const verification = isArtifactProducing ? await runVerificationStage() : defaultVerification;
471
+ const verifyError = verificationErrorResult(implResult, verification);
472
+ if (verifyError)
473
+ return verifyError;
184
474
  const filePathsInteracted = task.filePaths && task.filePaths.length > 0
185
475
  ? [...(implResult.filesRead ?? []), ...implResult.filesWritten].some(f => task.filePaths.some(fp => f === fp || f.endsWith('/' + fp) || f.endsWith(fp)))
186
476
  : true;
187
477
  const filePathsSkipped = !filePathsInteracted;
188
478
  if (implResult.filesWritten.length === 0) {
189
479
  heartbeat?.updateStageCount(1);
480
+ if (reviewPolicy === 'off') {
481
+ emitTaskEvent('stage_change', { from: 'verifying', to: 'terminal' });
482
+ const terminal = resolveOffTerminal({
483
+ ...implResult,
484
+ workerStatus,
485
+ specReviewStatus: 'skipped',
486
+ qualityReviewStatus: 'skipped',
487
+ specReviewReason: 'skipped: reviewPolicy is off',
488
+ qualityReviewReason: 'skipped: reviewPolicy is off',
489
+ agents: {
490
+ implementer: resolved.slot,
491
+ specReviewer: 'skipped',
492
+ qualityReviewer: 'skipped',
493
+ },
494
+ models: {
495
+ implementer: implModel,
496
+ specReviewer: null,
497
+ qualityReviewer: null,
498
+ },
499
+ implementationReport: implReport,
500
+ structuredReport: implReport,
501
+ filePathsSkipped,
502
+ fileArtifactsMissing: implResult.status === 'ok' ? checkOutputTargets(outputTargets) : undefined,
503
+ }, verification);
504
+ return terminal;
505
+ }
190
506
  const effectiveImplReport = implReport ?? buildFallbackImplReport(implResult);
191
507
  const earlyFileArtifactsMissing = implResult.status === 'ok' ? checkOutputTargets(outputTargets) : undefined;
192
508
  const earlyStatus = implResult.status === 'ok' && earlyFileArtifactsMissing
@@ -207,6 +523,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
207
523
  validationsRun: effectiveImplReport.validationsRun,
208
524
  deviationsFromBrief: effectiveImplReport.deviationsFromBrief,
209
525
  unresolved: effectiveImplReport.unresolved,
526
+ extraSections: effectiveImplReport.extraSections ?? {},
210
527
  },
211
528
  filePathsSkipped,
212
529
  agents: {
@@ -220,8 +537,9 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
220
537
  qualityReviewer: null,
221
538
  },
222
539
  fileArtifactsMissing: earlyFileArtifactsMissing,
223
- commitSha,
540
+ commits,
224
541
  commitError,
542
+ verification,
225
543
  };
226
544
  }
227
545
  if (workerStatus === 'needs_context' || workerStatus === 'blocked') {
@@ -243,12 +561,14 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
243
561
  qualityReviewer: null,
244
562
  },
245
563
  fileArtifactsMissing: implResult.status === 'ok' ? checkOutputTargets(outputTargets) : undefined,
246
- commitSha,
564
+ commits,
247
565
  commitError,
566
+ verification,
248
567
  };
249
568
  }
250
569
  if (reviewPolicy === 'off') {
251
- return {
570
+ emitTaskEvent('stage_change', { from: 'verifying', to: 'terminal' });
571
+ const terminal = resolveOffTerminal({
252
572
  ...implResult,
253
573
  workerStatus,
254
574
  specReviewStatus: 'skipped',
@@ -267,9 +587,8 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
267
587
  },
268
588
  implementationReport: implReport,
269
589
  fileArtifactsMissing: implResult.status === 'ok' ? checkOutputTargets(outputTargets) : undefined,
270
- commitSha,
271
- commitError,
272
- };
590
+ }, verification);
591
+ return terminal;
273
592
  }
274
593
  let otherProvider;
275
594
  try {
@@ -294,8 +613,9 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
294
613
  qualityReviewer: null,
295
614
  },
296
615
  fileArtifactsMissing: implResult.status === 'ok' ? checkOutputTargets(outputTargets) : undefined,
297
- commitSha,
616
+ commits,
298
617
  commitError,
618
+ verification,
299
619
  };
300
620
  }
301
621
  const reviewModel = otherProvider.config.model;
@@ -306,23 +626,72 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
306
626
  };
307
627
  let fileContents = await readImplementerFileContents(implResult.filesWritten, task.cwd);
308
628
  const effectiveImplReport = implReport ?? buildFallbackImplReport(implResult);
629
+ const evidence = isArtifactProducing
630
+ ? await buildEvidence({ cwd, baselineHead, commits, verification, reviewPolicy })
631
+ : { block: '', diffTruncated: false, fullDiff: '' };
632
+ if (reviewPolicy === 'diff_only') {
633
+ emitTaskEvent('stage_change', { from: 'verifying', to: 'diff_review' });
634
+ heartbeat?.transition({
635
+ stage: 'diff_review',
636
+ stageIndex: 2,
637
+ reviewRound: 1,
638
+ maxReviewRounds,
639
+ });
640
+ const verdict = await runDiffReview({
641
+ cwd,
642
+ diff: evidence.fullDiff,
643
+ diffTruncated: evidence.diffTruncated,
644
+ verification,
645
+ worker: { call: (prompt) => otherProvider.run(prompt) },
646
+ });
647
+ emitTaskEvent('review_decision', { stage: 'diff_review', verdict: verdict.kind, round: 1 });
648
+ return resolveDiffOnlyTerminal({
649
+ ...implResult,
650
+ workerStatus,
651
+ specReviewStatus: 'skipped',
652
+ qualityReviewStatus: 'skipped',
653
+ specReviewReason: 'skipped: reviewPolicy is diff_only',
654
+ qualityReviewReason: 'skipped: reviewPolicy is diff_only',
655
+ implementationReport: effectiveImplReport,
656
+ fileArtifactsMissing: implResult.status === 'ok' ? checkOutputTargets(outputTargets) : undefined,
657
+ agents: {
658
+ implementer: resolved.slot,
659
+ specReviewer: 'skipped',
660
+ qualityReviewer: 'skipped',
661
+ },
662
+ models: {
663
+ implementer: implModel,
664
+ specReviewer: reviewModel,
665
+ qualityReviewer: null,
666
+ },
667
+ }, verdict, verification, evidence.diffTruncated);
668
+ }
309
669
  heartbeat?.transition({
310
670
  stage: 'spec_review', stageIndex: 2,
311
671
  reviewRound: 1, maxReviewRounds: task.maxReviewRounds ?? 5,
312
672
  });
313
- let specResult = await runSpecReview(otherProvider, packet, effectiveImplReport, fileContents, implResult.toolCalls, task.planContext);
673
+ let specResult = await runSpecReview(otherProvider, packet, effectiveImplReport, fileContents, implResult.toolCalls, task.planContext, evidence.block);
314
674
  let finalImplResult = implResult;
315
675
  let finalImplReport = effectiveImplReport;
316
676
  let specStatus = specResult.status;
317
677
  let specReport = specResult.report;
318
678
  if (specStatus === 'changes_required') {
319
679
  let prevSpecFindings = [];
320
- let round = 0;
321
680
  while (true) {
322
- round++;
681
+ if (specRework + qualityRework >= maxReviewRounds) {
682
+ return abortReviewLoop(finalImplResult, 'round_cap', 'review round cap reached before spec rework', 'spec');
683
+ }
684
+ const currentCostUSD = taskCostUSD();
685
+ if (currentCostUSD !== null && maxCostUSD !== undefined && currentCostUSD >= 0.8 * maxCostUSD) {
686
+ emitTaskEvent('cost_check', { stage: 'spec_rework', tripped: true, cost_used_usd: currentCostUSD, cost_cap_usd: maxCostUSD, cost_available: true });
687
+ return abortReviewLoop(finalImplResult, 'cost_ceiling', 'cost ceiling reached before spec rework', 'spec');
688
+ }
689
+ emitTaskEvent('stage_change', { from: 'spec_review', to: 'spec_rework', round: specRework + 1, cap: maxReviewRounds });
690
+ specRework++;
691
+ const round = specRework;
323
692
  heartbeat?.transition({
324
693
  stage: 'spec_rework', stageIndex: 3,
325
- reviewRound: round, maxReviewRounds: task.maxReviewRounds ?? 5,
694
+ reviewRound: round, maxReviewRounds,
326
695
  });
327
696
  const feedback = specResult.findings.length > 0
328
697
  ? `\n\n## Spec Review Feedback (round ${round}):\n${specResult.findings.map(f => `- ${f}`).join('\n')}`
@@ -330,15 +699,6 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
330
699
  const reworkPrompt = `${task.prompt}${feedback}`;
331
700
  const reworkTask = withDoneCondition({ ...task, prompt: reworkPrompt });
332
701
  const reworkResult = await delegateWithEscalation(reworkTask, [resolved.provider], { explicitlyPinned: true, onProgress: wrappedOnProgress });
333
- // Auto-commit rework changes
334
- if (task.autoCommit && reworkResult.status === 'ok' && reworkResult.filesWritten.length > 0) {
335
- const reworkReport = parseStructuredReport(reworkResult.output);
336
- const reworkCommit = autoCommitFiles(reworkResult.filesWritten, reworkReport.summary ?? undefined, task.cwd ?? process.cwd());
337
- if (reworkCommit.sha)
338
- commitSha = reworkCommit.sha;
339
- if (reworkCommit.error)
340
- commitError = reworkCommit.error;
341
- }
342
702
  finalImplResult = reworkResult;
343
703
  const reworkReport = parseStructuredReport(reworkResult.output);
344
704
  finalImplReport = reworkReport.summary ? reworkReport : buildFallbackImplReport(reworkResult);
@@ -346,9 +706,9 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
346
706
  fileContents = reworkContents;
347
707
  heartbeat?.transition({
348
708
  stage: 'spec_review', stageIndex: 2,
349
- reviewRound: round + 1, maxReviewRounds: task.maxReviewRounds ?? 5,
709
+ reviewRound: round + 1, maxReviewRounds,
350
710
  });
351
- specResult = await runSpecReview(otherProvider, packet, finalImplReport, reworkContents, reworkResult.toolCalls, task.planContext);
711
+ specResult = await runSpecReview(otherProvider, packet, finalImplReport, reworkContents, reworkResult.toolCalls, task.planContext, evidence.block);
352
712
  specStatus = specResult.status;
353
713
  specReport = specResult.report;
354
714
  if (specStatus === 'approved')
@@ -358,25 +718,32 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
358
718
  if (currentFindings === prevFindings && currentFindings !== '')
359
719
  break;
360
720
  prevSpecFindings = specResult.findings;
361
- if (round >= (task.maxReviewRounds ?? 5))
362
- break;
363
721
  }
364
722
  }
365
723
  let qualityResult = { status: 'skipped', report: undefined, findings: [] };
366
724
  if (reviewPolicy === 'full') {
367
725
  heartbeat?.transition({
368
726
  stage: 'quality_review', stageIndex: 4,
369
- reviewRound: 1, maxReviewRounds: task.maxReviewRounds ?? 5,
727
+ reviewRound: 1, maxReviewRounds,
370
728
  });
371
- qualityResult = await runQualityReview(otherProvider, packet, specReport ?? finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten);
729
+ qualityResult = await runQualityReview(otherProvider, packet, specReport ?? finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block);
372
730
  if (qualityResult.status === 'changes_required') {
373
731
  let prevQualityFindings = [];
374
- let round = 0;
375
732
  while (true) {
376
- round++;
733
+ if (specRework + qualityRework >= maxReviewRounds) {
734
+ return abortReviewLoop(finalImplResult, 'round_cap', 'review round cap reached before quality rework', 'quality');
735
+ }
736
+ const currentCostUSD = taskCostUSD();
737
+ if (currentCostUSD !== null && maxCostUSD !== undefined && currentCostUSD >= 0.8 * maxCostUSD) {
738
+ emitTaskEvent('cost_check', { stage: 'quality_rework', tripped: true, cost_used_usd: currentCostUSD, cost_cap_usd: maxCostUSD, cost_available: true });
739
+ return abortReviewLoop(finalImplResult, 'cost_ceiling', 'cost ceiling reached before quality rework', 'quality');
740
+ }
741
+ emitTaskEvent('stage_change', { from: 'quality_review', to: 'quality_rework', round: qualityRework + 1, cap: maxReviewRounds });
742
+ qualityRework++;
743
+ const round = qualityRework;
377
744
  heartbeat?.transition({
378
745
  stage: 'quality_rework', stageIndex: 5,
379
- reviewRound: round, maxReviewRounds: task.maxReviewRounds ?? 5,
746
+ reviewRound: round, maxReviewRounds,
380
747
  });
381
748
  const feedback = qualityResult.findings.length > 0
382
749
  ? `\n\n## Quality Review Feedback (round ${round}):\n${qualityResult.findings.map(f => `- ${f}`).join('\n')}`
@@ -384,24 +751,15 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
384
751
  const reworkPrompt = `${task.prompt}${feedback}`;
385
752
  const reworkTask = withDoneCondition({ ...task, prompt: reworkPrompt });
386
753
  const reworkResult = await delegateWithEscalation(reworkTask, [resolved.provider], { explicitlyPinned: true, onProgress: wrappedOnProgress });
387
- // Auto-commit rework changes
388
- if (task.autoCommit && reworkResult.status === 'ok' && reworkResult.filesWritten.length > 0) {
389
- const reworkReport = parseStructuredReport(reworkResult.output);
390
- const reworkCommit = autoCommitFiles(reworkResult.filesWritten, reworkReport.summary ?? undefined, task.cwd ?? process.cwd());
391
- if (reworkCommit.sha)
392
- commitSha = reworkCommit.sha;
393
- if (reworkCommit.error)
394
- commitError = reworkCommit.error;
395
- }
396
754
  finalImplResult = reworkResult;
397
755
  const reworkReport = parseStructuredReport(reworkResult.output);
398
756
  finalImplReport = reworkReport.summary ? reworkReport : buildFallbackImplReport(reworkResult);
399
757
  const reworkContents = await readImplementerFileContents(reworkResult.filesWritten, task.cwd);
400
758
  heartbeat?.transition({
401
759
  stage: 'quality_review', stageIndex: 4,
402
- reviewRound: round + 1, maxReviewRounds: task.maxReviewRounds ?? 5,
760
+ reviewRound: round + 1, maxReviewRounds,
403
761
  });
404
- qualityResult = await runQualityReview(otherProvider, packet, finalImplReport, reworkContents, reworkResult.toolCalls, reworkResult.filesWritten);
762
+ qualityResult = await runQualityReview(otherProvider, packet, finalImplReport, reworkContents, reworkResult.toolCalls, reworkResult.filesWritten, evidence.block);
405
763
  if (qualityResult.status === 'approved')
406
764
  break;
407
765
  const currentFindings = [...qualityResult.findings].sort().join('\0');
@@ -409,12 +767,28 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
409
767
  if (currentFindings === prevFindings && currentFindings !== '')
410
768
  break;
411
769
  prevQualityFindings = qualityResult.findings;
412
- if (round >= (task.maxReviewRounds ?? 5))
413
- break;
414
770
  }
415
771
  }
416
772
  }
417
773
  const finalReport = specReport ?? finalImplReport;
774
+ const concerns = [...(finalImplResult.concerns ?? [])];
775
+ let finalWorkerStatus = workerStatus;
776
+ if (verification.status === 'failed') {
777
+ concerns.push({
778
+ source: 'verification',
779
+ severity: 'high',
780
+ message: 'Verification failed after implementation.',
781
+ });
782
+ if (finalWorkerStatus === 'done')
783
+ finalWorkerStatus = 'done_with_concerns';
784
+ }
785
+ if (evidence.diffTruncated) {
786
+ concerns.push({
787
+ source: 'diff_truncated',
788
+ severity: 'medium',
789
+ message: 'Implementation diff exceeded the reviewer evidence byte cap and was truncated.',
790
+ });
791
+ }
418
792
  const aggregated = aggregateResult(finalReport, specReport, qualityResult.report, specStatus, qualityResult.status);
419
793
  // File artifact verification: check whether output targets exist on disk after all work.
420
794
  // Only applies when status is ok; non-ok statuses skip verification entirely.
@@ -432,7 +806,8 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
432
806
  return {
433
807
  ...finalImplResult,
434
808
  status: finalStatus,
435
- workerStatus,
809
+ workerStatus: finalWorkerStatus,
810
+ concerns,
436
811
  specReviewStatus: specStatus,
437
812
  qualityReviewStatus: qualityResult.status,
438
813
  specReviewReason: specResult.errorReason,
@@ -453,10 +828,14 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
453
828
  qualityReviewer: reviewPolicy === 'full' ? reviewModel : null,
454
829
  },
455
830
  fileArtifactsMissing,
456
- commitSha,
831
+ commits,
457
832
  commitError,
833
+ verification,
458
834
  };
459
835
  }
836
+ catch (err) {
837
+ return withVerification(workerErrorResult(err));
838
+ }
460
839
  finally {
461
840
  heartbeat?.stop();
462
841
  }