@zhixuan92/multi-model-agent-core 3.2.0 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/auto-commit.d.ts +8 -1
- package/dist/auto-commit.d.ts.map +1 -1
- package/dist/auto-commit.js +6 -3
- package/dist/auto-commit.js.map +1 -1
- package/dist/batch-cache.d.ts +1 -1
- package/dist/batch-cache.d.ts.map +1 -1
- package/dist/batch-cache.js +3 -5
- package/dist/batch-cache.js.map +1 -1
- package/dist/diagnostics/request-spill.d.ts +16 -0
- package/dist/diagnostics/request-spill.d.ts.map +1 -0
- package/dist/diagnostics/request-spill.js +23 -0
- package/dist/diagnostics/request-spill.js.map +1 -0
- package/dist/diagnostics/verbose-line.d.ts +12 -0
- package/dist/diagnostics/verbose-line.d.ts.map +1 -0
- package/dist/diagnostics/verbose-line.js +80 -0
- package/dist/diagnostics/verbose-line.js.map +1 -0
- package/dist/executors/debug.js +1 -1
- package/dist/executors/debug.js.map +1 -1
- package/dist/executors/delegate.d.ts.map +1 -1
- package/dist/executors/delegate.js +6 -2
- package/dist/executors/delegate.js.map +1 -1
- package/dist/executors/execute-plan.d.ts.map +1 -1
- package/dist/executors/execute-plan.js +9 -2
- package/dist/executors/execute-plan.js.map +1 -1
- package/dist/executors/retry.d.ts.map +1 -1
- package/dist/executors/retry.js +4 -1
- package/dist/executors/retry.js.map +1 -1
- package/dist/heartbeat.d.ts +7 -0
- package/dist/heartbeat.d.ts.map +1 -1
- package/dist/heartbeat.js +28 -1
- package/dist/heartbeat.js.map +1 -1
- package/dist/intake/compilers/delegate.d.ts +3 -1
- package/dist/intake/compilers/delegate.d.ts.map +1 -1
- package/dist/intake/compilers/delegate.js +23 -12
- package/dist/intake/compilers/delegate.js.map +1 -1
- package/dist/intake/compilers/execute-plan.d.ts +6 -1
- package/dist/intake/compilers/execute-plan.d.ts.map +1 -1
- package/dist/intake/compilers/execute-plan.js +8 -1
- package/dist/intake/compilers/execute-plan.js.map +1 -1
- package/dist/intake/resolve.js +1 -1
- package/dist/intake/resolve.js.map +1 -1
- package/dist/intake/types.d.ts +1 -0
- package/dist/intake/types.d.ts.map +1 -1
- package/dist/reporting/structured-report.d.ts +19 -0
- package/dist/reporting/structured-report.d.ts.map +1 -1
- package/dist/reporting/structured-report.js +50 -1
- package/dist/reporting/structured-report.js.map +1 -1
- package/dist/review/diff-review.d.ts +29 -0
- package/dist/review/diff-review.d.ts.map +1 -0
- package/dist/review/diff-review.js +53 -0
- package/dist/review/diff-review.js.map +1 -0
- package/dist/review/evidence.d.ts +15 -0
- package/dist/review/evidence.d.ts.map +1 -0
- package/dist/review/evidence.js +26 -0
- package/dist/review/evidence.js.map +1 -0
- package/dist/review/quality-reviewer.d.ts +1 -1
- package/dist/review/quality-reviewer.d.ts.map +1 -1
- package/dist/review/quality-reviewer.js +5 -3
- package/dist/review/quality-reviewer.js.map +1 -1
- package/dist/review/spec-reviewer.d.ts +1 -1
- package/dist/review/spec-reviewer.d.ts.map +1 -1
- package/dist/review/spec-reviewer.js +3 -2
- package/dist/review/spec-reviewer.js.map +1 -1
- package/dist/run-tasks/commit-stage.d.ts +16 -0
- package/dist/run-tasks/commit-stage.d.ts.map +1 -0
- package/dist/run-tasks/commit-stage.js +43 -0
- package/dist/run-tasks/commit-stage.js.map +1 -0
- package/dist/run-tasks/metadata-repair.d.ts +15 -0
- package/dist/run-tasks/metadata-repair.d.ts.map +1 -0
- package/dist/run-tasks/metadata-repair.js +30 -0
- package/dist/run-tasks/metadata-repair.js.map +1 -0
- package/dist/run-tasks/reviewed-lifecycle.d.ts.map +1 -1
- package/dist/run-tasks/reviewed-lifecycle.js +443 -68
- package/dist/run-tasks/reviewed-lifecycle.js.map +1 -1
- package/dist/run-tasks/verify-stage.d.ts +25 -0
- package/dist/run-tasks/verify-stage.d.ts.map +1 -0
- package/dist/run-tasks/verify-stage.js +168 -0
- package/dist/run-tasks/verify-stage.js.map +1 -0
- package/dist/runners/base/result-builders.d.ts +26 -1
- package/dist/runners/base/result-builders.d.ts.map +1 -1
- package/dist/runners/base/result-builders.js +5 -0
- package/dist/runners/base/result-builders.js.map +1 -1
- package/dist/runners/prevention.d.ts.map +1 -1
- package/dist/runners/prevention.js +18 -0
- package/dist/runners/prevention.js.map +1 -1
- package/dist/runners/types.d.ts +4 -1
- package/dist/runners/types.d.ts.map +1 -1
- package/dist/tool-schemas/audit.d.ts +2 -2
- package/dist/tool-schemas/delegate.d.ts +9 -0
- package/dist/tool-schemas/delegate.d.ts.map +1 -1
- package/dist/tool-schemas/delegate.js +4 -0
- package/dist/tool-schemas/delegate.js.map +1 -1
- package/dist/tool-schemas/execute-plan.d.ts +13 -2
- package/dist/tool-schemas/execute-plan.d.ts.map +1 -1
- package/dist/tool-schemas/execute-plan.js +22 -4
- package/dist/tool-schemas/execute-plan.js.map +1 -1
- package/dist/tool-schemas/review.d.ts +1 -1
- package/dist/types.d.ts +33 -4
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/package.json +29 -1
|
@@ -1,16 +1,24 @@
|
|
|
1
|
+
import { execFile } from 'node:child_process';
|
|
2
|
+
import { promisify } from 'node:util';
|
|
1
3
|
import { computeCostUSD, computeSavedCostUSD } from '../types.js';
|
|
2
4
|
import { createProvider } from '../provider.js';
|
|
3
5
|
import { delegateWithEscalation } from '../delegate-with-escalation.js';
|
|
4
6
|
import { HeartbeatTimer } from '../heartbeat.js';
|
|
5
7
|
import { runSpecReview } from '../review/spec-reviewer.js';
|
|
6
8
|
import { runQualityReview } from '../review/quality-reviewer.js';
|
|
9
|
+
import { runDiffReview } from '../review/diff-review.js';
|
|
7
10
|
import { aggregateResult } from '../review/aggregate-result.js';
|
|
11
|
+
import { buildEvidence } from '../review/evidence.js';
|
|
8
12
|
import { parseStructuredReport } from '../reporting/structured-report.js';
|
|
9
|
-
import {
|
|
13
|
+
import { runCommitStage, readbackCommit } from './commit-stage.js';
|
|
14
|
+
import { runVerifyStage } from './verify-stage.js';
|
|
15
|
+
import { runMetadataRepairTurn } from './metadata-repair.js';
|
|
10
16
|
import { partitionFilePaths, checkOutputTargets } from '../file-artifact-check.js';
|
|
11
17
|
import { extractWorkerStatus } from './worker-status.js';
|
|
12
18
|
import { buildFallbackImplReport, readImplementerFileContents } from './fallback-report.js';
|
|
19
|
+
import { composeVerboseLine } from '../diagnostics/verbose-line.js';
|
|
13
20
|
import { withDoneCondition } from './execute-task.js';
|
|
21
|
+
const exec = promisify(execFile);
|
|
14
22
|
export async function executeReviewedLifecycle(task, resolved, config, taskIndex, onProgress, heartbeatWiring, diagnostics) {
|
|
15
23
|
const reviewPolicy = task.reviewPolicy ?? 'full';
|
|
16
24
|
const otherSlot = resolved.slot === 'standard' ? 'complex' : 'standard';
|
|
@@ -57,16 +65,34 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
57
65
|
// per-5s progress inside a long-running stage.
|
|
58
66
|
if (event.stage !== lastStageSeen) {
|
|
59
67
|
if (lastStageSeen !== undefined) {
|
|
60
|
-
verboseStreamRaw(
|
|
68
|
+
verboseStreamRaw(composeVerboseLine({
|
|
69
|
+
event: 'stage_change',
|
|
70
|
+
ts: new Date().toISOString(),
|
|
71
|
+
batch: shortBatchEarly,
|
|
72
|
+
task: taskIndex,
|
|
73
|
+
from: lastStageSeen,
|
|
74
|
+
to: event.stage,
|
|
75
|
+
}));
|
|
61
76
|
}
|
|
62
77
|
lastStageSeen = event.stage;
|
|
63
78
|
}
|
|
64
|
-
const costStr = event.costUSD !== null ? ` cost=$${event.costUSD.toFixed(4)}` : '';
|
|
65
|
-
const roundStr = event.reviewRound !== undefined && event.maxReviewRounds !== undefined
|
|
66
|
-
? ` round=${event.reviewRound}/${event.maxReviewRounds}`
|
|
67
|
-
: '';
|
|
68
79
|
const sinceLastMs = Date.now() - prevEventAtMs;
|
|
69
|
-
verboseStreamRaw(
|
|
80
|
+
verboseStreamRaw(composeVerboseLine({
|
|
81
|
+
event: 'heartbeat',
|
|
82
|
+
ts: new Date().toISOString(),
|
|
83
|
+
batch: shortBatchEarly,
|
|
84
|
+
task: taskIndex,
|
|
85
|
+
elapsed: event.elapsed,
|
|
86
|
+
stage: event.stage,
|
|
87
|
+
round: event.reviewRound,
|
|
88
|
+
cap: event.maxReviewRounds,
|
|
89
|
+
tools: event.progress.toolCalls,
|
|
90
|
+
read: event.progress.filesRead,
|
|
91
|
+
wrote: event.progress.filesWritten,
|
|
92
|
+
text: textEmissionChars,
|
|
93
|
+
cost: event.costUSD,
|
|
94
|
+
idle_ms: sinceLastMs,
|
|
95
|
+
}));
|
|
70
96
|
}
|
|
71
97
|
synthOnProgress(taskIndex, event);
|
|
72
98
|
}, {
|
|
@@ -78,8 +104,16 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
78
104
|
: undefined;
|
|
79
105
|
heartbeat?.start(stageCount);
|
|
80
106
|
if (verboseStreamRaw) {
|
|
81
|
-
verboseStreamRaw(
|
|
82
|
-
|
|
107
|
+
verboseStreamRaw(composeVerboseLine({
|
|
108
|
+
event: 'heartbeat_timer',
|
|
109
|
+
ts: new Date().toISOString(),
|
|
110
|
+
batch: shortBatchEarly,
|
|
111
|
+
task: taskIndex,
|
|
112
|
+
state: heartbeat ? 'started' : 'disabled',
|
|
113
|
+
stage_count: stageCount,
|
|
114
|
+
tick_ms: heartbeat ? 5000 : undefined,
|
|
115
|
+
reason: heartbeat ? undefined : 'no_consumer',
|
|
116
|
+
}));
|
|
83
117
|
}
|
|
84
118
|
const implModel = resolved.provider.config.model;
|
|
85
119
|
const progressCounters = { filesRead: 0, filesWritten: 0, toolCalls: 0 };
|
|
@@ -88,7 +122,13 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
88
122
|
const verboseStream = verboseStreamRaw;
|
|
89
123
|
const shortBatch = shortBatchEarly;
|
|
90
124
|
if (verboseStream) {
|
|
91
|
-
verboseStream(
|
|
125
|
+
verboseStream(composeVerboseLine({
|
|
126
|
+
event: 'worker_start',
|
|
127
|
+
ts: new Date().toISOString(),
|
|
128
|
+
batch: shortBatch,
|
|
129
|
+
task: taskIndex,
|
|
130
|
+
worker: resolved.provider.config.model,
|
|
131
|
+
}));
|
|
92
132
|
}
|
|
93
133
|
let prevEventAtMs = verbose ? Date.now() : 0;
|
|
94
134
|
// Wrap whenever we have ANY consumer for InternalRunnerEvent (heartbeat,
|
|
@@ -99,22 +139,40 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
99
139
|
const wrappedOnProgress = needHeartbeat
|
|
100
140
|
? (event) => {
|
|
101
141
|
if (event.kind === 'turn_start') {
|
|
142
|
+
heartbeat?.markEvent('llm');
|
|
102
143
|
if (verbose)
|
|
103
144
|
prevEventAtMs = Date.now();
|
|
104
145
|
if (verboseStream) {
|
|
105
|
-
verboseStream(
|
|
146
|
+
verboseStream(composeVerboseLine({
|
|
147
|
+
event: 'turn_start',
|
|
148
|
+
ts: new Date().toISOString(),
|
|
149
|
+
batch: shortBatch,
|
|
150
|
+
task: taskIndex,
|
|
151
|
+
turn: event.turn,
|
|
152
|
+
provider: event.provider,
|
|
153
|
+
}));
|
|
106
154
|
}
|
|
107
155
|
}
|
|
108
156
|
if (event.kind === 'text_emission') {
|
|
157
|
+
heartbeat?.markEvent('text');
|
|
109
158
|
textEmissionChars += event.chars;
|
|
110
159
|
if (verboseStream && event.chars > 0) {
|
|
111
160
|
const preview = event.preview.length > 60
|
|
112
161
|
? event.preview.slice(0, 57) + '...'
|
|
113
162
|
: event.preview;
|
|
114
|
-
verboseStream(
|
|
163
|
+
verboseStream(composeVerboseLine({
|
|
164
|
+
event: 'text_emission',
|
|
165
|
+
ts: new Date().toISOString(),
|
|
166
|
+
batch: shortBatch,
|
|
167
|
+
task: taskIndex,
|
|
168
|
+
chars: event.chars,
|
|
169
|
+
total: textEmissionChars,
|
|
170
|
+
preview,
|
|
171
|
+
}));
|
|
115
172
|
}
|
|
116
173
|
}
|
|
117
174
|
if (event.kind === 'tool_call') {
|
|
175
|
+
heartbeat?.markEvent('tool');
|
|
118
176
|
progressCounters.toolCalls++;
|
|
119
177
|
const name = event.toolSummary.split('(')[0];
|
|
120
178
|
if (name === 'readFile' || name === 'grep' || name === 'glob' || name === 'listFiles') {
|
|
@@ -137,10 +195,18 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
137
195
|
});
|
|
138
196
|
}
|
|
139
197
|
if (verboseStream) {
|
|
140
|
-
verboseStream(
|
|
198
|
+
verboseStream(composeVerboseLine({
|
|
199
|
+
event: 'tool_call',
|
|
200
|
+
ts: new Date().toISOString(),
|
|
201
|
+
batch: shortBatch,
|
|
202
|
+
task: taskIndex,
|
|
203
|
+
tool: event.toolSummary,
|
|
204
|
+
duration_ms: sincePrevMs,
|
|
205
|
+
}));
|
|
141
206
|
}
|
|
142
207
|
}
|
|
143
208
|
if (event.kind === 'turn_complete') {
|
|
209
|
+
heartbeat?.markEvent('llm');
|
|
144
210
|
const costUSD = computeCostUSD(event.cumulativeInputTokens, event.cumulativeOutputTokens, resolved.provider.config);
|
|
145
211
|
const savedCostUSD = computeSavedCostUSD(costUSD, event.cumulativeInputTokens, event.cumulativeOutputTokens, task.parentModel);
|
|
146
212
|
heartbeat?.updateCost(costUSD, savedCostUSD);
|
|
@@ -160,27 +226,277 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
160
226
|
});
|
|
161
227
|
}
|
|
162
228
|
if (verboseStream) {
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
229
|
+
verboseStream(composeVerboseLine({
|
|
230
|
+
event: 'turn_complete',
|
|
231
|
+
ts: new Date().toISOString(),
|
|
232
|
+
batch: shortBatch,
|
|
233
|
+
task: taskIndex,
|
|
234
|
+
input_tokens: event.cumulativeInputTokens,
|
|
235
|
+
output_tokens: event.cumulativeOutputTokens,
|
|
236
|
+
cost: costUSD,
|
|
237
|
+
duration_ms: turnDurMs,
|
|
238
|
+
provider: resolved.provider.config.model,
|
|
239
|
+
}));
|
|
167
240
|
}
|
|
168
241
|
}
|
|
169
242
|
}
|
|
170
243
|
: undefined;
|
|
171
|
-
|
|
172
|
-
|
|
244
|
+
const cwd = task.cwd ?? process.cwd();
|
|
245
|
+
const taskStartMs = Date.now();
|
|
246
|
+
const commits = [];
|
|
173
247
|
let commitError;
|
|
248
|
+
let specRework = 0;
|
|
249
|
+
let qualityRework = 0;
|
|
250
|
+
let metadataRepair = 0;
|
|
251
|
+
const maxReviewRounds = task.maxReviewRounds ?? 3;
|
|
252
|
+
const maxCostUSD = task.maxCostUSD;
|
|
253
|
+
const reviewRounds = () => ({ spec: specRework, quality: qualityRework, metadata: metadataRepair, cap: maxReviewRounds });
|
|
254
|
+
const taskCostUSD = () => (heartbeat ? heartbeat.getHeartbeatTickInfo().costUSD : null);
|
|
255
|
+
// When the review loop aborts mid-flight, preserve any review-status info already set
|
|
256
|
+
// on the base result (set by callers via abortReviewLoop({ ...res, specReviewStatus, ... })).
|
|
257
|
+
// Defaults to 'changes_required' for whichever loop tripped — that's the only state the
|
|
258
|
+
// loop ever fires from, by construction.
|
|
259
|
+
const abortReviewLoop = (base, terminationReason, message, aborting) => ({
|
|
260
|
+
...base,
|
|
261
|
+
status: 'incomplete',
|
|
262
|
+
workerStatus: 'review_loop_aborted',
|
|
263
|
+
terminationReason,
|
|
264
|
+
reviewRounds: reviewRounds(),
|
|
265
|
+
error: message,
|
|
266
|
+
specReviewStatus: aborting === 'spec' ? 'changes_required' : (base.specReviewStatus ?? 'approved'),
|
|
267
|
+
qualityReviewStatus: aborting === 'quality' ? 'changes_required' : (base.qualityReviewStatus ?? 'skipped'),
|
|
268
|
+
});
|
|
269
|
+
const defaultVerification = { status: 'skipped', steps: [], totalDurationMs: 0, skipReason: 'no_command' };
|
|
270
|
+
let latestVerification = defaultVerification;
|
|
271
|
+
const emitVerbose = (event, fields) => {
|
|
272
|
+
if (!verboseStream)
|
|
273
|
+
return;
|
|
274
|
+
verboseStream(composeVerboseLine({
|
|
275
|
+
event,
|
|
276
|
+
ts: new Date().toISOString(),
|
|
277
|
+
batch: shortBatch,
|
|
278
|
+
task: taskIndex,
|
|
279
|
+
...fields,
|
|
280
|
+
}));
|
|
281
|
+
};
|
|
282
|
+
async function runVerificationStage() {
|
|
283
|
+
emitVerbose('stage_change', { from: 'committing', to: 'verifying' });
|
|
284
|
+
heartbeat?.transition({
|
|
285
|
+
stage: 'verifying',
|
|
286
|
+
stageIndex: 4,
|
|
287
|
+
reviewRound: undefined,
|
|
288
|
+
maxReviewRounds: task.maxReviewRounds ?? 5,
|
|
289
|
+
});
|
|
290
|
+
const verification = await runVerifyStage({
|
|
291
|
+
cwd,
|
|
292
|
+
verifyCommand: task.verifyCommand,
|
|
293
|
+
taskTimeoutMs: task.timeoutMs ?? config.defaults.timeoutMs ?? 1_800_000,
|
|
294
|
+
taskStartMs,
|
|
295
|
+
});
|
|
296
|
+
latestVerification = verification;
|
|
297
|
+
for (const step of verification.steps) {
|
|
298
|
+
emitVerbose('verify_step', {
|
|
299
|
+
command: step.command,
|
|
300
|
+
status: step.status,
|
|
301
|
+
exit_code: step.exitCode,
|
|
302
|
+
signal: step.signal,
|
|
303
|
+
duration_ms: step.durationMs,
|
|
304
|
+
error_message: step.errorMessage ?? undefined,
|
|
305
|
+
});
|
|
306
|
+
}
|
|
307
|
+
if (verification.status === 'skipped') {
|
|
308
|
+
emitVerbose('verify_skipped', { reason: verification.skipReason ?? 'no_command', stage: 'verifying' });
|
|
309
|
+
}
|
|
310
|
+
return verification;
|
|
311
|
+
}
|
|
312
|
+
function withVerification(result, verification = latestVerification) {
|
|
313
|
+
return { ...result, verification };
|
|
314
|
+
}
|
|
315
|
+
function verificationErrorResult(base, verification) {
|
|
316
|
+
if (verification.status !== 'error')
|
|
317
|
+
return null;
|
|
318
|
+
const failedIndex = verification.steps.findIndex((step) => step.status !== 'passed');
|
|
319
|
+
const failedStep = failedIndex >= 0 ? verification.steps[failedIndex] : undefined;
|
|
320
|
+
return withVerification({
|
|
321
|
+
...base,
|
|
322
|
+
status: 'error',
|
|
323
|
+
workerStatus: 'done_with_concerns',
|
|
324
|
+
error: failedStep?.errorMessage ?? 'verify command error',
|
|
325
|
+
errorCode: 'verify_command_error',
|
|
326
|
+
commits,
|
|
327
|
+
commitError,
|
|
328
|
+
verification,
|
|
329
|
+
}, verification);
|
|
330
|
+
}
|
|
331
|
+
function resolveOffTerminal(base, verification) {
|
|
332
|
+
const concerns = [...(base.concerns ?? [])];
|
|
333
|
+
let workerStatus = workerStatusForTerminal(base.workerStatus);
|
|
334
|
+
if (verification.status === 'failed') {
|
|
335
|
+
concerns.push({
|
|
336
|
+
source: 'verification',
|
|
337
|
+
severity: 'high',
|
|
338
|
+
message: 'Verification failed after implementation.',
|
|
339
|
+
});
|
|
340
|
+
workerStatus = 'done_with_concerns';
|
|
341
|
+
}
|
|
342
|
+
if (verification.status === 'error') {
|
|
343
|
+
const failedIndex = verification.steps.findIndex((step) => step.status !== 'passed');
|
|
344
|
+
const failedStep = failedIndex >= 0 ? verification.steps[failedIndex] : undefined;
|
|
345
|
+
return withVerification({
|
|
346
|
+
...base,
|
|
347
|
+
status: 'error',
|
|
348
|
+
workerStatus: 'failed',
|
|
349
|
+
error: failedStep?.errorMessage ?? 'verify command error',
|
|
350
|
+
errorCode: 'verify_command_error',
|
|
351
|
+
commits,
|
|
352
|
+
commitError,
|
|
353
|
+
verification,
|
|
354
|
+
}, verification);
|
|
355
|
+
}
|
|
356
|
+
return withVerification({
|
|
357
|
+
...base,
|
|
358
|
+
status: base.status === 'ok' ? 'ok' : base.status,
|
|
359
|
+
workerStatus,
|
|
360
|
+
concerns,
|
|
361
|
+
commits,
|
|
362
|
+
commitError,
|
|
363
|
+
verification,
|
|
364
|
+
}, verification);
|
|
365
|
+
}
|
|
366
|
+
function resolveDiffOnlyTerminal(base, verdict, verification, diffTruncated) {
|
|
367
|
+
const concerns = [...(base.concerns ?? [])];
|
|
368
|
+
if (verdict.kind === 'reject') {
|
|
369
|
+
return withVerification({
|
|
370
|
+
...base,
|
|
371
|
+
status: 'error',
|
|
372
|
+
workerStatus: 'failed',
|
|
373
|
+
error: verdict.message || 'diff review rejected implementation',
|
|
374
|
+
errorCode: 'diff_review_rejected',
|
|
375
|
+
structuredError: {
|
|
376
|
+
code: 'diff_review_rejected',
|
|
377
|
+
message: verdict.message || 'diff review rejected implementation',
|
|
378
|
+
},
|
|
379
|
+
concerns,
|
|
380
|
+
commits,
|
|
381
|
+
commitError,
|
|
382
|
+
verification,
|
|
383
|
+
}, verification);
|
|
384
|
+
}
|
|
385
|
+
concerns.push(...verdict.concerns);
|
|
386
|
+
if (verification.status === 'failed') {
|
|
387
|
+
concerns.push({
|
|
388
|
+
source: 'verification',
|
|
389
|
+
severity: 'high',
|
|
390
|
+
message: 'Verification failed after implementation.',
|
|
391
|
+
});
|
|
392
|
+
}
|
|
393
|
+
if (diffTruncated) {
|
|
394
|
+
concerns.push({
|
|
395
|
+
source: 'diff_truncated',
|
|
396
|
+
severity: 'medium',
|
|
397
|
+
message: 'Implementation diff exceeded the reviewer evidence byte cap and was truncated.',
|
|
398
|
+
});
|
|
399
|
+
}
|
|
400
|
+
const hasConcerns = concerns.length > 0 || verification.status === 'failed';
|
|
401
|
+
return withVerification({
|
|
402
|
+
...base,
|
|
403
|
+
status: base.status === 'ok' ? 'ok' : base.status,
|
|
404
|
+
workerStatus: hasConcerns ? 'done_with_concerns' : workerStatusForTerminal(base.workerStatus),
|
|
405
|
+
concerns,
|
|
406
|
+
commits,
|
|
407
|
+
commitError,
|
|
408
|
+
verification,
|
|
409
|
+
}, verification);
|
|
410
|
+
}
|
|
411
|
+
function workerStatusForTerminal(status) {
|
|
412
|
+
return status === 'needs_context' || status === 'blocked' || status === 'failed' || status === 'done_with_concerns'
|
|
413
|
+
? status
|
|
414
|
+
: 'done';
|
|
415
|
+
}
|
|
416
|
+
async function recordWorkerCommits(from, to = 'HEAD') {
|
|
417
|
+
const { stdout: revs } = await exec('git', ['rev-list', '--reverse', `${from}..${to}`], { cwd });
|
|
418
|
+
for (const sha of revs.trim().split('\n').filter(Boolean)) {
|
|
419
|
+
const c = await readbackCommit(sha, cwd);
|
|
420
|
+
commits.push(c);
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
async function repairCommitMetadata(initialDiagnostic) {
|
|
424
|
+
let metadataAttempts = 0;
|
|
425
|
+
let lastZodError = initialDiagnostic || 'no commit block emitted';
|
|
426
|
+
let validCommit = null;
|
|
427
|
+
while (metadataAttempts < 2 && !validCommit) {
|
|
428
|
+
const preStatus = (await exec('git', ['status', '--porcelain=v1', '-z'], { cwd })).stdout;
|
|
429
|
+
const repaired = await runMetadataRepairTurn({ task, zodError: lastZodError, cwd, providerSlot: resolved.slot, provider: resolved.provider });
|
|
430
|
+
const postStatus = (await exec('git', ['status', '--porcelain=v1', '-z'], { cwd })).stdout;
|
|
431
|
+
metadataAttempts += 1;
|
|
432
|
+
if (preStatus !== postStatus) {
|
|
433
|
+
commitError = 'commit_metadata_repair_modified_files';
|
|
434
|
+
return null;
|
|
435
|
+
}
|
|
436
|
+
if (repaired.commit)
|
|
437
|
+
validCommit = repaired.commit;
|
|
438
|
+
else
|
|
439
|
+
lastZodError = repaired.commitDiagnostic ?? 'no commit block emitted';
|
|
440
|
+
}
|
|
441
|
+
if (!validCommit)
|
|
442
|
+
commitError = `commit_metadata_invalid: ${lastZodError}`;
|
|
443
|
+
return validCommit;
|
|
444
|
+
}
|
|
445
|
+
async function captureCommitsAfterImplementation(implResult, implReport, baselineHead) {
|
|
446
|
+
const porcelain = (await exec('git', ['status', '--porcelain=v1'], { cwd })).stdout;
|
|
447
|
+
const headNow = (await exec('git', ['rev-parse', 'HEAD'], { cwd })).stdout.trim();
|
|
448
|
+
const headMoved = headNow !== baselineHead;
|
|
449
|
+
const treeDirty = porcelain.length > 0;
|
|
450
|
+
if (!headMoved && !treeDirty)
|
|
451
|
+
return;
|
|
452
|
+
if (headMoved)
|
|
453
|
+
await recordWorkerCommits(baselineHead, 'HEAD');
|
|
454
|
+
if (treeDirty) {
|
|
455
|
+
const validCommit = implReport?.commit ?? await repairCommitMetadata(implReport?.commitDiagnostic ?? 'no commit block emitted');
|
|
456
|
+
if (!validCommit)
|
|
457
|
+
return;
|
|
458
|
+
const c = await runCommitStage({ cwd, filesWritten: implResult.filesWritten, commit: validCommit });
|
|
459
|
+
commits.push(c);
|
|
460
|
+
}
|
|
461
|
+
}
|
|
174
462
|
try {
|
|
463
|
+
// The dirty-tree precondition + git baseline only apply to artifact-producing tasks
|
|
464
|
+
// (those with autoCommit === true). Non-artifact presets — audit, review, verify,
|
|
465
|
+
// debug — neither produce commits nor read git state, so they bypass the check
|
|
466
|
+
// entirely. Per spec Section A: "Non-artifact tasks (audits, analyses, read-only
|
|
467
|
+
// investigations) skip stages 3 and 4."
|
|
468
|
+
const isArtifactProducing = task.autoCommit === true;
|
|
469
|
+
let baselineHead = '';
|
|
470
|
+
if (isArtifactProducing) {
|
|
471
|
+
baselineHead = (await exec('git', ['rev-parse', 'HEAD'], { cwd })).stdout.trim();
|
|
472
|
+
const baselinePorcelain = (await exec('git', ['status', '--porcelain=v1', '-z'], { cwd })).stdout;
|
|
473
|
+
if (baselinePorcelain.length !== 0) {
|
|
474
|
+
return withVerification({
|
|
475
|
+
output: `Sub-agent error: task.cwd ${cwd} had pre-existing modifications`,
|
|
476
|
+
status: 'error',
|
|
477
|
+
usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0, costUSD: null },
|
|
478
|
+
turns: 0,
|
|
479
|
+
filesRead: [],
|
|
480
|
+
filesWritten: [],
|
|
481
|
+
toolCalls: [],
|
|
482
|
+
outputIsDiagnostic: true,
|
|
483
|
+
escalationLog: [],
|
|
484
|
+
error: `task.cwd ${cwd} had pre-existing modifications`,
|
|
485
|
+
errorCode: 'dirty_worktree',
|
|
486
|
+
commits,
|
|
487
|
+
});
|
|
488
|
+
}
|
|
489
|
+
}
|
|
175
490
|
const implResult = await delegateWithEscalation(withDoneCondition(task), [resolved.provider], { explicitlyPinned: false, escalateToProvider: escalationProvider, onProgress: wrappedOnProgress });
|
|
176
491
|
const implReport = implResult.status === 'ok' ? parseStructuredReport(implResult.output) : undefined;
|
|
177
492
|
const workerStatus = extractWorkerStatus(implReport);
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
const commitResult = autoCommitFiles(implResult.filesWritten, implReport?.summary ?? undefined, task.cwd ?? process.cwd());
|
|
181
|
-
commitSha = commitResult.sha;
|
|
182
|
-
commitError = commitResult.error;
|
|
493
|
+
if (implResult.status === 'ok' && isArtifactProducing) {
|
|
494
|
+
await captureCommitsAfterImplementation(implResult, implReport, baselineHead);
|
|
183
495
|
}
|
|
496
|
+
const verification = isArtifactProducing ? await runVerificationStage() : defaultVerification;
|
|
497
|
+
const verifyError = verificationErrorResult(implResult, verification);
|
|
498
|
+
if (verifyError)
|
|
499
|
+
return verifyError;
|
|
184
500
|
const filePathsInteracted = task.filePaths && task.filePaths.length > 0
|
|
185
501
|
? [...(implResult.filesRead ?? []), ...implResult.filesWritten].some(f => task.filePaths.some(fp => f === fp || f.endsWith('/' + fp) || f.endsWith(fp)))
|
|
186
502
|
: true;
|
|
@@ -220,8 +536,9 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
220
536
|
qualityReviewer: null,
|
|
221
537
|
},
|
|
222
538
|
fileArtifactsMissing: earlyFileArtifactsMissing,
|
|
223
|
-
|
|
539
|
+
commits,
|
|
224
540
|
commitError,
|
|
541
|
+
verification,
|
|
225
542
|
};
|
|
226
543
|
}
|
|
227
544
|
if (workerStatus === 'needs_context' || workerStatus === 'blocked') {
|
|
@@ -243,12 +560,14 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
243
560
|
qualityReviewer: null,
|
|
244
561
|
},
|
|
245
562
|
fileArtifactsMissing: implResult.status === 'ok' ? checkOutputTargets(outputTargets) : undefined,
|
|
246
|
-
|
|
563
|
+
commits,
|
|
247
564
|
commitError,
|
|
565
|
+
verification,
|
|
248
566
|
};
|
|
249
567
|
}
|
|
250
568
|
if (reviewPolicy === 'off') {
|
|
251
|
-
|
|
569
|
+
emitVerbose('stage_change', { from: 'verifying', to: 'terminal' });
|
|
570
|
+
const terminal = resolveOffTerminal({
|
|
252
571
|
...implResult,
|
|
253
572
|
workerStatus,
|
|
254
573
|
specReviewStatus: 'skipped',
|
|
@@ -267,9 +586,8 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
267
586
|
},
|
|
268
587
|
implementationReport: implReport,
|
|
269
588
|
fileArtifactsMissing: implResult.status === 'ok' ? checkOutputTargets(outputTargets) : undefined,
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
};
|
|
589
|
+
}, verification);
|
|
590
|
+
return terminal;
|
|
273
591
|
}
|
|
274
592
|
let otherProvider;
|
|
275
593
|
try {
|
|
@@ -294,8 +612,9 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
294
612
|
qualityReviewer: null,
|
|
295
613
|
},
|
|
296
614
|
fileArtifactsMissing: implResult.status === 'ok' ? checkOutputTargets(outputTargets) : undefined,
|
|
297
|
-
|
|
615
|
+
commits,
|
|
298
616
|
commitError,
|
|
617
|
+
verification,
|
|
299
618
|
};
|
|
300
619
|
}
|
|
301
620
|
const reviewModel = otherProvider.config.model;
|
|
@@ -306,23 +625,72 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
306
625
|
};
|
|
307
626
|
let fileContents = await readImplementerFileContents(implResult.filesWritten, task.cwd);
|
|
308
627
|
const effectiveImplReport = implReport ?? buildFallbackImplReport(implResult);
|
|
628
|
+
const evidence = isArtifactProducing
|
|
629
|
+
? await buildEvidence({ cwd, baselineHead, commits, verification, reviewPolicy })
|
|
630
|
+
: { block: '', diffTruncated: false, fullDiff: '' };
|
|
631
|
+
if (reviewPolicy === 'diff_only') {
|
|
632
|
+
emitVerbose('stage_change', { from: 'verifying', to: 'diff_review' });
|
|
633
|
+
heartbeat?.transition({
|
|
634
|
+
stage: 'diff_review',
|
|
635
|
+
stageIndex: 2,
|
|
636
|
+
reviewRound: 1,
|
|
637
|
+
maxReviewRounds,
|
|
638
|
+
});
|
|
639
|
+
const verdict = await runDiffReview({
|
|
640
|
+
cwd,
|
|
641
|
+
diff: evidence.fullDiff,
|
|
642
|
+
diffTruncated: evidence.diffTruncated,
|
|
643
|
+
verification,
|
|
644
|
+
worker: { call: (prompt) => otherProvider.run(prompt) },
|
|
645
|
+
});
|
|
646
|
+
emitVerbose('review_decision', { stage: 'diff_review', verdict: verdict.kind, round: 1 });
|
|
647
|
+
return resolveDiffOnlyTerminal({
|
|
648
|
+
...implResult,
|
|
649
|
+
workerStatus,
|
|
650
|
+
specReviewStatus: 'skipped',
|
|
651
|
+
qualityReviewStatus: 'skipped',
|
|
652
|
+
specReviewReason: 'skipped: reviewPolicy is diff_only',
|
|
653
|
+
qualityReviewReason: 'skipped: reviewPolicy is diff_only',
|
|
654
|
+
implementationReport: effectiveImplReport,
|
|
655
|
+
fileArtifactsMissing: implResult.status === 'ok' ? checkOutputTargets(outputTargets) : undefined,
|
|
656
|
+
agents: {
|
|
657
|
+
implementer: resolved.slot,
|
|
658
|
+
specReviewer: 'skipped',
|
|
659
|
+
qualityReviewer: 'skipped',
|
|
660
|
+
},
|
|
661
|
+
models: {
|
|
662
|
+
implementer: implModel,
|
|
663
|
+
specReviewer: reviewModel,
|
|
664
|
+
qualityReviewer: null,
|
|
665
|
+
},
|
|
666
|
+
}, verdict, verification, evidence.diffTruncated);
|
|
667
|
+
}
|
|
309
668
|
heartbeat?.transition({
|
|
310
669
|
stage: 'spec_review', stageIndex: 2,
|
|
311
670
|
reviewRound: 1, maxReviewRounds: task.maxReviewRounds ?? 5,
|
|
312
671
|
});
|
|
313
|
-
let specResult = await runSpecReview(otherProvider, packet, effectiveImplReport, fileContents, implResult.toolCalls, task.planContext);
|
|
672
|
+
let specResult = await runSpecReview(otherProvider, packet, effectiveImplReport, fileContents, implResult.toolCalls, task.planContext, evidence.block);
|
|
314
673
|
let finalImplResult = implResult;
|
|
315
674
|
let finalImplReport = effectiveImplReport;
|
|
316
675
|
let specStatus = specResult.status;
|
|
317
676
|
let specReport = specResult.report;
|
|
318
677
|
if (specStatus === 'changes_required') {
|
|
319
678
|
let prevSpecFindings = [];
|
|
320
|
-
let round = 0;
|
|
321
679
|
while (true) {
|
|
322
|
-
|
|
680
|
+
if (specRework + qualityRework >= maxReviewRounds) {
|
|
681
|
+
return abortReviewLoop(finalImplResult, 'round_cap', 'review round cap reached before spec rework', 'spec');
|
|
682
|
+
}
|
|
683
|
+
const currentCostUSD = taskCostUSD();
|
|
684
|
+
if (currentCostUSD !== null && maxCostUSD !== undefined && currentCostUSD >= 0.8 * maxCostUSD) {
|
|
685
|
+
emitVerbose('cost_check', { stage: 'spec_rework', tripped: true, cost_used_usd: currentCostUSD, cost_cap_usd: maxCostUSD, cost_available: true });
|
|
686
|
+
return abortReviewLoop(finalImplResult, 'cost_ceiling', 'cost ceiling reached before spec rework', 'spec');
|
|
687
|
+
}
|
|
688
|
+
emitVerbose('stage_change', { from: 'spec_review', to: 'spec_rework', round: specRework + 1, cap: maxReviewRounds });
|
|
689
|
+
specRework++;
|
|
690
|
+
const round = specRework;
|
|
323
691
|
heartbeat?.transition({
|
|
324
692
|
stage: 'spec_rework', stageIndex: 3,
|
|
325
|
-
reviewRound: round, maxReviewRounds
|
|
693
|
+
reviewRound: round, maxReviewRounds,
|
|
326
694
|
});
|
|
327
695
|
const feedback = specResult.findings.length > 0
|
|
328
696
|
? `\n\n## Spec Review Feedback (round ${round}):\n${specResult.findings.map(f => `- ${f}`).join('\n')}`
|
|
@@ -330,15 +698,6 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
330
698
|
const reworkPrompt = `${task.prompt}${feedback}`;
|
|
331
699
|
const reworkTask = withDoneCondition({ ...task, prompt: reworkPrompt });
|
|
332
700
|
const reworkResult = await delegateWithEscalation(reworkTask, [resolved.provider], { explicitlyPinned: true, onProgress: wrappedOnProgress });
|
|
333
|
-
// Auto-commit rework changes
|
|
334
|
-
if (task.autoCommit && reworkResult.status === 'ok' && reworkResult.filesWritten.length > 0) {
|
|
335
|
-
const reworkReport = parseStructuredReport(reworkResult.output);
|
|
336
|
-
const reworkCommit = autoCommitFiles(reworkResult.filesWritten, reworkReport.summary ?? undefined, task.cwd ?? process.cwd());
|
|
337
|
-
if (reworkCommit.sha)
|
|
338
|
-
commitSha = reworkCommit.sha;
|
|
339
|
-
if (reworkCommit.error)
|
|
340
|
-
commitError = reworkCommit.error;
|
|
341
|
-
}
|
|
342
701
|
finalImplResult = reworkResult;
|
|
343
702
|
const reworkReport = parseStructuredReport(reworkResult.output);
|
|
344
703
|
finalImplReport = reworkReport.summary ? reworkReport : buildFallbackImplReport(reworkResult);
|
|
@@ -346,9 +705,9 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
346
705
|
fileContents = reworkContents;
|
|
347
706
|
heartbeat?.transition({
|
|
348
707
|
stage: 'spec_review', stageIndex: 2,
|
|
349
|
-
reviewRound: round + 1, maxReviewRounds
|
|
708
|
+
reviewRound: round + 1, maxReviewRounds,
|
|
350
709
|
});
|
|
351
|
-
specResult = await runSpecReview(otherProvider, packet, finalImplReport, reworkContents, reworkResult.toolCalls, task.planContext);
|
|
710
|
+
specResult = await runSpecReview(otherProvider, packet, finalImplReport, reworkContents, reworkResult.toolCalls, task.planContext, evidence.block);
|
|
352
711
|
specStatus = specResult.status;
|
|
353
712
|
specReport = specResult.report;
|
|
354
713
|
if (specStatus === 'approved')
|
|
@@ -358,25 +717,32 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
358
717
|
if (currentFindings === prevFindings && currentFindings !== '')
|
|
359
718
|
break;
|
|
360
719
|
prevSpecFindings = specResult.findings;
|
|
361
|
-
if (round >= (task.maxReviewRounds ?? 5))
|
|
362
|
-
break;
|
|
363
720
|
}
|
|
364
721
|
}
|
|
365
722
|
let qualityResult = { status: 'skipped', report: undefined, findings: [] };
|
|
366
723
|
if (reviewPolicy === 'full') {
|
|
367
724
|
heartbeat?.transition({
|
|
368
725
|
stage: 'quality_review', stageIndex: 4,
|
|
369
|
-
reviewRound: 1, maxReviewRounds
|
|
726
|
+
reviewRound: 1, maxReviewRounds,
|
|
370
727
|
});
|
|
371
|
-
qualityResult = await runQualityReview(otherProvider, packet, specReport ?? finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten);
|
|
728
|
+
qualityResult = await runQualityReview(otherProvider, packet, specReport ?? finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block);
|
|
372
729
|
if (qualityResult.status === 'changes_required') {
|
|
373
730
|
let prevQualityFindings = [];
|
|
374
|
-
let round = 0;
|
|
375
731
|
while (true) {
|
|
376
|
-
|
|
732
|
+
if (specRework + qualityRework >= maxReviewRounds) {
|
|
733
|
+
return abortReviewLoop(finalImplResult, 'round_cap', 'review round cap reached before quality rework', 'quality');
|
|
734
|
+
}
|
|
735
|
+
const currentCostUSD = taskCostUSD();
|
|
736
|
+
if (currentCostUSD !== null && maxCostUSD !== undefined && currentCostUSD >= 0.8 * maxCostUSD) {
|
|
737
|
+
emitVerbose('cost_check', { stage: 'quality_rework', tripped: true, cost_used_usd: currentCostUSD, cost_cap_usd: maxCostUSD, cost_available: true });
|
|
738
|
+
return abortReviewLoop(finalImplResult, 'cost_ceiling', 'cost ceiling reached before quality rework', 'quality');
|
|
739
|
+
}
|
|
740
|
+
emitVerbose('stage_change', { from: 'quality_review', to: 'quality_rework', round: qualityRework + 1, cap: maxReviewRounds });
|
|
741
|
+
qualityRework++;
|
|
742
|
+
const round = qualityRework;
|
|
377
743
|
heartbeat?.transition({
|
|
378
744
|
stage: 'quality_rework', stageIndex: 5,
|
|
379
|
-
reviewRound: round, maxReviewRounds
|
|
745
|
+
reviewRound: round, maxReviewRounds,
|
|
380
746
|
});
|
|
381
747
|
const feedback = qualityResult.findings.length > 0
|
|
382
748
|
? `\n\n## Quality Review Feedback (round ${round}):\n${qualityResult.findings.map(f => `- ${f}`).join('\n')}`
|
|
@@ -384,24 +750,15 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
384
750
|
const reworkPrompt = `${task.prompt}${feedback}`;
|
|
385
751
|
const reworkTask = withDoneCondition({ ...task, prompt: reworkPrompt });
|
|
386
752
|
const reworkResult = await delegateWithEscalation(reworkTask, [resolved.provider], { explicitlyPinned: true, onProgress: wrappedOnProgress });
|
|
387
|
-
// Auto-commit rework changes
|
|
388
|
-
if (task.autoCommit && reworkResult.status === 'ok' && reworkResult.filesWritten.length > 0) {
|
|
389
|
-
const reworkReport = parseStructuredReport(reworkResult.output);
|
|
390
|
-
const reworkCommit = autoCommitFiles(reworkResult.filesWritten, reworkReport.summary ?? undefined, task.cwd ?? process.cwd());
|
|
391
|
-
if (reworkCommit.sha)
|
|
392
|
-
commitSha = reworkCommit.sha;
|
|
393
|
-
if (reworkCommit.error)
|
|
394
|
-
commitError = reworkCommit.error;
|
|
395
|
-
}
|
|
396
753
|
finalImplResult = reworkResult;
|
|
397
754
|
const reworkReport = parseStructuredReport(reworkResult.output);
|
|
398
755
|
finalImplReport = reworkReport.summary ? reworkReport : buildFallbackImplReport(reworkResult);
|
|
399
756
|
const reworkContents = await readImplementerFileContents(reworkResult.filesWritten, task.cwd);
|
|
400
757
|
heartbeat?.transition({
|
|
401
758
|
stage: 'quality_review', stageIndex: 4,
|
|
402
|
-
reviewRound: round + 1, maxReviewRounds
|
|
759
|
+
reviewRound: round + 1, maxReviewRounds,
|
|
403
760
|
});
|
|
404
|
-
qualityResult = await runQualityReview(otherProvider, packet, finalImplReport, reworkContents, reworkResult.toolCalls, reworkResult.filesWritten);
|
|
761
|
+
qualityResult = await runQualityReview(otherProvider, packet, finalImplReport, reworkContents, reworkResult.toolCalls, reworkResult.filesWritten, evidence.block);
|
|
405
762
|
if (qualityResult.status === 'approved')
|
|
406
763
|
break;
|
|
407
764
|
const currentFindings = [...qualityResult.findings].sort().join('\0');
|
|
@@ -409,12 +766,28 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
409
766
|
if (currentFindings === prevFindings && currentFindings !== '')
|
|
410
767
|
break;
|
|
411
768
|
prevQualityFindings = qualityResult.findings;
|
|
412
|
-
if (round >= (task.maxReviewRounds ?? 5))
|
|
413
|
-
break;
|
|
414
769
|
}
|
|
415
770
|
}
|
|
416
771
|
}
|
|
417
772
|
const finalReport = specReport ?? finalImplReport;
|
|
773
|
+
const concerns = [...(finalImplResult.concerns ?? [])];
|
|
774
|
+
let finalWorkerStatus = workerStatus;
|
|
775
|
+
if (verification.status === 'failed') {
|
|
776
|
+
concerns.push({
|
|
777
|
+
source: 'verification',
|
|
778
|
+
severity: 'high',
|
|
779
|
+
message: 'Verification failed after implementation.',
|
|
780
|
+
});
|
|
781
|
+
if (finalWorkerStatus === 'done')
|
|
782
|
+
finalWorkerStatus = 'done_with_concerns';
|
|
783
|
+
}
|
|
784
|
+
if (evidence.diffTruncated) {
|
|
785
|
+
concerns.push({
|
|
786
|
+
source: 'diff_truncated',
|
|
787
|
+
severity: 'medium',
|
|
788
|
+
message: 'Implementation diff exceeded the reviewer evidence byte cap and was truncated.',
|
|
789
|
+
});
|
|
790
|
+
}
|
|
418
791
|
const aggregated = aggregateResult(finalReport, specReport, qualityResult.report, specStatus, qualityResult.status);
|
|
419
792
|
// File artifact verification: check whether output targets exist on disk after all work.
|
|
420
793
|
// Only applies when status is ok; non-ok statuses skip verification entirely.
|
|
@@ -432,7 +805,8 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
432
805
|
return {
|
|
433
806
|
...finalImplResult,
|
|
434
807
|
status: finalStatus,
|
|
435
|
-
workerStatus,
|
|
808
|
+
workerStatus: finalWorkerStatus,
|
|
809
|
+
concerns,
|
|
436
810
|
specReviewStatus: specStatus,
|
|
437
811
|
qualityReviewStatus: qualityResult.status,
|
|
438
812
|
specReviewReason: specResult.errorReason,
|
|
@@ -453,8 +827,9 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
453
827
|
qualityReviewer: reviewPolicy === 'full' ? reviewModel : null,
|
|
454
828
|
},
|
|
455
829
|
fileArtifactsMissing,
|
|
456
|
-
|
|
830
|
+
commits,
|
|
457
831
|
commitError,
|
|
832
|
+
verification,
|
|
458
833
|
};
|
|
459
834
|
}
|
|
460
835
|
finally {
|