@zhixuan92/multi-model-agent-core 3.2.0 → 3.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -3
- package/dist/auto-commit.d.ts +8 -1
- package/dist/auto-commit.d.ts.map +1 -1
- package/dist/auto-commit.js +6 -3
- package/dist/auto-commit.js.map +1 -1
- package/dist/batch-cache.d.ts +1 -1
- package/dist/batch-cache.d.ts.map +1 -1
- package/dist/batch-cache.js +3 -5
- package/dist/batch-cache.js.map +1 -1
- package/dist/diagnostics/disconnect-log.d.ts +8 -27
- package/dist/diagnostics/disconnect-log.d.ts.map +1 -1
- package/dist/diagnostics/disconnect-log.js +10 -49
- package/dist/diagnostics/disconnect-log.js.map +1 -1
- package/dist/diagnostics/request-spill.d.ts +16 -0
- package/dist/diagnostics/request-spill.d.ts.map +1 -0
- package/dist/diagnostics/request-spill.js +23 -0
- package/dist/diagnostics/request-spill.js.map +1 -0
- package/dist/diagnostics/verbose-line.d.ts +12 -0
- package/dist/diagnostics/verbose-line.d.ts.map +1 -0
- package/dist/diagnostics/verbose-line.js +80 -0
- package/dist/diagnostics/verbose-line.js.map +1 -0
- package/dist/executors/debug.js +1 -1
- package/dist/executors/debug.js.map +1 -1
- package/dist/executors/delegate.d.ts.map +1 -1
- package/dist/executors/delegate.js +6 -2
- package/dist/executors/delegate.js.map +1 -1
- package/dist/executors/execute-plan.d.ts.map +1 -1
- package/dist/executors/execute-plan.js +9 -2
- package/dist/executors/execute-plan.js.map +1 -1
- package/dist/executors/investigate.d.ts +11 -0
- package/dist/executors/investigate.d.ts.map +1 -0
- package/dist/executors/investigate.js +101 -0
- package/dist/executors/investigate.js.map +1 -0
- package/dist/executors/retry.d.ts.map +1 -1
- package/dist/executors/retry.js +4 -1
- package/dist/executors/retry.js.map +1 -1
- package/dist/heartbeat.d.ts +7 -0
- package/dist/heartbeat.d.ts.map +1 -1
- package/dist/heartbeat.js +28 -1
- package/dist/heartbeat.js.map +1 -1
- package/dist/intake/compilers/delegate.d.ts +3 -1
- package/dist/intake/compilers/delegate.d.ts.map +1 -1
- package/dist/intake/compilers/delegate.js +23 -12
- package/dist/intake/compilers/delegate.js.map +1 -1
- package/dist/intake/compilers/execute-plan.d.ts +6 -1
- package/dist/intake/compilers/execute-plan.d.ts.map +1 -1
- package/dist/intake/compilers/execute-plan.js +8 -1
- package/dist/intake/compilers/execute-plan.js.map +1 -1
- package/dist/intake/compilers/investigate.d.ts +12 -0
- package/dist/intake/compilers/investigate.d.ts.map +1 -0
- package/dist/intake/compilers/investigate.js +36 -0
- package/dist/intake/compilers/investigate.js.map +1 -0
- package/dist/intake/resolve.d.ts.map +1 -1
- package/dist/intake/resolve.js +3 -1
- package/dist/intake/resolve.js.map +1 -1
- package/dist/intake/types.d.ts +9 -2
- package/dist/intake/types.d.ts.map +1 -1
- package/dist/model-profiles.json +10 -6
- package/dist/reporting/compose-investigate-headline.d.ts +11 -0
- package/dist/reporting/compose-investigate-headline.d.ts.map +1 -0
- package/dist/reporting/compose-investigate-headline.js +29 -0
- package/dist/reporting/compose-investigate-headline.js.map +1 -0
- package/dist/reporting/derive-investigate-status.d.ts +17 -0
- package/dist/reporting/derive-investigate-status.d.ts.map +1 -0
- package/dist/reporting/derive-investigate-status.js +30 -0
- package/dist/reporting/derive-investigate-status.js.map +1 -0
- package/dist/reporting/parse-investigation-report.d.ts +39 -0
- package/dist/reporting/parse-investigation-report.d.ts.map +1 -0
- package/dist/reporting/parse-investigation-report.js +150 -0
- package/dist/reporting/parse-investigation-report.js.map +1 -0
- package/dist/reporting/structured-report.d.ts +20 -0
- package/dist/reporting/structured-report.d.ts.map +1 -1
- package/dist/reporting/structured-report.js +76 -3
- package/dist/reporting/structured-report.js.map +1 -1
- package/dist/review/aggregate-result.d.ts.map +1 -1
- package/dist/review/aggregate-result.js +5 -0
- package/dist/review/aggregate-result.js.map +1 -1
- package/dist/review/diff-review.d.ts +29 -0
- package/dist/review/diff-review.d.ts.map +1 -0
- package/dist/review/diff-review.js +53 -0
- package/dist/review/diff-review.js.map +1 -0
- package/dist/review/evidence.d.ts +15 -0
- package/dist/review/evidence.d.ts.map +1 -0
- package/dist/review/evidence.js +26 -0
- package/dist/review/evidence.js.map +1 -0
- package/dist/review/quality-reviewer.d.ts +1 -1
- package/dist/review/quality-reviewer.d.ts.map +1 -1
- package/dist/review/quality-reviewer.js +5 -3
- package/dist/review/quality-reviewer.js.map +1 -1
- package/dist/review/spec-reviewer.d.ts +1 -1
- package/dist/review/spec-reviewer.d.ts.map +1 -1
- package/dist/review/spec-reviewer.js +3 -2
- package/dist/review/spec-reviewer.js.map +1 -1
- package/dist/run-tasks/commit-stage.d.ts +16 -0
- package/dist/run-tasks/commit-stage.d.ts.map +1 -0
- package/dist/run-tasks/commit-stage.js +52 -0
- package/dist/run-tasks/commit-stage.js.map +1 -0
- package/dist/run-tasks/fallback-report.d.ts.map +1 -1
- package/dist/run-tasks/fallback-report.js +1 -0
- package/dist/run-tasks/fallback-report.js.map +1 -1
- package/dist/run-tasks/metadata-repair.d.ts +15 -0
- package/dist/run-tasks/metadata-repair.d.ts.map +1 -0
- package/dist/run-tasks/metadata-repair.js +30 -0
- package/dist/run-tasks/metadata-repair.js.map +1 -0
- package/dist/run-tasks/reviewed-lifecycle.d.ts.map +1 -1
- package/dist/run-tasks/reviewed-lifecycle.js +474 -95
- package/dist/run-tasks/reviewed-lifecycle.js.map +1 -1
- package/dist/run-tasks/verify-stage.d.ts +25 -0
- package/dist/run-tasks/verify-stage.d.ts.map +1 -0
- package/dist/run-tasks/verify-stage.js +168 -0
- package/dist/run-tasks/verify-stage.js.map +1 -0
- package/dist/runners/base/result-builders.d.ts +26 -1
- package/dist/runners/base/result-builders.d.ts.map +1 -1
- package/dist/runners/base/result-builders.js +5 -0
- package/dist/runners/base/result-builders.js.map +1 -1
- package/dist/runners/prevention.d.ts.map +1 -1
- package/dist/runners/prevention.js +18 -0
- package/dist/runners/prevention.js.map +1 -1
- package/dist/runners/types.d.ts +4 -1
- package/dist/runners/types.d.ts.map +1 -1
- package/dist/tool-schemas/audit.d.ts +2 -2
- package/dist/tool-schemas/delegate.d.ts +9 -0
- package/dist/tool-schemas/delegate.d.ts.map +1 -1
- package/dist/tool-schemas/delegate.js +4 -0
- package/dist/tool-schemas/delegate.js.map +1 -1
- package/dist/tool-schemas/execute-plan.d.ts +13 -2
- package/dist/tool-schemas/execute-plan.d.ts.map +1 -1
- package/dist/tool-schemas/execute-plan.js +22 -4
- package/dist/tool-schemas/execute-plan.js.map +1 -1
- package/dist/tool-schemas/investigate.d.ts +48 -0
- package/dist/tool-schemas/investigate.d.ts.map +1 -0
- package/dist/tool-schemas/investigate.js +13 -0
- package/dist/tool-schemas/investigate.js.map +1 -0
- package/dist/tool-schemas/review.d.ts +1 -1
- package/dist/types.d.ts +36 -4
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/package.json +37 -1
|
@@ -1,16 +1,24 @@
|
|
|
1
|
+
import { execFile } from 'node:child_process';
|
|
2
|
+
import { promisify } from 'node:util';
|
|
1
3
|
import { computeCostUSD, computeSavedCostUSD } from '../types.js';
|
|
2
4
|
import { createProvider } from '../provider.js';
|
|
3
5
|
import { delegateWithEscalation } from '../delegate-with-escalation.js';
|
|
4
6
|
import { HeartbeatTimer } from '../heartbeat.js';
|
|
5
7
|
import { runSpecReview } from '../review/spec-reviewer.js';
|
|
6
8
|
import { runQualityReview } from '../review/quality-reviewer.js';
|
|
9
|
+
import { runDiffReview } from '../review/diff-review.js';
|
|
7
10
|
import { aggregateResult } from '../review/aggregate-result.js';
|
|
11
|
+
import { buildEvidence } from '../review/evidence.js';
|
|
8
12
|
import { parseStructuredReport } from '../reporting/structured-report.js';
|
|
9
|
-
import {
|
|
13
|
+
import { runCommitStage, readbackCommit } from './commit-stage.js';
|
|
14
|
+
import { runVerifyStage } from './verify-stage.js';
|
|
15
|
+
import { runMetadataRepairTurn } from './metadata-repair.js';
|
|
10
16
|
import { partitionFilePaths, checkOutputTargets } from '../file-artifact-check.js';
|
|
11
17
|
import { extractWorkerStatus } from './worker-status.js';
|
|
12
18
|
import { buildFallbackImplReport, readImplementerFileContents } from './fallback-report.js';
|
|
19
|
+
import { composeVerboseLine } from '../diagnostics/verbose-line.js';
|
|
13
20
|
import { withDoneCondition } from './execute-task.js';
|
|
21
|
+
const exec = promisify(execFile);
|
|
14
22
|
export async function executeReviewedLifecycle(task, resolved, config, taskIndex, onProgress, heartbeatWiring, diagnostics) {
|
|
15
23
|
const reviewPolicy = task.reviewPolicy ?? 'full';
|
|
16
24
|
const otherSlot = resolved.slot === 'standard' ? 'complex' : 'standard';
|
|
@@ -34,6 +42,20 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
34
42
|
: undefined;
|
|
35
43
|
const verboseBatchIdEarly = heartbeatWiring?.batchId;
|
|
36
44
|
const shortBatchEarly = verboseBatchIdEarly ? verboseBatchIdEarly.slice(0, 8) : '????????';
|
|
45
|
+
const taskEventLogger = diagnostics?.logger;
|
|
46
|
+
const emitTaskEvent = (event, fields) => {
|
|
47
|
+
if (taskEventLogger && verboseBatchIdEarly !== undefined) {
|
|
48
|
+
const cleaned = {};
|
|
49
|
+
for (const [key, value] of Object.entries(fields)) {
|
|
50
|
+
if (value !== undefined)
|
|
51
|
+
cleaned[key] = value;
|
|
52
|
+
}
|
|
53
|
+
taskEventLogger.emit({ event, batchId: verboseBatchIdEarly, taskIndex, ...cleaned });
|
|
54
|
+
}
|
|
55
|
+
if (verboseStreamRaw) {
|
|
56
|
+
verboseStreamRaw(composeVerboseLine({ event, ts: new Date().toISOString(), batch: shortBatchEarly, task: taskIndex, ...fields }));
|
|
57
|
+
}
|
|
58
|
+
};
|
|
37
59
|
// Start the heartbeat whenever there's a downstream consumer:
|
|
38
60
|
// - onProgress (external progress callback from the runTasks caller)
|
|
39
61
|
// - verbose (stderr stream needs the heartbeat's tool_call / turn_complete relay)
|
|
@@ -50,23 +72,30 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
50
72
|
const synthOnProgress = onProgress ?? (() => { });
|
|
51
73
|
const heartbeat = needHeartbeat
|
|
52
74
|
? new HeartbeatTimer((event) => {
|
|
53
|
-
if (
|
|
75
|
+
if (event.kind === 'heartbeat') {
|
|
54
76
|
// Emit on every heartbeat tick so the operator can confirm
|
|
55
77
|
// the timer is actually firing. Stage-change lines are richer
|
|
56
78
|
// but fire only on transitions; plain ticks let you see
|
|
57
79
|
// per-5s progress inside a long-running stage.
|
|
58
80
|
if (event.stage !== lastStageSeen) {
|
|
59
81
|
if (lastStageSeen !== undefined) {
|
|
60
|
-
|
|
82
|
+
emitTaskEvent('stage_change', { from: lastStageSeen, to: event.stage });
|
|
61
83
|
}
|
|
62
84
|
lastStageSeen = event.stage;
|
|
63
85
|
}
|
|
64
|
-
const costStr = event.costUSD !== null ? ` cost=$${event.costUSD.toFixed(4)}` : '';
|
|
65
|
-
const roundStr = event.reviewRound !== undefined && event.maxReviewRounds !== undefined
|
|
66
|
-
? ` round=${event.reviewRound}/${event.maxReviewRounds}`
|
|
67
|
-
: '';
|
|
68
86
|
const sinceLastMs = Date.now() - prevEventAtMs;
|
|
69
|
-
|
|
87
|
+
emitTaskEvent('heartbeat', {
|
|
88
|
+
elapsed: event.elapsed,
|
|
89
|
+
stage: event.stage,
|
|
90
|
+
round: event.reviewRound,
|
|
91
|
+
cap: event.maxReviewRounds,
|
|
92
|
+
tools: event.progress.toolCalls,
|
|
93
|
+
read: event.progress.filesRead,
|
|
94
|
+
wrote: event.progress.filesWritten,
|
|
95
|
+
text: textEmissionChars,
|
|
96
|
+
cost: event.costUSD,
|
|
97
|
+
idle_ms: sinceLastMs,
|
|
98
|
+
});
|
|
70
99
|
}
|
|
71
100
|
synthOnProgress(taskIndex, event);
|
|
72
101
|
}, {
|
|
@@ -77,19 +106,16 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
77
106
|
})
|
|
78
107
|
: undefined;
|
|
79
108
|
heartbeat?.start(stageCount);
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
109
|
+
emitTaskEvent('heartbeat_timer', {
|
|
110
|
+
state: heartbeat ? 'started' : 'disabled',
|
|
111
|
+
stage_count: stageCount,
|
|
112
|
+
tick_ms: heartbeat ? 5000 : undefined,
|
|
113
|
+
reason: heartbeat ? undefined : 'no_consumer',
|
|
114
|
+
});
|
|
84
115
|
const implModel = resolved.provider.config.model;
|
|
85
116
|
const progressCounters = { filesRead: 0, filesWritten: 0, toolCalls: 0 };
|
|
86
|
-
const verboseLogger = verbose && diagnostics?.logger ? diagnostics.logger : undefined;
|
|
87
|
-
const verboseBatchId = verboseBatchIdEarly;
|
|
88
117
|
const verboseStream = verboseStreamRaw;
|
|
89
|
-
|
|
90
|
-
if (verboseStream) {
|
|
91
|
-
verboseStream(`[mmagent verbose] batch=${shortBatch} task=${taskIndex} start worker=${resolved.provider.config.model}`);
|
|
92
|
-
}
|
|
118
|
+
emitTaskEvent('worker_start', { worker: resolved.provider.config.model });
|
|
93
119
|
let prevEventAtMs = verbose ? Date.now() : 0;
|
|
94
120
|
// Wrap whenever we have ANY consumer for InternalRunnerEvent (heartbeat,
|
|
95
121
|
// verbose stream, or verbose logger). Previously this only wrapped when
|
|
@@ -99,22 +125,32 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
99
125
|
const wrappedOnProgress = needHeartbeat
|
|
100
126
|
? (event) => {
|
|
101
127
|
if (event.kind === 'turn_start') {
|
|
128
|
+
heartbeat?.markEvent('llm');
|
|
102
129
|
if (verbose)
|
|
103
130
|
prevEventAtMs = Date.now();
|
|
104
|
-
if (
|
|
105
|
-
|
|
131
|
+
if (verbose) {
|
|
132
|
+
emitTaskEvent('turn_start', {
|
|
133
|
+
turn: event.turn,
|
|
134
|
+
provider: event.provider,
|
|
135
|
+
});
|
|
106
136
|
}
|
|
107
137
|
}
|
|
108
138
|
if (event.kind === 'text_emission') {
|
|
139
|
+
heartbeat?.markEvent('text');
|
|
109
140
|
textEmissionChars += event.chars;
|
|
110
|
-
if (
|
|
141
|
+
if (verbose && event.chars > 0) {
|
|
111
142
|
const preview = event.preview.length > 60
|
|
112
143
|
? event.preview.slice(0, 57) + '...'
|
|
113
144
|
: event.preview;
|
|
114
|
-
|
|
145
|
+
emitTaskEvent('text_emission', {
|
|
146
|
+
chars: event.chars,
|
|
147
|
+
total: textEmissionChars,
|
|
148
|
+
preview,
|
|
149
|
+
});
|
|
115
150
|
}
|
|
116
151
|
}
|
|
117
152
|
if (event.kind === 'tool_call') {
|
|
153
|
+
heartbeat?.markEvent('tool');
|
|
118
154
|
progressCounters.toolCalls++;
|
|
119
155
|
const name = event.toolSummary.split('(')[0];
|
|
120
156
|
if (name === 'readFile' || name === 'grep' || name === 'glob' || name === 'listFiles') {
|
|
@@ -128,19 +164,15 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
128
164
|
const sincePrevMs = verbose ? now - prevEventAtMs : 0;
|
|
129
165
|
if (verbose)
|
|
130
166
|
prevEventAtMs = now;
|
|
131
|
-
if (
|
|
132
|
-
|
|
133
|
-
batchId: verboseBatchId,
|
|
134
|
-
taskIndex,
|
|
167
|
+
if (verbose) {
|
|
168
|
+
emitTaskEvent('tool_call', {
|
|
135
169
|
tool: event.toolSummary,
|
|
136
|
-
|
|
170
|
+
duration_ms: sincePrevMs,
|
|
137
171
|
});
|
|
138
172
|
}
|
|
139
|
-
if (verboseStream) {
|
|
140
|
-
verboseStream(`[mmagent verbose] batch=${shortBatch} task=${taskIndex} tool=${event.toolSummary} +${sincePrevMs}ms`);
|
|
141
|
-
}
|
|
142
173
|
}
|
|
143
174
|
if (event.kind === 'turn_complete') {
|
|
175
|
+
heartbeat?.markEvent('llm');
|
|
144
176
|
const costUSD = computeCostUSD(event.cumulativeInputTokens, event.cumulativeOutputTokens, resolved.provider.config);
|
|
145
177
|
const savedCostUSD = computeSavedCostUSD(costUSD, event.cumulativeInputTokens, event.cumulativeOutputTokens, task.parentModel);
|
|
146
178
|
heartbeat?.updateCost(costUSD, savedCostUSD);
|
|
@@ -148,45 +180,329 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
148
180
|
const turnDurMs = verbose ? nowTurn - prevEventAtMs : 0;
|
|
149
181
|
if (verbose)
|
|
150
182
|
prevEventAtMs = nowTurn;
|
|
151
|
-
if (
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
183
|
+
if (verbose) {
|
|
184
|
+
emitTaskEvent('turn_complete', {
|
|
185
|
+
input_tokens: event.cumulativeInputTokens,
|
|
186
|
+
output_tokens: event.cumulativeOutputTokens,
|
|
187
|
+
cost: costUSD,
|
|
188
|
+
duration_ms: turnDurMs,
|
|
156
189
|
provider: resolved.provider.config.model,
|
|
157
|
-
inputTokens: event.cumulativeInputTokens,
|
|
158
|
-
outputTokens: event.cumulativeOutputTokens,
|
|
159
|
-
costUSD,
|
|
160
190
|
});
|
|
161
191
|
}
|
|
162
|
-
if (verboseStream) {
|
|
163
|
-
const costStr = costUSD !== null ? ` $${costUSD.toFixed(4)}` : '';
|
|
164
|
-
verboseStream(`[mmagent verbose] batch=${shortBatch} task=${taskIndex} ` +
|
|
165
|
-
`turn in=${event.cumulativeInputTokens} out=${event.cumulativeOutputTokens}${costStr} ` +
|
|
166
|
-
`+${turnDurMs}ms (${resolved.provider.config.model})`);
|
|
167
|
-
}
|
|
168
192
|
}
|
|
169
193
|
}
|
|
170
194
|
: undefined;
|
|
171
|
-
|
|
172
|
-
|
|
195
|
+
const cwd = task.cwd ?? process.cwd();
|
|
196
|
+
const taskStartMs = Date.now();
|
|
197
|
+
const commits = [];
|
|
173
198
|
let commitError;
|
|
199
|
+
let specRework = 0;
|
|
200
|
+
let qualityRework = 0;
|
|
201
|
+
let metadataRepair = 0;
|
|
202
|
+
const maxReviewRounds = task.maxReviewRounds ?? 3;
|
|
203
|
+
const maxCostUSD = task.maxCostUSD;
|
|
204
|
+
const reviewRounds = () => ({ spec: specRework, quality: qualityRework, metadata: metadataRepair, cap: maxReviewRounds });
|
|
205
|
+
const taskCostUSD = () => (heartbeat ? heartbeat.getHeartbeatTickInfo().costUSD : null);
|
|
206
|
+
// When the review loop aborts mid-flight, preserve any review-status info already set
|
|
207
|
+
// on the base result (set by callers via abortReviewLoop({ ...res, specReviewStatus, ... })).
|
|
208
|
+
// Defaults to 'changes_required' for whichever loop tripped — that's the only state the
|
|
209
|
+
// loop ever fires from, by construction.
|
|
210
|
+
const abortReviewLoop = (base, terminationReason, message, aborting) => ({
|
|
211
|
+
...base,
|
|
212
|
+
status: 'incomplete',
|
|
213
|
+
workerStatus: 'review_loop_aborted',
|
|
214
|
+
terminationReason,
|
|
215
|
+
reviewRounds: reviewRounds(),
|
|
216
|
+
error: message,
|
|
217
|
+
specReviewStatus: aborting === 'spec' ? 'changes_required' : (base.specReviewStatus ?? 'approved'),
|
|
218
|
+
qualityReviewStatus: aborting === 'quality' ? 'changes_required' : (base.qualityReviewStatus ?? 'skipped'),
|
|
219
|
+
});
|
|
220
|
+
const defaultVerification = { status: 'skipped', steps: [], totalDurationMs: 0, skipReason: 'no_command' };
|
|
221
|
+
let latestVerification = defaultVerification;
|
|
222
|
+
async function runVerificationStage() {
|
|
223
|
+
emitTaskEvent('stage_change', { from: 'committing', to: 'verifying' });
|
|
224
|
+
heartbeat?.transition({
|
|
225
|
+
stage: 'verifying',
|
|
226
|
+
stageIndex: 4,
|
|
227
|
+
reviewRound: undefined,
|
|
228
|
+
maxReviewRounds: task.maxReviewRounds ?? 5,
|
|
229
|
+
});
|
|
230
|
+
const verification = await runVerifyStage({
|
|
231
|
+
cwd,
|
|
232
|
+
verifyCommand: task.verifyCommand,
|
|
233
|
+
taskTimeoutMs: task.timeoutMs ?? config.defaults.timeoutMs ?? 1_800_000,
|
|
234
|
+
taskStartMs,
|
|
235
|
+
});
|
|
236
|
+
latestVerification = verification;
|
|
237
|
+
for (const step of verification.steps) {
|
|
238
|
+
emitTaskEvent('verify_step', {
|
|
239
|
+
command: step.command,
|
|
240
|
+
status: step.status,
|
|
241
|
+
exit_code: step.exitCode,
|
|
242
|
+
signal: step.signal,
|
|
243
|
+
duration_ms: step.durationMs,
|
|
244
|
+
error_message: step.errorMessage ?? undefined,
|
|
245
|
+
});
|
|
246
|
+
}
|
|
247
|
+
if (verification.status === 'skipped') {
|
|
248
|
+
emitTaskEvent('verify_skipped', { reason: verification.skipReason ?? 'no_command', stage: 'verifying' });
|
|
249
|
+
}
|
|
250
|
+
return verification;
|
|
251
|
+
}
|
|
252
|
+
function signalize(result) {
|
|
253
|
+
const cause = typeof result.terminationReason === 'object' ? result.terminationReason.cause : result.terminationReason;
|
|
254
|
+
const capExhausted = result.capExhausted
|
|
255
|
+
?? (result.status === 'cost_exceeded' || cause === 'cost_exceeded' || cause === 'cost_ceiling' ? 'cost'
|
|
256
|
+
: result.status === 'timeout' || cause === 'timeout' ? 'wall_clock'
|
|
257
|
+
: result.status === 'incomplete' && result.turns > 1 ? 'turn'
|
|
258
|
+
: undefined);
|
|
259
|
+
const lifecycleClarificationRequested = result.lifecycleClarificationRequested
|
|
260
|
+
?? (result.status === 'brief_too_vague' || cause === 'brief_too_vague' ? true : undefined);
|
|
261
|
+
return {
|
|
262
|
+
...result,
|
|
263
|
+
...(capExhausted !== undefined && { capExhausted }),
|
|
264
|
+
...(lifecycleClarificationRequested !== undefined && { lifecycleClarificationRequested }),
|
|
265
|
+
};
|
|
266
|
+
}
|
|
267
|
+
function workerErrorResult(err) {
|
|
268
|
+
const workerError = err instanceof Error ? err : new Error(String(err));
|
|
269
|
+
return signalize({
|
|
270
|
+
output: '',
|
|
271
|
+
status: 'error',
|
|
272
|
+
usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0, costUSD: null },
|
|
273
|
+
turns: 0,
|
|
274
|
+
filesRead: [],
|
|
275
|
+
filesWritten: [],
|
|
276
|
+
toolCalls: [],
|
|
277
|
+
outputIsDiagnostic: true,
|
|
278
|
+
escalationLog: [],
|
|
279
|
+
error: workerError.message,
|
|
280
|
+
errorCode: 'runner_crash',
|
|
281
|
+
structuredError: { code: 'runner_crash', message: workerError.message },
|
|
282
|
+
workerStatus: 'failed',
|
|
283
|
+
workerError,
|
|
284
|
+
});
|
|
285
|
+
}
|
|
286
|
+
function withVerification(result, verification = latestVerification) {
|
|
287
|
+
return signalize({ ...result, verification });
|
|
288
|
+
}
|
|
289
|
+
function verificationErrorResult(base, verification) {
|
|
290
|
+
if (verification.status !== 'error')
|
|
291
|
+
return null;
|
|
292
|
+
const failedIndex = verification.steps.findIndex((step) => step.status !== 'passed');
|
|
293
|
+
const failedStep = failedIndex >= 0 ? verification.steps[failedIndex] : undefined;
|
|
294
|
+
return withVerification({
|
|
295
|
+
...base,
|
|
296
|
+
status: 'error',
|
|
297
|
+
workerStatus: 'done_with_concerns',
|
|
298
|
+
error: failedStep?.errorMessage ?? 'verify command error',
|
|
299
|
+
errorCode: 'verify_command_error',
|
|
300
|
+
commits,
|
|
301
|
+
commitError,
|
|
302
|
+
verification,
|
|
303
|
+
}, verification);
|
|
304
|
+
}
|
|
305
|
+
function resolveOffTerminal(base, verification) {
|
|
306
|
+
const concerns = [...(base.concerns ?? [])];
|
|
307
|
+
let workerStatus = workerStatusForTerminal(base.workerStatus);
|
|
308
|
+
if (verification.status === 'failed') {
|
|
309
|
+
concerns.push({
|
|
310
|
+
source: 'verification',
|
|
311
|
+
severity: 'high',
|
|
312
|
+
message: 'Verification failed after implementation.',
|
|
313
|
+
});
|
|
314
|
+
workerStatus = 'done_with_concerns';
|
|
315
|
+
}
|
|
316
|
+
if (verification.status === 'error') {
|
|
317
|
+
const failedIndex = verification.steps.findIndex((step) => step.status !== 'passed');
|
|
318
|
+
const failedStep = failedIndex >= 0 ? verification.steps[failedIndex] : undefined;
|
|
319
|
+
return withVerification({
|
|
320
|
+
...base,
|
|
321
|
+
status: 'error',
|
|
322
|
+
workerStatus: 'failed',
|
|
323
|
+
error: failedStep?.errorMessage ?? 'verify command error',
|
|
324
|
+
errorCode: 'verify_command_error',
|
|
325
|
+
commits,
|
|
326
|
+
commitError,
|
|
327
|
+
verification,
|
|
328
|
+
}, verification);
|
|
329
|
+
}
|
|
330
|
+
return withVerification({
|
|
331
|
+
...base,
|
|
332
|
+
status: base.status === 'ok' ? 'ok' : base.status,
|
|
333
|
+
workerStatus,
|
|
334
|
+
concerns,
|
|
335
|
+
commits,
|
|
336
|
+
commitError,
|
|
337
|
+
verification,
|
|
338
|
+
}, verification);
|
|
339
|
+
}
|
|
340
|
+
function resolveDiffOnlyTerminal(base, verdict, verification, diffTruncated) {
|
|
341
|
+
const concerns = [...(base.concerns ?? [])];
|
|
342
|
+
if (verdict.kind === 'reject') {
|
|
343
|
+
return withVerification({
|
|
344
|
+
...base,
|
|
345
|
+
status: 'error',
|
|
346
|
+
workerStatus: 'failed',
|
|
347
|
+
error: verdict.message || 'diff review rejected implementation',
|
|
348
|
+
errorCode: 'diff_review_rejected',
|
|
349
|
+
structuredError: {
|
|
350
|
+
code: 'diff_review_rejected',
|
|
351
|
+
message: verdict.message || 'diff review rejected implementation',
|
|
352
|
+
},
|
|
353
|
+
concerns,
|
|
354
|
+
commits,
|
|
355
|
+
commitError,
|
|
356
|
+
verification,
|
|
357
|
+
}, verification);
|
|
358
|
+
}
|
|
359
|
+
concerns.push(...verdict.concerns);
|
|
360
|
+
if (verification.status === 'failed') {
|
|
361
|
+
concerns.push({
|
|
362
|
+
source: 'verification',
|
|
363
|
+
severity: 'high',
|
|
364
|
+
message: 'Verification failed after implementation.',
|
|
365
|
+
});
|
|
366
|
+
}
|
|
367
|
+
if (diffTruncated) {
|
|
368
|
+
concerns.push({
|
|
369
|
+
source: 'diff_truncated',
|
|
370
|
+
severity: 'medium',
|
|
371
|
+
message: 'Implementation diff exceeded the reviewer evidence byte cap and was truncated.',
|
|
372
|
+
});
|
|
373
|
+
}
|
|
374
|
+
const hasConcerns = concerns.length > 0 || verification.status === 'failed';
|
|
375
|
+
return withVerification({
|
|
376
|
+
...base,
|
|
377
|
+
status: base.status === 'ok' ? 'ok' : base.status,
|
|
378
|
+
workerStatus: hasConcerns ? 'done_with_concerns' : workerStatusForTerminal(base.workerStatus),
|
|
379
|
+
concerns,
|
|
380
|
+
commits,
|
|
381
|
+
commitError,
|
|
382
|
+
verification,
|
|
383
|
+
}, verification);
|
|
384
|
+
}
|
|
385
|
+
function workerStatusForTerminal(status) {
|
|
386
|
+
return status === 'needs_context' || status === 'blocked' || status === 'failed' || status === 'done_with_concerns'
|
|
387
|
+
? status
|
|
388
|
+
: 'done';
|
|
389
|
+
}
|
|
390
|
+
async function recordWorkerCommits(from, to = 'HEAD') {
|
|
391
|
+
const { stdout: revs } = await exec('git', ['rev-list', '--reverse', `${from}..${to}`], { cwd });
|
|
392
|
+
for (const sha of revs.trim().split('\n').filter(Boolean)) {
|
|
393
|
+
const c = await readbackCommit(sha, cwd);
|
|
394
|
+
commits.push(c);
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
async function repairCommitMetadata(initialDiagnostic) {
|
|
398
|
+
let metadataAttempts = 0;
|
|
399
|
+
let lastZodError = initialDiagnostic || 'no commit block emitted';
|
|
400
|
+
let validCommit = null;
|
|
401
|
+
while (metadataAttempts < 2 && !validCommit) {
|
|
402
|
+
const preStatus = (await exec('git', ['status', '--porcelain=v1', '-z'], { cwd })).stdout;
|
|
403
|
+
const repaired = await runMetadataRepairTurn({ task, zodError: lastZodError, cwd, providerSlot: resolved.slot, provider: resolved.provider });
|
|
404
|
+
const postStatus = (await exec('git', ['status', '--porcelain=v1', '-z'], { cwd })).stdout;
|
|
405
|
+
metadataAttempts += 1;
|
|
406
|
+
if (preStatus !== postStatus) {
|
|
407
|
+
commitError = 'commit_metadata_repair_modified_files';
|
|
408
|
+
return null;
|
|
409
|
+
}
|
|
410
|
+
if (repaired.commit)
|
|
411
|
+
validCommit = repaired.commit;
|
|
412
|
+
else
|
|
413
|
+
lastZodError = repaired.commitDiagnostic ?? 'no commit block emitted';
|
|
414
|
+
}
|
|
415
|
+
if (!validCommit)
|
|
416
|
+
commitError = `commit_metadata_invalid: ${lastZodError}`;
|
|
417
|
+
return validCommit;
|
|
418
|
+
}
|
|
419
|
+
async function captureCommitsAfterImplementation(implResult, implReport, baselineHead) {
|
|
420
|
+
const porcelain = (await exec('git', ['status', '--porcelain=v1'], { cwd })).stdout;
|
|
421
|
+
const headNow = (await exec('git', ['rev-parse', 'HEAD'], { cwd })).stdout.trim();
|
|
422
|
+
const headMoved = headNow !== baselineHead;
|
|
423
|
+
const treeDirty = porcelain.length > 0;
|
|
424
|
+
if (!headMoved && !treeDirty)
|
|
425
|
+
return;
|
|
426
|
+
if (headMoved)
|
|
427
|
+
await recordWorkerCommits(baselineHead, 'HEAD');
|
|
428
|
+
if (treeDirty) {
|
|
429
|
+
const validCommit = implReport?.commit ?? await repairCommitMetadata(implReport?.commitDiagnostic ?? 'no commit block emitted');
|
|
430
|
+
if (!validCommit)
|
|
431
|
+
return;
|
|
432
|
+
const c = await runCommitStage({ cwd, filesWritten: implResult.filesWritten, commit: validCommit });
|
|
433
|
+
commits.push(c);
|
|
434
|
+
}
|
|
435
|
+
}
|
|
174
436
|
try {
|
|
437
|
+
// The dirty-tree precondition + git baseline only apply to artifact-producing tasks
|
|
438
|
+
// (those with autoCommit === true). Non-artifact presets — audit, review, verify,
|
|
439
|
+
// debug — neither produce commits nor read git state, so they bypass the check
|
|
440
|
+
// entirely. Per spec Section A: "Non-artifact tasks (audits, analyses, read-only
|
|
441
|
+
// investigations) skip stages 3 and 4."
|
|
442
|
+
const isArtifactProducing = task.autoCommit === true;
|
|
443
|
+
let baselineHead = '';
|
|
444
|
+
if (isArtifactProducing) {
|
|
445
|
+
baselineHead = (await exec('git', ['rev-parse', 'HEAD'], { cwd })).stdout.trim();
|
|
446
|
+
const baselinePorcelain = (await exec('git', ['status', '--porcelain=v1', '-z'], { cwd })).stdout;
|
|
447
|
+
if (baselinePorcelain.length !== 0) {
|
|
448
|
+
return withVerification({
|
|
449
|
+
output: `Sub-agent error: task.cwd ${cwd} had pre-existing modifications`,
|
|
450
|
+
status: 'error',
|
|
451
|
+
usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0, costUSD: null },
|
|
452
|
+
turns: 0,
|
|
453
|
+
filesRead: [],
|
|
454
|
+
filesWritten: [],
|
|
455
|
+
toolCalls: [],
|
|
456
|
+
outputIsDiagnostic: true,
|
|
457
|
+
escalationLog: [],
|
|
458
|
+
error: `task.cwd ${cwd} had pre-existing modifications`,
|
|
459
|
+
errorCode: 'dirty_worktree',
|
|
460
|
+
commits,
|
|
461
|
+
});
|
|
462
|
+
}
|
|
463
|
+
}
|
|
175
464
|
const implResult = await delegateWithEscalation(withDoneCondition(task), [resolved.provider], { explicitlyPinned: false, escalateToProvider: escalationProvider, onProgress: wrappedOnProgress });
|
|
176
465
|
const implReport = implResult.status === 'ok' ? parseStructuredReport(implResult.output) : undefined;
|
|
177
466
|
const workerStatus = extractWorkerStatus(implReport);
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
const commitResult = autoCommitFiles(implResult.filesWritten, implReport?.summary ?? undefined, task.cwd ?? process.cwd());
|
|
181
|
-
commitSha = commitResult.sha;
|
|
182
|
-
commitError = commitResult.error;
|
|
467
|
+
if (implResult.status === 'ok' && isArtifactProducing) {
|
|
468
|
+
await captureCommitsAfterImplementation(implResult, implReport, baselineHead);
|
|
183
469
|
}
|
|
470
|
+
const verification = isArtifactProducing ? await runVerificationStage() : defaultVerification;
|
|
471
|
+
const verifyError = verificationErrorResult(implResult, verification);
|
|
472
|
+
if (verifyError)
|
|
473
|
+
return verifyError;
|
|
184
474
|
const filePathsInteracted = task.filePaths && task.filePaths.length > 0
|
|
185
475
|
? [...(implResult.filesRead ?? []), ...implResult.filesWritten].some(f => task.filePaths.some(fp => f === fp || f.endsWith('/' + fp) || f.endsWith(fp)))
|
|
186
476
|
: true;
|
|
187
477
|
const filePathsSkipped = !filePathsInteracted;
|
|
188
478
|
if (implResult.filesWritten.length === 0) {
|
|
189
479
|
heartbeat?.updateStageCount(1);
|
|
480
|
+
if (reviewPolicy === 'off') {
|
|
481
|
+
emitTaskEvent('stage_change', { from: 'verifying', to: 'terminal' });
|
|
482
|
+
const terminal = resolveOffTerminal({
|
|
483
|
+
...implResult,
|
|
484
|
+
workerStatus,
|
|
485
|
+
specReviewStatus: 'skipped',
|
|
486
|
+
qualityReviewStatus: 'skipped',
|
|
487
|
+
specReviewReason: 'skipped: reviewPolicy is off',
|
|
488
|
+
qualityReviewReason: 'skipped: reviewPolicy is off',
|
|
489
|
+
agents: {
|
|
490
|
+
implementer: resolved.slot,
|
|
491
|
+
specReviewer: 'skipped',
|
|
492
|
+
qualityReviewer: 'skipped',
|
|
493
|
+
},
|
|
494
|
+
models: {
|
|
495
|
+
implementer: implModel,
|
|
496
|
+
specReviewer: null,
|
|
497
|
+
qualityReviewer: null,
|
|
498
|
+
},
|
|
499
|
+
implementationReport: implReport,
|
|
500
|
+
structuredReport: implReport,
|
|
501
|
+
filePathsSkipped,
|
|
502
|
+
fileArtifactsMissing: implResult.status === 'ok' ? checkOutputTargets(outputTargets) : undefined,
|
|
503
|
+
}, verification);
|
|
504
|
+
return terminal;
|
|
505
|
+
}
|
|
190
506
|
const effectiveImplReport = implReport ?? buildFallbackImplReport(implResult);
|
|
191
507
|
const earlyFileArtifactsMissing = implResult.status === 'ok' ? checkOutputTargets(outputTargets) : undefined;
|
|
192
508
|
const earlyStatus = implResult.status === 'ok' && earlyFileArtifactsMissing
|
|
@@ -207,6 +523,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
207
523
|
validationsRun: effectiveImplReport.validationsRun,
|
|
208
524
|
deviationsFromBrief: effectiveImplReport.deviationsFromBrief,
|
|
209
525
|
unresolved: effectiveImplReport.unresolved,
|
|
526
|
+
extraSections: effectiveImplReport.extraSections ?? {},
|
|
210
527
|
},
|
|
211
528
|
filePathsSkipped,
|
|
212
529
|
agents: {
|
|
@@ -220,8 +537,9 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
220
537
|
qualityReviewer: null,
|
|
221
538
|
},
|
|
222
539
|
fileArtifactsMissing: earlyFileArtifactsMissing,
|
|
223
|
-
|
|
540
|
+
commits,
|
|
224
541
|
commitError,
|
|
542
|
+
verification,
|
|
225
543
|
};
|
|
226
544
|
}
|
|
227
545
|
if (workerStatus === 'needs_context' || workerStatus === 'blocked') {
|
|
@@ -243,12 +561,14 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
243
561
|
qualityReviewer: null,
|
|
244
562
|
},
|
|
245
563
|
fileArtifactsMissing: implResult.status === 'ok' ? checkOutputTargets(outputTargets) : undefined,
|
|
246
|
-
|
|
564
|
+
commits,
|
|
247
565
|
commitError,
|
|
566
|
+
verification,
|
|
248
567
|
};
|
|
249
568
|
}
|
|
250
569
|
if (reviewPolicy === 'off') {
|
|
251
|
-
|
|
570
|
+
emitTaskEvent('stage_change', { from: 'verifying', to: 'terminal' });
|
|
571
|
+
const terminal = resolveOffTerminal({
|
|
252
572
|
...implResult,
|
|
253
573
|
workerStatus,
|
|
254
574
|
specReviewStatus: 'skipped',
|
|
@@ -267,9 +587,8 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
267
587
|
},
|
|
268
588
|
implementationReport: implReport,
|
|
269
589
|
fileArtifactsMissing: implResult.status === 'ok' ? checkOutputTargets(outputTargets) : undefined,
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
};
|
|
590
|
+
}, verification);
|
|
591
|
+
return terminal;
|
|
273
592
|
}
|
|
274
593
|
let otherProvider;
|
|
275
594
|
try {
|
|
@@ -294,8 +613,9 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
294
613
|
qualityReviewer: null,
|
|
295
614
|
},
|
|
296
615
|
fileArtifactsMissing: implResult.status === 'ok' ? checkOutputTargets(outputTargets) : undefined,
|
|
297
|
-
|
|
616
|
+
commits,
|
|
298
617
|
commitError,
|
|
618
|
+
verification,
|
|
299
619
|
};
|
|
300
620
|
}
|
|
301
621
|
const reviewModel = otherProvider.config.model;
|
|
@@ -306,23 +626,72 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
306
626
|
};
|
|
307
627
|
let fileContents = await readImplementerFileContents(implResult.filesWritten, task.cwd);
|
|
308
628
|
const effectiveImplReport = implReport ?? buildFallbackImplReport(implResult);
|
|
629
|
+
const evidence = isArtifactProducing
|
|
630
|
+
? await buildEvidence({ cwd, baselineHead, commits, verification, reviewPolicy })
|
|
631
|
+
: { block: '', diffTruncated: false, fullDiff: '' };
|
|
632
|
+
if (reviewPolicy === 'diff_only') {
|
|
633
|
+
emitTaskEvent('stage_change', { from: 'verifying', to: 'diff_review' });
|
|
634
|
+
heartbeat?.transition({
|
|
635
|
+
stage: 'diff_review',
|
|
636
|
+
stageIndex: 2,
|
|
637
|
+
reviewRound: 1,
|
|
638
|
+
maxReviewRounds,
|
|
639
|
+
});
|
|
640
|
+
const verdict = await runDiffReview({
|
|
641
|
+
cwd,
|
|
642
|
+
diff: evidence.fullDiff,
|
|
643
|
+
diffTruncated: evidence.diffTruncated,
|
|
644
|
+
verification,
|
|
645
|
+
worker: { call: (prompt) => otherProvider.run(prompt) },
|
|
646
|
+
});
|
|
647
|
+
emitTaskEvent('review_decision', { stage: 'diff_review', verdict: verdict.kind, round: 1 });
|
|
648
|
+
return resolveDiffOnlyTerminal({
|
|
649
|
+
...implResult,
|
|
650
|
+
workerStatus,
|
|
651
|
+
specReviewStatus: 'skipped',
|
|
652
|
+
qualityReviewStatus: 'skipped',
|
|
653
|
+
specReviewReason: 'skipped: reviewPolicy is diff_only',
|
|
654
|
+
qualityReviewReason: 'skipped: reviewPolicy is diff_only',
|
|
655
|
+
implementationReport: effectiveImplReport,
|
|
656
|
+
fileArtifactsMissing: implResult.status === 'ok' ? checkOutputTargets(outputTargets) : undefined,
|
|
657
|
+
agents: {
|
|
658
|
+
implementer: resolved.slot,
|
|
659
|
+
specReviewer: 'skipped',
|
|
660
|
+
qualityReviewer: 'skipped',
|
|
661
|
+
},
|
|
662
|
+
models: {
|
|
663
|
+
implementer: implModel,
|
|
664
|
+
specReviewer: reviewModel,
|
|
665
|
+
qualityReviewer: null,
|
|
666
|
+
},
|
|
667
|
+
}, verdict, verification, evidence.diffTruncated);
|
|
668
|
+
}
|
|
309
669
|
heartbeat?.transition({
|
|
310
670
|
stage: 'spec_review', stageIndex: 2,
|
|
311
671
|
reviewRound: 1, maxReviewRounds: task.maxReviewRounds ?? 5,
|
|
312
672
|
});
|
|
313
|
-
let specResult = await runSpecReview(otherProvider, packet, effectiveImplReport, fileContents, implResult.toolCalls, task.planContext);
|
|
673
|
+
let specResult = await runSpecReview(otherProvider, packet, effectiveImplReport, fileContents, implResult.toolCalls, task.planContext, evidence.block);
|
|
314
674
|
let finalImplResult = implResult;
|
|
315
675
|
let finalImplReport = effectiveImplReport;
|
|
316
676
|
let specStatus = specResult.status;
|
|
317
677
|
let specReport = specResult.report;
|
|
318
678
|
if (specStatus === 'changes_required') {
|
|
319
679
|
let prevSpecFindings = [];
|
|
320
|
-
let round = 0;
|
|
321
680
|
while (true) {
|
|
322
|
-
|
|
681
|
+
if (specRework + qualityRework >= maxReviewRounds) {
|
|
682
|
+
return abortReviewLoop(finalImplResult, 'round_cap', 'review round cap reached before spec rework', 'spec');
|
|
683
|
+
}
|
|
684
|
+
const currentCostUSD = taskCostUSD();
|
|
685
|
+
if (currentCostUSD !== null && maxCostUSD !== undefined && currentCostUSD >= 0.8 * maxCostUSD) {
|
|
686
|
+
emitTaskEvent('cost_check', { stage: 'spec_rework', tripped: true, cost_used_usd: currentCostUSD, cost_cap_usd: maxCostUSD, cost_available: true });
|
|
687
|
+
return abortReviewLoop(finalImplResult, 'cost_ceiling', 'cost ceiling reached before spec rework', 'spec');
|
|
688
|
+
}
|
|
689
|
+
emitTaskEvent('stage_change', { from: 'spec_review', to: 'spec_rework', round: specRework + 1, cap: maxReviewRounds });
|
|
690
|
+
specRework++;
|
|
691
|
+
const round = specRework;
|
|
323
692
|
heartbeat?.transition({
|
|
324
693
|
stage: 'spec_rework', stageIndex: 3,
|
|
325
|
-
reviewRound: round, maxReviewRounds
|
|
694
|
+
reviewRound: round, maxReviewRounds,
|
|
326
695
|
});
|
|
327
696
|
const feedback = specResult.findings.length > 0
|
|
328
697
|
? `\n\n## Spec Review Feedback (round ${round}):\n${specResult.findings.map(f => `- ${f}`).join('\n')}`
|
|
@@ -330,15 +699,6 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
330
699
|
const reworkPrompt = `${task.prompt}${feedback}`;
|
|
331
700
|
const reworkTask = withDoneCondition({ ...task, prompt: reworkPrompt });
|
|
332
701
|
const reworkResult = await delegateWithEscalation(reworkTask, [resolved.provider], { explicitlyPinned: true, onProgress: wrappedOnProgress });
|
|
333
|
-
// Auto-commit rework changes
|
|
334
|
-
if (task.autoCommit && reworkResult.status === 'ok' && reworkResult.filesWritten.length > 0) {
|
|
335
|
-
const reworkReport = parseStructuredReport(reworkResult.output);
|
|
336
|
-
const reworkCommit = autoCommitFiles(reworkResult.filesWritten, reworkReport.summary ?? undefined, task.cwd ?? process.cwd());
|
|
337
|
-
if (reworkCommit.sha)
|
|
338
|
-
commitSha = reworkCommit.sha;
|
|
339
|
-
if (reworkCommit.error)
|
|
340
|
-
commitError = reworkCommit.error;
|
|
341
|
-
}
|
|
342
702
|
finalImplResult = reworkResult;
|
|
343
703
|
const reworkReport = parseStructuredReport(reworkResult.output);
|
|
344
704
|
finalImplReport = reworkReport.summary ? reworkReport : buildFallbackImplReport(reworkResult);
|
|
@@ -346,9 +706,9 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
346
706
|
fileContents = reworkContents;
|
|
347
707
|
heartbeat?.transition({
|
|
348
708
|
stage: 'spec_review', stageIndex: 2,
|
|
349
|
-
reviewRound: round + 1, maxReviewRounds
|
|
709
|
+
reviewRound: round + 1, maxReviewRounds,
|
|
350
710
|
});
|
|
351
|
-
specResult = await runSpecReview(otherProvider, packet, finalImplReport, reworkContents, reworkResult.toolCalls, task.planContext);
|
|
711
|
+
specResult = await runSpecReview(otherProvider, packet, finalImplReport, reworkContents, reworkResult.toolCalls, task.planContext, evidence.block);
|
|
352
712
|
specStatus = specResult.status;
|
|
353
713
|
specReport = specResult.report;
|
|
354
714
|
if (specStatus === 'approved')
|
|
@@ -358,25 +718,32 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
358
718
|
if (currentFindings === prevFindings && currentFindings !== '')
|
|
359
719
|
break;
|
|
360
720
|
prevSpecFindings = specResult.findings;
|
|
361
|
-
if (round >= (task.maxReviewRounds ?? 5))
|
|
362
|
-
break;
|
|
363
721
|
}
|
|
364
722
|
}
|
|
365
723
|
let qualityResult = { status: 'skipped', report: undefined, findings: [] };
|
|
366
724
|
if (reviewPolicy === 'full') {
|
|
367
725
|
heartbeat?.transition({
|
|
368
726
|
stage: 'quality_review', stageIndex: 4,
|
|
369
|
-
reviewRound: 1, maxReviewRounds
|
|
727
|
+
reviewRound: 1, maxReviewRounds,
|
|
370
728
|
});
|
|
371
|
-
qualityResult = await runQualityReview(otherProvider, packet, specReport ?? finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten);
|
|
729
|
+
qualityResult = await runQualityReview(otherProvider, packet, specReport ?? finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block);
|
|
372
730
|
if (qualityResult.status === 'changes_required') {
|
|
373
731
|
let prevQualityFindings = [];
|
|
374
|
-
let round = 0;
|
|
375
732
|
while (true) {
|
|
376
|
-
|
|
733
|
+
if (specRework + qualityRework >= maxReviewRounds) {
|
|
734
|
+
return abortReviewLoop(finalImplResult, 'round_cap', 'review round cap reached before quality rework', 'quality');
|
|
735
|
+
}
|
|
736
|
+
const currentCostUSD = taskCostUSD();
|
|
737
|
+
if (currentCostUSD !== null && maxCostUSD !== undefined && currentCostUSD >= 0.8 * maxCostUSD) {
|
|
738
|
+
emitTaskEvent('cost_check', { stage: 'quality_rework', tripped: true, cost_used_usd: currentCostUSD, cost_cap_usd: maxCostUSD, cost_available: true });
|
|
739
|
+
return abortReviewLoop(finalImplResult, 'cost_ceiling', 'cost ceiling reached before quality rework', 'quality');
|
|
740
|
+
}
|
|
741
|
+
emitTaskEvent('stage_change', { from: 'quality_review', to: 'quality_rework', round: qualityRework + 1, cap: maxReviewRounds });
|
|
742
|
+
qualityRework++;
|
|
743
|
+
const round = qualityRework;
|
|
377
744
|
heartbeat?.transition({
|
|
378
745
|
stage: 'quality_rework', stageIndex: 5,
|
|
379
|
-
reviewRound: round, maxReviewRounds
|
|
746
|
+
reviewRound: round, maxReviewRounds,
|
|
380
747
|
});
|
|
381
748
|
const feedback = qualityResult.findings.length > 0
|
|
382
749
|
? `\n\n## Quality Review Feedback (round ${round}):\n${qualityResult.findings.map(f => `- ${f}`).join('\n')}`
|
|
@@ -384,24 +751,15 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
384
751
|
const reworkPrompt = `${task.prompt}${feedback}`;
|
|
385
752
|
const reworkTask = withDoneCondition({ ...task, prompt: reworkPrompt });
|
|
386
753
|
const reworkResult = await delegateWithEscalation(reworkTask, [resolved.provider], { explicitlyPinned: true, onProgress: wrappedOnProgress });
|
|
387
|
-
// Auto-commit rework changes
|
|
388
|
-
if (task.autoCommit && reworkResult.status === 'ok' && reworkResult.filesWritten.length > 0) {
|
|
389
|
-
const reworkReport = parseStructuredReport(reworkResult.output);
|
|
390
|
-
const reworkCommit = autoCommitFiles(reworkResult.filesWritten, reworkReport.summary ?? undefined, task.cwd ?? process.cwd());
|
|
391
|
-
if (reworkCommit.sha)
|
|
392
|
-
commitSha = reworkCommit.sha;
|
|
393
|
-
if (reworkCommit.error)
|
|
394
|
-
commitError = reworkCommit.error;
|
|
395
|
-
}
|
|
396
754
|
finalImplResult = reworkResult;
|
|
397
755
|
const reworkReport = parseStructuredReport(reworkResult.output);
|
|
398
756
|
finalImplReport = reworkReport.summary ? reworkReport : buildFallbackImplReport(reworkResult);
|
|
399
757
|
const reworkContents = await readImplementerFileContents(reworkResult.filesWritten, task.cwd);
|
|
400
758
|
heartbeat?.transition({
|
|
401
759
|
stage: 'quality_review', stageIndex: 4,
|
|
402
|
-
reviewRound: round + 1, maxReviewRounds
|
|
760
|
+
reviewRound: round + 1, maxReviewRounds,
|
|
403
761
|
});
|
|
404
|
-
qualityResult = await runQualityReview(otherProvider, packet, finalImplReport, reworkContents, reworkResult.toolCalls, reworkResult.filesWritten);
|
|
762
|
+
qualityResult = await runQualityReview(otherProvider, packet, finalImplReport, reworkContents, reworkResult.toolCalls, reworkResult.filesWritten, evidence.block);
|
|
405
763
|
if (qualityResult.status === 'approved')
|
|
406
764
|
break;
|
|
407
765
|
const currentFindings = [...qualityResult.findings].sort().join('\0');
|
|
@@ -409,12 +767,28 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
409
767
|
if (currentFindings === prevFindings && currentFindings !== '')
|
|
410
768
|
break;
|
|
411
769
|
prevQualityFindings = qualityResult.findings;
|
|
412
|
-
if (round >= (task.maxReviewRounds ?? 5))
|
|
413
|
-
break;
|
|
414
770
|
}
|
|
415
771
|
}
|
|
416
772
|
}
|
|
417
773
|
const finalReport = specReport ?? finalImplReport;
|
|
774
|
+
const concerns = [...(finalImplResult.concerns ?? [])];
|
|
775
|
+
let finalWorkerStatus = workerStatus;
|
|
776
|
+
if (verification.status === 'failed') {
|
|
777
|
+
concerns.push({
|
|
778
|
+
source: 'verification',
|
|
779
|
+
severity: 'high',
|
|
780
|
+
message: 'Verification failed after implementation.',
|
|
781
|
+
});
|
|
782
|
+
if (finalWorkerStatus === 'done')
|
|
783
|
+
finalWorkerStatus = 'done_with_concerns';
|
|
784
|
+
}
|
|
785
|
+
if (evidence.diffTruncated) {
|
|
786
|
+
concerns.push({
|
|
787
|
+
source: 'diff_truncated',
|
|
788
|
+
severity: 'medium',
|
|
789
|
+
message: 'Implementation diff exceeded the reviewer evidence byte cap and was truncated.',
|
|
790
|
+
});
|
|
791
|
+
}
|
|
418
792
|
const aggregated = aggregateResult(finalReport, specReport, qualityResult.report, specStatus, qualityResult.status);
|
|
419
793
|
// File artifact verification: check whether output targets exist on disk after all work.
|
|
420
794
|
// Only applies when status is ok; non-ok statuses skip verification entirely.
|
|
@@ -432,7 +806,8 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
432
806
|
return {
|
|
433
807
|
...finalImplResult,
|
|
434
808
|
status: finalStatus,
|
|
435
|
-
workerStatus,
|
|
809
|
+
workerStatus: finalWorkerStatus,
|
|
810
|
+
concerns,
|
|
436
811
|
specReviewStatus: specStatus,
|
|
437
812
|
qualityReviewStatus: qualityResult.status,
|
|
438
813
|
specReviewReason: specResult.errorReason,
|
|
@@ -453,10 +828,14 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
453
828
|
qualityReviewer: reviewPolicy === 'full' ? reviewModel : null,
|
|
454
829
|
},
|
|
455
830
|
fileArtifactsMissing,
|
|
456
|
-
|
|
831
|
+
commits,
|
|
457
832
|
commitError,
|
|
833
|
+
verification,
|
|
458
834
|
};
|
|
459
835
|
}
|
|
836
|
+
catch (err) {
|
|
837
|
+
return withVerification(workerErrorResult(err));
|
|
838
|
+
}
|
|
460
839
|
finally {
|
|
461
840
|
heartbeat?.stop();
|
|
462
841
|
}
|