@zhixuan92/multi-model-agent-core 3.3.0 → 3.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -3
- package/dist/delegate-with-escalation.d.ts +0 -1
- package/dist/delegate-with-escalation.d.ts.map +1 -1
- package/dist/delegate-with-escalation.js +7 -52
- package/dist/delegate-with-escalation.js.map +1 -1
- package/dist/diagnostics/disconnect-log.d.ts +56 -27
- package/dist/diagnostics/disconnect-log.d.ts.map +1 -1
- package/dist/diagnostics/disconnect-log.js +34 -49
- package/dist/diagnostics/disconnect-log.js.map +1 -1
- package/dist/escalation/fallback.d.ts +59 -0
- package/dist/escalation/fallback.d.ts.map +1 -0
- package/dist/escalation/fallback.js +172 -0
- package/dist/escalation/fallback.js.map +1 -0
- package/dist/escalation/policy.d.ts +37 -0
- package/dist/escalation/policy.d.ts.map +1 -0
- package/dist/escalation/policy.js +67 -0
- package/dist/escalation/policy.js.map +1 -0
- package/dist/executors/debug.d.ts.map +1 -1
- package/dist/executors/debug.js +0 -1
- package/dist/executors/debug.js.map +1 -1
- package/dist/executors/execute-plan.js +1 -1
- package/dist/executors/execute-plan.js.map +1 -1
- package/dist/executors/investigate.d.ts +11 -0
- package/dist/executors/investigate.d.ts.map +1 -0
- package/dist/executors/investigate.js +101 -0
- package/dist/executors/investigate.js.map +1 -0
- package/dist/heartbeat.d.ts +4 -4
- package/dist/heartbeat.d.ts.map +1 -1
- package/dist/heartbeat.js +17 -17
- package/dist/heartbeat.js.map +1 -1
- package/dist/intake/compilers/execute-plan.d.ts.map +1 -1
- package/dist/intake/compilers/execute-plan.js +1 -0
- package/dist/intake/compilers/execute-plan.js.map +1 -1
- package/dist/intake/compilers/investigate.d.ts +12 -0
- package/dist/intake/compilers/investigate.d.ts.map +1 -0
- package/dist/intake/compilers/investigate.js +36 -0
- package/dist/intake/compilers/investigate.js.map +1 -0
- package/dist/intake/resolve.d.ts.map +1 -1
- package/dist/intake/resolve.js +3 -2
- package/dist/intake/resolve.js.map +1 -1
- package/dist/intake/types.d.ts +8 -2
- package/dist/intake/types.d.ts.map +1 -1
- package/dist/model-profiles.json +10 -6
- package/dist/reporting/compose-investigate-headline.d.ts +11 -0
- package/dist/reporting/compose-investigate-headline.d.ts.map +1 -0
- package/dist/reporting/compose-investigate-headline.js +29 -0
- package/dist/reporting/compose-investigate-headline.js.map +1 -0
- package/dist/reporting/compose-terminal-headline.d.ts +5 -0
- package/dist/reporting/compose-terminal-headline.d.ts.map +1 -1
- package/dist/reporting/compose-terminal-headline.js +23 -6
- package/dist/reporting/compose-terminal-headline.js.map +1 -1
- package/dist/reporting/derive-investigate-status.d.ts +17 -0
- package/dist/reporting/derive-investigate-status.d.ts.map +1 -0
- package/dist/reporting/derive-investigate-status.js +30 -0
- package/dist/reporting/derive-investigate-status.js.map +1 -0
- package/dist/reporting/parse-investigation-report.d.ts +39 -0
- package/dist/reporting/parse-investigation-report.d.ts.map +1 -0
- package/dist/reporting/parse-investigation-report.js +150 -0
- package/dist/reporting/parse-investigation-report.js.map +1 -0
- package/dist/reporting/structured-report.d.ts +1 -0
- package/dist/reporting/structured-report.d.ts.map +1 -1
- package/dist/reporting/structured-report.js +26 -2
- package/dist/reporting/structured-report.js.map +1 -1
- package/dist/review/aggregate-result.d.ts +3 -1
- package/dist/review/aggregate-result.d.ts.map +1 -1
- package/dist/review/aggregate-result.js +5 -0
- package/dist/review/aggregate-result.js.map +1 -1
- package/dist/review/diff-review.d.ts +11 -0
- package/dist/review/diff-review.d.ts.map +1 -1
- package/dist/review/diff-review.js +5 -2
- package/dist/review/diff-review.js.map +1 -1
- package/dist/review/quality-reviewer.d.ts +11 -2
- package/dist/review/quality-reviewer.d.ts.map +1 -1
- package/dist/review/quality-reviewer.js +3 -0
- package/dist/review/quality-reviewer.js.map +1 -1
- package/dist/review/skipped-result.d.ts +8 -0
- package/dist/review/skipped-result.d.ts.map +1 -0
- package/dist/review/skipped-result.js +4 -0
- package/dist/review/skipped-result.js.map +1 -0
- package/dist/review/spec-reviewer.d.ts +4 -1
- package/dist/review/spec-reviewer.d.ts.map +1 -1
- package/dist/review/spec-reviewer.js +3 -0
- package/dist/review/spec-reviewer.js.map +1 -1
- package/dist/run-tasks/commit-stage.d.ts.map +1 -1
- package/dist/run-tasks/commit-stage.js +17 -8
- package/dist/run-tasks/commit-stage.js.map +1 -1
- package/dist/run-tasks/fallback-report.d.ts.map +1 -1
- package/dist/run-tasks/fallback-report.js +1 -0
- package/dist/run-tasks/fallback-report.js.map +1 -1
- package/dist/run-tasks/metadata-repair.d.ts.map +1 -1
- package/dist/run-tasks/metadata-repair.js +0 -1
- package/dist/run-tasks/metadata-repair.js.map +1 -1
- package/dist/run-tasks/reviewed-lifecycle.d.ts.map +1 -1
- package/dist/run-tasks/reviewed-lifecycle.js +460 -284
- package/dist/run-tasks/reviewed-lifecycle.js.map +1 -1
- package/dist/runners/base/result-builders.d.ts +1 -1
- package/dist/runners/base/result-builders.d.ts.map +1 -1
- package/dist/runners/types.d.ts +2 -2
- package/dist/runners/types.d.ts.map +1 -1
- package/dist/tool-schemas/delegate.d.ts +0 -1
- package/dist/tool-schemas/delegate.d.ts.map +1 -1
- package/dist/tool-schemas/delegate.js +0 -1
- package/dist/tool-schemas/delegate.js.map +1 -1
- package/dist/tool-schemas/execute-plan.d.ts +0 -5
- package/dist/tool-schemas/execute-plan.d.ts.map +1 -1
- package/dist/tool-schemas/execute-plan.js +0 -4
- package/dist/tool-schemas/execute-plan.js.map +1 -1
- package/dist/tool-schemas/investigate.d.ts +48 -0
- package/dist/tool-schemas/investigate.d.ts.map +1 -0
- package/dist/tool-schemas/investigate.js +13 -0
- package/dist/tool-schemas/investigate.js.map +1 -0
- package/dist/types.d.ts +18 -2
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/package.json +9 -1
|
@@ -3,8 +3,11 @@ import { promisify } from 'node:util';
|
|
|
3
3
|
import { computeCostUSD, computeSavedCostUSD } from '../types.js';
|
|
4
4
|
import { createProvider } from '../provider.js';
|
|
5
5
|
import { delegateWithEscalation } from '../delegate-with-escalation.js';
|
|
6
|
+
import { pickEscalation, pickReviewer, maxRowsFor, } from '../escalation/policy.js';
|
|
7
|
+
import { runWithFallback, makeSyntheticRunResult, TRANSPORT_FAILURES, isReviewTransportFailure, } from '../escalation/fallback.js';
|
|
6
8
|
import { HeartbeatTimer } from '../heartbeat.js';
|
|
7
9
|
import { runSpecReview } from '../review/spec-reviewer.js';
|
|
10
|
+
import { makeSkippedReviewResult } from '../review/skipped-result.js';
|
|
8
11
|
import { runQualityReview } from '../review/quality-reviewer.js';
|
|
9
12
|
import { runDiffReview } from '../review/diff-review.js';
|
|
10
13
|
import { aggregateResult } from '../review/aggregate-result.js';
|
|
@@ -22,16 +25,24 @@ const exec = promisify(execFile);
|
|
|
22
25
|
export async function executeReviewedLifecycle(task, resolved, config, taskIndex, onProgress, heartbeatWiring, diagnostics) {
|
|
23
26
|
const reviewPolicy = task.reviewPolicy ?? 'full';
|
|
24
27
|
const otherSlot = resolved.slot === 'standard' ? 'complex' : 'standard';
|
|
25
|
-
// Partition filePaths into output targets before the worker runs.
|
|
26
|
-
// Output targets are paths that do not yet exist on disk.
|
|
27
|
-
const { outputTargets } = partitionFilePaths(task.filePaths, task.cwd ?? process.cwd());
|
|
28
28
|
let escalationProvider;
|
|
29
29
|
try {
|
|
30
30
|
escalationProvider = createProvider(otherSlot, config);
|
|
31
31
|
}
|
|
32
32
|
catch {
|
|
33
|
-
|
|
33
|
+
escalationProvider = undefined;
|
|
34
|
+
}
|
|
35
|
+
const providers = {
|
|
36
|
+
[resolved.slot]: resolved.provider,
|
|
37
|
+
};
|
|
38
|
+
if (escalationProvider)
|
|
39
|
+
providers[otherSlot] = escalationProvider;
|
|
40
|
+
function providerFor(tier) {
|
|
41
|
+
return providers[tier];
|
|
34
42
|
}
|
|
43
|
+
// Partition filePaths into output targets before the worker runs.
|
|
44
|
+
// Output targets are paths that do not yet exist on disk.
|
|
45
|
+
const { outputTargets } = partitionFilePaths(task.filePaths, task.cwd ?? process.cwd());
|
|
35
46
|
const stageCount = reviewPolicy === 'off' ? 1 :
|
|
36
47
|
reviewPolicy === 'spec_only' ? 3 :
|
|
37
48
|
5;
|
|
@@ -42,6 +53,20 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
42
53
|
: undefined;
|
|
43
54
|
const verboseBatchIdEarly = heartbeatWiring?.batchId;
|
|
44
55
|
const shortBatchEarly = verboseBatchIdEarly ? verboseBatchIdEarly.slice(0, 8) : '????????';
|
|
56
|
+
const taskEventLogger = diagnostics?.logger;
|
|
57
|
+
const emitTaskEvent = (event, fields) => {
|
|
58
|
+
if (taskEventLogger && verboseBatchIdEarly !== undefined) {
|
|
59
|
+
const cleaned = {};
|
|
60
|
+
for (const [key, value] of Object.entries(fields)) {
|
|
61
|
+
if (value !== undefined)
|
|
62
|
+
cleaned[key] = value;
|
|
63
|
+
}
|
|
64
|
+
taskEventLogger.emit({ event, batchId: verboseBatchIdEarly, taskIndex, ...cleaned });
|
|
65
|
+
}
|
|
66
|
+
if (verboseStreamRaw) {
|
|
67
|
+
verboseStreamRaw(composeVerboseLine({ event, ts: new Date().toISOString(), batch: shortBatchEarly, task: taskIndex, ...fields }));
|
|
68
|
+
}
|
|
69
|
+
};
|
|
45
70
|
// Start the heartbeat whenever there's a downstream consumer:
|
|
46
71
|
// - onProgress (external progress callback from the runTasks caller)
|
|
47
72
|
// - verbose (stderr stream needs the heartbeat's tool_call / turn_complete relay)
|
|
@@ -58,41 +83,30 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
58
83
|
const synthOnProgress = onProgress ?? (() => { });
|
|
59
84
|
const heartbeat = needHeartbeat
|
|
60
85
|
? new HeartbeatTimer((event) => {
|
|
61
|
-
if (
|
|
86
|
+
if (event.kind === 'heartbeat') {
|
|
62
87
|
// Emit on every heartbeat tick so the operator can confirm
|
|
63
88
|
// the timer is actually firing. Stage-change lines are richer
|
|
64
89
|
// but fire only on transitions; plain ticks let you see
|
|
65
90
|
// per-5s progress inside a long-running stage.
|
|
66
91
|
if (event.stage !== lastStageSeen) {
|
|
67
92
|
if (lastStageSeen !== undefined) {
|
|
68
|
-
|
|
69
|
-
event: 'stage_change',
|
|
70
|
-
ts: new Date().toISOString(),
|
|
71
|
-
batch: shortBatchEarly,
|
|
72
|
-
task: taskIndex,
|
|
73
|
-
from: lastStageSeen,
|
|
74
|
-
to: event.stage,
|
|
75
|
-
}));
|
|
93
|
+
emitTaskEvent('stage_change', { from: lastStageSeen, to: event.stage });
|
|
76
94
|
}
|
|
77
95
|
lastStageSeen = event.stage;
|
|
78
96
|
}
|
|
79
97
|
const sinceLastMs = Date.now() - prevEventAtMs;
|
|
80
|
-
|
|
81
|
-
event: 'heartbeat',
|
|
82
|
-
ts: new Date().toISOString(),
|
|
83
|
-
batch: shortBatchEarly,
|
|
84
|
-
task: taskIndex,
|
|
98
|
+
emitTaskEvent('heartbeat', {
|
|
85
99
|
elapsed: event.elapsed,
|
|
86
100
|
stage: event.stage,
|
|
87
101
|
round: event.reviewRound,
|
|
88
|
-
cap: event.
|
|
102
|
+
cap: event.attemptCap,
|
|
89
103
|
tools: event.progress.toolCalls,
|
|
90
104
|
read: event.progress.filesRead,
|
|
91
105
|
wrote: event.progress.filesWritten,
|
|
92
106
|
text: textEmissionChars,
|
|
93
107
|
cost: event.costUSD,
|
|
94
108
|
idle_ms: sinceLastMs,
|
|
95
|
-
})
|
|
109
|
+
});
|
|
96
110
|
}
|
|
97
111
|
synthOnProgress(taskIndex, event);
|
|
98
112
|
}, {
|
|
@@ -103,33 +117,16 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
103
117
|
})
|
|
104
118
|
: undefined;
|
|
105
119
|
heartbeat?.start(stageCount);
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
state: heartbeat ? 'started' : 'disabled',
|
|
113
|
-
stage_count: stageCount,
|
|
114
|
-
tick_ms: heartbeat ? 5000 : undefined,
|
|
115
|
-
reason: heartbeat ? undefined : 'no_consumer',
|
|
116
|
-
}));
|
|
117
|
-
}
|
|
120
|
+
emitTaskEvent('heartbeat_timer', {
|
|
121
|
+
state: heartbeat ? 'started' : 'disabled',
|
|
122
|
+
stage_count: stageCount,
|
|
123
|
+
tick_ms: heartbeat ? 5000 : undefined,
|
|
124
|
+
reason: heartbeat ? undefined : 'no_consumer',
|
|
125
|
+
});
|
|
118
126
|
const implModel = resolved.provider.config.model;
|
|
119
127
|
const progressCounters = { filesRead: 0, filesWritten: 0, toolCalls: 0 };
|
|
120
|
-
const verboseLogger = verbose && diagnostics?.logger ? diagnostics.logger : undefined;
|
|
121
|
-
const verboseBatchId = verboseBatchIdEarly;
|
|
122
128
|
const verboseStream = verboseStreamRaw;
|
|
123
|
-
|
|
124
|
-
if (verboseStream) {
|
|
125
|
-
verboseStream(composeVerboseLine({
|
|
126
|
-
event: 'worker_start',
|
|
127
|
-
ts: new Date().toISOString(),
|
|
128
|
-
batch: shortBatch,
|
|
129
|
-
task: taskIndex,
|
|
130
|
-
worker: resolved.provider.config.model,
|
|
131
|
-
}));
|
|
132
|
-
}
|
|
129
|
+
emitTaskEvent('worker_start', { worker: resolved.provider.config.model });
|
|
133
130
|
let prevEventAtMs = verbose ? Date.now() : 0;
|
|
134
131
|
// Wrap whenever we have ANY consumer for InternalRunnerEvent (heartbeat,
|
|
135
132
|
// verbose stream, or verbose logger). Previously this only wrapped when
|
|
@@ -142,33 +139,25 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
142
139
|
heartbeat?.markEvent('llm');
|
|
143
140
|
if (verbose)
|
|
144
141
|
prevEventAtMs = Date.now();
|
|
145
|
-
if (
|
|
146
|
-
|
|
147
|
-
event: 'turn_start',
|
|
148
|
-
ts: new Date().toISOString(),
|
|
149
|
-
batch: shortBatch,
|
|
150
|
-
task: taskIndex,
|
|
142
|
+
if (verbose) {
|
|
143
|
+
emitTaskEvent('turn_start', {
|
|
151
144
|
turn: event.turn,
|
|
152
145
|
provider: event.provider,
|
|
153
|
-
})
|
|
146
|
+
});
|
|
154
147
|
}
|
|
155
148
|
}
|
|
156
149
|
if (event.kind === 'text_emission') {
|
|
157
150
|
heartbeat?.markEvent('text');
|
|
158
151
|
textEmissionChars += event.chars;
|
|
159
|
-
if (
|
|
152
|
+
if (verbose && event.chars > 0) {
|
|
160
153
|
const preview = event.preview.length > 60
|
|
161
154
|
? event.preview.slice(0, 57) + '...'
|
|
162
155
|
: event.preview;
|
|
163
|
-
|
|
164
|
-
event: 'text_emission',
|
|
165
|
-
ts: new Date().toISOString(),
|
|
166
|
-
batch: shortBatch,
|
|
167
|
-
task: taskIndex,
|
|
156
|
+
emitTaskEvent('text_emission', {
|
|
168
157
|
chars: event.chars,
|
|
169
158
|
total: textEmissionChars,
|
|
170
159
|
preview,
|
|
171
|
-
})
|
|
160
|
+
});
|
|
172
161
|
}
|
|
173
162
|
}
|
|
174
163
|
if (event.kind === 'tool_call') {
|
|
@@ -186,23 +175,11 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
186
175
|
const sincePrevMs = verbose ? now - prevEventAtMs : 0;
|
|
187
176
|
if (verbose)
|
|
188
177
|
prevEventAtMs = now;
|
|
189
|
-
if (
|
|
190
|
-
|
|
191
|
-
batchId: verboseBatchId,
|
|
192
|
-
taskIndex,
|
|
193
|
-
tool: event.toolSummary,
|
|
194
|
-
durationMs: sincePrevMs,
|
|
195
|
-
});
|
|
196
|
-
}
|
|
197
|
-
if (verboseStream) {
|
|
198
|
-
verboseStream(composeVerboseLine({
|
|
199
|
-
event: 'tool_call',
|
|
200
|
-
ts: new Date().toISOString(),
|
|
201
|
-
batch: shortBatch,
|
|
202
|
-
task: taskIndex,
|
|
178
|
+
if (verbose) {
|
|
179
|
+
emitTaskEvent('tool_call', {
|
|
203
180
|
tool: event.toolSummary,
|
|
204
181
|
duration_ms: sincePrevMs,
|
|
205
|
-
})
|
|
182
|
+
});
|
|
206
183
|
}
|
|
207
184
|
}
|
|
208
185
|
if (event.kind === 'turn_complete') {
|
|
@@ -214,29 +191,14 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
214
191
|
const turnDurMs = verbose ? nowTurn - prevEventAtMs : 0;
|
|
215
192
|
if (verbose)
|
|
216
193
|
prevEventAtMs = nowTurn;
|
|
217
|
-
if (
|
|
218
|
-
|
|
219
|
-
batchId: verboseBatchId,
|
|
220
|
-
taskIndex,
|
|
221
|
-
turnIndex: progressCounters.toolCalls,
|
|
222
|
-
provider: resolved.provider.config.model,
|
|
223
|
-
inputTokens: event.cumulativeInputTokens,
|
|
224
|
-
outputTokens: event.cumulativeOutputTokens,
|
|
225
|
-
costUSD,
|
|
226
|
-
});
|
|
227
|
-
}
|
|
228
|
-
if (verboseStream) {
|
|
229
|
-
verboseStream(composeVerboseLine({
|
|
230
|
-
event: 'turn_complete',
|
|
231
|
-
ts: new Date().toISOString(),
|
|
232
|
-
batch: shortBatch,
|
|
233
|
-
task: taskIndex,
|
|
194
|
+
if (verbose) {
|
|
195
|
+
emitTaskEvent('turn_complete', {
|
|
234
196
|
input_tokens: event.cumulativeInputTokens,
|
|
235
197
|
output_tokens: event.cumulativeOutputTokens,
|
|
236
198
|
cost: costUSD,
|
|
237
199
|
duration_ms: turnDurMs,
|
|
238
200
|
provider: resolved.provider.config.model,
|
|
239
|
-
})
|
|
201
|
+
});
|
|
240
202
|
}
|
|
241
203
|
}
|
|
242
204
|
}
|
|
@@ -245,17 +207,76 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
245
207
|
const taskStartMs = Date.now();
|
|
246
208
|
const commits = [];
|
|
247
209
|
let commitError;
|
|
248
|
-
let
|
|
249
|
-
let
|
|
210
|
+
let specAttemptIndex = 0;
|
|
211
|
+
let qualityAttemptIndex = 0;
|
|
212
|
+
const maxSpecRows = maxRowsFor('spec');
|
|
213
|
+
const maxQualityRows = maxRowsFor('quality');
|
|
214
|
+
const specUnavailable = new Map();
|
|
215
|
+
let qualityUnavailable = new Map();
|
|
250
216
|
let metadataRepair = 0;
|
|
251
|
-
const maxReviewRounds = task.maxReviewRounds ?? 3;
|
|
252
217
|
const maxCostUSD = task.maxCostUSD;
|
|
253
|
-
const
|
|
218
|
+
const implementerHistory = [];
|
|
219
|
+
const specReviewerHistory = [];
|
|
220
|
+
const qualityReviewerHistory = [];
|
|
221
|
+
const fallbackOverrides = [];
|
|
222
|
+
let latestAttemptedImpl;
|
|
223
|
+
let lastNonRejectedImpl;
|
|
224
|
+
const reviewRounds = () => ({ spec: specAttemptIndex, quality: qualityAttemptIndex, metadata: metadataRepair, cap: Math.max(maxSpecRows, maxQualityRows) });
|
|
254
225
|
const taskCostUSD = () => (heartbeat ? heartbeat.getHeartbeatTickInfo().costUSD : null);
|
|
226
|
+
const policyEscalated = { spec: false, quality: false, diff: false };
|
|
227
|
+
const emitFallback = (p) => {
|
|
228
|
+
diagnostics?.logger?.fallback(p);
|
|
229
|
+
emitTaskEvent('fallback', p);
|
|
230
|
+
};
|
|
231
|
+
const emitFallbackUnavailable = (p) => {
|
|
232
|
+
diagnostics?.logger?.fallbackUnavailable(p);
|
|
233
|
+
emitTaskEvent('fallback_unavailable', p);
|
|
234
|
+
};
|
|
235
|
+
const emitEscalationEvent = (loop, attempt, decision) => {
|
|
236
|
+
const p = {
|
|
237
|
+
batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop, attempt,
|
|
238
|
+
baseTier: resolved.slot, implTier: decision.impl, reviewerTier: decision.reviewer,
|
|
239
|
+
};
|
|
240
|
+
diagnostics?.logger?.escalation(p);
|
|
241
|
+
emitTaskEvent('escalation', p);
|
|
242
|
+
policyEscalated[loop] = true;
|
|
243
|
+
};
|
|
244
|
+
const emitEscalationUnavailable = (p) => {
|
|
245
|
+
diagnostics?.logger?.escalationUnavailable(p);
|
|
246
|
+
emitTaskEvent('escalation_unavailable', p);
|
|
247
|
+
};
|
|
255
248
|
// When the review loop aborts mid-flight, preserve any review-status info already set
|
|
256
249
|
// on the base result (set by callers via abortReviewLoop({ ...res, specReviewStatus, ... })).
|
|
257
250
|
// Defaults to 'changes_required' for whichever loop tripped — that's the only state the
|
|
258
251
|
// loop ever fires from, by construction.
|
|
252
|
+
function adaptForAllTiersUnavailable(base, loop, attempt) {
|
|
253
|
+
const ship = lastNonRejectedImpl?.result ?? base;
|
|
254
|
+
return {
|
|
255
|
+
...ship,
|
|
256
|
+
status: 'incomplete',
|
|
257
|
+
workerStatus: 'blocked',
|
|
258
|
+
terminationReason: 'all_tiers_unavailable',
|
|
259
|
+
reviewRounds: reviewRounds(),
|
|
260
|
+
error: `runWithFallback: both tiers unavailable (loop=${loop}, attempt=${attempt}, role=implementer)`,
|
|
261
|
+
agents: agentEnvelope(specReviewerHistory[specReviewerHistory.length - 1] ?? 'not_applicable', qualityReviewerHistory[qualityReviewerHistory.length - 1] ?? (reviewPolicy === 'full' ? 'not_applicable' : 'skipped')),
|
|
262
|
+
};
|
|
263
|
+
}
|
|
264
|
+
function reviewDidNotReject(status) {
|
|
265
|
+
return status === 'approved' || status === 'skipped';
|
|
266
|
+
}
|
|
267
|
+
const agentEnvelope = (specReviewer, qualityReviewer) => {
|
|
268
|
+
const selectedImpl = latestAttemptedImpl ?? lastNonRejectedImpl;
|
|
269
|
+
const implementer = selectedImpl?.tier ?? resolved.slot;
|
|
270
|
+
return {
|
|
271
|
+
implementer,
|
|
272
|
+
...(implementerHistory.length > 1 || implementerHistory.some(t => t !== implementer) ? { implementerHistory } : {}),
|
|
273
|
+
specReviewer,
|
|
274
|
+
...(specReviewerHistory.length > 0 && (specReviewerHistory.length > 1 || specReviewerHistory.some(t => t === 'skipped')) ? { specReviewerHistory } : {}),
|
|
275
|
+
qualityReviewer,
|
|
276
|
+
...(qualityReviewerHistory.length > 0 && (qualityReviewerHistory.length > 1 || qualityReviewerHistory.some(t => t === 'skipped')) ? { qualityReviewerHistory } : {}),
|
|
277
|
+
...(fallbackOverrides.length > 0 ? { fallbackOverrides } : {}),
|
|
278
|
+
};
|
|
279
|
+
};
|
|
259
280
|
const abortReviewLoop = (base, terminationReason, message, aborting) => ({
|
|
260
281
|
...base,
|
|
261
282
|
status: 'incomplete',
|
|
@@ -265,27 +286,16 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
265
286
|
error: message,
|
|
266
287
|
specReviewStatus: aborting === 'spec' ? 'changes_required' : (base.specReviewStatus ?? 'approved'),
|
|
267
288
|
qualityReviewStatus: aborting === 'quality' ? 'changes_required' : (base.qualityReviewStatus ?? 'skipped'),
|
|
289
|
+
agents: agentEnvelope(specReviewerHistory[specReviewerHistory.length - 1] ?? 'not_applicable', qualityReviewerHistory[qualityReviewerHistory.length - 1] ?? (reviewPolicy === 'full' ? 'not_applicable' : 'skipped')),
|
|
268
290
|
});
|
|
269
291
|
const defaultVerification = { status: 'skipped', steps: [], totalDurationMs: 0, skipReason: 'no_command' };
|
|
270
292
|
let latestVerification = defaultVerification;
|
|
271
|
-
const emitVerbose = (event, fields) => {
|
|
272
|
-
if (!verboseStream)
|
|
273
|
-
return;
|
|
274
|
-
verboseStream(composeVerboseLine({
|
|
275
|
-
event,
|
|
276
|
-
ts: new Date().toISOString(),
|
|
277
|
-
batch: shortBatch,
|
|
278
|
-
task: taskIndex,
|
|
279
|
-
...fields,
|
|
280
|
-
}));
|
|
281
|
-
};
|
|
282
293
|
async function runVerificationStage() {
|
|
283
|
-
|
|
294
|
+
emitTaskEvent('stage_change', { from: 'committing', to: 'verifying' });
|
|
284
295
|
heartbeat?.transition({
|
|
285
296
|
stage: 'verifying',
|
|
286
297
|
stageIndex: 4,
|
|
287
298
|
reviewRound: undefined,
|
|
288
|
-
maxReviewRounds: task.maxReviewRounds ?? 5,
|
|
289
299
|
});
|
|
290
300
|
const verification = await runVerifyStage({
|
|
291
301
|
cwd,
|
|
@@ -295,7 +305,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
295
305
|
});
|
|
296
306
|
latestVerification = verification;
|
|
297
307
|
for (const step of verification.steps) {
|
|
298
|
-
|
|
308
|
+
emitTaskEvent('verify_step', {
|
|
299
309
|
command: step.command,
|
|
300
310
|
status: step.status,
|
|
301
311
|
exit_code: step.exitCode,
|
|
@@ -305,12 +315,46 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
305
315
|
});
|
|
306
316
|
}
|
|
307
317
|
if (verification.status === 'skipped') {
|
|
308
|
-
|
|
318
|
+
emitTaskEvent('verify_skipped', { reason: verification.skipReason ?? 'no_command', stage: 'verifying' });
|
|
309
319
|
}
|
|
310
320
|
return verification;
|
|
311
321
|
}
|
|
322
|
+
function signalize(result) {
|
|
323
|
+
const cause = typeof result.terminationReason === 'object' ? result.terminationReason.cause : result.terminationReason;
|
|
324
|
+
const capExhausted = result.capExhausted
|
|
325
|
+
?? (result.status === 'cost_exceeded' || cause === 'cost_exceeded' || cause === 'cost_ceiling' ? 'cost'
|
|
326
|
+
: result.status === 'timeout' || cause === 'timeout' ? 'wall_clock'
|
|
327
|
+
: result.status === 'incomplete' && result.turns > 1 ? 'turn'
|
|
328
|
+
: undefined);
|
|
329
|
+
const lifecycleClarificationRequested = result.lifecycleClarificationRequested
|
|
330
|
+
?? (result.status === 'brief_too_vague' || cause === 'brief_too_vague' ? true : undefined);
|
|
331
|
+
return {
|
|
332
|
+
...result,
|
|
333
|
+
...(capExhausted !== undefined && { capExhausted }),
|
|
334
|
+
...(lifecycleClarificationRequested !== undefined && { lifecycleClarificationRequested }),
|
|
335
|
+
};
|
|
336
|
+
}
|
|
337
|
+
function workerErrorResult(err) {
|
|
338
|
+
const workerError = err instanceof Error ? err : new Error(String(err));
|
|
339
|
+
return signalize({
|
|
340
|
+
output: '',
|
|
341
|
+
status: 'error',
|
|
342
|
+
usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0, costUSD: null },
|
|
343
|
+
turns: 0,
|
|
344
|
+
filesRead: [],
|
|
345
|
+
filesWritten: [],
|
|
346
|
+
toolCalls: [],
|
|
347
|
+
outputIsDiagnostic: true,
|
|
348
|
+
escalationLog: [],
|
|
349
|
+
error: workerError.message,
|
|
350
|
+
errorCode: 'runner_crash',
|
|
351
|
+
structuredError: { code: 'runner_crash', message: workerError.message },
|
|
352
|
+
workerStatus: 'failed',
|
|
353
|
+
workerError,
|
|
354
|
+
});
|
|
355
|
+
}
|
|
312
356
|
function withVerification(result, verification = latestVerification) {
|
|
313
|
-
return { ...result, verification };
|
|
357
|
+
return signalize({ ...result, verification });
|
|
314
358
|
}
|
|
315
359
|
function verificationErrorResult(base, verification) {
|
|
316
360
|
if (verification.status !== 'error')
|
|
@@ -365,6 +409,15 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
365
409
|
}
|
|
366
410
|
function resolveDiffOnlyTerminal(base, verdict, verification, diffTruncated) {
|
|
367
411
|
const concerns = [...(base.concerns ?? [])];
|
|
412
|
+
if ('status' in verdict && verdict.status === 'skipped') {
|
|
413
|
+
return withVerification({
|
|
414
|
+
...base,
|
|
415
|
+
workerStatus: workerStatusForTerminal(base.workerStatus),
|
|
416
|
+
commits,
|
|
417
|
+
commitError,
|
|
418
|
+
verification,
|
|
419
|
+
}, verification);
|
|
420
|
+
}
|
|
368
421
|
if (verdict.kind === 'reject') {
|
|
369
422
|
return withVerification({
|
|
370
423
|
...base,
|
|
@@ -382,6 +435,18 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
382
435
|
verification,
|
|
383
436
|
}, verification);
|
|
384
437
|
}
|
|
438
|
+
if (verdict.kind === 'transport_failure') {
|
|
439
|
+
return withVerification({
|
|
440
|
+
...base,
|
|
441
|
+
status: verdict.status,
|
|
442
|
+
workerStatus: 'failed',
|
|
443
|
+
error: verdict.reason ?? `diff review transport failure: ${verdict.status}`,
|
|
444
|
+
concerns: [...concerns, ...verdict.concerns],
|
|
445
|
+
commits,
|
|
446
|
+
commitError,
|
|
447
|
+
verification,
|
|
448
|
+
}, verification);
|
|
449
|
+
}
|
|
385
450
|
concerns.push(...verdict.concerns);
|
|
386
451
|
if (verification.status === 'failed') {
|
|
387
452
|
concerns.push({
|
|
@@ -487,7 +552,57 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
487
552
|
});
|
|
488
553
|
}
|
|
489
554
|
}
|
|
490
|
-
const
|
|
555
|
+
const initialDecision = pickEscalation({
|
|
556
|
+
loop: 'spec',
|
|
557
|
+
attemptIndex: 0,
|
|
558
|
+
baseTier: resolved.slot,
|
|
559
|
+
});
|
|
560
|
+
const initialImpl = await runWithFallback({
|
|
561
|
+
assigned: initialDecision.impl,
|
|
562
|
+
providerFor,
|
|
563
|
+
unavailableTiers: specUnavailable,
|
|
564
|
+
isTransportFailure: (r) => TRANSPORT_FAILURES.has(r.status) && r.capExhausted === undefined,
|
|
565
|
+
getStatus: (r) => r.status,
|
|
566
|
+
makeSyntheticFailure: (assigned) => makeSyntheticRunResult(assigned, 'all_tiers_unavailable'),
|
|
567
|
+
call: (provider) => delegateWithEscalation(withDoneCondition(task), [provider], { explicitlyPinned: false, onProgress: wrappedOnProgress }),
|
|
568
|
+
});
|
|
569
|
+
if (initialImpl.fallbackFired || initialImpl.bothUnavailable) {
|
|
570
|
+
fallbackOverrides.push({
|
|
571
|
+
role: 'implementer',
|
|
572
|
+
loop: 'spec',
|
|
573
|
+
attempt: 0,
|
|
574
|
+
assigned: initialDecision.impl,
|
|
575
|
+
used: initialImpl.usedTier,
|
|
576
|
+
reason: (initialImpl.fallbackReason ?? initialImpl.unavailableReason),
|
|
577
|
+
triggeringStatus: initialImpl.fallbackTriggeringStatus,
|
|
578
|
+
bothUnavailable: initialImpl.bothUnavailable,
|
|
579
|
+
});
|
|
580
|
+
}
|
|
581
|
+
if (initialImpl.fallbackFired) {
|
|
582
|
+
emitFallback({
|
|
583
|
+
batchId: heartbeatWiring?.batchId ?? '', taskIndex,
|
|
584
|
+
loop: 'spec', attempt: 0, role: 'implementer',
|
|
585
|
+
assignedTier: initialDecision.impl,
|
|
586
|
+
usedTier: initialImpl.usedTier,
|
|
587
|
+
reason: initialImpl.fallbackReason,
|
|
588
|
+
triggeringStatus: initialImpl.fallbackTriggeringStatus,
|
|
589
|
+
violatesSeparation: false,
|
|
590
|
+
});
|
|
591
|
+
}
|
|
592
|
+
if (initialImpl.bothUnavailable) {
|
|
593
|
+
emitFallbackUnavailable({
|
|
594
|
+
batchId: heartbeatWiring?.batchId ?? '', taskIndex,
|
|
595
|
+
loop: 'spec', attempt: 0, role: 'implementer',
|
|
596
|
+
assignedTier: initialDecision.impl,
|
|
597
|
+
reason: initialImpl.unavailableReason,
|
|
598
|
+
});
|
|
599
|
+
return adaptForAllTiersUnavailable(initialImpl.result, 'spec', 0);
|
|
600
|
+
}
|
|
601
|
+
const implResult = initialImpl.result;
|
|
602
|
+
latestAttemptedImpl = { tier: initialImpl.usedTier, result: implResult };
|
|
603
|
+
lastNonRejectedImpl = { tier: initialImpl.usedTier, result: implResult };
|
|
604
|
+
implementerHistory.push(initialImpl.usedTier);
|
|
605
|
+
specAttemptIndex = 1;
|
|
491
606
|
const implReport = implResult.status === 'ok' ? parseStructuredReport(implResult.output) : undefined;
|
|
492
607
|
const workerStatus = extractWorkerStatus(implReport);
|
|
493
608
|
if (implResult.status === 'ok' && isArtifactProducing) {
|
|
@@ -503,6 +618,28 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
503
618
|
const filePathsSkipped = !filePathsInteracted;
|
|
504
619
|
if (implResult.filesWritten.length === 0) {
|
|
505
620
|
heartbeat?.updateStageCount(1);
|
|
621
|
+
if (reviewPolicy === 'off') {
|
|
622
|
+
emitTaskEvent('stage_change', { from: 'verifying', to: 'terminal' });
|
|
623
|
+
const terminal = resolveOffTerminal({
|
|
624
|
+
...implResult,
|
|
625
|
+
workerStatus,
|
|
626
|
+
specReviewStatus: 'skipped',
|
|
627
|
+
qualityReviewStatus: 'skipped',
|
|
628
|
+
specReviewReason: 'skipped: reviewPolicy is off',
|
|
629
|
+
qualityReviewReason: 'skipped: reviewPolicy is off',
|
|
630
|
+
agents: agentEnvelope('skipped', 'skipped'),
|
|
631
|
+
models: {
|
|
632
|
+
implementer: implModel,
|
|
633
|
+
specReviewer: null,
|
|
634
|
+
qualityReviewer: null,
|
|
635
|
+
},
|
|
636
|
+
implementationReport: implReport,
|
|
637
|
+
structuredReport: implReport,
|
|
638
|
+
filePathsSkipped,
|
|
639
|
+
fileArtifactsMissing: implResult.status === 'ok' ? checkOutputTargets(outputTargets) : undefined,
|
|
640
|
+
}, verification);
|
|
641
|
+
return terminal;
|
|
642
|
+
}
|
|
506
643
|
const effectiveImplReport = implReport ?? buildFallbackImplReport(implResult);
|
|
507
644
|
const earlyFileArtifactsMissing = implResult.status === 'ok' ? checkOutputTargets(outputTargets) : undefined;
|
|
508
645
|
const earlyStatus = implResult.status === 'ok' && earlyFileArtifactsMissing
|
|
@@ -523,13 +660,10 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
523
660
|
validationsRun: effectiveImplReport.validationsRun,
|
|
524
661
|
deviationsFromBrief: effectiveImplReport.deviationsFromBrief,
|
|
525
662
|
unresolved: effectiveImplReport.unresolved,
|
|
663
|
+
extraSections: effectiveImplReport.extraSections ?? {},
|
|
526
664
|
},
|
|
527
665
|
filePathsSkipped,
|
|
528
|
-
agents:
|
|
529
|
-
implementer: resolved.slot,
|
|
530
|
-
specReviewer: 'not_applicable',
|
|
531
|
-
qualityReviewer: 'not_applicable',
|
|
532
|
-
},
|
|
666
|
+
agents: agentEnvelope('not_applicable', 'not_applicable'),
|
|
533
667
|
models: {
|
|
534
668
|
implementer: implModel,
|
|
535
669
|
specReviewer: null,
|
|
@@ -549,11 +683,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
549
683
|
qualityReviewStatus: 'skipped',
|
|
550
684
|
specReviewReason: 'skipped: worker reported ' + workerStatus,
|
|
551
685
|
qualityReviewReason: 'skipped: worker reported ' + workerStatus,
|
|
552
|
-
agents:
|
|
553
|
-
implementer: resolved.slot,
|
|
554
|
-
specReviewer: 'skipped',
|
|
555
|
-
qualityReviewer: 'skipped',
|
|
556
|
-
},
|
|
686
|
+
agents: agentEnvelope('skipped', 'skipped'),
|
|
557
687
|
models: {
|
|
558
688
|
implementer: implModel,
|
|
559
689
|
specReviewer: null,
|
|
@@ -566,7 +696,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
566
696
|
};
|
|
567
697
|
}
|
|
568
698
|
if (reviewPolicy === 'off') {
|
|
569
|
-
|
|
699
|
+
emitTaskEvent('stage_change', { from: 'verifying', to: 'terminal' });
|
|
570
700
|
const terminal = resolveOffTerminal({
|
|
571
701
|
...implResult,
|
|
572
702
|
workerStatus,
|
|
@@ -574,11 +704,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
574
704
|
qualityReviewStatus: 'skipped',
|
|
575
705
|
specReviewReason: 'skipped: reviewPolicy is off',
|
|
576
706
|
qualityReviewReason: 'skipped: reviewPolicy is off',
|
|
577
|
-
agents:
|
|
578
|
-
implementer: resolved.slot,
|
|
579
|
-
specReviewer: 'skipped',
|
|
580
|
-
qualityReviewer: 'skipped',
|
|
581
|
-
},
|
|
707
|
+
agents: agentEnvelope('skipped', 'skipped'),
|
|
582
708
|
models: {
|
|
583
709
|
implementer: implModel,
|
|
584
710
|
specReviewer: null,
|
|
@@ -589,35 +715,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
589
715
|
}, verification);
|
|
590
716
|
return terminal;
|
|
591
717
|
}
|
|
592
|
-
|
|
593
|
-
try {
|
|
594
|
-
otherProvider = createProvider(otherSlot, config);
|
|
595
|
-
}
|
|
596
|
-
catch {
|
|
597
|
-
return {
|
|
598
|
-
...implResult,
|
|
599
|
-
workerStatus,
|
|
600
|
-
specReviewStatus: 'skipped',
|
|
601
|
-
qualityReviewStatus: 'skipped',
|
|
602
|
-
specReviewReason: 'skipped: no review agent configured',
|
|
603
|
-
qualityReviewReason: 'skipped: no review agent configured',
|
|
604
|
-
agents: {
|
|
605
|
-
implementer: resolved.slot,
|
|
606
|
-
specReviewer: 'skipped',
|
|
607
|
-
qualityReviewer: 'skipped',
|
|
608
|
-
},
|
|
609
|
-
models: {
|
|
610
|
-
implementer: implModel,
|
|
611
|
-
specReviewer: null,
|
|
612
|
-
qualityReviewer: null,
|
|
613
|
-
},
|
|
614
|
-
fileArtifactsMissing: implResult.status === 'ok' ? checkOutputTargets(outputTargets) : undefined,
|
|
615
|
-
commits,
|
|
616
|
-
commitError,
|
|
617
|
-
verification,
|
|
618
|
-
};
|
|
619
|
-
}
|
|
620
|
-
const reviewModel = otherProvider.config.model;
|
|
718
|
+
const reviewModel = providerFor(pickReviewer({ loop: 'spec', attemptIndex: 0, baseTier: resolved.slot }))?.config.model ?? null;
|
|
621
719
|
const packet = {
|
|
622
720
|
prompt: task.prompt,
|
|
623
721
|
scope: task.filePaths ?? [],
|
|
@@ -629,21 +727,28 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
629
727
|
? await buildEvidence({ cwd, baselineHead, commits, verification, reviewPolicy })
|
|
630
728
|
: { block: '', diffTruncated: false, fullDiff: '' };
|
|
631
729
|
if (reviewPolicy === 'diff_only') {
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
verification,
|
|
644
|
-
worker: { call: (prompt) => otherProvider.run(prompt) },
|
|
730
|
+
const diffUnavailable = new Map();
|
|
731
|
+
const diffReviewerTier = pickReviewer({ loop: 'spec', attemptIndex: 0, baseTier: resolved.slot });
|
|
732
|
+
emitTaskEvent('stage_change', { from: 'verifying', to: 'diff_review' });
|
|
733
|
+
heartbeat?.transition({ stage: 'diff_review', stageIndex: 2, reviewRound: 1, attemptCap: 1 });
|
|
734
|
+
const diffCall = await runWithFallback({
|
|
735
|
+
assigned: diffReviewerTier,
|
|
736
|
+
providerFor,
|
|
737
|
+
unavailableTiers: diffUnavailable,
|
|
738
|
+
isTransportFailure: (r) => isReviewTransportFailure(r),
|
|
739
|
+
getStatus: (r) => r.status,
|
|
740
|
+
makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'),
|
|
741
|
+
call: (provider) => runDiffReview({ cwd, diff: evidence.fullDiff, diffTruncated: evidence.diffTruncated, verification, worker: { call: (prompt) => provider.run(prompt) } }),
|
|
645
742
|
});
|
|
646
|
-
|
|
743
|
+
if (diffCall.fallbackFired) {
|
|
744
|
+
emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'diff', attempt: 0, role: 'diffReviewer', assignedTier: diffReviewerTier, usedTier: diffCall.usedTier, reason: diffCall.fallbackReason, triggeringStatus: diffCall.fallbackTriggeringStatus, violatesSeparation: diffCall.usedTier === implementerHistory[implementerHistory.length - 1] });
|
|
745
|
+
fallbackOverrides.push({ role: 'diffReviewer', loop: 'diff', attempt: 0, assigned: diffReviewerTier, used: diffCall.usedTier, reason: diffCall.fallbackReason, triggeringStatus: diffCall.fallbackTriggeringStatus, bothUnavailable: diffCall.bothUnavailable });
|
|
746
|
+
}
|
|
747
|
+
if (diffCall.bothUnavailable) {
|
|
748
|
+
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'diff', attempt: 0, role: 'diffReviewer', assignedTier: diffReviewerTier, reason: diffCall.unavailableReason });
|
|
749
|
+
}
|
|
750
|
+
const verdict = diffCall.bothUnavailable || isReviewTransportFailure(diffCall.result) ? makeSkippedReviewResult('all_tiers_unavailable') : diffCall.result;
|
|
751
|
+
emitTaskEvent('review_decision', { stage: 'diff_review', verdict: 'kind' in verdict ? verdict.kind : 'skipped', round: 1 });
|
|
647
752
|
return resolveDiffOnlyTerminal({
|
|
648
753
|
...implResult,
|
|
649
754
|
workerStatus,
|
|
@@ -653,120 +758,188 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
653
758
|
qualityReviewReason: 'skipped: reviewPolicy is diff_only',
|
|
654
759
|
implementationReport: effectiveImplReport,
|
|
655
760
|
fileArtifactsMissing: implResult.status === 'ok' ? checkOutputTargets(outputTargets) : undefined,
|
|
656
|
-
agents:
|
|
657
|
-
|
|
658
|
-
specReviewer: 'skipped',
|
|
659
|
-
qualityReviewer: 'skipped',
|
|
660
|
-
},
|
|
661
|
-
models: {
|
|
662
|
-
implementer: implModel,
|
|
663
|
-
specReviewer: reviewModel,
|
|
664
|
-
qualityReviewer: null,
|
|
665
|
-
},
|
|
761
|
+
agents: agentEnvelope('skipped', 'skipped'),
|
|
762
|
+
models: { implementer: implModel, specReviewer: reviewModel, qualityReviewer: null },
|
|
666
763
|
}, verdict, verification, evidence.diffTruncated);
|
|
667
764
|
}
|
|
668
|
-
heartbeat?.transition({
|
|
669
|
-
stage: 'spec_review', stageIndex: 2,
|
|
670
|
-
reviewRound: 1, maxReviewRounds: task.maxReviewRounds ?? 5,
|
|
671
|
-
});
|
|
672
|
-
let specResult = await runSpecReview(otherProvider, packet, effectiveImplReport, fileContents, implResult.toolCalls, task.planContext, evidence.block);
|
|
673
765
|
let finalImplResult = implResult;
|
|
674
766
|
let finalImplReport = effectiveImplReport;
|
|
675
|
-
let
|
|
676
|
-
let
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
767
|
+
let specResult;
|
|
768
|
+
let specStatus;
|
|
769
|
+
let specReport;
|
|
770
|
+
let specReviewReason;
|
|
771
|
+
heartbeat?.transition({ stage: 'spec_review', stageIndex: 2, reviewRound: 1, attemptCap: maxSpecRows });
|
|
772
|
+
const initialReviewerTier = pickReviewer({ loop: 'spec', attemptIndex: 0, baseTier: resolved.slot });
|
|
773
|
+
const initialSpecReview = await runWithFallback({
|
|
774
|
+
assigned: initialReviewerTier,
|
|
775
|
+
providerFor,
|
|
776
|
+
unavailableTiers: specUnavailable,
|
|
777
|
+
isTransportFailure: (r) => isReviewTransportFailure(r),
|
|
778
|
+
getStatus: (r) => r.status,
|
|
779
|
+
makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'),
|
|
780
|
+
call: (provider) => runSpecReview(provider, packet, effectiveImplReport, fileContents, implResult.toolCalls, task.planContext, evidence.block),
|
|
781
|
+
});
|
|
782
|
+
if (initialSpecReview.bothUnavailable) {
|
|
783
|
+
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: 0, role: 'specReviewer', assignedTier: initialReviewerTier, reason: initialSpecReview.unavailableReason });
|
|
784
|
+
fallbackOverrides.push({ role: 'specReviewer', loop: 'spec', attempt: 0, assigned: initialReviewerTier, used: initialSpecReview.usedTier, reason: initialSpecReview.unavailableReason, triggeringStatus: initialSpecReview.fallbackTriggeringStatus, bothUnavailable: true });
|
|
785
|
+
specReviewerHistory.push('skipped');
|
|
786
|
+
}
|
|
787
|
+
else {
|
|
788
|
+
specReviewerHistory.push(initialSpecReview.usedTier);
|
|
789
|
+
if (initialSpecReview.fallbackFired) {
|
|
790
|
+
emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: 0, role: 'specReviewer', assignedTier: initialReviewerTier, usedTier: initialSpecReview.usedTier, reason: initialSpecReview.fallbackReason, triggeringStatus: initialSpecReview.fallbackTriggeringStatus, violatesSeparation: initialSpecReview.usedTier === implementerHistory[implementerHistory.length - 1] });
|
|
791
|
+
fallbackOverrides.push({ role: 'specReviewer', loop: 'spec', attempt: 0, assigned: initialReviewerTier, used: initialSpecReview.usedTier, reason: initialSpecReview.fallbackReason, triggeringStatus: initialSpecReview.fallbackTriggeringStatus, bothUnavailable: false });
|
|
792
|
+
}
|
|
793
|
+
}
|
|
794
|
+
specResult = initialSpecReview.bothUnavailable
|
|
795
|
+
? makeSkippedReviewResult('all_tiers_unavailable')
|
|
796
|
+
: initialSpecReview.result;
|
|
797
|
+
specStatus = specResult.status;
|
|
798
|
+
specReport = 'report' in specResult ? specResult.report : undefined;
|
|
799
|
+
specReviewReason = specStatus === 'skipped' ? 'all_tiers_unavailable' : ('errorReason' in specResult ? specResult.errorReason : undefined);
|
|
800
|
+
let prevSpecFindings = [...(specResult.findings ?? [])];
|
|
801
|
+
while (specStatus === 'changes_required') {
|
|
802
|
+
if (specAttemptIndex >= maxSpecRows)
|
|
803
|
+
return abortReviewLoop(finalImplResult, 'round_cap', 'review round cap reached before spec rework', 'spec');
|
|
804
|
+
const currentCostUSD = taskCostUSD();
|
|
805
|
+
if (currentCostUSD !== null && maxCostUSD !== undefined && currentCostUSD >= 0.8 * maxCostUSD) {
|
|
806
|
+
emitTaskEvent('cost_check', { stage: 'spec_rework', tripped: true, cost_used_usd: currentCostUSD, cost_cap_usd: maxCostUSD, cost_available: true });
|
|
807
|
+
return abortReviewLoop(finalImplResult, 'cost_ceiling', 'cost ceiling reached before spec rework', 'spec');
|
|
808
|
+
}
|
|
809
|
+
const decision = pickEscalation({ loop: 'spec', attemptIndex: specAttemptIndex, baseTier: resolved.slot });
|
|
810
|
+
if (decision.isEscalated)
|
|
811
|
+
emitEscalationEvent('spec', specAttemptIndex, decision);
|
|
812
|
+
emitTaskEvent('stage_change', { from: 'spec_review', to: 'spec_rework', attempt: specAttemptIndex, attemptCap: maxSpecRows, implTier: decision.impl, reviewerTier: decision.reviewer, escalated: decision.isEscalated });
|
|
813
|
+
heartbeat?.transition({ stage: 'spec_rework', stageIndex: 3, reviewRound: specAttemptIndex, attemptCap: maxSpecRows });
|
|
814
|
+
const feedback = specResult.findings.length > 0 ? `\n\n## Spec Review Feedback (round ${specAttemptIndex}):\n${specResult.findings.map(f => `- ${f}`).join('\n')}` : '';
|
|
815
|
+
const reworkTask = withDoneCondition({ ...task, prompt: `${task.prompt}${feedback}` });
|
|
816
|
+
const reworkCall = await runWithFallback({ assigned: decision.impl, providerFor, unavailableTiers: specUnavailable, isTransportFailure: (r) => TRANSPORT_FAILURES.has(r.status) && r.capExhausted === undefined, getStatus: (r) => r.status, makeSyntheticFailure: (assigned) => makeSyntheticRunResult(assigned, 'all_tiers_unavailable'), call: (provider) => delegateWithEscalation(reworkTask, [provider], { explicitlyPinned: true, onProgress: wrappedOnProgress }) });
|
|
817
|
+
if (reworkCall.fallbackFired || reworkCall.bothUnavailable)
|
|
818
|
+
fallbackOverrides.push({ role: 'implementer', loop: 'spec', attempt: specAttemptIndex, assigned: decision.impl, used: reworkCall.usedTier, reason: (reworkCall.fallbackReason ?? reworkCall.unavailableReason), triggeringStatus: reworkCall.fallbackTriggeringStatus, bothUnavailable: reworkCall.bothUnavailable });
|
|
819
|
+
if (reworkCall.fallbackFired) {
|
|
820
|
+
emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'implementer', assignedTier: decision.impl, usedTier: reworkCall.usedTier, reason: reworkCall.fallbackReason, triggeringStatus: reworkCall.fallbackTriggeringStatus, violatesSeparation: false });
|
|
821
|
+
if (decision.isEscalated && reworkCall.fallbackReason === 'not_configured')
|
|
822
|
+
emitEscalationUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'implementer', wantedTier: decision.impl, reason: reworkCall.fallbackReason });
|
|
823
|
+
}
|
|
824
|
+
if (reworkCall.bothUnavailable) {
|
|
825
|
+
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'implementer', assignedTier: decision.impl, reason: reworkCall.unavailableReason });
|
|
826
|
+
if (decision.isEscalated)
|
|
827
|
+
emitEscalationUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'implementer', wantedTier: decision.impl, reason: reworkCall.unavailableReason });
|
|
828
|
+
return adaptForAllTiersUnavailable(reworkCall.result, 'spec', specAttemptIndex);
|
|
829
|
+
}
|
|
830
|
+
finalImplResult = reworkCall.result;
|
|
831
|
+
latestAttemptedImpl = { tier: reworkCall.usedTier, result: finalImplResult };
|
|
832
|
+
implementerHistory.push(reworkCall.usedTier);
|
|
833
|
+
const reworkReport = parseStructuredReport(finalImplResult.output);
|
|
834
|
+
finalImplReport = reworkReport.summary ? reworkReport : buildFallbackImplReport(finalImplResult);
|
|
835
|
+
fileContents = await readImplementerFileContents(finalImplResult.filesWritten, task.cwd);
|
|
836
|
+
heartbeat?.transition({ stage: 'spec_review', stageIndex: 2, reviewRound: specAttemptIndex + 1, attemptCap: maxSpecRows });
|
|
837
|
+
const reviewCall = await runWithFallback({ assigned: decision.reviewer, providerFor, unavailableTiers: specUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), call: (provider) => runSpecReview(provider, packet, finalImplReport, fileContents, finalImplResult.toolCalls, task.planContext, evidence.block) });
|
|
838
|
+
if (reviewCall.bothUnavailable) {
|
|
839
|
+
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'specReviewer', assignedTier: decision.reviewer, reason: reviewCall.unavailableReason });
|
|
840
|
+
fallbackOverrides.push({ role: 'specReviewer', loop: 'spec', attempt: specAttemptIndex, assigned: decision.reviewer, used: reviewCall.usedTier, reason: reviewCall.unavailableReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, bothUnavailable: true });
|
|
841
|
+
specReviewerHistory.push('skipped');
|
|
842
|
+
}
|
|
843
|
+
else {
|
|
844
|
+
specReviewerHistory.push(reviewCall.usedTier);
|
|
845
|
+
if (reviewCall.fallbackFired) {
|
|
846
|
+
emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'specReviewer', assignedTier: decision.reviewer, usedTier: reviewCall.usedTier, reason: reviewCall.fallbackReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, violatesSeparation: reviewCall.usedTier === implementerHistory[implementerHistory.length - 1] });
|
|
847
|
+
fallbackOverrides.push({ role: 'specReviewer', loop: 'spec', attempt: specAttemptIndex, assigned: decision.reviewer, used: reviewCall.usedTier, reason: reviewCall.fallbackReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, bothUnavailable: false });
|
|
848
|
+
}
|
|
849
|
+
}
|
|
850
|
+
specResult = reviewCall.result;
|
|
851
|
+
specStatus = specResult.status;
|
|
852
|
+
specReport = 'report' in specResult ? specResult.report : undefined;
|
|
853
|
+
specReviewReason = specStatus === 'skipped' ? 'all_tiers_unavailable' : ('errorReason' in specResult ? specResult.errorReason : undefined);
|
|
854
|
+
if (reviewDidNotReject(specStatus))
|
|
855
|
+
lastNonRejectedImpl = { tier: implementerHistory[implementerHistory.length - 1], result: finalImplResult };
|
|
856
|
+
specAttemptIndex++;
|
|
857
|
+
if (specStatus === 'approved' || specStatus === 'skipped')
|
|
858
|
+
break;
|
|
859
|
+
const currentFindings = [...(specResult.findings ?? [])].sort().join('\0');
|
|
860
|
+
const prevFindings = [...prevSpecFindings].sort().join('\0');
|
|
861
|
+
if (currentFindings === prevFindings && currentFindings !== '')
|
|
862
|
+
break;
|
|
863
|
+
prevSpecFindings = [...(specResult.findings ?? [])];
|
|
864
|
+
}
|
|
865
|
+
let qualityResult = { status: 'skipped', report: undefined, findings: [], errorReason: reviewPolicy === 'full' ? 'all_tiers_unavailable' : 'skipped: reviewPolicy is spec_only' };
|
|
866
|
+
if (reviewPolicy === 'full') {
|
|
867
|
+
qualityUnavailable = new Map();
|
|
868
|
+
const qualityReviewerTier = pickReviewer({ loop: 'quality', attemptIndex: 0, baseTier: resolved.slot });
|
|
869
|
+
heartbeat?.transition({ stage: 'quality_review', stageIndex: 4, reviewRound: 1, attemptCap: maxQualityRows });
|
|
870
|
+
const initialQuality = await runWithFallback({ assigned: qualityReviewerTier, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), call: (provider) => runQualityReview(provider, packet, specReport ?? finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block) });
|
|
871
|
+
if (initialQuality.bothUnavailable) {
|
|
872
|
+
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: 0, role: 'qualityReviewer', assignedTier: qualityReviewerTier, reason: initialQuality.unavailableReason });
|
|
873
|
+
fallbackOverrides.push({ role: 'qualityReviewer', loop: 'quality', attempt: 0, assigned: qualityReviewerTier, used: initialQuality.usedTier, reason: initialQuality.unavailableReason, triggeringStatus: initialQuality.fallbackTriggeringStatus, bothUnavailable: true });
|
|
874
|
+
qualityReviewerHistory.push('skipped');
|
|
875
|
+
}
|
|
876
|
+
else {
|
|
877
|
+
qualityReviewerHistory.push(initialQuality.usedTier);
|
|
878
|
+
if (initialQuality.fallbackFired) {
|
|
879
|
+
emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: 0, role: 'qualityReviewer', assignedTier: qualityReviewerTier, usedTier: initialQuality.usedTier, reason: initialQuality.fallbackReason, triggeringStatus: initialQuality.fallbackTriggeringStatus, violatesSeparation: initialQuality.usedTier === implementerHistory[implementerHistory.length - 1] });
|
|
880
|
+
fallbackOverrides.push({ role: 'qualityReviewer', loop: 'quality', attempt: 0, assigned: qualityReviewerTier, used: initialQuality.usedTier, reason: initialQuality.fallbackReason, triggeringStatus: initialQuality.fallbackTriggeringStatus, bothUnavailable: false });
|
|
682
881
|
}
|
|
882
|
+
}
|
|
883
|
+
qualityResult = initialQuality.result;
|
|
884
|
+
let prevQualityFindings = [...(qualityResult.findings ?? [])];
|
|
885
|
+
qualityAttemptIndex = 1;
|
|
886
|
+
while (qualityResult.status === 'changes_required') {
|
|
887
|
+
if (qualityAttemptIndex >= maxQualityRows)
|
|
888
|
+
return abortReviewLoop(finalImplResult, 'round_cap', 'review round cap reached before quality rework', 'quality');
|
|
683
889
|
const currentCostUSD = taskCostUSD();
|
|
684
890
|
if (currentCostUSD !== null && maxCostUSD !== undefined && currentCostUSD >= 0.8 * maxCostUSD) {
|
|
685
|
-
|
|
686
|
-
return abortReviewLoop(finalImplResult, 'cost_ceiling', 'cost ceiling reached before
|
|
891
|
+
emitTaskEvent('cost_check', { stage: 'quality_rework', tripped: true, cost_used_usd: currentCostUSD, cost_cap_usd: maxCostUSD, cost_available: true });
|
|
892
|
+
return abortReviewLoop(finalImplResult, 'cost_ceiling', 'cost ceiling reached before quality rework', 'quality');
|
|
687
893
|
}
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
});
|
|
695
|
-
const
|
|
696
|
-
|
|
697
|
-
: '';
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
894
|
+
const decision = pickEscalation({ loop: 'quality', attemptIndex: qualityAttemptIndex, baseTier: resolved.slot });
|
|
895
|
+
if (decision.isEscalated)
|
|
896
|
+
emitEscalationEvent('quality', qualityAttemptIndex, decision);
|
|
897
|
+
emitTaskEvent('stage_change', { from: 'quality_review', to: 'quality_rework', attempt: qualityAttemptIndex, attemptCap: maxQualityRows, implTier: decision.impl, reviewerTier: decision.reviewer, escalated: decision.isEscalated });
|
|
898
|
+
heartbeat?.transition({ stage: 'quality_rework', stageIndex: 5, reviewRound: qualityAttemptIndex, attemptCap: maxQualityRows });
|
|
899
|
+
const feedback = qualityResult.findings.length > 0 ? `\n\n## Quality Review Feedback (round ${qualityAttemptIndex}):\n${qualityResult.findings.map(f => `- ${f}`).join('\n')}` : '';
|
|
900
|
+
const reworkTask = withDoneCondition({ ...task, prompt: `${task.prompt}${feedback}` });
|
|
901
|
+
const reworkCall = await runWithFallback({ assigned: decision.impl, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => TRANSPORT_FAILURES.has(r.status) && r.capExhausted === undefined, getStatus: (r) => r.status, makeSyntheticFailure: (assigned) => makeSyntheticRunResult(assigned, 'all_tiers_unavailable'), call: (provider) => delegateWithEscalation(reworkTask, [provider], { explicitlyPinned: true, onProgress: wrappedOnProgress }) });
|
|
902
|
+
if (reworkCall.fallbackFired || reworkCall.bothUnavailable)
|
|
903
|
+
fallbackOverrides.push({ role: 'implementer', loop: 'quality', attempt: qualityAttemptIndex, assigned: decision.impl, used: reworkCall.usedTier, reason: (reworkCall.fallbackReason ?? reworkCall.unavailableReason), triggeringStatus: reworkCall.fallbackTriggeringStatus, bothUnavailable: reworkCall.bothUnavailable });
|
|
904
|
+
if (reworkCall.fallbackFired)
|
|
905
|
+
emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'implementer', assignedTier: decision.impl, usedTier: reworkCall.usedTier, reason: reworkCall.fallbackReason, triggeringStatus: reworkCall.fallbackTriggeringStatus, violatesSeparation: false });
|
|
906
|
+
if (reworkCall.bothUnavailable) {
|
|
907
|
+
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'implementer', assignedTier: decision.impl, reason: reworkCall.unavailableReason });
|
|
908
|
+
if (decision.isEscalated)
|
|
909
|
+
emitEscalationUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'implementer', wantedTier: decision.impl, reason: reworkCall.unavailableReason });
|
|
910
|
+
return adaptForAllTiersUnavailable(reworkCall.result, 'quality', qualityAttemptIndex);
|
|
911
|
+
}
|
|
912
|
+
finalImplResult = reworkCall.result;
|
|
913
|
+
latestAttemptedImpl = { tier: reworkCall.usedTier, result: finalImplResult };
|
|
914
|
+
implementerHistory.push(reworkCall.usedTier);
|
|
915
|
+
const reworkReport = parseStructuredReport(finalImplResult.output);
|
|
916
|
+
finalImplReport = reworkReport.summary ? reworkReport : buildFallbackImplReport(finalImplResult);
|
|
917
|
+
fileContents = await readImplementerFileContents(finalImplResult.filesWritten, task.cwd);
|
|
918
|
+
heartbeat?.transition({ stage: 'quality_review', stageIndex: 4, reviewRound: qualityAttemptIndex + 1, attemptCap: maxQualityRows });
|
|
919
|
+
const reviewCall = await runWithFallback({ assigned: decision.reviewer, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), call: (provider) => runQualityReview(provider, packet, finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block) });
|
|
920
|
+
if (reviewCall.bothUnavailable) {
|
|
921
|
+
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'qualityReviewer', assignedTier: decision.reviewer, reason: reviewCall.unavailableReason });
|
|
922
|
+
fallbackOverrides.push({ role: 'qualityReviewer', loop: 'quality', attempt: qualityAttemptIndex, assigned: decision.reviewer, used: reviewCall.usedTier, reason: reviewCall.unavailableReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, bothUnavailable: true });
|
|
923
|
+
qualityReviewerHistory.push('skipped');
|
|
924
|
+
}
|
|
925
|
+
else {
|
|
926
|
+
qualityReviewerHistory.push(reviewCall.usedTier);
|
|
927
|
+
if (reviewCall.fallbackFired) {
|
|
928
|
+
emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'qualityReviewer', assignedTier: decision.reviewer, usedTier: reviewCall.usedTier, reason: reviewCall.fallbackReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, violatesSeparation: reviewCall.usedTier === implementerHistory[implementerHistory.length - 1] });
|
|
929
|
+
fallbackOverrides.push({ role: 'qualityReviewer', loop: 'quality', attempt: qualityAttemptIndex, assigned: decision.reviewer, used: reviewCall.usedTier, reason: reviewCall.fallbackReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, bothUnavailable: false });
|
|
930
|
+
}
|
|
931
|
+
}
|
|
932
|
+
qualityResult = reviewCall.result;
|
|
933
|
+
if (reviewDidNotReject(qualityResult.status))
|
|
934
|
+
lastNonRejectedImpl = { tier: implementerHistory[implementerHistory.length - 1], result: finalImplResult };
|
|
935
|
+
qualityAttemptIndex++;
|
|
936
|
+
if (qualityResult.status === 'approved' || qualityResult.status === 'skipped')
|
|
714
937
|
break;
|
|
715
|
-
const currentFindings = [...
|
|
716
|
-
const prevFindings =
|
|
938
|
+
const currentFindings = [...(qualityResult.findings ?? [])].sort().join('\0');
|
|
939
|
+
const prevFindings = [...prevQualityFindings].sort().join('\0');
|
|
717
940
|
if (currentFindings === prevFindings && currentFindings !== '')
|
|
718
941
|
break;
|
|
719
|
-
|
|
720
|
-
}
|
|
721
|
-
}
|
|
722
|
-
let qualityResult = { status: 'skipped', report: undefined, findings: [] };
|
|
723
|
-
if (reviewPolicy === 'full') {
|
|
724
|
-
heartbeat?.transition({
|
|
725
|
-
stage: 'quality_review', stageIndex: 4,
|
|
726
|
-
reviewRound: 1, maxReviewRounds,
|
|
727
|
-
});
|
|
728
|
-
qualityResult = await runQualityReview(otherProvider, packet, specReport ?? finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block);
|
|
729
|
-
if (qualityResult.status === 'changes_required') {
|
|
730
|
-
let prevQualityFindings = [];
|
|
731
|
-
while (true) {
|
|
732
|
-
if (specRework + qualityRework >= maxReviewRounds) {
|
|
733
|
-
return abortReviewLoop(finalImplResult, 'round_cap', 'review round cap reached before quality rework', 'quality');
|
|
734
|
-
}
|
|
735
|
-
const currentCostUSD = taskCostUSD();
|
|
736
|
-
if (currentCostUSD !== null && maxCostUSD !== undefined && currentCostUSD >= 0.8 * maxCostUSD) {
|
|
737
|
-
emitVerbose('cost_check', { stage: 'quality_rework', tripped: true, cost_used_usd: currentCostUSD, cost_cap_usd: maxCostUSD, cost_available: true });
|
|
738
|
-
return abortReviewLoop(finalImplResult, 'cost_ceiling', 'cost ceiling reached before quality rework', 'quality');
|
|
739
|
-
}
|
|
740
|
-
emitVerbose('stage_change', { from: 'quality_review', to: 'quality_rework', round: qualityRework + 1, cap: maxReviewRounds });
|
|
741
|
-
qualityRework++;
|
|
742
|
-
const round = qualityRework;
|
|
743
|
-
heartbeat?.transition({
|
|
744
|
-
stage: 'quality_rework', stageIndex: 5,
|
|
745
|
-
reviewRound: round, maxReviewRounds,
|
|
746
|
-
});
|
|
747
|
-
const feedback = qualityResult.findings.length > 0
|
|
748
|
-
? `\n\n## Quality Review Feedback (round ${round}):\n${qualityResult.findings.map(f => `- ${f}`).join('\n')}`
|
|
749
|
-
: '';
|
|
750
|
-
const reworkPrompt = `${task.prompt}${feedback}`;
|
|
751
|
-
const reworkTask = withDoneCondition({ ...task, prompt: reworkPrompt });
|
|
752
|
-
const reworkResult = await delegateWithEscalation(reworkTask, [resolved.provider], { explicitlyPinned: true, onProgress: wrappedOnProgress });
|
|
753
|
-
finalImplResult = reworkResult;
|
|
754
|
-
const reworkReport = parseStructuredReport(reworkResult.output);
|
|
755
|
-
finalImplReport = reworkReport.summary ? reworkReport : buildFallbackImplReport(reworkResult);
|
|
756
|
-
const reworkContents = await readImplementerFileContents(reworkResult.filesWritten, task.cwd);
|
|
757
|
-
heartbeat?.transition({
|
|
758
|
-
stage: 'quality_review', stageIndex: 4,
|
|
759
|
-
reviewRound: round + 1, maxReviewRounds,
|
|
760
|
-
});
|
|
761
|
-
qualityResult = await runQualityReview(otherProvider, packet, finalImplReport, reworkContents, reworkResult.toolCalls, reworkResult.filesWritten, evidence.block);
|
|
762
|
-
if (qualityResult.status === 'approved')
|
|
763
|
-
break;
|
|
764
|
-
const currentFindings = [...qualityResult.findings].sort().join('\0');
|
|
765
|
-
const prevFindings = prevQualityFindings.sort().join('\0');
|
|
766
|
-
if (currentFindings === prevFindings && currentFindings !== '')
|
|
767
|
-
break;
|
|
768
|
-
prevQualityFindings = qualityResult.findings;
|
|
769
|
-
}
|
|
942
|
+
prevQualityFindings = [...(qualityResult.findings ?? [])];
|
|
770
943
|
}
|
|
771
944
|
}
|
|
772
945
|
const finalReport = specReport ?? finalImplReport;
|
|
@@ -788,7 +961,9 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
788
961
|
message: 'Implementation diff exceeded the reviewer evidence byte cap and was truncated.',
|
|
789
962
|
});
|
|
790
963
|
}
|
|
791
|
-
const
|
|
964
|
+
const specAggregateStatus = (['approved', 'changes_required', 'skipped', 'error', 'api_error', 'network_error', 'timeout'].includes(specStatus) ? specStatus : 'error');
|
|
965
|
+
const qualityAggregateStatus = qualityResult.status;
|
|
966
|
+
const aggregated = aggregateResult(finalReport, specReport, qualityResult.report, specAggregateStatus, qualityAggregateStatus);
|
|
792
967
|
// File artifact verification: check whether output targets exist on disk after all work.
|
|
793
968
|
// Only applies when status is ok; non-ok statuses skip verification entirely.
|
|
794
969
|
const fileArtifactsMissing = finalImplResult.status === 'ok' && outputTargets.length > 0
|
|
@@ -802,25 +977,23 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
802
977
|
: finalImplResult.status === 'ok' && fileArtifactsMissing
|
|
803
978
|
? 'incomplete'
|
|
804
979
|
: finalImplResult.status;
|
|
980
|
+
const specEnvelopeStatus = (specStatus === 'api_error' || specStatus === 'network_error' || specStatus === 'timeout' ? 'error' : specStatus);
|
|
981
|
+
const qualityEnvelopeStatus = qualityResult.status === 'api_error' || qualityResult.status === 'network_error' || qualityResult.status === 'timeout' ? 'error' : qualityResult.status;
|
|
805
982
|
return {
|
|
806
983
|
...finalImplResult,
|
|
807
984
|
status: finalStatus,
|
|
808
985
|
workerStatus: finalWorkerStatus,
|
|
809
986
|
concerns,
|
|
810
|
-
specReviewStatus:
|
|
811
|
-
qualityReviewStatus:
|
|
812
|
-
specReviewReason: specResult.errorReason,
|
|
813
|
-
qualityReviewReason: qualityResult.errorReason,
|
|
987
|
+
specReviewStatus: specEnvelopeStatus,
|
|
988
|
+
qualityReviewStatus: qualityEnvelopeStatus,
|
|
989
|
+
specReviewReason: 'errorReason' in specResult ? specResult.errorReason : undefined,
|
|
990
|
+
qualityReviewReason: 'errorReason' in qualityResult ? qualityResult.errorReason : undefined,
|
|
814
991
|
structuredReport: aggregated,
|
|
815
992
|
implementationReport: finalImplReport,
|
|
816
993
|
specReviewReport: specReport,
|
|
817
994
|
qualityReviewReport: qualityResult.report,
|
|
818
995
|
filePathsSkipped,
|
|
819
|
-
agents:
|
|
820
|
-
implementer: resolved.slot,
|
|
821
|
-
specReviewer: otherSlot,
|
|
822
|
-
qualityReviewer: reviewPolicy === 'full' ? otherSlot : 'skipped',
|
|
823
|
-
},
|
|
996
|
+
agents: agentEnvelope(specReviewerHistory[specReviewerHistory.length - 1] ?? 'not_applicable', qualityReviewerHistory[qualityReviewerHistory.length - 1] ?? (reviewPolicy === 'full' ? 'not_applicable' : 'skipped')),
|
|
824
997
|
models: {
|
|
825
998
|
implementer: implModel,
|
|
826
999
|
specReviewer: reviewModel,
|
|
@@ -832,6 +1005,9 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
832
1005
|
verification,
|
|
833
1006
|
};
|
|
834
1007
|
}
|
|
1008
|
+
catch (err) {
|
|
1009
|
+
return withVerification(workerErrorResult(err));
|
|
1010
|
+
}
|
|
835
1011
|
finally {
|
|
836
1012
|
heartbeat?.stop();
|
|
837
1013
|
}
|