@zhixuan92/multi-model-agent-core 3.4.0 → 3.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/delegate-with-escalation.d.ts +0 -1
- package/dist/delegate-with-escalation.d.ts.map +1 -1
- package/dist/delegate-with-escalation.js +7 -52
- package/dist/delegate-with-escalation.js.map +1 -1
- package/dist/diagnostics/disconnect-log.d.ts +48 -0
- package/dist/diagnostics/disconnect-log.d.ts.map +1 -1
- package/dist/diagnostics/disconnect-log.js +24 -0
- package/dist/diagnostics/disconnect-log.js.map +1 -1
- package/dist/diagnostics/verbose-line.d.ts +1 -0
- package/dist/diagnostics/verbose-line.d.ts.map +1 -1
- package/dist/diagnostics/verbose-line.js +19 -0
- package/dist/diagnostics/verbose-line.js.map +1 -1
- package/dist/escalation/fallback.d.ts +65 -0
- package/dist/escalation/fallback.d.ts.map +1 -0
- package/dist/escalation/fallback.js +195 -0
- package/dist/escalation/fallback.js.map +1 -0
- package/dist/escalation/policy.d.ts +37 -0
- package/dist/escalation/policy.d.ts.map +1 -0
- package/dist/escalation/policy.js +67 -0
- package/dist/escalation/policy.js.map +1 -0
- package/dist/executors/debug.d.ts.map +1 -1
- package/dist/executors/debug.js +0 -1
- package/dist/executors/debug.js.map +1 -1
- package/dist/executors/execute-plan.js +1 -1
- package/dist/executors/execute-plan.js.map +1 -1
- package/dist/heartbeat.d.ts +4 -4
- package/dist/heartbeat.d.ts.map +1 -1
- package/dist/heartbeat.js +17 -17
- package/dist/heartbeat.js.map +1 -1
- package/dist/intake/compilers/execute-plan.d.ts.map +1 -1
- package/dist/intake/compilers/execute-plan.js +1 -0
- package/dist/intake/compilers/execute-plan.js.map +1 -1
- package/dist/intake/resolve.d.ts.map +1 -1
- package/dist/intake/resolve.js +1 -2
- package/dist/intake/resolve.js.map +1 -1
- package/dist/reporting/compose-terminal-headline.d.ts +5 -0
- package/dist/reporting/compose-terminal-headline.d.ts.map +1 -1
- package/dist/reporting/compose-terminal-headline.js +23 -6
- package/dist/reporting/compose-terminal-headline.js.map +1 -1
- package/dist/review/aggregate-result.d.ts +3 -1
- package/dist/review/aggregate-result.d.ts.map +1 -1
- package/dist/review/aggregate-result.js.map +1 -1
- package/dist/review/diff-review.d.ts +11 -0
- package/dist/review/diff-review.d.ts.map +1 -1
- package/dist/review/diff-review.js +5 -2
- package/dist/review/diff-review.js.map +1 -1
- package/dist/review/quality-reviewer.d.ts +11 -2
- package/dist/review/quality-reviewer.d.ts.map +1 -1
- package/dist/review/quality-reviewer.js +3 -0
- package/dist/review/quality-reviewer.js.map +1 -1
- package/dist/review/skipped-result.d.ts +8 -0
- package/dist/review/skipped-result.d.ts.map +1 -0
- package/dist/review/skipped-result.js +4 -0
- package/dist/review/skipped-result.js.map +1 -0
- package/dist/review/spec-reviewer.d.ts +4 -1
- package/dist/review/spec-reviewer.d.ts.map +1 -1
- package/dist/review/spec-reviewer.js +3 -0
- package/dist/review/spec-reviewer.js.map +1 -1
- package/dist/run-tasks/metadata-repair.d.ts.map +1 -1
- package/dist/run-tasks/metadata-repair.js +0 -1
- package/dist/run-tasks/metadata-repair.js.map +1 -1
- package/dist/run-tasks/reviewed-lifecycle.d.ts.map +1 -1
- package/dist/run-tasks/reviewed-lifecycle.js +360 -188
- package/dist/run-tasks/reviewed-lifecycle.js.map +1 -1
- package/dist/runners/base/result-builders.d.ts +1 -1
- package/dist/runners/base/result-builders.d.ts.map +1 -1
- package/dist/runners/types.d.ts +2 -2
- package/dist/runners/types.d.ts.map +1 -1
- package/dist/tool-schemas/delegate.d.ts +0 -1
- package/dist/tool-schemas/delegate.d.ts.map +1 -1
- package/dist/tool-schemas/delegate.js +0 -1
- package/dist/tool-schemas/delegate.js.map +1 -1
- package/dist/tool-schemas/execute-plan.d.ts +0 -5
- package/dist/tool-schemas/execute-plan.d.ts.map +1 -1
- package/dist/tool-schemas/execute-plan.js +0 -4
- package/dist/tool-schemas/execute-plan.js.map +1 -1
- package/dist/types.d.ts +15 -2
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/package.json +1 -1
|
@@ -3,8 +3,11 @@ import { promisify } from 'node:util';
|
|
|
3
3
|
import { computeCostUSD, computeSavedCostUSD } from '../types.js';
|
|
4
4
|
import { createProvider } from '../provider.js';
|
|
5
5
|
import { delegateWithEscalation } from '../delegate-with-escalation.js';
|
|
6
|
+
import { pickEscalation, pickReviewer, maxRowsFor, } from '../escalation/policy.js';
|
|
7
|
+
import { runWithFallback, makeSyntheticRunResult, TRANSPORT_FAILURES, isReviewTransportFailure, } from '../escalation/fallback.js';
|
|
6
8
|
import { HeartbeatTimer } from '../heartbeat.js';
|
|
7
9
|
import { runSpecReview } from '../review/spec-reviewer.js';
|
|
10
|
+
import { makeSkippedReviewResult } from '../review/skipped-result.js';
|
|
8
11
|
import { runQualityReview } from '../review/quality-reviewer.js';
|
|
9
12
|
import { runDiffReview } from '../review/diff-review.js';
|
|
10
13
|
import { aggregateResult } from '../review/aggregate-result.js';
|
|
@@ -16,22 +19,30 @@ import { runMetadataRepairTurn } from './metadata-repair.js';
|
|
|
16
19
|
import { partitionFilePaths, checkOutputTargets } from '../file-artifact-check.js';
|
|
17
20
|
import { extractWorkerStatus } from './worker-status.js';
|
|
18
21
|
import { buildFallbackImplReport, readImplementerFileContents } from './fallback-report.js';
|
|
19
|
-
import { composeVerboseLine } from '../diagnostics/verbose-line.js';
|
|
22
|
+
import { composeVerboseLine, toVerboseFields } from '../diagnostics/verbose-line.js';
|
|
20
23
|
import { withDoneCondition } from './execute-task.js';
|
|
21
24
|
const exec = promisify(execFile);
|
|
22
25
|
export async function executeReviewedLifecycle(task, resolved, config, taskIndex, onProgress, heartbeatWiring, diagnostics) {
|
|
23
26
|
const reviewPolicy = task.reviewPolicy ?? 'full';
|
|
24
27
|
const otherSlot = resolved.slot === 'standard' ? 'complex' : 'standard';
|
|
25
|
-
// Partition filePaths into output targets before the worker runs.
|
|
26
|
-
// Output targets are paths that do not yet exist on disk.
|
|
27
|
-
const { outputTargets } = partitionFilePaths(task.filePaths, task.cwd ?? process.cwd());
|
|
28
28
|
let escalationProvider;
|
|
29
29
|
try {
|
|
30
30
|
escalationProvider = createProvider(otherSlot, config);
|
|
31
31
|
}
|
|
32
32
|
catch {
|
|
33
|
-
|
|
33
|
+
escalationProvider = undefined;
|
|
34
34
|
}
|
|
35
|
+
const providers = {
|
|
36
|
+
[resolved.slot]: resolved.provider,
|
|
37
|
+
};
|
|
38
|
+
if (escalationProvider)
|
|
39
|
+
providers[otherSlot] = escalationProvider;
|
|
40
|
+
function providerFor(tier) {
|
|
41
|
+
return providers[tier];
|
|
42
|
+
}
|
|
43
|
+
// Partition filePaths into output targets before the worker runs.
|
|
44
|
+
// Output targets are paths that do not yet exist on disk.
|
|
45
|
+
const { outputTargets } = partitionFilePaths(task.filePaths, task.cwd ?? process.cwd());
|
|
35
46
|
const stageCount = reviewPolicy === 'off' ? 1 :
|
|
36
47
|
reviewPolicy === 'spec_only' ? 3 :
|
|
37
48
|
5;
|
|
@@ -53,7 +64,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
53
64
|
taskEventLogger.emit({ event, batchId: verboseBatchIdEarly, taskIndex, ...cleaned });
|
|
54
65
|
}
|
|
55
66
|
if (verboseStreamRaw) {
|
|
56
|
-
verboseStreamRaw(composeVerboseLine({ event, ts: new Date().toISOString(), batch: shortBatchEarly, task: taskIndex, ...fields }));
|
|
67
|
+
verboseStreamRaw(composeVerboseLine({ event, ts: new Date().toISOString(), batch: shortBatchEarly, task: taskIndex, ...toVerboseFields(fields) }));
|
|
57
68
|
}
|
|
58
69
|
};
|
|
59
70
|
// Start the heartbeat whenever there's a downstream consumer:
|
|
@@ -88,7 +99,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
88
99
|
elapsed: event.elapsed,
|
|
89
100
|
stage: event.stage,
|
|
90
101
|
round: event.reviewRound,
|
|
91
|
-
cap: event.
|
|
102
|
+
cap: event.attemptCap,
|
|
92
103
|
tools: event.progress.toolCalls,
|
|
93
104
|
read: event.progress.filesRead,
|
|
94
105
|
wrote: event.progress.filesWritten,
|
|
@@ -196,17 +207,76 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
196
207
|
const taskStartMs = Date.now();
|
|
197
208
|
const commits = [];
|
|
198
209
|
let commitError;
|
|
199
|
-
let
|
|
200
|
-
let
|
|
210
|
+
let specAttemptIndex = 0;
|
|
211
|
+
let qualityAttemptIndex = 0;
|
|
212
|
+
const maxSpecRows = maxRowsFor('spec');
|
|
213
|
+
const maxQualityRows = maxRowsFor('quality');
|
|
214
|
+
const specUnavailable = new Map();
|
|
215
|
+
let qualityUnavailable = new Map();
|
|
201
216
|
let metadataRepair = 0;
|
|
202
|
-
const maxReviewRounds = task.maxReviewRounds ?? 3;
|
|
203
217
|
const maxCostUSD = task.maxCostUSD;
|
|
204
|
-
const
|
|
218
|
+
const implementerHistory = [];
|
|
219
|
+
const specReviewerHistory = [];
|
|
220
|
+
const qualityReviewerHistory = [];
|
|
221
|
+
const fallbackOverrides = [];
|
|
222
|
+
let latestAttemptedImpl;
|
|
223
|
+
let lastNonRejectedImpl;
|
|
224
|
+
const reviewRounds = () => ({ spec: specAttemptIndex, quality: qualityAttemptIndex, metadata: metadataRepair, cap: Math.max(maxSpecRows, maxQualityRows) });
|
|
205
225
|
const taskCostUSD = () => (heartbeat ? heartbeat.getHeartbeatTickInfo().costUSD : null);
|
|
226
|
+
const policyEscalated = { spec: false, quality: false, diff: false };
|
|
227
|
+
const emitFallback = (p) => {
|
|
228
|
+
diagnostics?.logger?.fallback(p);
|
|
229
|
+
emitTaskEvent('fallback', p);
|
|
230
|
+
};
|
|
231
|
+
const emitFallbackUnavailable = (p) => {
|
|
232
|
+
diagnostics?.logger?.fallbackUnavailable(p);
|
|
233
|
+
emitTaskEvent('fallback_unavailable', p);
|
|
234
|
+
};
|
|
235
|
+
const emitEscalationEvent = (loop, attempt, decision) => {
|
|
236
|
+
const p = {
|
|
237
|
+
batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop, attempt,
|
|
238
|
+
baseTier: resolved.slot, implTier: decision.impl, reviewerTier: decision.reviewer,
|
|
239
|
+
};
|
|
240
|
+
diagnostics?.logger?.escalation(p);
|
|
241
|
+
emitTaskEvent('escalation', p);
|
|
242
|
+
policyEscalated[loop] = true;
|
|
243
|
+
};
|
|
244
|
+
const emitEscalationUnavailable = (p) => {
|
|
245
|
+
diagnostics?.logger?.escalationUnavailable(p);
|
|
246
|
+
emitTaskEvent('escalation_unavailable', p);
|
|
247
|
+
};
|
|
206
248
|
// When the review loop aborts mid-flight, preserve any review-status info already set
|
|
207
249
|
// on the base result (set by callers via abortReviewLoop({ ...res, specReviewStatus, ... })).
|
|
208
250
|
// Defaults to 'changes_required' for whichever loop tripped — that's the only state the
|
|
209
251
|
// loop ever fires from, by construction.
|
|
252
|
+
function adaptForAllTiersUnavailable(base, loop, attempt) {
|
|
253
|
+
const ship = lastNonRejectedImpl?.result ?? base;
|
|
254
|
+
return {
|
|
255
|
+
...ship,
|
|
256
|
+
status: 'incomplete',
|
|
257
|
+
workerStatus: 'blocked',
|
|
258
|
+
terminationReason: 'all_tiers_unavailable',
|
|
259
|
+
reviewRounds: reviewRounds(),
|
|
260
|
+
error: `runWithFallback: both tiers unavailable (loop=${loop}, attempt=${attempt}, role=implementer)`,
|
|
261
|
+
agents: agentEnvelope(specReviewerHistory[specReviewerHistory.length - 1] ?? 'not_applicable', qualityReviewerHistory[qualityReviewerHistory.length - 1] ?? (reviewPolicy === 'full' ? 'not_applicable' : 'skipped')),
|
|
262
|
+
};
|
|
263
|
+
}
|
|
264
|
+
function reviewDidNotReject(status) {
|
|
265
|
+
return status === 'approved' || status === 'skipped';
|
|
266
|
+
}
|
|
267
|
+
const agentEnvelope = (specReviewer, qualityReviewer) => {
|
|
268
|
+
const selectedImpl = latestAttemptedImpl ?? lastNonRejectedImpl;
|
|
269
|
+
const implementer = selectedImpl?.tier ?? resolved.slot;
|
|
270
|
+
return {
|
|
271
|
+
implementer,
|
|
272
|
+
...(implementerHistory.length > 1 || implementerHistory.some(t => t !== implementer) ? { implementerHistory } : {}),
|
|
273
|
+
specReviewer,
|
|
274
|
+
...(specReviewerHistory.length > 0 && (specReviewerHistory.length > 1 || specReviewerHistory.some(t => t === 'skipped')) ? { specReviewerHistory } : {}),
|
|
275
|
+
qualityReviewer,
|
|
276
|
+
...(qualityReviewerHistory.length > 0 && (qualityReviewerHistory.length > 1 || qualityReviewerHistory.some(t => t === 'skipped')) ? { qualityReviewerHistory } : {}),
|
|
277
|
+
...(fallbackOverrides.length > 0 ? { fallbackOverrides } : {}),
|
|
278
|
+
};
|
|
279
|
+
};
|
|
210
280
|
const abortReviewLoop = (base, terminationReason, message, aborting) => ({
|
|
211
281
|
...base,
|
|
212
282
|
status: 'incomplete',
|
|
@@ -216,6 +286,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
216
286
|
error: message,
|
|
217
287
|
specReviewStatus: aborting === 'spec' ? 'changes_required' : (base.specReviewStatus ?? 'approved'),
|
|
218
288
|
qualityReviewStatus: aborting === 'quality' ? 'changes_required' : (base.qualityReviewStatus ?? 'skipped'),
|
|
289
|
+
agents: agentEnvelope(specReviewerHistory[specReviewerHistory.length - 1] ?? 'not_applicable', qualityReviewerHistory[qualityReviewerHistory.length - 1] ?? (reviewPolicy === 'full' ? 'not_applicable' : 'skipped')),
|
|
219
290
|
});
|
|
220
291
|
const defaultVerification = { status: 'skipped', steps: [], totalDurationMs: 0, skipReason: 'no_command' };
|
|
221
292
|
let latestVerification = defaultVerification;
|
|
@@ -225,7 +296,6 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
225
296
|
stage: 'verifying',
|
|
226
297
|
stageIndex: 4,
|
|
227
298
|
reviewRound: undefined,
|
|
228
|
-
maxReviewRounds: task.maxReviewRounds ?? 5,
|
|
229
299
|
});
|
|
230
300
|
const verification = await runVerifyStage({
|
|
231
301
|
cwd,
|
|
@@ -339,6 +409,15 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
339
409
|
}
|
|
340
410
|
function resolveDiffOnlyTerminal(base, verdict, verification, diffTruncated) {
|
|
341
411
|
const concerns = [...(base.concerns ?? [])];
|
|
412
|
+
if ('status' in verdict && verdict.status === 'skipped') {
|
|
413
|
+
return withVerification({
|
|
414
|
+
...base,
|
|
415
|
+
workerStatus: workerStatusForTerminal(base.workerStatus),
|
|
416
|
+
commits,
|
|
417
|
+
commitError,
|
|
418
|
+
verification,
|
|
419
|
+
}, verification);
|
|
420
|
+
}
|
|
342
421
|
if (verdict.kind === 'reject') {
|
|
343
422
|
return withVerification({
|
|
344
423
|
...base,
|
|
@@ -356,6 +435,18 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
356
435
|
verification,
|
|
357
436
|
}, verification);
|
|
358
437
|
}
|
|
438
|
+
if (verdict.kind === 'transport_failure') {
|
|
439
|
+
return withVerification({
|
|
440
|
+
...base,
|
|
441
|
+
status: verdict.status,
|
|
442
|
+
workerStatus: 'failed',
|
|
443
|
+
error: verdict.reason ?? `diff review transport failure: ${verdict.status}`,
|
|
444
|
+
concerns: [...concerns, ...verdict.concerns],
|
|
445
|
+
commits,
|
|
446
|
+
commitError,
|
|
447
|
+
verification,
|
|
448
|
+
}, verification);
|
|
449
|
+
}
|
|
359
450
|
concerns.push(...verdict.concerns);
|
|
360
451
|
if (verification.status === 'failed') {
|
|
361
452
|
concerns.push({
|
|
@@ -461,7 +552,57 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
461
552
|
});
|
|
462
553
|
}
|
|
463
554
|
}
|
|
464
|
-
const
|
|
555
|
+
const initialDecision = pickEscalation({
|
|
556
|
+
loop: 'spec',
|
|
557
|
+
attemptIndex: 0,
|
|
558
|
+
baseTier: resolved.slot,
|
|
559
|
+
});
|
|
560
|
+
const initialImpl = await runWithFallback({
|
|
561
|
+
assigned: initialDecision.impl,
|
|
562
|
+
providerFor,
|
|
563
|
+
unavailableTiers: specUnavailable,
|
|
564
|
+
isTransportFailure: (r) => TRANSPORT_FAILURES.has(r.status) && r.capExhausted === undefined,
|
|
565
|
+
getStatus: (r) => r.status,
|
|
566
|
+
makeSyntheticFailure: (assigned) => makeSyntheticRunResult(assigned, 'all_tiers_unavailable'),
|
|
567
|
+
call: (provider) => delegateWithEscalation(withDoneCondition(task), [provider], { explicitlyPinned: false, onProgress: wrappedOnProgress }),
|
|
568
|
+
});
|
|
569
|
+
if (initialImpl.fallbackFired || initialImpl.bothUnavailable) {
|
|
570
|
+
fallbackOverrides.push({
|
|
571
|
+
role: 'implementer',
|
|
572
|
+
loop: 'spec',
|
|
573
|
+
attempt: 0,
|
|
574
|
+
assigned: initialDecision.impl,
|
|
575
|
+
used: initialImpl.usedTier,
|
|
576
|
+
reason: (initialImpl.fallbackReason ?? initialImpl.unavailableReason),
|
|
577
|
+
triggeringStatus: initialImpl.fallbackTriggeringStatus,
|
|
578
|
+
bothUnavailable: initialImpl.bothUnavailable,
|
|
579
|
+
});
|
|
580
|
+
}
|
|
581
|
+
if (initialImpl.fallbackFired) {
|
|
582
|
+
emitFallback({
|
|
583
|
+
batchId: heartbeatWiring?.batchId ?? '', taskIndex,
|
|
584
|
+
loop: 'spec', attempt: 0, role: 'implementer',
|
|
585
|
+
assignedTier: initialDecision.impl,
|
|
586
|
+
usedTier: initialImpl.usedTier,
|
|
587
|
+
reason: initialImpl.fallbackReason,
|
|
588
|
+
triggeringStatus: initialImpl.fallbackTriggeringStatus,
|
|
589
|
+
violatesSeparation: false,
|
|
590
|
+
});
|
|
591
|
+
}
|
|
592
|
+
if (initialImpl.bothUnavailable) {
|
|
593
|
+
emitFallbackUnavailable({
|
|
594
|
+
batchId: heartbeatWiring?.batchId ?? '', taskIndex,
|
|
595
|
+
loop: 'spec', attempt: 0, role: 'implementer',
|
|
596
|
+
assignedTier: initialDecision.impl,
|
|
597
|
+
reason: initialImpl.unavailableReason,
|
|
598
|
+
});
|
|
599
|
+
return adaptForAllTiersUnavailable(initialImpl.result, 'spec', 0);
|
|
600
|
+
}
|
|
601
|
+
const implResult = initialImpl.result;
|
|
602
|
+
latestAttemptedImpl = { tier: initialImpl.usedTier, result: implResult };
|
|
603
|
+
lastNonRejectedImpl = { tier: initialImpl.usedTier, result: implResult };
|
|
604
|
+
implementerHistory.push(initialImpl.usedTier);
|
|
605
|
+
specAttemptIndex = 1;
|
|
465
606
|
const implReport = implResult.status === 'ok' ? parseStructuredReport(implResult.output) : undefined;
|
|
466
607
|
const workerStatus = extractWorkerStatus(implReport);
|
|
467
608
|
if (implResult.status === 'ok' && isArtifactProducing) {
|
|
@@ -486,11 +627,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
486
627
|
qualityReviewStatus: 'skipped',
|
|
487
628
|
specReviewReason: 'skipped: reviewPolicy is off',
|
|
488
629
|
qualityReviewReason: 'skipped: reviewPolicy is off',
|
|
489
|
-
agents:
|
|
490
|
-
implementer: resolved.slot,
|
|
491
|
-
specReviewer: 'skipped',
|
|
492
|
-
qualityReviewer: 'skipped',
|
|
493
|
-
},
|
|
630
|
+
agents: agentEnvelope('skipped', 'skipped'),
|
|
494
631
|
models: {
|
|
495
632
|
implementer: implModel,
|
|
496
633
|
specReviewer: null,
|
|
@@ -526,11 +663,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
526
663
|
extraSections: effectiveImplReport.extraSections ?? {},
|
|
527
664
|
},
|
|
528
665
|
filePathsSkipped,
|
|
529
|
-
agents:
|
|
530
|
-
implementer: resolved.slot,
|
|
531
|
-
specReviewer: 'not_applicable',
|
|
532
|
-
qualityReviewer: 'not_applicable',
|
|
533
|
-
},
|
|
666
|
+
agents: agentEnvelope('not_applicable', 'not_applicable'),
|
|
534
667
|
models: {
|
|
535
668
|
implementer: implModel,
|
|
536
669
|
specReviewer: null,
|
|
@@ -550,11 +683,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
550
683
|
qualityReviewStatus: 'skipped',
|
|
551
684
|
specReviewReason: 'skipped: worker reported ' + workerStatus,
|
|
552
685
|
qualityReviewReason: 'skipped: worker reported ' + workerStatus,
|
|
553
|
-
agents:
|
|
554
|
-
implementer: resolved.slot,
|
|
555
|
-
specReviewer: 'skipped',
|
|
556
|
-
qualityReviewer: 'skipped',
|
|
557
|
-
},
|
|
686
|
+
agents: agentEnvelope('skipped', 'skipped'),
|
|
558
687
|
models: {
|
|
559
688
|
implementer: implModel,
|
|
560
689
|
specReviewer: null,
|
|
@@ -575,11 +704,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
575
704
|
qualityReviewStatus: 'skipped',
|
|
576
705
|
specReviewReason: 'skipped: reviewPolicy is off',
|
|
577
706
|
qualityReviewReason: 'skipped: reviewPolicy is off',
|
|
578
|
-
agents:
|
|
579
|
-
implementer: resolved.slot,
|
|
580
|
-
specReviewer: 'skipped',
|
|
581
|
-
qualityReviewer: 'skipped',
|
|
582
|
-
},
|
|
707
|
+
agents: agentEnvelope('skipped', 'skipped'),
|
|
583
708
|
models: {
|
|
584
709
|
implementer: implModel,
|
|
585
710
|
specReviewer: null,
|
|
@@ -590,35 +715,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
590
715
|
}, verification);
|
|
591
716
|
return terminal;
|
|
592
717
|
}
|
|
593
|
-
|
|
594
|
-
try {
|
|
595
|
-
otherProvider = createProvider(otherSlot, config);
|
|
596
|
-
}
|
|
597
|
-
catch {
|
|
598
|
-
return {
|
|
599
|
-
...implResult,
|
|
600
|
-
workerStatus,
|
|
601
|
-
specReviewStatus: 'skipped',
|
|
602
|
-
qualityReviewStatus: 'skipped',
|
|
603
|
-
specReviewReason: 'skipped: no review agent configured',
|
|
604
|
-
qualityReviewReason: 'skipped: no review agent configured',
|
|
605
|
-
agents: {
|
|
606
|
-
implementer: resolved.slot,
|
|
607
|
-
specReviewer: 'skipped',
|
|
608
|
-
qualityReviewer: 'skipped',
|
|
609
|
-
},
|
|
610
|
-
models: {
|
|
611
|
-
implementer: implModel,
|
|
612
|
-
specReviewer: null,
|
|
613
|
-
qualityReviewer: null,
|
|
614
|
-
},
|
|
615
|
-
fileArtifactsMissing: implResult.status === 'ok' ? checkOutputTargets(outputTargets) : undefined,
|
|
616
|
-
commits,
|
|
617
|
-
commitError,
|
|
618
|
-
verification,
|
|
619
|
-
};
|
|
620
|
-
}
|
|
621
|
-
const reviewModel = otherProvider.config.model;
|
|
718
|
+
const reviewModel = providerFor(pickReviewer({ loop: 'spec', attemptIndex: 0, baseTier: resolved.slot }))?.config.model ?? null;
|
|
622
719
|
const packet = {
|
|
623
720
|
prompt: task.prompt,
|
|
624
721
|
scope: task.filePaths ?? [],
|
|
@@ -630,21 +727,28 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
630
727
|
? await buildEvidence({ cwd, baselineHead, commits, verification, reviewPolicy })
|
|
631
728
|
: { block: '', diffTruncated: false, fullDiff: '' };
|
|
632
729
|
if (reviewPolicy === 'diff_only') {
|
|
730
|
+
const diffUnavailable = new Map();
|
|
731
|
+
const diffReviewerTier = pickReviewer({ loop: 'spec', attemptIndex: 0, baseTier: resolved.slot });
|
|
633
732
|
emitTaskEvent('stage_change', { from: 'verifying', to: 'diff_review' });
|
|
634
|
-
heartbeat?.transition({
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
diff: evidence.fullDiff,
|
|
643
|
-
diffTruncated: evidence.diffTruncated,
|
|
644
|
-
verification,
|
|
645
|
-
worker: { call: (prompt) => otherProvider.run(prompt) },
|
|
733
|
+
heartbeat?.transition({ stage: 'diff_review', stageIndex: 2, reviewRound: 1, attemptCap: 1 });
|
|
734
|
+
const diffCall = await runWithFallback({
|
|
735
|
+
assigned: diffReviewerTier,
|
|
736
|
+
providerFor,
|
|
737
|
+
unavailableTiers: diffUnavailable,
|
|
738
|
+
isTransportFailure: (r) => isReviewTransportFailure(r),
|
|
739
|
+
getStatus: (r) => r.status,
|
|
740
|
+
makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'),
|
|
741
|
+
call: (provider) => runDiffReview({ cwd, diff: evidence.fullDiff, diffTruncated: evidence.diffTruncated, verification, worker: { call: (prompt) => provider.run(prompt) } }),
|
|
646
742
|
});
|
|
647
|
-
|
|
743
|
+
if (diffCall.fallbackFired) {
|
|
744
|
+
emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'diff', attempt: 0, role: 'diffReviewer', assignedTier: diffReviewerTier, usedTier: diffCall.usedTier, reason: diffCall.fallbackReason, triggeringStatus: diffCall.fallbackTriggeringStatus, violatesSeparation: diffCall.usedTier === implementerHistory[implementerHistory.length - 1] });
|
|
745
|
+
fallbackOverrides.push({ role: 'diffReviewer', loop: 'diff', attempt: 0, assigned: diffReviewerTier, used: diffCall.usedTier, reason: diffCall.fallbackReason, triggeringStatus: diffCall.fallbackTriggeringStatus, bothUnavailable: diffCall.bothUnavailable });
|
|
746
|
+
}
|
|
747
|
+
if (diffCall.bothUnavailable) {
|
|
748
|
+
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'diff', attempt: 0, role: 'diffReviewer', assignedTier: diffReviewerTier, reason: diffCall.unavailableReason });
|
|
749
|
+
}
|
|
750
|
+
const verdict = diffCall.bothUnavailable || isReviewTransportFailure(diffCall.result) ? makeSkippedReviewResult('all_tiers_unavailable') : diffCall.result;
|
|
751
|
+
emitTaskEvent('review_decision', { stage: 'diff_review', verdict: 'kind' in verdict ? verdict.kind : 'skipped', round: 1 });
|
|
648
752
|
return resolveDiffOnlyTerminal({
|
|
649
753
|
...implResult,
|
|
650
754
|
workerStatus,
|
|
@@ -654,120 +758,188 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
654
758
|
qualityReviewReason: 'skipped: reviewPolicy is diff_only',
|
|
655
759
|
implementationReport: effectiveImplReport,
|
|
656
760
|
fileArtifactsMissing: implResult.status === 'ok' ? checkOutputTargets(outputTargets) : undefined,
|
|
657
|
-
agents:
|
|
658
|
-
|
|
659
|
-
specReviewer: 'skipped',
|
|
660
|
-
qualityReviewer: 'skipped',
|
|
661
|
-
},
|
|
662
|
-
models: {
|
|
663
|
-
implementer: implModel,
|
|
664
|
-
specReviewer: reviewModel,
|
|
665
|
-
qualityReviewer: null,
|
|
666
|
-
},
|
|
761
|
+
agents: agentEnvelope('skipped', 'skipped'),
|
|
762
|
+
models: { implementer: implModel, specReviewer: reviewModel, qualityReviewer: null },
|
|
667
763
|
}, verdict, verification, evidence.diffTruncated);
|
|
668
764
|
}
|
|
669
|
-
heartbeat?.transition({
|
|
670
|
-
stage: 'spec_review', stageIndex: 2,
|
|
671
|
-
reviewRound: 1, maxReviewRounds: task.maxReviewRounds ?? 5,
|
|
672
|
-
});
|
|
673
|
-
let specResult = await runSpecReview(otherProvider, packet, effectiveImplReport, fileContents, implResult.toolCalls, task.planContext, evidence.block);
|
|
674
765
|
let finalImplResult = implResult;
|
|
675
766
|
let finalImplReport = effectiveImplReport;
|
|
676
|
-
let
|
|
677
|
-
let
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
767
|
+
let specResult;
|
|
768
|
+
let specStatus;
|
|
769
|
+
let specReport;
|
|
770
|
+
let specReviewReason;
|
|
771
|
+
heartbeat?.transition({ stage: 'spec_review', stageIndex: 2, reviewRound: 1, attemptCap: maxSpecRows });
|
|
772
|
+
const initialReviewerTier = pickReviewer({ loop: 'spec', attemptIndex: 0, baseTier: resolved.slot });
|
|
773
|
+
const initialSpecReview = await runWithFallback({
|
|
774
|
+
assigned: initialReviewerTier,
|
|
775
|
+
providerFor,
|
|
776
|
+
unavailableTiers: specUnavailable,
|
|
777
|
+
isTransportFailure: (r) => isReviewTransportFailure(r),
|
|
778
|
+
getStatus: (r) => r.status,
|
|
779
|
+
makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'),
|
|
780
|
+
call: (provider) => runSpecReview(provider, packet, effectiveImplReport, fileContents, implResult.toolCalls, task.planContext, evidence.block),
|
|
781
|
+
});
|
|
782
|
+
if (initialSpecReview.bothUnavailable) {
|
|
783
|
+
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: 0, role: 'specReviewer', assignedTier: initialReviewerTier, reason: initialSpecReview.unavailableReason });
|
|
784
|
+
fallbackOverrides.push({ role: 'specReviewer', loop: 'spec', attempt: 0, assigned: initialReviewerTier, used: initialSpecReview.usedTier, reason: initialSpecReview.unavailableReason, triggeringStatus: initialSpecReview.fallbackTriggeringStatus, bothUnavailable: true });
|
|
785
|
+
specReviewerHistory.push('skipped');
|
|
786
|
+
}
|
|
787
|
+
else {
|
|
788
|
+
specReviewerHistory.push(initialSpecReview.usedTier);
|
|
789
|
+
if (initialSpecReview.fallbackFired) {
|
|
790
|
+
emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: 0, role: 'specReviewer', assignedTier: initialReviewerTier, usedTier: initialSpecReview.usedTier, reason: initialSpecReview.fallbackReason, triggeringStatus: initialSpecReview.fallbackTriggeringStatus, violatesSeparation: initialSpecReview.usedTier === implementerHistory[implementerHistory.length - 1] });
|
|
791
|
+
fallbackOverrides.push({ role: 'specReviewer', loop: 'spec', attempt: 0, assigned: initialReviewerTier, used: initialSpecReview.usedTier, reason: initialSpecReview.fallbackReason, triggeringStatus: initialSpecReview.fallbackTriggeringStatus, bothUnavailable: false });
|
|
792
|
+
}
|
|
793
|
+
}
|
|
794
|
+
specResult = initialSpecReview.bothUnavailable
|
|
795
|
+
? makeSkippedReviewResult('all_tiers_unavailable')
|
|
796
|
+
: initialSpecReview.result;
|
|
797
|
+
specStatus = specResult.status;
|
|
798
|
+
specReport = 'report' in specResult ? specResult.report : undefined;
|
|
799
|
+
specReviewReason = specStatus === 'skipped' ? 'all_tiers_unavailable' : ('errorReason' in specResult ? specResult.errorReason : undefined);
|
|
800
|
+
let prevSpecFindings = [...(specResult.findings ?? [])];
|
|
801
|
+
while (specStatus === 'changes_required') {
|
|
802
|
+
if (specAttemptIndex >= maxSpecRows)
|
|
803
|
+
return abortReviewLoop(finalImplResult, 'round_cap', 'review round cap reached before spec rework', 'spec');
|
|
804
|
+
const currentCostUSD = taskCostUSD();
|
|
805
|
+
if (currentCostUSD !== null && maxCostUSD !== undefined && currentCostUSD >= 0.8 * maxCostUSD) {
|
|
806
|
+
emitTaskEvent('cost_check', { stage: 'spec_rework', tripped: true, cost_used_usd: currentCostUSD, cost_cap_usd: maxCostUSD, cost_available: true });
|
|
807
|
+
return abortReviewLoop(finalImplResult, 'cost_ceiling', 'cost ceiling reached before spec rework', 'spec');
|
|
808
|
+
}
|
|
809
|
+
const decision = pickEscalation({ loop: 'spec', attemptIndex: specAttemptIndex, baseTier: resolved.slot });
|
|
810
|
+
if (decision.isEscalated)
|
|
811
|
+
emitEscalationEvent('spec', specAttemptIndex, decision);
|
|
812
|
+
emitTaskEvent('stage_change', { from: 'spec_review', to: 'spec_rework', attempt: specAttemptIndex, attemptCap: maxSpecRows, implTier: decision.impl, reviewerTier: decision.reviewer, escalated: decision.isEscalated });
|
|
813
|
+
heartbeat?.transition({ stage: 'spec_rework', stageIndex: 3, reviewRound: specAttemptIndex, attemptCap: maxSpecRows });
|
|
814
|
+
const feedback = specResult.findings.length > 0 ? `\n\n## Spec Review Feedback (round ${specAttemptIndex}):\n${specResult.findings.map(f => `- ${f}`).join('\n')}` : '';
|
|
815
|
+
const reworkTask = withDoneCondition({ ...task, prompt: `${task.prompt}${feedback}` });
|
|
816
|
+
const reworkCall = await runWithFallback({ assigned: decision.impl, providerFor, unavailableTiers: specUnavailable, isTransportFailure: (r) => TRANSPORT_FAILURES.has(r.status) && r.capExhausted === undefined, getStatus: (r) => r.status, makeSyntheticFailure: (assigned) => makeSyntheticRunResult(assigned, 'all_tiers_unavailable'), call: (provider) => delegateWithEscalation(reworkTask, [provider], { explicitlyPinned: true, onProgress: wrappedOnProgress }) });
|
|
817
|
+
if (reworkCall.fallbackFired || reworkCall.bothUnavailable)
|
|
818
|
+
fallbackOverrides.push({ role: 'implementer', loop: 'spec', attempt: specAttemptIndex, assigned: decision.impl, used: reworkCall.usedTier, reason: (reworkCall.fallbackReason ?? reworkCall.unavailableReason), triggeringStatus: reworkCall.fallbackTriggeringStatus, bothUnavailable: reworkCall.bothUnavailable });
|
|
819
|
+
if (reworkCall.fallbackFired) {
|
|
820
|
+
emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'implementer', assignedTier: decision.impl, usedTier: reworkCall.usedTier, reason: reworkCall.fallbackReason, triggeringStatus: reworkCall.fallbackTriggeringStatus, violatesSeparation: false });
|
|
821
|
+
if (decision.isEscalated && reworkCall.fallbackReason === 'not_configured')
|
|
822
|
+
emitEscalationUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'implementer', wantedTier: decision.impl, reason: reworkCall.fallbackReason });
|
|
823
|
+
}
|
|
824
|
+
if (reworkCall.bothUnavailable) {
|
|
825
|
+
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'implementer', assignedTier: decision.impl, reason: reworkCall.unavailableReason });
|
|
826
|
+
if (decision.isEscalated)
|
|
827
|
+
emitEscalationUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'implementer', wantedTier: decision.impl, reason: reworkCall.unavailableReason });
|
|
828
|
+
return adaptForAllTiersUnavailable(reworkCall.result, 'spec', specAttemptIndex);
|
|
829
|
+
}
|
|
830
|
+
finalImplResult = reworkCall.result;
|
|
831
|
+
latestAttemptedImpl = { tier: reworkCall.usedTier, result: finalImplResult };
|
|
832
|
+
implementerHistory.push(reworkCall.usedTier);
|
|
833
|
+
const reworkReport = parseStructuredReport(finalImplResult.output);
|
|
834
|
+
finalImplReport = reworkReport.summary ? reworkReport : buildFallbackImplReport(finalImplResult);
|
|
835
|
+
fileContents = await readImplementerFileContents(finalImplResult.filesWritten, task.cwd);
|
|
836
|
+
heartbeat?.transition({ stage: 'spec_review', stageIndex: 2, reviewRound: specAttemptIndex + 1, attemptCap: maxSpecRows });
|
|
837
|
+
const reviewCall = await runWithFallback({ assigned: decision.reviewer, providerFor, unavailableTiers: specUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), call: (provider) => runSpecReview(provider, packet, finalImplReport, fileContents, finalImplResult.toolCalls, task.planContext, evidence.block) });
|
|
838
|
+
if (reviewCall.bothUnavailable) {
|
|
839
|
+
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'specReviewer', assignedTier: decision.reviewer, reason: reviewCall.unavailableReason });
|
|
840
|
+
fallbackOverrides.push({ role: 'specReviewer', loop: 'spec', attempt: specAttemptIndex, assigned: decision.reviewer, used: reviewCall.usedTier, reason: reviewCall.unavailableReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, bothUnavailable: true });
|
|
841
|
+
specReviewerHistory.push('skipped');
|
|
842
|
+
}
|
|
843
|
+
else {
|
|
844
|
+
specReviewerHistory.push(reviewCall.usedTier);
|
|
845
|
+
if (reviewCall.fallbackFired) {
|
|
846
|
+
emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'specReviewer', assignedTier: decision.reviewer, usedTier: reviewCall.usedTier, reason: reviewCall.fallbackReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, violatesSeparation: reviewCall.usedTier === implementerHistory[implementerHistory.length - 1] });
|
|
847
|
+
fallbackOverrides.push({ role: 'specReviewer', loop: 'spec', attempt: specAttemptIndex, assigned: decision.reviewer, used: reviewCall.usedTier, reason: reviewCall.fallbackReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, bothUnavailable: false });
|
|
683
848
|
}
|
|
849
|
+
}
|
|
850
|
+
specResult = reviewCall.result;
|
|
851
|
+
specStatus = specResult.status;
|
|
852
|
+
specReport = 'report' in specResult ? specResult.report : undefined;
|
|
853
|
+
specReviewReason = specStatus === 'skipped' ? 'all_tiers_unavailable' : ('errorReason' in specResult ? specResult.errorReason : undefined);
|
|
854
|
+
if (reviewDidNotReject(specStatus))
|
|
855
|
+
lastNonRejectedImpl = { tier: implementerHistory[implementerHistory.length - 1], result: finalImplResult };
|
|
856
|
+
specAttemptIndex++;
|
|
857
|
+
if (specStatus === 'approved' || specStatus === 'skipped')
|
|
858
|
+
break;
|
|
859
|
+
const currentFindings = [...(specResult.findings ?? [])].sort().join('\0');
|
|
860
|
+
const prevFindings = [...prevSpecFindings].sort().join('\0');
|
|
861
|
+
if (currentFindings === prevFindings && currentFindings !== '')
|
|
862
|
+
break;
|
|
863
|
+
prevSpecFindings = [...(specResult.findings ?? [])];
|
|
864
|
+
}
|
|
865
|
+
let qualityResult = { status: 'skipped', report: undefined, findings: [], errorReason: reviewPolicy === 'full' ? 'all_tiers_unavailable' : 'skipped: reviewPolicy is spec_only' };
|
|
866
|
+
if (reviewPolicy === 'full') {
|
|
867
|
+
qualityUnavailable = new Map();
|
|
868
|
+
const qualityReviewerTier = pickReviewer({ loop: 'quality', attemptIndex: 0, baseTier: resolved.slot });
|
|
869
|
+
heartbeat?.transition({ stage: 'quality_review', stageIndex: 4, reviewRound: 1, attemptCap: maxQualityRows });
|
|
870
|
+
const initialQuality = await runWithFallback({ assigned: qualityReviewerTier, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), call: (provider) => runQualityReview(provider, packet, specReport ?? finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block) });
|
|
871
|
+
if (initialQuality.bothUnavailable) {
|
|
872
|
+
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: 0, role: 'qualityReviewer', assignedTier: qualityReviewerTier, reason: initialQuality.unavailableReason });
|
|
873
|
+
fallbackOverrides.push({ role: 'qualityReviewer', loop: 'quality', attempt: 0, assigned: qualityReviewerTier, used: initialQuality.usedTier, reason: initialQuality.unavailableReason, triggeringStatus: initialQuality.fallbackTriggeringStatus, bothUnavailable: true });
|
|
874
|
+
qualityReviewerHistory.push('skipped');
|
|
875
|
+
}
|
|
876
|
+
else {
|
|
877
|
+
qualityReviewerHistory.push(initialQuality.usedTier);
|
|
878
|
+
if (initialQuality.fallbackFired) {
|
|
879
|
+
emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: 0, role: 'qualityReviewer', assignedTier: qualityReviewerTier, usedTier: initialQuality.usedTier, reason: initialQuality.fallbackReason, triggeringStatus: initialQuality.fallbackTriggeringStatus, violatesSeparation: initialQuality.usedTier === implementerHistory[implementerHistory.length - 1] });
|
|
880
|
+
fallbackOverrides.push({ role: 'qualityReviewer', loop: 'quality', attempt: 0, assigned: qualityReviewerTier, used: initialQuality.usedTier, reason: initialQuality.fallbackReason, triggeringStatus: initialQuality.fallbackTriggeringStatus, bothUnavailable: false });
|
|
881
|
+
}
|
|
882
|
+
}
|
|
883
|
+
qualityResult = initialQuality.result;
|
|
884
|
+
let prevQualityFindings = [...(qualityResult.findings ?? [])];
|
|
885
|
+
qualityAttemptIndex = 1;
|
|
886
|
+
while (qualityResult.status === 'changes_required') {
|
|
887
|
+
if (qualityAttemptIndex >= maxQualityRows)
|
|
888
|
+
return abortReviewLoop(finalImplResult, 'round_cap', 'review round cap reached before quality rework', 'quality');
|
|
684
889
|
const currentCostUSD = taskCostUSD();
|
|
685
890
|
if (currentCostUSD !== null && maxCostUSD !== undefined && currentCostUSD >= 0.8 * maxCostUSD) {
|
|
686
|
-
emitTaskEvent('cost_check', { stage: '
|
|
687
|
-
return abortReviewLoop(finalImplResult, 'cost_ceiling', 'cost ceiling reached before
|
|
891
|
+
emitTaskEvent('cost_check', { stage: 'quality_rework', tripped: true, cost_used_usd: currentCostUSD, cost_cap_usd: maxCostUSD, cost_available: true });
|
|
892
|
+
return abortReviewLoop(finalImplResult, 'cost_ceiling', 'cost ceiling reached before quality rework', 'quality');
|
|
688
893
|
}
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
});
|
|
696
|
-
const
|
|
697
|
-
|
|
698
|
-
: '';
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
894
|
+
const decision = pickEscalation({ loop: 'quality', attemptIndex: qualityAttemptIndex, baseTier: resolved.slot });
|
|
895
|
+
if (decision.isEscalated)
|
|
896
|
+
emitEscalationEvent('quality', qualityAttemptIndex, decision);
|
|
897
|
+
emitTaskEvent('stage_change', { from: 'quality_review', to: 'quality_rework', attempt: qualityAttemptIndex, attemptCap: maxQualityRows, implTier: decision.impl, reviewerTier: decision.reviewer, escalated: decision.isEscalated });
|
|
898
|
+
heartbeat?.transition({ stage: 'quality_rework', stageIndex: 5, reviewRound: qualityAttemptIndex, attemptCap: maxQualityRows });
|
|
899
|
+
const feedback = qualityResult.findings.length > 0 ? `\n\n## Quality Review Feedback (round ${qualityAttemptIndex}):\n${qualityResult.findings.map(f => `- ${f}`).join('\n')}` : '';
|
|
900
|
+
const reworkTask = withDoneCondition({ ...task, prompt: `${task.prompt}${feedback}` });
|
|
901
|
+
const reworkCall = await runWithFallback({ assigned: decision.impl, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => TRANSPORT_FAILURES.has(r.status) && r.capExhausted === undefined, getStatus: (r) => r.status, makeSyntheticFailure: (assigned) => makeSyntheticRunResult(assigned, 'all_tiers_unavailable'), call: (provider) => delegateWithEscalation(reworkTask, [provider], { explicitlyPinned: true, onProgress: wrappedOnProgress }) });
|
|
902
|
+
if (reworkCall.fallbackFired || reworkCall.bothUnavailable)
|
|
903
|
+
fallbackOverrides.push({ role: 'implementer', loop: 'quality', attempt: qualityAttemptIndex, assigned: decision.impl, used: reworkCall.usedTier, reason: (reworkCall.fallbackReason ?? reworkCall.unavailableReason), triggeringStatus: reworkCall.fallbackTriggeringStatus, bothUnavailable: reworkCall.bothUnavailable });
|
|
904
|
+
if (reworkCall.fallbackFired)
|
|
905
|
+
emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'implementer', assignedTier: decision.impl, usedTier: reworkCall.usedTier, reason: reworkCall.fallbackReason, triggeringStatus: reworkCall.fallbackTriggeringStatus, violatesSeparation: false });
|
|
906
|
+
if (reworkCall.bothUnavailable) {
|
|
907
|
+
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'implementer', assignedTier: decision.impl, reason: reworkCall.unavailableReason });
|
|
908
|
+
if (decision.isEscalated)
|
|
909
|
+
emitEscalationUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'implementer', wantedTier: decision.impl, reason: reworkCall.unavailableReason });
|
|
910
|
+
return adaptForAllTiersUnavailable(reworkCall.result, 'quality', qualityAttemptIndex);
|
|
911
|
+
}
|
|
912
|
+
finalImplResult = reworkCall.result;
|
|
913
|
+
latestAttemptedImpl = { tier: reworkCall.usedTier, result: finalImplResult };
|
|
914
|
+
implementerHistory.push(reworkCall.usedTier);
|
|
915
|
+
const reworkReport = parseStructuredReport(finalImplResult.output);
|
|
916
|
+
finalImplReport = reworkReport.summary ? reworkReport : buildFallbackImplReport(finalImplResult);
|
|
917
|
+
fileContents = await readImplementerFileContents(finalImplResult.filesWritten, task.cwd);
|
|
918
|
+
heartbeat?.transition({ stage: 'quality_review', stageIndex: 4, reviewRound: qualityAttemptIndex + 1, attemptCap: maxQualityRows });
|
|
919
|
+
const reviewCall = await runWithFallback({ assigned: decision.reviewer, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), call: (provider) => runQualityReview(provider, packet, finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block) });
|
|
920
|
+
if (reviewCall.bothUnavailable) {
|
|
921
|
+
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'qualityReviewer', assignedTier: decision.reviewer, reason: reviewCall.unavailableReason });
|
|
922
|
+
fallbackOverrides.push({ role: 'qualityReviewer', loop: 'quality', attempt: qualityAttemptIndex, assigned: decision.reviewer, used: reviewCall.usedTier, reason: reviewCall.unavailableReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, bothUnavailable: true });
|
|
923
|
+
qualityReviewerHistory.push('skipped');
|
|
924
|
+
}
|
|
925
|
+
else {
|
|
926
|
+
qualityReviewerHistory.push(reviewCall.usedTier);
|
|
927
|
+
if (reviewCall.fallbackFired) {
|
|
928
|
+
emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'qualityReviewer', assignedTier: decision.reviewer, usedTier: reviewCall.usedTier, reason: reviewCall.fallbackReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, violatesSeparation: reviewCall.usedTier === implementerHistory[implementerHistory.length - 1] });
|
|
929
|
+
fallbackOverrides.push({ role: 'qualityReviewer', loop: 'quality', attempt: qualityAttemptIndex, assigned: decision.reviewer, used: reviewCall.usedTier, reason: reviewCall.fallbackReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, bothUnavailable: false });
|
|
930
|
+
}
|
|
931
|
+
}
|
|
932
|
+
qualityResult = reviewCall.result;
|
|
933
|
+
if (reviewDidNotReject(qualityResult.status))
|
|
934
|
+
lastNonRejectedImpl = { tier: implementerHistory[implementerHistory.length - 1], result: finalImplResult };
|
|
935
|
+
qualityAttemptIndex++;
|
|
936
|
+
if (qualityResult.status === 'approved' || qualityResult.status === 'skipped')
|
|
715
937
|
break;
|
|
716
|
-
const currentFindings = [...
|
|
717
|
-
const prevFindings =
|
|
938
|
+
const currentFindings = [...(qualityResult.findings ?? [])].sort().join('\0');
|
|
939
|
+
const prevFindings = [...prevQualityFindings].sort().join('\0');
|
|
718
940
|
if (currentFindings === prevFindings && currentFindings !== '')
|
|
719
941
|
break;
|
|
720
|
-
|
|
721
|
-
}
|
|
722
|
-
}
|
|
723
|
-
let qualityResult = { status: 'skipped', report: undefined, findings: [] };
|
|
724
|
-
if (reviewPolicy === 'full') {
|
|
725
|
-
heartbeat?.transition({
|
|
726
|
-
stage: 'quality_review', stageIndex: 4,
|
|
727
|
-
reviewRound: 1, maxReviewRounds,
|
|
728
|
-
});
|
|
729
|
-
qualityResult = await runQualityReview(otherProvider, packet, specReport ?? finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block);
|
|
730
|
-
if (qualityResult.status === 'changes_required') {
|
|
731
|
-
let prevQualityFindings = [];
|
|
732
|
-
while (true) {
|
|
733
|
-
if (specRework + qualityRework >= maxReviewRounds) {
|
|
734
|
-
return abortReviewLoop(finalImplResult, 'round_cap', 'review round cap reached before quality rework', 'quality');
|
|
735
|
-
}
|
|
736
|
-
const currentCostUSD = taskCostUSD();
|
|
737
|
-
if (currentCostUSD !== null && maxCostUSD !== undefined && currentCostUSD >= 0.8 * maxCostUSD) {
|
|
738
|
-
emitTaskEvent('cost_check', { stage: 'quality_rework', tripped: true, cost_used_usd: currentCostUSD, cost_cap_usd: maxCostUSD, cost_available: true });
|
|
739
|
-
return abortReviewLoop(finalImplResult, 'cost_ceiling', 'cost ceiling reached before quality rework', 'quality');
|
|
740
|
-
}
|
|
741
|
-
emitTaskEvent('stage_change', { from: 'quality_review', to: 'quality_rework', round: qualityRework + 1, cap: maxReviewRounds });
|
|
742
|
-
qualityRework++;
|
|
743
|
-
const round = qualityRework;
|
|
744
|
-
heartbeat?.transition({
|
|
745
|
-
stage: 'quality_rework', stageIndex: 5,
|
|
746
|
-
reviewRound: round, maxReviewRounds,
|
|
747
|
-
});
|
|
748
|
-
const feedback = qualityResult.findings.length > 0
|
|
749
|
-
? `\n\n## Quality Review Feedback (round ${round}):\n${qualityResult.findings.map(f => `- ${f}`).join('\n')}`
|
|
750
|
-
: '';
|
|
751
|
-
const reworkPrompt = `${task.prompt}${feedback}`;
|
|
752
|
-
const reworkTask = withDoneCondition({ ...task, prompt: reworkPrompt });
|
|
753
|
-
const reworkResult = await delegateWithEscalation(reworkTask, [resolved.provider], { explicitlyPinned: true, onProgress: wrappedOnProgress });
|
|
754
|
-
finalImplResult = reworkResult;
|
|
755
|
-
const reworkReport = parseStructuredReport(reworkResult.output);
|
|
756
|
-
finalImplReport = reworkReport.summary ? reworkReport : buildFallbackImplReport(reworkResult);
|
|
757
|
-
const reworkContents = await readImplementerFileContents(reworkResult.filesWritten, task.cwd);
|
|
758
|
-
heartbeat?.transition({
|
|
759
|
-
stage: 'quality_review', stageIndex: 4,
|
|
760
|
-
reviewRound: round + 1, maxReviewRounds,
|
|
761
|
-
});
|
|
762
|
-
qualityResult = await runQualityReview(otherProvider, packet, finalImplReport, reworkContents, reworkResult.toolCalls, reworkResult.filesWritten, evidence.block);
|
|
763
|
-
if (qualityResult.status === 'approved')
|
|
764
|
-
break;
|
|
765
|
-
const currentFindings = [...qualityResult.findings].sort().join('\0');
|
|
766
|
-
const prevFindings = prevQualityFindings.sort().join('\0');
|
|
767
|
-
if (currentFindings === prevFindings && currentFindings !== '')
|
|
768
|
-
break;
|
|
769
|
-
prevQualityFindings = qualityResult.findings;
|
|
770
|
-
}
|
|
942
|
+
prevQualityFindings = [...(qualityResult.findings ?? [])];
|
|
771
943
|
}
|
|
772
944
|
}
|
|
773
945
|
const finalReport = specReport ?? finalImplReport;
|
|
@@ -789,7 +961,9 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
789
961
|
message: 'Implementation diff exceeded the reviewer evidence byte cap and was truncated.',
|
|
790
962
|
});
|
|
791
963
|
}
|
|
792
|
-
const
|
|
964
|
+
const specAggregateStatus = (['approved', 'changes_required', 'skipped', 'error', 'api_error', 'network_error', 'timeout'].includes(specStatus) ? specStatus : 'error');
|
|
965
|
+
const qualityAggregateStatus = qualityResult.status;
|
|
966
|
+
const aggregated = aggregateResult(finalReport, specReport, qualityResult.report, specAggregateStatus, qualityAggregateStatus);
|
|
793
967
|
// File artifact verification: check whether output targets exist on disk after all work.
|
|
794
968
|
// Only applies when status is ok; non-ok statuses skip verification entirely.
|
|
795
969
|
const fileArtifactsMissing = finalImplResult.status === 'ok' && outputTargets.length > 0
|
|
@@ -803,25 +977,23 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
803
977
|
: finalImplResult.status === 'ok' && fileArtifactsMissing
|
|
804
978
|
? 'incomplete'
|
|
805
979
|
: finalImplResult.status;
|
|
980
|
+
const specEnvelopeStatus = (specStatus === 'api_error' || specStatus === 'network_error' || specStatus === 'timeout' ? 'error' : specStatus);
|
|
981
|
+
const qualityEnvelopeStatus = qualityResult.status === 'api_error' || qualityResult.status === 'network_error' || qualityResult.status === 'timeout' ? 'error' : qualityResult.status;
|
|
806
982
|
return {
|
|
807
983
|
...finalImplResult,
|
|
808
984
|
status: finalStatus,
|
|
809
985
|
workerStatus: finalWorkerStatus,
|
|
810
986
|
concerns,
|
|
811
|
-
specReviewStatus:
|
|
812
|
-
qualityReviewStatus:
|
|
813
|
-
specReviewReason: specResult.errorReason,
|
|
814
|
-
qualityReviewReason: qualityResult.errorReason,
|
|
987
|
+
specReviewStatus: specEnvelopeStatus,
|
|
988
|
+
qualityReviewStatus: qualityEnvelopeStatus,
|
|
989
|
+
specReviewReason: 'errorReason' in specResult ? specResult.errorReason : undefined,
|
|
990
|
+
qualityReviewReason: 'errorReason' in qualityResult ? qualityResult.errorReason : undefined,
|
|
815
991
|
structuredReport: aggregated,
|
|
816
992
|
implementationReport: finalImplReport,
|
|
817
993
|
specReviewReport: specReport,
|
|
818
994
|
qualityReviewReport: qualityResult.report,
|
|
819
995
|
filePathsSkipped,
|
|
820
|
-
agents:
|
|
821
|
-
implementer: resolved.slot,
|
|
822
|
-
specReviewer: otherSlot,
|
|
823
|
-
qualityReviewer: reviewPolicy === 'full' ? otherSlot : 'skipped',
|
|
824
|
-
},
|
|
996
|
+
agents: agentEnvelope(specReviewerHistory[specReviewerHistory.length - 1] ?? 'not_applicable', qualityReviewerHistory[qualityReviewerHistory.length - 1] ?? (reviewPolicy === 'full' ? 'not_applicable' : 'skipped')),
|
|
825
997
|
models: {
|
|
826
998
|
implementer: implModel,
|
|
827
999
|
specReviewer: reviewModel,
|