@zhixuan92/multi-model-agent-core 3.6.7 → 3.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -0
- package/dist/config/read-only-review-flag.d.ts +8 -0
- package/dist/config/read-only-review-flag.d.ts.map +1 -0
- package/dist/config/read-only-review-flag.js +13 -0
- package/dist/config/read-only-review-flag.js.map +1 -0
- package/dist/delegate-with-escalation.d.ts +7 -1
- package/dist/delegate-with-escalation.d.ts.map +1 -1
- package/dist/delegate-with-escalation.js +22 -4
- package/dist/delegate-with-escalation.js.map +1 -1
- package/dist/diagnostics/http-server-log.d.ts +63 -0
- package/dist/diagnostics/http-server-log.d.ts.map +1 -0
- package/dist/diagnostics/http-server-log.js +188 -0
- package/dist/diagnostics/http-server-log.js.map +1 -0
- package/dist/diagnostics/jsonl-writer.d.ts +24 -0
- package/dist/diagnostics/jsonl-writer.d.ts.map +1 -0
- package/dist/diagnostics/jsonl-writer.js +57 -0
- package/dist/diagnostics/jsonl-writer.js.map +1 -0
- package/dist/diagnostics/types.d.ts +54 -0
- package/dist/diagnostics/types.d.ts.map +1 -0
- package/dist/diagnostics/types.js +2 -0
- package/dist/diagnostics/types.js.map +1 -0
- package/dist/executors/_shared/findings-schema.d.ts +40 -0
- package/dist/executors/_shared/findings-schema.d.ts.map +1 -0
- package/dist/executors/_shared/findings-schema.js +23 -0
- package/dist/executors/_shared/findings-schema.js.map +1 -0
- package/dist/executors/_shared/review-verdict-mapping.d.ts +16 -0
- package/dist/executors/_shared/review-verdict-mapping.d.ts.map +1 -0
- package/dist/executors/_shared/review-verdict-mapping.js +24 -0
- package/dist/executors/_shared/review-verdict-mapping.js.map +1 -0
- package/dist/executors/audit.d.ts.map +1 -1
- package/dist/executors/audit.js +21 -5
- package/dist/executors/audit.js.map +1 -1
- package/dist/executors/debug.d.ts.map +1 -1
- package/dist/executors/debug.js +11 -2
- package/dist/executors/debug.js.map +1 -1
- package/dist/executors/execution-context.d.ts.map +1 -1
- package/dist/executors/execution-context.js +1 -0
- package/dist/executors/execution-context.js.map +1 -1
- package/dist/executors/investigate.d.ts.map +1 -1
- package/dist/executors/investigate.js +22 -17
- package/dist/executors/investigate.js.map +1 -1
- package/dist/executors/review.d.ts.map +1 -1
- package/dist/executors/review.js +48 -48
- package/dist/executors/review.js.map +1 -1
- package/dist/executors/types.d.ts +11 -4
- package/dist/executors/types.d.ts.map +1 -1
- package/dist/executors/verify.d.ts.map +1 -1
- package/dist/executors/verify.js +43 -8
- package/dist/executors/verify.js.map +1 -1
- package/dist/heartbeat.d.ts.map +1 -1
- package/dist/heartbeat.js +5 -0
- package/dist/heartbeat.js.map +1 -1
- package/dist/index.d.ts +11 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +7 -1
- package/dist/index.js.map +1 -1
- package/dist/intake/compilers/audit.d.ts.map +1 -1
- package/dist/intake/compilers/audit.js +2 -0
- package/dist/intake/compilers/audit.js.map +1 -1
- package/dist/intake/compilers/debug.d.ts.map +1 -1
- package/dist/intake/compilers/debug.js +1 -0
- package/dist/intake/compilers/debug.js.map +1 -1
- package/dist/intake/compilers/investigate.d.ts.map +1 -1
- package/dist/intake/compilers/investigate.js +3 -4
- package/dist/intake/compilers/investigate.js.map +1 -1
- package/dist/intake/compilers/review.d.ts.map +1 -1
- package/dist/intake/compilers/review.js +2 -0
- package/dist/intake/compilers/review.js.map +1 -1
- package/dist/intake/resolve.d.ts.map +1 -1
- package/dist/intake/resolve.js +11 -10
- package/dist/intake/resolve.js.map +1 -1
- package/dist/intake/types.d.ts +2 -1
- package/dist/intake/types.d.ts.map +1 -1
- package/dist/observability/buckets.d.ts +3 -0
- package/dist/observability/buckets.d.ts.map +1 -0
- package/dist/observability/buckets.js +21 -0
- package/dist/observability/buckets.js.map +1 -0
- package/dist/observability/bus.d.ts +11 -0
- package/dist/observability/bus.d.ts.map +1 -0
- package/dist/observability/bus.js +17 -0
- package/dist/observability/bus.js.map +1 -0
- package/dist/observability/events.d.ts +932 -0
- package/dist/observability/events.d.ts.map +1 -0
- package/dist/observability/events.js +313 -0
- package/dist/observability/events.js.map +1 -0
- package/dist/observability/local-log-sink.d.ts +10 -0
- package/dist/observability/local-log-sink.d.ts.map +1 -0
- package/dist/observability/local-log-sink.js +11 -0
- package/dist/observability/local-log-sink.js.map +1 -0
- package/dist/observability/telemetry-sink.d.ts +12 -0
- package/dist/observability/telemetry-sink.d.ts.map +1 -0
- package/dist/observability/telemetry-sink.js +24 -0
- package/dist/observability/telemetry-sink.js.map +1 -0
- package/dist/review/quality-only-prompts.d.ts +20 -0
- package/dist/review/quality-only-prompts.d.ts.map +1 -0
- package/dist/review/quality-only-prompts.js +108 -0
- package/dist/review/quality-only-prompts.js.map +1 -0
- package/dist/review/quality-reviewer.d.ts +4 -1
- package/dist/review/quality-reviewer.d.ts.map +1 -1
- package/dist/review/quality-reviewer.js +6 -4
- package/dist/review/quality-reviewer.js.map +1 -1
- package/dist/run-tasks/index.d.ts +11 -3
- package/dist/run-tasks/index.d.ts.map +1 -1
- package/dist/run-tasks/index.js +1 -1
- package/dist/run-tasks/index.js.map +1 -1
- package/dist/run-tasks/reviewed-lifecycle.d.ts +5 -2
- package/dist/run-tasks/reviewed-lifecycle.d.ts.map +1 -1
- package/dist/run-tasks/reviewed-lifecycle.js +211 -130
- package/dist/run-tasks/reviewed-lifecycle.js.map +1 -1
- package/dist/runners/types.d.ts +6 -1
- package/dist/runners/types.d.ts.map +1 -1
- package/dist/telemetry/bucketing.d.ts +2 -0
- package/dist/telemetry/bucketing.d.ts.map +1 -1
- package/dist/telemetry/bucketing.js +9 -0
- package/dist/telemetry/bucketing.js.map +1 -1
- package/dist/telemetry/event-builder.d.ts.map +1 -1
- package/dist/telemetry/event-builder.js +19 -1
- package/dist/telemetry/event-builder.js.map +1 -1
- package/dist/telemetry/types.d.ts +131 -5
- package/dist/telemetry/types.d.ts.map +1 -1
- package/dist/telemetry/types.js +13 -1
- package/dist/telemetry/types.js.map +1 -1
- package/dist/tool-schemas/audit.d.ts +17 -0
- package/dist/tool-schemas/audit.d.ts.map +1 -1
- package/dist/tool-schemas/debug.d.ts +17 -0
- package/dist/tool-schemas/debug.d.ts.map +1 -1
- package/dist/tool-schemas/delegate.d.ts +17 -0
- package/dist/tool-schemas/delegate.d.ts.map +1 -1
- package/dist/tool-schemas/execute-plan.d.ts +17 -0
- package/dist/tool-schemas/execute-plan.d.ts.map +1 -1
- package/dist/tool-schemas/investigate.d.ts +17 -4
- package/dist/tool-schemas/investigate.d.ts.map +1 -1
- package/dist/tool-schemas/investigate.js +0 -1
- package/dist/tool-schemas/investigate.js.map +1 -1
- package/dist/tool-schemas/retry.d.ts +17 -0
- package/dist/tool-schemas/retry.d.ts.map +1 -1
- package/dist/tool-schemas/review.d.ts +17 -0
- package/dist/tool-schemas/review.d.ts.map +1 -1
- package/dist/tool-schemas/shared-output.d.ts +17 -0
- package/dist/tool-schemas/shared-output.d.ts.map +1 -1
- package/dist/tool-schemas/shared-output.js +6 -0
- package/dist/tool-schemas/shared-output.js.map +1 -1
- package/dist/tool-schemas/verify.d.ts +17 -0
- package/dist/tool-schemas/verify.d.ts.map +1 -1
- package/dist/types.d.ts +5 -1
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/package.json +25 -5
- package/dist/diagnostics/disconnect-log.d.ts +0 -143
- package/dist/diagnostics/disconnect-log.d.ts.map +0 -1
- package/dist/diagnostics/disconnect-log.js +0 -374
- package/dist/diagnostics/disconnect-log.js.map +0 -1
|
@@ -23,6 +23,9 @@ import { buildFallbackImplReport, readImplementerFileContents } from './fallback
|
|
|
23
23
|
import { composeVerboseLine, toVerboseFields } from '../diagnostics/verbose-line.js';
|
|
24
24
|
import { withDoneCondition } from './execute-task.js';
|
|
25
25
|
const exec = promisify(execFile);
|
|
26
|
+
const READ_ONLY_TOOL_NAMES = new Set([
|
|
27
|
+
'audit', 'review', 'verify', 'investigate', 'debug',
|
|
28
|
+
]);
|
|
26
29
|
export function emptyStats() {
|
|
27
30
|
return {
|
|
28
31
|
implementing: { stage: 'implementing', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null },
|
|
@@ -87,8 +90,13 @@ export function endVerifyStage(stats, t0, c0, agent, finalCostUSD, outcome, skip
|
|
|
87
90
|
skipReason,
|
|
88
91
|
};
|
|
89
92
|
}
|
|
90
|
-
export async function executeReviewedLifecycle(task, resolved, config, taskIndex, onProgress, heartbeatWiring, diagnostics, recorder, _route, _client, _triggeringSkill) {
|
|
93
|
+
export async function executeReviewedLifecycle(task, resolved, config, taskIndex, onProgress, heartbeatWiring, diagnostics, recorder, _route, _client, _triggeringSkill, bus, qualityReviewPromptBuilder) {
|
|
91
94
|
const reviewPolicy = task.reviewPolicy ?? 'full';
|
|
95
|
+
const routeKey = _route ?? '';
|
|
96
|
+
if (reviewPolicy === 'quality_only' && !READ_ONLY_TOOL_NAMES.has(routeKey)) {
|
|
97
|
+
throw new Error(`reviewPolicy 'quality_only' is only valid for read-only routes; received '${routeKey}'. ` +
|
|
98
|
+
`Use 'full', 'spec_only', 'diff_only', or 'off' for artifact-producing routes.`);
|
|
99
|
+
}
|
|
92
100
|
const otherSlot = resolved.slot === 'standard' ? 'complex' : 'standard';
|
|
93
101
|
let escalationProvider;
|
|
94
102
|
try {
|
|
@@ -110,23 +118,22 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
110
118
|
const { outputTargets } = partitionFilePaths(task.filePaths, task.cwd ?? process.cwd());
|
|
111
119
|
const stageCount = reviewPolicy === 'off' ? 1 :
|
|
112
120
|
reviewPolicy === 'spec_only' ? 3 :
|
|
113
|
-
|
|
121
|
+
reviewPolicy === 'quality_only' ? 3 :
|
|
122
|
+
5;
|
|
114
123
|
const verbose = diagnostics?.verbose ?? false;
|
|
115
|
-
let lastStageSeen;
|
|
116
124
|
const verboseStreamRaw = verbose
|
|
117
125
|
? (diagnostics?.verboseStream ?? ((line) => { process.stderr.write(line + '\n'); }))
|
|
118
126
|
: undefined;
|
|
119
127
|
const verboseBatchIdEarly = heartbeatWiring?.batchId;
|
|
120
128
|
const shortBatchEarly = verboseBatchIdEarly ? verboseBatchIdEarly.slice(0, 8) : '????????';
|
|
121
|
-
const taskEventLogger = diagnostics?.logger;
|
|
122
129
|
const emitTaskEvent = (event, fields) => {
|
|
123
|
-
if (
|
|
130
|
+
if (bus && verboseBatchIdEarly !== undefined) {
|
|
124
131
|
const cleaned = {};
|
|
125
132
|
for (const [key, value] of Object.entries(fields)) {
|
|
126
133
|
if (value !== undefined)
|
|
127
134
|
cleaned[key] = value;
|
|
128
135
|
}
|
|
129
|
-
|
|
136
|
+
bus.emit({ event, ts: new Date().toISOString(), batchId: verboseBatchIdEarly, taskIndex, ...cleaned });
|
|
130
137
|
}
|
|
131
138
|
if (verboseStreamRaw) {
|
|
132
139
|
verboseStreamRaw(composeVerboseLine({ event, ts: new Date().toISOString(), batch: shortBatchEarly, task: taskIndex, ...toVerboseFields(fields) }));
|
|
@@ -141,7 +148,8 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
141
148
|
const needHeartbeat = onProgress !== undefined ||
|
|
142
149
|
verbose ||
|
|
143
150
|
heartbeatWiring?.recordHeartbeat !== undefined ||
|
|
144
|
-
diagnostics?.logger !== undefined
|
|
151
|
+
diagnostics?.logger !== undefined ||
|
|
152
|
+
bus !== undefined;
|
|
145
153
|
// Synthesize an onProgress sink when the caller didn't pass one — the
|
|
146
154
|
// heartbeat needs a place to emit heartbeat events so the stage-change
|
|
147
155
|
// detector below fires. Discards events if there is no external consumer.
|
|
@@ -149,16 +157,10 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
149
157
|
const heartbeat = needHeartbeat
|
|
150
158
|
? new HeartbeatTimer((event) => {
|
|
151
159
|
if (event.kind === 'heartbeat') {
|
|
152
|
-
// Emit on every heartbeat tick so the operator can confirm
|
|
153
|
-
//
|
|
154
|
-
//
|
|
155
|
-
//
|
|
156
|
-
if (event.stage !== lastStageSeen) {
|
|
157
|
-
if (lastStageSeen !== undefined) {
|
|
158
|
-
emitTaskEvent('stage_change', { from: lastStageSeen, to: event.stage });
|
|
159
|
-
}
|
|
160
|
-
lastStageSeen = event.stage;
|
|
161
|
-
}
|
|
160
|
+
// Emit on every heartbeat tick so the operator can confirm the
|
|
161
|
+
// timer is actually firing. Stage transitions are authoritative
|
|
162
|
+
// only via explicit emit calls at lifecycle points; the
|
|
163
|
+
// heartbeat tick no longer infers transitions (P5).
|
|
162
164
|
const sinceLastMs = Date.now() - prevEventAtMs;
|
|
163
165
|
emitTaskEvent('heartbeat', {
|
|
164
166
|
elapsed: event.elapsed,
|
|
@@ -205,7 +207,6 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
205
207
|
const implModel = resolved.provider.config.model;
|
|
206
208
|
const progressCounters = { filesRead: 0, filesWritten: 0, toolCalls: 0 };
|
|
207
209
|
const verboseStream = verboseStreamRaw;
|
|
208
|
-
emitTaskEvent('worker_start', { worker: resolved.provider.config.model });
|
|
209
210
|
let prevEventAtMs = verbose ? Date.now() : 0;
|
|
210
211
|
// Wrap whenever we have ANY consumer for InternalRunnerEvent (heartbeat,
|
|
211
212
|
// verbose stream, or verbose logger). Previously this only wrapped when
|
|
@@ -218,6 +219,13 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
218
219
|
if (event.kind === 'turn_start' || event.kind === 'text_emission' || event.kind === 'tool_call' || event.kind === 'turn_complete') {
|
|
219
220
|
markRunnerEvent();
|
|
220
221
|
}
|
|
222
|
+
if (event.kind === 'worker_start') {
|
|
223
|
+
emitTaskEvent('worker_start', {
|
|
224
|
+
model: event.model,
|
|
225
|
+
providerType: event.providerType,
|
|
226
|
+
tier: event.tier,
|
|
227
|
+
});
|
|
228
|
+
}
|
|
221
229
|
if (event.kind === 'turn_start') {
|
|
222
230
|
heartbeat?.markEvent('llm');
|
|
223
231
|
if (verbose)
|
|
@@ -333,11 +341,9 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
333
341
|
const runningCostUSD = () => taskCostUSD();
|
|
334
342
|
const policyEscalated = { spec: false, quality: false, diff: false };
|
|
335
343
|
const emitFallback = (p) => {
|
|
336
|
-
diagnostics?.logger?.fallback(p);
|
|
337
344
|
emitTaskEvent('fallback', p);
|
|
338
345
|
};
|
|
339
346
|
const emitFallbackUnavailable = (p) => {
|
|
340
|
-
diagnostics?.logger?.fallbackUnavailable(p);
|
|
341
347
|
emitTaskEvent('fallback_unavailable', p);
|
|
342
348
|
};
|
|
343
349
|
const emitEscalationEvent = (loop, attempt, decision) => {
|
|
@@ -345,12 +351,10 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
345
351
|
batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop, attempt,
|
|
346
352
|
baseTier: resolved.slot, implTier: decision.impl, reviewerTier: decision.reviewer,
|
|
347
353
|
};
|
|
348
|
-
diagnostics?.logger?.escalation(p);
|
|
349
354
|
emitTaskEvent('escalation', p);
|
|
350
355
|
policyEscalated[loop] = true;
|
|
351
356
|
};
|
|
352
357
|
const emitEscalationUnavailable = (p) => {
|
|
353
|
-
diagnostics?.logger?.escalationUnavailable(p);
|
|
354
358
|
emitTaskEvent('escalation_unavailable', p);
|
|
355
359
|
};
|
|
356
360
|
// When the review loop aborts mid-flight, preserve any review-status info already set
|
|
@@ -366,7 +370,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
366
370
|
terminationReason: 'all_tiers_unavailable',
|
|
367
371
|
reviewRounds: reviewRounds(),
|
|
368
372
|
error: `runWithFallback: both tiers unavailable (loop=${loop}, attempt=${attempt}, role=implementer)`,
|
|
369
|
-
agents: agentEnvelope(specReviewerHistory[specReviewerHistory.length - 1] ?? 'not_applicable', qualityReviewerHistory[qualityReviewerHistory.length - 1] ?? (reviewPolicy === 'full' ? 'not_applicable' : 'skipped')),
|
|
373
|
+
agents: agentEnvelope(specReviewerHistory[specReviewerHistory.length - 1] ?? 'not_applicable', qualityReviewerHistory[qualityReviewerHistory.length - 1] ?? ((reviewPolicy === 'full' || reviewPolicy === 'quality_only') ? 'not_applicable' : 'skipped')),
|
|
370
374
|
stageStats: stats,
|
|
371
375
|
};
|
|
372
376
|
}
|
|
@@ -400,7 +404,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
400
404
|
error: message,
|
|
401
405
|
specReviewStatus: aborting === 'spec' ? 'changes_required' : (base.specReviewStatus ?? 'approved'),
|
|
402
406
|
qualityReviewStatus: aborting === 'quality' ? 'changes_required' : (base.qualityReviewStatus ?? 'skipped'),
|
|
403
|
-
agents: agentEnvelope(specReviewerHistory[specReviewerHistory.length - 1] ?? 'not_applicable', qualityReviewerHistory[qualityReviewerHistory.length - 1] ?? (reviewPolicy === 'full' ? 'not_applicable' : 'skipped')),
|
|
407
|
+
agents: agentEnvelope(specReviewerHistory[specReviewerHistory.length - 1] ?? 'not_applicable', qualityReviewerHistory[qualityReviewerHistory.length - 1] ?? ((reviewPolicy === 'full' || reviewPolicy === 'quality_only') ? 'not_applicable' : 'skipped')),
|
|
404
408
|
stageStats: stats,
|
|
405
409
|
});
|
|
406
410
|
const defaultVerification = { status: 'skipped', steps: [], totalDurationMs: 0, skipReason: 'no_command' };
|
|
@@ -653,9 +657,13 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
653
657
|
// catch path. Without this, the recorder only fires on 2 of ~5 exit paths.
|
|
654
658
|
let __finalRunResult;
|
|
655
659
|
const __recordOnce = (r) => {
|
|
660
|
+
// Stamp stallTriggered on every exit path. The watchdog flag is owned
|
|
661
|
+
// by this scope; surfacing it on the RunResult lets the caller (and
|
|
662
|
+
// telemetry) distinguish "no progress" aborts from cap exhaustion.
|
|
663
|
+
const stamped = stallFired ? { ...r, stallTriggered: true } : r;
|
|
656
664
|
if (__finalRunResult === undefined)
|
|
657
|
-
__finalRunResult =
|
|
658
|
-
return
|
|
665
|
+
__finalRunResult = stamped;
|
|
666
|
+
return stamped;
|
|
659
667
|
};
|
|
660
668
|
try {
|
|
661
669
|
// The dirty-tree precondition + git baseline only apply to artifact-producing tasks
|
|
@@ -699,7 +707,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
699
707
|
isTransportFailure: (r) => TRANSPORT_FAILURES.has(r.status) && r.capExhausted === undefined,
|
|
700
708
|
getStatus: (r) => r.status,
|
|
701
709
|
makeSyntheticFailure: (assigned) => makeSyntheticRunResult(assigned, 'all_tiers_unavailable'),
|
|
702
|
-
call: (provider) => delegateWithEscalation(withDoneCondition(task), [provider], { explicitlyPinned: false, onProgress: wrappedOnProgress, taskDeadlineMs, abortSignal: stallController.signal }),
|
|
710
|
+
call: (provider) => delegateWithEscalation(withDoneCondition(task), [provider], { explicitlyPinned: false, onProgress: wrappedOnProgress, taskDeadlineMs, abortSignal: stallController.signal, assignedTier: initialDecision.impl }),
|
|
703
711
|
});
|
|
704
712
|
if (initialImpl.fallbackFired || initialImpl.bothUnavailable) {
|
|
705
713
|
fallbackOverrides.push({
|
|
@@ -752,7 +760,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
752
760
|
? [...(implResult.filesRead ?? []), ...implResult.filesWritten].some(f => task.filePaths.some(fp => f === fp || f.endsWith('/' + fp) || f.endsWith(fp)))
|
|
753
761
|
: true;
|
|
754
762
|
const filePathsSkipped = !filePathsInteracted;
|
|
755
|
-
if (implResult.filesWritten.length === 0) {
|
|
763
|
+
if (implResult.filesWritten.length === 0 && reviewPolicy !== 'quality_only') {
|
|
756
764
|
if (reviewPolicy === 'off') {
|
|
757
765
|
emitTaskEvent('stage_change', { from: 'verifying', to: 'terminal' });
|
|
758
766
|
const terminal = resolveOffTerminal({
|
|
@@ -860,7 +868,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
860
868
|
};
|
|
861
869
|
let fileContents = await readImplementerFileContents(implResult.filesWritten, task.cwd);
|
|
862
870
|
const effectiveImplReport = implReport ?? buildFallbackImplReport(implResult);
|
|
863
|
-
const evidence = isArtifactProducing
|
|
871
|
+
const evidence = (isArtifactProducing && reviewPolicy !== 'quality_only')
|
|
864
872
|
? await buildEvidence({ cwd, baselineHead, commits, verification, reviewPolicy })
|
|
865
873
|
: { block: '', diffTruncated: false, fullDiff: '' };
|
|
866
874
|
if (reviewPolicy === 'diff_only') {
|
|
@@ -918,116 +926,126 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
918
926
|
let specStatus;
|
|
919
927
|
let specReport;
|
|
920
928
|
let specReviewReason;
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
specReviewerHistory.push(initialSpecReview.usedTier);
|
|
941
|
-
if (initialSpecReview.fallbackFired) {
|
|
942
|
-
emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: 0, role: 'specReviewer', assignedTier: initialReviewerTier, usedTier: initialSpecReview.usedTier, reason: initialSpecReview.fallbackReason, triggeringStatus: initialSpecReview.fallbackTriggeringStatus, violatesSeparation: initialSpecReview.usedTier === implementerHistory[implementerHistory.length - 1] });
|
|
943
|
-
fallbackOverrides.push({ role: 'specReviewer', loop: 'spec', attempt: 0, assigned: initialReviewerTier, used: initialSpecReview.usedTier, reason: initialSpecReview.fallbackReason, triggeringStatus: initialSpecReview.fallbackTriggeringStatus, bothUnavailable: false });
|
|
944
|
-
}
|
|
945
|
-
}
|
|
946
|
-
specResult = initialSpecReview.bothUnavailable
|
|
947
|
-
? makeSkippedReviewResult('all_tiers_unavailable')
|
|
948
|
-
: initialSpecReview.result;
|
|
949
|
-
specStatus = specResult.status;
|
|
950
|
-
specReport = 'report' in specResult ? specResult.report : undefined;
|
|
951
|
-
specReviewReason = specStatus === 'skipped' ? 'all_tiers_unavailable' : ('errorReason' in specResult ? specResult.errorReason : undefined);
|
|
952
|
-
let prevSpecFindings = [...(specResult.findings ?? [])];
|
|
953
|
-
while (specStatus === 'changes_required') {
|
|
954
|
-
if (specAttemptIndex >= maxSpecRows)
|
|
955
|
-
return abortReviewLoop(finalImplResult, 'round_cap', 'review round cap reached before spec rework', 'spec');
|
|
956
|
-
const currentCostUSD = taskCostUSD();
|
|
957
|
-
if (currentCostUSD !== null && maxCostUSD !== undefined && currentCostUSD >= 0.8 * maxCostUSD) {
|
|
958
|
-
emitTaskEvent('cost_check', { stage: 'spec_rework', tripped: true, cost_used_usd: currentCostUSD, cost_cap_usd: maxCostUSD, cost_available: true });
|
|
959
|
-
return abortReviewLoop(finalImplResult, 'cost_ceiling', 'cost ceiling reached before spec rework', 'spec');
|
|
960
|
-
}
|
|
961
|
-
const decision = pickEscalation({ loop: 'spec', attemptIndex: specAttemptIndex, baseTier: resolved.slot });
|
|
962
|
-
if (decision.isEscalated)
|
|
963
|
-
emitEscalationEvent('spec', specAttemptIndex, decision);
|
|
964
|
-
emitTaskEvent('stage_change', { from: 'spec_review', to: 'spec_rework', attempt: specAttemptIndex, attemptCap: maxSpecRows, implTier: decision.impl, reviewerTier: decision.reviewer, escalated: decision.isEscalated });
|
|
965
|
-
heartbeat?.transition({ stage: 'spec_rework', stageIndex: 3, reviewRound: specAttemptIndex, attemptCap: maxSpecRows });
|
|
966
|
-
const feedback = specResult.findings.length > 0 ? `\n\n## Spec Review Feedback (round ${specAttemptIndex}):\n${specResult.findings.map(f => `- ${f}`).join('\n')}` : '';
|
|
967
|
-
const reworkTask = withDoneCondition({ ...task, prompt: `${task.prompt}${feedback}` });
|
|
968
|
-
const reworkCall = await runWithFallback({ assigned: decision.impl, providerFor, unavailableTiers: specUnavailable, isTransportFailure: (r) => TRANSPORT_FAILURES.has(r.status) && r.capExhausted === undefined, getStatus: (r) => r.status, makeSyntheticFailure: (assigned) => makeSyntheticRunResult(assigned, 'all_tiers_unavailable'), call: (provider) => delegateWithEscalation(reworkTask, [provider], { explicitlyPinned: true, onProgress: wrappedOnProgress, taskDeadlineMs, abortSignal: stallController.signal }) });
|
|
969
|
-
if (reworkCall.fallbackFired || reworkCall.bothUnavailable)
|
|
970
|
-
fallbackOverrides.push({ role: 'implementer', loop: 'spec', attempt: specAttemptIndex, assigned: decision.impl, used: reworkCall.usedTier, reason: (reworkCall.fallbackReason ?? reworkCall.unavailableReason), triggeringStatus: reworkCall.fallbackTriggeringStatus, bothUnavailable: reworkCall.bothUnavailable });
|
|
971
|
-
if (reworkCall.fallbackFired) {
|
|
972
|
-
emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'implementer', assignedTier: decision.impl, usedTier: reworkCall.usedTier, reason: reworkCall.fallbackReason, triggeringStatus: reworkCall.fallbackTriggeringStatus, violatesSeparation: false });
|
|
973
|
-
if (decision.isEscalated && reworkCall.fallbackReason === 'not_configured')
|
|
974
|
-
emitEscalationUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'implementer', wantedTier: decision.impl, reason: reworkCall.fallbackReason });
|
|
975
|
-
}
|
|
976
|
-
if (reworkCall.bothUnavailable) {
|
|
977
|
-
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'implementer', assignedTier: decision.impl, reason: reworkCall.unavailableReason });
|
|
978
|
-
if (decision.isEscalated)
|
|
979
|
-
emitEscalationUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'implementer', wantedTier: decision.impl, reason: reworkCall.unavailableReason });
|
|
980
|
-
return __recordOnce(adaptForAllTiersUnavailable(reworkCall.result, 'spec', specAttemptIndex));
|
|
981
|
-
}
|
|
982
|
-
finalImplResult = reworkCall.result;
|
|
983
|
-
latestAttemptedImpl = { tier: reworkCall.usedTier, result: finalImplResult };
|
|
984
|
-
implementerHistory.push(reworkCall.usedTier);
|
|
985
|
-
const reworkReport = parseStructuredReport(finalImplResult.output);
|
|
986
|
-
finalImplReport = reworkReport.summary ? reworkReport : buildFallbackImplReport(finalImplResult);
|
|
987
|
-
fileContents = await readImplementerFileContents(finalImplResult.filesWritten, task.cwd);
|
|
988
|
-
heartbeat?.transition({ stage: 'spec_review', stageIndex: 2, reviewRound: specAttemptIndex + 1, attemptCap: maxSpecRows });
|
|
989
|
-
const reviewCall = await runWithFallback({ assigned: decision.reviewer, providerFor, unavailableTiers: specUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), call: (provider) => runSpecReview(provider, packet, finalImplReport, fileContents, finalImplResult.toolCalls, task.planContext, evidence.block) });
|
|
990
|
-
if (reviewCall.bothUnavailable) {
|
|
991
|
-
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'specReviewer', assignedTier: decision.reviewer, reason: reviewCall.unavailableReason });
|
|
992
|
-
fallbackOverrides.push({ role: 'specReviewer', loop: 'spec', attempt: specAttemptIndex, assigned: decision.reviewer, used: reviewCall.usedTier, reason: reviewCall.unavailableReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, bothUnavailable: true });
|
|
929
|
+
let specReviewT0 = 0;
|
|
930
|
+
let specReviewC0 = null;
|
|
931
|
+
if (reviewPolicy !== 'quality_only') {
|
|
932
|
+
heartbeat?.transition({ stage: 'spec_review', stageIndex: 2, reviewRound: 1, attemptCap: maxSpecRows });
|
|
933
|
+
const initialReviewerTier = pickReviewer({ loop: 'spec', attemptIndex: 0, baseTier: resolved.slot });
|
|
934
|
+
specReviewT0 = Date.now();
|
|
935
|
+
specReviewC0 = runningCostUSD();
|
|
936
|
+
const initialSpecReview = await runWithFallback({
|
|
937
|
+
assigned: initialReviewerTier,
|
|
938
|
+
providerFor,
|
|
939
|
+
unavailableTiers: specUnavailable,
|
|
940
|
+
isTransportFailure: (r) => isReviewTransportFailure(r),
|
|
941
|
+
getStatus: (r) => r.status,
|
|
942
|
+
makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'),
|
|
943
|
+
call: (provider) => runSpecReview(provider, packet, effectiveImplReport, fileContents, implResult.toolCalls, task.planContext, evidence.block),
|
|
944
|
+
});
|
|
945
|
+
if (initialSpecReview.bothUnavailable) {
|
|
946
|
+
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: 0, role: 'specReviewer', assignedTier: initialReviewerTier, reason: initialSpecReview.unavailableReason });
|
|
947
|
+
fallbackOverrides.push({ role: 'specReviewer', loop: 'spec', attempt: 0, assigned: initialReviewerTier, used: initialSpecReview.usedTier, reason: initialSpecReview.unavailableReason, triggeringStatus: initialSpecReview.fallbackTriggeringStatus, bothUnavailable: true });
|
|
993
948
|
specReviewerHistory.push('skipped');
|
|
994
949
|
}
|
|
995
950
|
else {
|
|
996
|
-
specReviewerHistory.push(
|
|
997
|
-
if (
|
|
998
|
-
emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt:
|
|
999
|
-
fallbackOverrides.push({ role: 'specReviewer', loop: 'spec', attempt:
|
|
951
|
+
specReviewerHistory.push(initialSpecReview.usedTier);
|
|
952
|
+
if (initialSpecReview.fallbackFired) {
|
|
953
|
+
emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: 0, role: 'specReviewer', assignedTier: initialReviewerTier, usedTier: initialSpecReview.usedTier, reason: initialSpecReview.fallbackReason, triggeringStatus: initialSpecReview.fallbackTriggeringStatus, violatesSeparation: initialSpecReview.usedTier === implementerHistory[implementerHistory.length - 1] });
|
|
954
|
+
fallbackOverrides.push({ role: 'specReviewer', loop: 'spec', attempt: 0, assigned: initialReviewerTier, used: initialSpecReview.usedTier, reason: initialSpecReview.fallbackReason, triggeringStatus: initialSpecReview.fallbackTriggeringStatus, bothUnavailable: false });
|
|
1000
955
|
}
|
|
1001
956
|
}
|
|
1002
|
-
specResult =
|
|
957
|
+
specResult = initialSpecReview.bothUnavailable
|
|
958
|
+
? makeSkippedReviewResult('all_tiers_unavailable')
|
|
959
|
+
: initialSpecReview.result;
|
|
1003
960
|
specStatus = specResult.status;
|
|
1004
961
|
specReport = 'report' in specResult ? specResult.report : undefined;
|
|
1005
962
|
specReviewReason = specStatus === 'skipped' ? 'all_tiers_unavailable' : ('errorReason' in specResult ? specResult.errorReason : undefined);
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
963
|
+
let prevSpecFindings = [...(specResult.findings ?? [])];
|
|
964
|
+
while (specStatus === 'changes_required') {
|
|
965
|
+
if (specAttemptIndex >= maxSpecRows)
|
|
966
|
+
return abortReviewLoop(finalImplResult, 'round_cap', 'review round cap reached before spec rework', 'spec');
|
|
967
|
+
const currentCostUSD = taskCostUSD();
|
|
968
|
+
if (currentCostUSD !== null && maxCostUSD !== undefined && currentCostUSD >= 0.8 * maxCostUSD) {
|
|
969
|
+
emitTaskEvent('cost_check', { stage: 'spec_rework', tripped: true, cost_used_usd: currentCostUSD, cost_cap_usd: maxCostUSD, cost_available: true });
|
|
970
|
+
return abortReviewLoop(finalImplResult, 'cost_ceiling', 'cost ceiling reached before spec rework', 'spec');
|
|
971
|
+
}
|
|
972
|
+
const decision = pickEscalation({ loop: 'spec', attemptIndex: specAttemptIndex, baseTier: resolved.slot });
|
|
973
|
+
if (decision.isEscalated)
|
|
974
|
+
emitEscalationEvent('spec', specAttemptIndex, decision);
|
|
975
|
+
emitTaskEvent('stage_change', { from: 'spec_review', to: 'spec_rework', attempt: specAttemptIndex, attemptCap: maxSpecRows, implTier: decision.impl, reviewerTier: decision.reviewer, escalated: decision.isEscalated });
|
|
976
|
+
heartbeat?.transition({ stage: 'spec_rework', stageIndex: 3, reviewRound: specAttemptIndex, attemptCap: maxSpecRows });
|
|
977
|
+
const feedback = specResult.findings.length > 0 ? `\n\n## Spec Review Feedback (round ${specAttemptIndex}):\n${specResult.findings.map(f => `- ${f}`).join('\n')}` : '';
|
|
978
|
+
const reworkTask = withDoneCondition({ ...task, prompt: `${task.prompt}${feedback}` });
|
|
979
|
+
const reworkCall = await runWithFallback({ assigned: decision.impl, providerFor, unavailableTiers: specUnavailable, isTransportFailure: (r) => TRANSPORT_FAILURES.has(r.status) && r.capExhausted === undefined, getStatus: (r) => r.status, makeSyntheticFailure: (assigned) => makeSyntheticRunResult(assigned, 'all_tiers_unavailable'), call: (provider) => delegateWithEscalation(reworkTask, [provider], { explicitlyPinned: true, onProgress: wrappedOnProgress, taskDeadlineMs, abortSignal: stallController.signal, assignedTier: decision.impl }) });
|
|
980
|
+
if (reworkCall.fallbackFired || reworkCall.bothUnavailable)
|
|
981
|
+
fallbackOverrides.push({ role: 'implementer', loop: 'spec', attempt: specAttemptIndex, assigned: decision.impl, used: reworkCall.usedTier, reason: (reworkCall.fallbackReason ?? reworkCall.unavailableReason), triggeringStatus: reworkCall.fallbackTriggeringStatus, bothUnavailable: reworkCall.bothUnavailable });
|
|
982
|
+
if (reworkCall.fallbackFired) {
|
|
983
|
+
emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'implementer', assignedTier: decision.impl, usedTier: reworkCall.usedTier, reason: reworkCall.fallbackReason, triggeringStatus: reworkCall.fallbackTriggeringStatus, violatesSeparation: false });
|
|
984
|
+
if (decision.isEscalated && reworkCall.fallbackReason === 'not_configured')
|
|
985
|
+
emitEscalationUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'implementer', wantedTier: decision.impl, reason: reworkCall.fallbackReason });
|
|
986
|
+
}
|
|
987
|
+
if (reworkCall.bothUnavailable) {
|
|
988
|
+
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'implementer', assignedTier: decision.impl, reason: reworkCall.unavailableReason });
|
|
989
|
+
if (decision.isEscalated)
|
|
990
|
+
emitEscalationUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'implementer', wantedTier: decision.impl, reason: reworkCall.unavailableReason });
|
|
991
|
+
return __recordOnce(adaptForAllTiersUnavailable(reworkCall.result, 'spec', specAttemptIndex));
|
|
992
|
+
}
|
|
993
|
+
finalImplResult = reworkCall.result;
|
|
994
|
+
latestAttemptedImpl = { tier: reworkCall.usedTier, result: finalImplResult };
|
|
995
|
+
implementerHistory.push(reworkCall.usedTier);
|
|
996
|
+
const reworkReport = parseStructuredReport(finalImplResult.output);
|
|
997
|
+
finalImplReport = reworkReport.summary ? reworkReport : buildFallbackImplReport(finalImplResult);
|
|
998
|
+
fileContents = await readImplementerFileContents(finalImplResult.filesWritten, task.cwd);
|
|
999
|
+
heartbeat?.transition({ stage: 'spec_review', stageIndex: 2, reviewRound: specAttemptIndex + 1, attemptCap: maxSpecRows });
|
|
1000
|
+
const reviewCall = await runWithFallback({ assigned: decision.reviewer, providerFor, unavailableTiers: specUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), call: (provider) => runSpecReview(provider, packet, finalImplReport, fileContents, finalImplResult.toolCalls, task.planContext, evidence.block) });
|
|
1001
|
+
if (reviewCall.bothUnavailable) {
|
|
1002
|
+
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'specReviewer', assignedTier: decision.reviewer, reason: reviewCall.unavailableReason });
|
|
1003
|
+
fallbackOverrides.push({ role: 'specReviewer', loop: 'spec', attempt: specAttemptIndex, assigned: decision.reviewer, used: reviewCall.usedTier, reason: reviewCall.unavailableReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, bothUnavailable: true });
|
|
1004
|
+
specReviewerHistory.push('skipped');
|
|
1005
|
+
}
|
|
1006
|
+
else {
|
|
1007
|
+
specReviewerHistory.push(reviewCall.usedTier);
|
|
1008
|
+
if (reviewCall.fallbackFired) {
|
|
1009
|
+
emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'specReviewer', assignedTier: decision.reviewer, usedTier: reviewCall.usedTier, reason: reviewCall.fallbackReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, violatesSeparation: reviewCall.usedTier === implementerHistory[implementerHistory.length - 1] });
|
|
1010
|
+
fallbackOverrides.push({ role: 'specReviewer', loop: 'spec', attempt: specAttemptIndex, assigned: decision.reviewer, used: reviewCall.usedTier, reason: reviewCall.fallbackReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, bothUnavailable: false });
|
|
1011
|
+
}
|
|
1012
|
+
}
|
|
1013
|
+
specResult = reviewCall.result;
|
|
1014
|
+
specStatus = specResult.status;
|
|
1015
|
+
specReport = 'report' in specResult ? specResult.report : undefined;
|
|
1016
|
+
specReviewReason = specStatus === 'skipped' ? 'all_tiers_unavailable' : ('errorReason' in specResult ? specResult.errorReason : undefined);
|
|
1017
|
+
if (reviewDidNotReject(specStatus))
|
|
1018
|
+
lastNonRejectedImpl = { tier: implementerHistory[implementerHistory.length - 1], result: finalImplResult };
|
|
1019
|
+
specAttemptIndex++;
|
|
1020
|
+
if (specStatus === 'approved' || specStatus === 'skipped')
|
|
1021
|
+
break;
|
|
1022
|
+
const currentFindings = [...(specResult.findings ?? [])].sort().join('\0');
|
|
1023
|
+
const prevFindings = [...prevSpecFindings].sort().join('\0');
|
|
1024
|
+
if (currentFindings === prevFindings && currentFindings !== '')
|
|
1025
|
+
break;
|
|
1026
|
+
prevSpecFindings = [...(specResult.findings ?? [])];
|
|
1027
|
+
}
|
|
1028
|
+
}
|
|
1029
|
+
else {
|
|
1030
|
+
specResult = { status: 'skipped', report: undefined, findings: [], reason: 'all_tiers_unavailable' };
|
|
1031
|
+
specStatus = 'not_applicable';
|
|
1032
|
+
specReport = undefined;
|
|
1033
|
+
specReviewReason = 'skipped: reviewPolicy is quality_only';
|
|
1016
1034
|
}
|
|
1017
|
-
let qualityResult = { status: 'skipped', report: undefined, findings: [], errorReason: reviewPolicy === 'full' ? 'all_tiers_unavailable' : 'skipped: reviewPolicy is spec_only' };
|
|
1035
|
+
let qualityResult = { status: 'skipped', report: undefined, findings: [], errorReason: (reviewPolicy === 'full' || reviewPolicy === 'quality_only') ? 'all_tiers_unavailable' : 'skipped: reviewPolicy is spec_only' };
|
|
1018
1036
|
// Hoisted so endReviewStage (called after this block) can read them on the
|
|
1019
1037
|
// success path. When the quality review is skipped (`reviewPolicy !== 'full'`),
|
|
1020
1038
|
// the values stay at 0/null and the corresponding stage entry remains in its
|
|
1021
1039
|
// `entered: false` default — endReviewStage is never called.
|
|
1022
1040
|
let qualityReviewT0 = 0;
|
|
1023
1041
|
let qualityReviewC0 = null;
|
|
1024
|
-
if (reviewPolicy === 'full') {
|
|
1042
|
+
if (reviewPolicy === 'full' || reviewPolicy === 'quality_only') {
|
|
1025
1043
|
qualityUnavailable = new Map();
|
|
1026
1044
|
const qualityReviewerTier = pickReviewer({ loop: 'quality', attemptIndex: 0, baseTier: resolved.slot });
|
|
1027
1045
|
heartbeat?.transition({ stage: 'quality_review', stageIndex: 4, reviewRound: 1, attemptCap: maxQualityRows });
|
|
1028
1046
|
qualityReviewT0 = Date.now();
|
|
1029
1047
|
qualityReviewC0 = runningCostUSD();
|
|
1030
|
-
const initialQuality = await runWithFallback({ assigned: qualityReviewerTier, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), call: (provider) => runQualityReview(provider, packet, specReport ?? finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block) });
|
|
1048
|
+
const initialQuality = await runWithFallback({ assigned: qualityReviewerTier, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), call: (provider) => runQualityReview(provider, packet, specReport ?? finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block, qualityReviewPromptBuilder, finalImplResult.output) });
|
|
1031
1049
|
if (initialQuality.bothUnavailable) {
|
|
1032
1050
|
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: 0, role: 'qualityReviewer', assignedTier: qualityReviewerTier, reason: initialQuality.unavailableReason });
|
|
1033
1051
|
fallbackOverrides.push({ role: 'qualityReviewer', loop: 'quality', attempt: 0, assigned: qualityReviewerTier, used: initialQuality.usedTier, reason: initialQuality.unavailableReason, triggeringStatus: initialQuality.fallbackTriggeringStatus, bothUnavailable: true });
|
|
@@ -1041,6 +1059,20 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1041
1059
|
}
|
|
1042
1060
|
}
|
|
1043
1061
|
qualityResult = initialQuality.result;
|
|
1062
|
+
if (reviewPolicy === 'quality_only') {
|
|
1063
|
+
emitTaskEvent('read_only_review.quality', {
|
|
1064
|
+
route: routeKey,
|
|
1065
|
+
verdict: qualityResult.status === 'approved' ? 'approved'
|
|
1066
|
+
: qualityResult.status === 'changes_required' ? 'changes_required'
|
|
1067
|
+
: qualityResult.status === 'skipped' ? 'skipped'
|
|
1068
|
+
: 'error',
|
|
1069
|
+
iterationIndex: 1,
|
|
1070
|
+
findingsReviewed: qualityResult.findings?.length ?? 0,
|
|
1071
|
+
findingsFlagged: qualityResult.status === 'changes_required' ? (qualityResult.findings?.length ?? 0) : 0,
|
|
1072
|
+
durationMs: Date.now() - qualityReviewT0,
|
|
1073
|
+
costUSD: runningCostUSD() !== null && qualityReviewC0 !== null ? runningCostUSD() - qualityReviewC0 : null,
|
|
1074
|
+
});
|
|
1075
|
+
}
|
|
1044
1076
|
let prevQualityFindings = [...(qualityResult.findings ?? [])];
|
|
1045
1077
|
qualityAttemptIndex = 1;
|
|
1046
1078
|
while (qualityResult.status === 'changes_required') {
|
|
@@ -1055,10 +1087,17 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1055
1087
|
if (decision.isEscalated)
|
|
1056
1088
|
emitEscalationEvent('quality', qualityAttemptIndex, decision);
|
|
1057
1089
|
emitTaskEvent('stage_change', { from: 'quality_review', to: 'quality_rework', attempt: qualityAttemptIndex, attemptCap: maxQualityRows, implTier: decision.impl, reviewerTier: decision.reviewer, escalated: decision.isEscalated });
|
|
1090
|
+
if (reviewPolicy === 'quality_only') {
|
|
1091
|
+
emitTaskEvent('read_only_review.rework', {
|
|
1092
|
+
route: routeKey,
|
|
1093
|
+
iterationIndex: qualityAttemptIndex,
|
|
1094
|
+
triggeringIssues: qualityResult.findings?.length ?? 0,
|
|
1095
|
+
});
|
|
1096
|
+
}
|
|
1058
1097
|
heartbeat?.transition({ stage: 'quality_rework', stageIndex: 5, reviewRound: qualityAttemptIndex, attemptCap: maxQualityRows });
|
|
1059
1098
|
const feedback = qualityResult.findings.length > 0 ? `\n\n## Quality Review Feedback (round ${qualityAttemptIndex}):\n${qualityResult.findings.map(f => `- ${f}`).join('\n')}` : '';
|
|
1060
1099
|
const reworkTask = withDoneCondition({ ...task, prompt: `${task.prompt}${feedback}` });
|
|
1061
|
-
const reworkCall = await runWithFallback({ assigned: decision.impl, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => TRANSPORT_FAILURES.has(r.status) && r.capExhausted === undefined, getStatus: (r) => r.status, makeSyntheticFailure: (assigned) => makeSyntheticRunResult(assigned, 'all_tiers_unavailable'), call: (provider) => delegateWithEscalation(reworkTask, [provider], { explicitlyPinned: true, onProgress: wrappedOnProgress, taskDeadlineMs, abortSignal: stallController.signal }) });
|
|
1100
|
+
const reworkCall = await runWithFallback({ assigned: decision.impl, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => TRANSPORT_FAILURES.has(r.status) && r.capExhausted === undefined, getStatus: (r) => r.status, makeSyntheticFailure: (assigned) => makeSyntheticRunResult(assigned, 'all_tiers_unavailable'), call: (provider) => delegateWithEscalation(reworkTask, [provider], { explicitlyPinned: true, onProgress: wrappedOnProgress, taskDeadlineMs, abortSignal: stallController.signal, assignedTier: decision.impl }) });
|
|
1062
1101
|
if (reworkCall.fallbackFired || reworkCall.bothUnavailable)
|
|
1063
1102
|
fallbackOverrides.push({ role: 'implementer', loop: 'quality', attempt: qualityAttemptIndex, assigned: decision.impl, used: reworkCall.usedTier, reason: (reworkCall.fallbackReason ?? reworkCall.unavailableReason), triggeringStatus: reworkCall.fallbackTriggeringStatus, bothUnavailable: reworkCall.bothUnavailable });
|
|
1064
1103
|
if (reworkCall.fallbackFired)
|
|
@@ -1076,7 +1115,9 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1076
1115
|
finalImplReport = reworkReport.summary ? reworkReport : buildFallbackImplReport(finalImplResult);
|
|
1077
1116
|
fileContents = await readImplementerFileContents(finalImplResult.filesWritten, task.cwd);
|
|
1078
1117
|
heartbeat?.transition({ stage: 'quality_review', stageIndex: 4, reviewRound: qualityAttemptIndex + 1, attemptCap: maxQualityRows });
|
|
1079
|
-
const
|
|
1118
|
+
const reworkQualityT0 = Date.now();
|
|
1119
|
+
const reworkQualityC0 = runningCostUSD();
|
|
1120
|
+
const reviewCall = await runWithFallback({ assigned: decision.reviewer, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), call: (provider) => runQualityReview(provider, packet, finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block, qualityReviewPromptBuilder, finalImplResult.output) });
|
|
1080
1121
|
if (reviewCall.bothUnavailable) {
|
|
1081
1122
|
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'qualityReviewer', assignedTier: decision.reviewer, reason: reviewCall.unavailableReason });
|
|
1082
1123
|
fallbackOverrides.push({ role: 'qualityReviewer', loop: 'quality', attempt: qualityAttemptIndex, assigned: decision.reviewer, used: reviewCall.usedTier, reason: reviewCall.unavailableReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, bothUnavailable: true });
|
|
@@ -1090,6 +1131,20 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1090
1131
|
}
|
|
1091
1132
|
}
|
|
1092
1133
|
qualityResult = reviewCall.result;
|
|
1134
|
+
if (reviewPolicy === 'quality_only') {
|
|
1135
|
+
emitTaskEvent('read_only_review.quality', {
|
|
1136
|
+
route: routeKey,
|
|
1137
|
+
verdict: qualityResult.status === 'approved' ? 'approved'
|
|
1138
|
+
: qualityResult.status === 'changes_required' ? 'changes_required'
|
|
1139
|
+
: qualityResult.status === 'skipped' ? 'skipped'
|
|
1140
|
+
: 'error',
|
|
1141
|
+
iterationIndex: qualityAttemptIndex + 1,
|
|
1142
|
+
findingsReviewed: qualityResult.findings?.length ?? 0,
|
|
1143
|
+
findingsFlagged: qualityResult.status === 'changes_required' ? (qualityResult.findings?.length ?? 0) : 0,
|
|
1144
|
+
durationMs: Date.now() - reworkQualityT0,
|
|
1145
|
+
costUSD: runningCostUSD() !== null && reworkQualityC0 !== null ? runningCostUSD() - reworkQualityC0 : null,
|
|
1146
|
+
});
|
|
1147
|
+
}
|
|
1093
1148
|
if (reviewDidNotReject(qualityResult.status))
|
|
1094
1149
|
lastNonRejectedImpl = { tier: implementerHistory[implementerHistory.length - 1], result: finalImplResult };
|
|
1095
1150
|
qualityAttemptIndex++;
|
|
@@ -1121,12 +1176,16 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1121
1176
|
message: 'Implementation diff exceeded the reviewer evidence byte cap and was truncated.',
|
|
1122
1177
|
});
|
|
1123
1178
|
}
|
|
1124
|
-
const specAggregateStatus =
|
|
1125
|
-
|
|
1126
|
-
:
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1179
|
+
const specAggregateStatus = reviewPolicy === 'quality_only'
|
|
1180
|
+
? 'skipped'
|
|
1181
|
+
: (['approved', 'changes_required', 'skipped', 'error', 'api_error', 'network_error', 'timeout'].includes(specStatus) ? specStatus : 'error');
|
|
1182
|
+
if (reviewPolicy !== 'quality_only') {
|
|
1183
|
+
endReviewStage(stats, 'spec_review', specReviewT0, specReviewC0, implementerAgentInfo, runningCostUSD(), specStatus === 'approved' ? 'approved'
|
|
1184
|
+
: specStatus === 'changes_required' ? 'changes_required'
|
|
1185
|
+
: specStatus === 'skipped' ? 'skipped'
|
|
1186
|
+
: specStatus === 'not_applicable' ? 'not_applicable'
|
|
1187
|
+
: 'error', specAttemptIndex - 1);
|
|
1188
|
+
}
|
|
1130
1189
|
const qualityAggregateStatus = qualityResult.status;
|
|
1131
1190
|
endReviewStage(stats, 'quality_review', qualityReviewT0, qualityReviewC0, implementerAgentInfo, runningCostUSD(), qualityResult.status === 'approved' ? 'approved'
|
|
1132
1191
|
: qualityResult.status === 'changes_required' ? 'changes_required'
|
|
@@ -1156,6 +1215,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1156
1215
|
specReviewStatus: specEnvelopeStatus,
|
|
1157
1216
|
qualityReviewStatus: qualityEnvelopeStatus,
|
|
1158
1217
|
stageStats: stats,
|
|
1218
|
+
reviewRounds: reviewRounds(),
|
|
1159
1219
|
specReviewReason: 'errorReason' in specResult ? specResult.errorReason : undefined,
|
|
1160
1220
|
qualityReviewReason: 'errorReason' in qualityResult ? qualityResult.errorReason : undefined,
|
|
1161
1221
|
structuredReport: aggregated,
|
|
@@ -1163,21 +1223,42 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1163
1223
|
specReviewReport: specReport,
|
|
1164
1224
|
qualityReviewReport: qualityResult.report,
|
|
1165
1225
|
filePathsSkipped,
|
|
1166
|
-
agents: agentEnvelope(specReviewerHistory[specReviewerHistory.length - 1] ?? 'not_applicable', qualityReviewerHistory[qualityReviewerHistory.length - 1] ?? (reviewPolicy === 'full' ? 'not_applicable' : 'skipped')),
|
|
1226
|
+
agents: agentEnvelope(specReviewerHistory[specReviewerHistory.length - 1] ?? 'not_applicable', qualityReviewerHistory[qualityReviewerHistory.length - 1] ?? ((reviewPolicy === 'full' || reviewPolicy === 'quality_only') ? 'not_applicable' : 'skipped')),
|
|
1167
1227
|
models: {
|
|
1168
1228
|
implementer: implModel,
|
|
1169
|
-
specReviewer: reviewModel,
|
|
1170
|
-
qualityReviewer: reviewPolicy === 'full' ? reviewModel : null,
|
|
1229
|
+
specReviewer: reviewPolicy !== 'quality_only' ? reviewModel : null,
|
|
1230
|
+
qualityReviewer: (reviewPolicy === 'full' || reviewPolicy === 'quality_only') ? reviewModel : null,
|
|
1171
1231
|
},
|
|
1172
1232
|
fileArtifactsMissing,
|
|
1173
1233
|
commits,
|
|
1174
1234
|
commitError,
|
|
1175
1235
|
verification,
|
|
1176
1236
|
};
|
|
1237
|
+
if (reviewPolicy === 'quality_only') {
|
|
1238
|
+
emitTaskEvent('read_only_review.terminal', {
|
|
1239
|
+
route: routeKey,
|
|
1240
|
+
roundsUsed: qualityAttemptIndex,
|
|
1241
|
+
finalQualityVerdict: qualityResult.status === 'approved' ? 'approved'
|
|
1242
|
+
: qualityResult.status === 'changes_required' ? 'changes_required'
|
|
1243
|
+
: qualityResult.status === 'skipped' ? 'skipped'
|
|
1244
|
+
: 'error',
|
|
1245
|
+
costUSD: taskCostUSD(),
|
|
1246
|
+
durationMs: Date.now() - taskStartMs,
|
|
1247
|
+
});
|
|
1248
|
+
}
|
|
1177
1249
|
return __recordOnce(runResult);
|
|
1178
1250
|
}
|
|
1179
1251
|
catch (err) {
|
|
1180
1252
|
const errorRunResult = withVerification(workerErrorResult(err));
|
|
1253
|
+
if (reviewPolicy === 'quality_only') {
|
|
1254
|
+
emitTaskEvent('read_only_review.terminal', {
|
|
1255
|
+
route: routeKey,
|
|
1256
|
+
roundsUsed: qualityAttemptIndex,
|
|
1257
|
+
finalQualityVerdict: 'error',
|
|
1258
|
+
costUSD: taskCostUSD(),
|
|
1259
|
+
durationMs: Date.now() - taskStartMs,
|
|
1260
|
+
});
|
|
1261
|
+
}
|
|
1181
1262
|
return __recordOnce(errorRunResult);
|
|
1182
1263
|
}
|
|
1183
1264
|
finally {
|