@zhixuan92/multi-model-agent-core 3.7.0 → 3.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/config/read-only-review-flag.d.ts +8 -0
- package/dist/config/read-only-review-flag.d.ts.map +1 -0
- package/dist/config/read-only-review-flag.js +13 -0
- package/dist/config/read-only-review-flag.js.map +1 -0
- package/dist/executors/_shared/findings-schema.d.ts +80 -0
- package/dist/executors/_shared/findings-schema.d.ts.map +1 -0
- package/dist/executors/_shared/findings-schema.js +35 -0
- package/dist/executors/_shared/findings-schema.js.map +1 -0
- package/dist/executors/_shared/review-verdict-mapping.d.ts +16 -0
- package/dist/executors/_shared/review-verdict-mapping.d.ts.map +1 -0
- package/dist/executors/_shared/review-verdict-mapping.js +24 -0
- package/dist/executors/_shared/review-verdict-mapping.js.map +1 -0
- package/dist/executors/audit.d.ts.map +1 -1
- package/dist/executors/audit.js +21 -5
- package/dist/executors/audit.js.map +1 -1
- package/dist/executors/debug.d.ts.map +1 -1
- package/dist/executors/debug.js +11 -2
- package/dist/executors/debug.js.map +1 -1
- package/dist/executors/investigate.d.ts.map +1 -1
- package/dist/executors/investigate.js +23 -17
- package/dist/executors/investigate.js.map +1 -1
- package/dist/executors/review.d.ts.map +1 -1
- package/dist/executors/review.js +48 -48
- package/dist/executors/review.js.map +1 -1
- package/dist/executors/types.d.ts +4 -1
- package/dist/executors/types.d.ts.map +1 -1
- package/dist/executors/verify.d.ts.map +1 -1
- package/dist/executors/verify.js +43 -8
- package/dist/executors/verify.js.map +1 -1
- package/dist/intake/compilers/investigate.d.ts.map +1 -1
- package/dist/intake/compilers/investigate.js +3 -4
- package/dist/intake/compilers/investigate.js.map +1 -1
- package/dist/intake/resolve.d.ts.map +1 -1
- package/dist/intake/resolve.js +24 -10
- package/dist/intake/resolve.js.map +1 -1
- package/dist/intake/types.d.ts +1 -1
- package/dist/intake/types.d.ts.map +1 -1
- package/dist/observability/events.d.ts +84 -0
- package/dist/observability/events.d.ts.map +1 -1
- package/dist/observability/events.js +23 -1
- package/dist/observability/events.js.map +1 -1
- package/dist/review/aggregate-result.d.ts +1 -1
- package/dist/review/aggregate-result.d.ts.map +1 -1
- package/dist/review/aggregate-result.js.map +1 -1
- package/dist/review/quality-only-prompts.d.ts +26 -0
- package/dist/review/quality-only-prompts.d.ts.map +1 -0
- package/dist/review/quality-only-prompts.js +155 -0
- package/dist/review/quality-only-prompts.js.map +1 -0
- package/dist/review/quality-reviewer.d.ts +54 -10
- package/dist/review/quality-reviewer.d.ts.map +1 -1
- package/dist/review/quality-reviewer.js +158 -17
- package/dist/review/quality-reviewer.js.map +1 -1
- package/dist/run-tasks/index.d.ts +6 -0
- package/dist/run-tasks/index.d.ts.map +1 -1
- package/dist/run-tasks/index.js +1 -1
- package/dist/run-tasks/index.js.map +1 -1
- package/dist/run-tasks/reviewed-lifecycle.d.ts +5 -1
- package/dist/run-tasks/reviewed-lifecycle.d.ts.map +1 -1
- package/dist/run-tasks/reviewed-lifecycle.js +234 -162
- package/dist/run-tasks/reviewed-lifecycle.js.map +1 -1
- package/dist/tool-schemas/audit.d.ts +19 -0
- package/dist/tool-schemas/audit.d.ts.map +1 -1
- package/dist/tool-schemas/debug.d.ts +19 -0
- package/dist/tool-schemas/debug.d.ts.map +1 -1
- package/dist/tool-schemas/delegate.d.ts +19 -0
- package/dist/tool-schemas/delegate.d.ts.map +1 -1
- package/dist/tool-schemas/execute-plan.d.ts +19 -0
- package/dist/tool-schemas/execute-plan.d.ts.map +1 -1
- package/dist/tool-schemas/investigate.d.ts +19 -4
- package/dist/tool-schemas/investigate.d.ts.map +1 -1
- package/dist/tool-schemas/investigate.js +0 -1
- package/dist/tool-schemas/investigate.js.map +1 -1
- package/dist/tool-schemas/retry.d.ts +19 -0
- package/dist/tool-schemas/retry.d.ts.map +1 -1
- package/dist/tool-schemas/review.d.ts +19 -0
- package/dist/tool-schemas/review.d.ts.map +1 -1
- package/dist/tool-schemas/shared-output.d.ts +19 -0
- package/dist/tool-schemas/shared-output.d.ts.map +1 -1
- package/dist/tool-schemas/shared-output.js +6 -0
- package/dist/tool-schemas/shared-output.js.map +1 -1
- package/dist/tool-schemas/verify.d.ts +19 -0
- package/dist/tool-schemas/verify.d.ts.map +1 -1
- package/dist/types.d.ts +3 -3
- package/dist/types.d.ts.map +1 -1
- package/package.json +5 -1
|
@@ -23,6 +23,9 @@ import { buildFallbackImplReport, readImplementerFileContents } from './fallback
|
|
|
23
23
|
import { composeVerboseLine, toVerboseFields } from '../diagnostics/verbose-line.js';
|
|
24
24
|
import { withDoneCondition } from './execute-task.js';
|
|
25
25
|
const exec = promisify(execFile);
|
|
26
|
+
const READ_ONLY_TOOL_NAMES = new Set([
|
|
27
|
+
'audit', 'review', 'verify', 'investigate', 'debug',
|
|
28
|
+
]);
|
|
26
29
|
export function emptyStats() {
|
|
27
30
|
return {
|
|
28
31
|
implementing: { stage: 'implementing', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null },
|
|
@@ -87,8 +90,13 @@ export function endVerifyStage(stats, t0, c0, agent, finalCostUSD, outcome, skip
|
|
|
87
90
|
skipReason,
|
|
88
91
|
};
|
|
89
92
|
}
|
|
90
|
-
export async function executeReviewedLifecycle(task, resolved, config, taskIndex, onProgress, heartbeatWiring, diagnostics, recorder, _route, _client, _triggeringSkill, bus) {
|
|
93
|
+
export async function executeReviewedLifecycle(task, resolved, config, taskIndex, onProgress, heartbeatWiring, diagnostics, recorder, _route, _client, _triggeringSkill, bus, qualityReviewPromptBuilder) {
|
|
91
94
|
const reviewPolicy = task.reviewPolicy ?? 'full';
|
|
95
|
+
const routeKey = _route ?? '';
|
|
96
|
+
if (reviewPolicy === 'quality_only' && !READ_ONLY_TOOL_NAMES.has(routeKey)) {
|
|
97
|
+
throw new Error(`reviewPolicy 'quality_only' is only valid for read-only routes; received '${routeKey}'. ` +
|
|
98
|
+
`Use 'full', 'spec_only', 'diff_only', or 'off' for artifact-producing routes.`);
|
|
99
|
+
}
|
|
92
100
|
const otherSlot = resolved.slot === 'standard' ? 'complex' : 'standard';
|
|
93
101
|
let escalationProvider;
|
|
94
102
|
try {
|
|
@@ -110,7 +118,8 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
110
118
|
const { outputTargets } = partitionFilePaths(task.filePaths, task.cwd ?? process.cwd());
|
|
111
119
|
const stageCount = reviewPolicy === 'off' ? 1 :
|
|
112
120
|
reviewPolicy === 'spec_only' ? 3 :
|
|
113
|
-
|
|
121
|
+
reviewPolicy === 'quality_only' ? 3 :
|
|
122
|
+
5;
|
|
114
123
|
const verbose = diagnostics?.verbose ?? false;
|
|
115
124
|
const verboseStreamRaw = verbose
|
|
116
125
|
? (diagnostics?.verboseStream ?? ((line) => { process.stderr.write(line + '\n'); }))
|
|
@@ -361,7 +370,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
361
370
|
terminationReason: 'all_tiers_unavailable',
|
|
362
371
|
reviewRounds: reviewRounds(),
|
|
363
372
|
error: `runWithFallback: both tiers unavailable (loop=${loop}, attempt=${attempt}, role=implementer)`,
|
|
364
|
-
agents: agentEnvelope(specReviewerHistory[specReviewerHistory.length - 1] ?? 'not_applicable', qualityReviewerHistory[qualityReviewerHistory.length - 1] ?? (reviewPolicy === 'full' ? 'not_applicable' : 'skipped')),
|
|
373
|
+
agents: agentEnvelope(specReviewerHistory[specReviewerHistory.length - 1] ?? 'not_applicable', qualityReviewerHistory[qualityReviewerHistory.length - 1] ?? ((reviewPolicy === 'full' || reviewPolicy === 'quality_only') ? 'not_applicable' : 'skipped')),
|
|
365
374
|
stageStats: stats,
|
|
366
375
|
};
|
|
367
376
|
}
|
|
@@ -395,7 +404,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
395
404
|
error: message,
|
|
396
405
|
specReviewStatus: aborting === 'spec' ? 'changes_required' : (base.specReviewStatus ?? 'approved'),
|
|
397
406
|
qualityReviewStatus: aborting === 'quality' ? 'changes_required' : (base.qualityReviewStatus ?? 'skipped'),
|
|
398
|
-
agents: agentEnvelope(specReviewerHistory[specReviewerHistory.length - 1] ?? 'not_applicable', qualityReviewerHistory[qualityReviewerHistory.length - 1] ?? (reviewPolicy === 'full' ? 'not_applicable' : 'skipped')),
|
|
407
|
+
agents: agentEnvelope(specReviewerHistory[specReviewerHistory.length - 1] ?? 'not_applicable', qualityReviewerHistory[qualityReviewerHistory.length - 1] ?? ((reviewPolicy === 'full' || reviewPolicy === 'quality_only') ? 'not_applicable' : 'skipped')),
|
|
399
408
|
stageStats: stats,
|
|
400
409
|
});
|
|
401
410
|
const defaultVerification = { status: 'skipped', steps: [], totalDurationMs: 0, skipReason: 'no_command' };
|
|
@@ -751,7 +760,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
751
760
|
? [...(implResult.filesRead ?? []), ...implResult.filesWritten].some(f => task.filePaths.some(fp => f === fp || f.endsWith('/' + fp) || f.endsWith(fp)))
|
|
752
761
|
: true;
|
|
753
762
|
const filePathsSkipped = !filePathsInteracted;
|
|
754
|
-
if (implResult.filesWritten.length === 0) {
|
|
763
|
+
if (implResult.filesWritten.length === 0 && reviewPolicy !== 'quality_only') {
|
|
755
764
|
if (reviewPolicy === 'off') {
|
|
756
765
|
emitTaskEvent('stage_change', { from: 'verifying', to: 'terminal' });
|
|
757
766
|
const terminal = resolveOffTerminal({
|
|
@@ -859,7 +868,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
859
868
|
};
|
|
860
869
|
let fileContents = await readImplementerFileContents(implResult.filesWritten, task.cwd);
|
|
861
870
|
const effectiveImplReport = implReport ?? buildFallbackImplReport(implResult);
|
|
862
|
-
const evidence = isArtifactProducing
|
|
871
|
+
const evidence = (isArtifactProducing && reviewPolicy !== 'quality_only')
|
|
863
872
|
? await buildEvidence({ cwd, baselineHead, commits, verification, reviewPolicy })
|
|
864
873
|
: { block: '', diffTruncated: false, fullDiff: '' };
|
|
865
874
|
if (reviewPolicy === 'diff_only') {
|
|
@@ -917,116 +926,126 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
917
926
|
let specStatus;
|
|
918
927
|
let specReport;
|
|
919
928
|
let specReviewReason;
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
specReviewerHistory.push(initialSpecReview.usedTier);
|
|
940
|
-
if (initialSpecReview.fallbackFired) {
|
|
941
|
-
emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: 0, role: 'specReviewer', assignedTier: initialReviewerTier, usedTier: initialSpecReview.usedTier, reason: initialSpecReview.fallbackReason, triggeringStatus: initialSpecReview.fallbackTriggeringStatus, violatesSeparation: initialSpecReview.usedTier === implementerHistory[implementerHistory.length - 1] });
|
|
942
|
-
fallbackOverrides.push({ role: 'specReviewer', loop: 'spec', attempt: 0, assigned: initialReviewerTier, used: initialSpecReview.usedTier, reason: initialSpecReview.fallbackReason, triggeringStatus: initialSpecReview.fallbackTriggeringStatus, bothUnavailable: false });
|
|
943
|
-
}
|
|
944
|
-
}
|
|
945
|
-
specResult = initialSpecReview.bothUnavailable
|
|
946
|
-
? makeSkippedReviewResult('all_tiers_unavailable')
|
|
947
|
-
: initialSpecReview.result;
|
|
948
|
-
specStatus = specResult.status;
|
|
949
|
-
specReport = 'report' in specResult ? specResult.report : undefined;
|
|
950
|
-
specReviewReason = specStatus === 'skipped' ? 'all_tiers_unavailable' : ('errorReason' in specResult ? specResult.errorReason : undefined);
|
|
951
|
-
let prevSpecFindings = [...(specResult.findings ?? [])];
|
|
952
|
-
while (specStatus === 'changes_required') {
|
|
953
|
-
if (specAttemptIndex >= maxSpecRows)
|
|
954
|
-
return abortReviewLoop(finalImplResult, 'round_cap', 'review round cap reached before spec rework', 'spec');
|
|
955
|
-
const currentCostUSD = taskCostUSD();
|
|
956
|
-
if (currentCostUSD !== null && maxCostUSD !== undefined && currentCostUSD >= 0.8 * maxCostUSD) {
|
|
957
|
-
emitTaskEvent('cost_check', { stage: 'spec_rework', tripped: true, cost_used_usd: currentCostUSD, cost_cap_usd: maxCostUSD, cost_available: true });
|
|
958
|
-
return abortReviewLoop(finalImplResult, 'cost_ceiling', 'cost ceiling reached before spec rework', 'spec');
|
|
959
|
-
}
|
|
960
|
-
const decision = pickEscalation({ loop: 'spec', attemptIndex: specAttemptIndex, baseTier: resolved.slot });
|
|
961
|
-
if (decision.isEscalated)
|
|
962
|
-
emitEscalationEvent('spec', specAttemptIndex, decision);
|
|
963
|
-
emitTaskEvent('stage_change', { from: 'spec_review', to: 'spec_rework', attempt: specAttemptIndex, attemptCap: maxSpecRows, implTier: decision.impl, reviewerTier: decision.reviewer, escalated: decision.isEscalated });
|
|
964
|
-
heartbeat?.transition({ stage: 'spec_rework', stageIndex: 3, reviewRound: specAttemptIndex, attemptCap: maxSpecRows });
|
|
965
|
-
const feedback = specResult.findings.length > 0 ? `\n\n## Spec Review Feedback (round ${specAttemptIndex}):\n${specResult.findings.map(f => `- ${f}`).join('\n')}` : '';
|
|
966
|
-
const reworkTask = withDoneCondition({ ...task, prompt: `${task.prompt}${feedback}` });
|
|
967
|
-
const reworkCall = await runWithFallback({ assigned: decision.impl, providerFor, unavailableTiers: specUnavailable, isTransportFailure: (r) => TRANSPORT_FAILURES.has(r.status) && r.capExhausted === undefined, getStatus: (r) => r.status, makeSyntheticFailure: (assigned) => makeSyntheticRunResult(assigned, 'all_tiers_unavailable'), call: (provider) => delegateWithEscalation(reworkTask, [provider], { explicitlyPinned: true, onProgress: wrappedOnProgress, taskDeadlineMs, abortSignal: stallController.signal, assignedTier: decision.impl }) });
|
|
968
|
-
if (reworkCall.fallbackFired || reworkCall.bothUnavailable)
|
|
969
|
-
fallbackOverrides.push({ role: 'implementer', loop: 'spec', attempt: specAttemptIndex, assigned: decision.impl, used: reworkCall.usedTier, reason: (reworkCall.fallbackReason ?? reworkCall.unavailableReason), triggeringStatus: reworkCall.fallbackTriggeringStatus, bothUnavailable: reworkCall.bothUnavailable });
|
|
970
|
-
if (reworkCall.fallbackFired) {
|
|
971
|
-
emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'implementer', assignedTier: decision.impl, usedTier: reworkCall.usedTier, reason: reworkCall.fallbackReason, triggeringStatus: reworkCall.fallbackTriggeringStatus, violatesSeparation: false });
|
|
972
|
-
if (decision.isEscalated && reworkCall.fallbackReason === 'not_configured')
|
|
973
|
-
emitEscalationUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'implementer', wantedTier: decision.impl, reason: reworkCall.fallbackReason });
|
|
974
|
-
}
|
|
975
|
-
if (reworkCall.bothUnavailable) {
|
|
976
|
-
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'implementer', assignedTier: decision.impl, reason: reworkCall.unavailableReason });
|
|
977
|
-
if (decision.isEscalated)
|
|
978
|
-
emitEscalationUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'implementer', wantedTier: decision.impl, reason: reworkCall.unavailableReason });
|
|
979
|
-
return __recordOnce(adaptForAllTiersUnavailable(reworkCall.result, 'spec', specAttemptIndex));
|
|
980
|
-
}
|
|
981
|
-
finalImplResult = reworkCall.result;
|
|
982
|
-
latestAttemptedImpl = { tier: reworkCall.usedTier, result: finalImplResult };
|
|
983
|
-
implementerHistory.push(reworkCall.usedTier);
|
|
984
|
-
const reworkReport = parseStructuredReport(finalImplResult.output);
|
|
985
|
-
finalImplReport = reworkReport.summary ? reworkReport : buildFallbackImplReport(finalImplResult);
|
|
986
|
-
fileContents = await readImplementerFileContents(finalImplResult.filesWritten, task.cwd);
|
|
987
|
-
heartbeat?.transition({ stage: 'spec_review', stageIndex: 2, reviewRound: specAttemptIndex + 1, attemptCap: maxSpecRows });
|
|
988
|
-
const reviewCall = await runWithFallback({ assigned: decision.reviewer, providerFor, unavailableTiers: specUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), call: (provider) => runSpecReview(provider, packet, finalImplReport, fileContents, finalImplResult.toolCalls, task.planContext, evidence.block) });
|
|
989
|
-
if (reviewCall.bothUnavailable) {
|
|
990
|
-
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'specReviewer', assignedTier: decision.reviewer, reason: reviewCall.unavailableReason });
|
|
991
|
-
fallbackOverrides.push({ role: 'specReviewer', loop: 'spec', attempt: specAttemptIndex, assigned: decision.reviewer, used: reviewCall.usedTier, reason: reviewCall.unavailableReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, bothUnavailable: true });
|
|
929
|
+
let specReviewT0 = 0;
|
|
930
|
+
let specReviewC0 = null;
|
|
931
|
+
if (reviewPolicy !== 'quality_only') {
|
|
932
|
+
heartbeat?.transition({ stage: 'spec_review', stageIndex: 2, reviewRound: 1, attemptCap: maxSpecRows });
|
|
933
|
+
const initialReviewerTier = pickReviewer({ loop: 'spec', attemptIndex: 0, baseTier: resolved.slot });
|
|
934
|
+
specReviewT0 = Date.now();
|
|
935
|
+
specReviewC0 = runningCostUSD();
|
|
936
|
+
const initialSpecReview = await runWithFallback({
|
|
937
|
+
assigned: initialReviewerTier,
|
|
938
|
+
providerFor,
|
|
939
|
+
unavailableTiers: specUnavailable,
|
|
940
|
+
isTransportFailure: (r) => isReviewTransportFailure(r),
|
|
941
|
+
getStatus: (r) => r.status,
|
|
942
|
+
makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'),
|
|
943
|
+
call: (provider) => runSpecReview(provider, packet, effectiveImplReport, fileContents, implResult.toolCalls, task.planContext, evidence.block),
|
|
944
|
+
});
|
|
945
|
+
if (initialSpecReview.bothUnavailable) {
|
|
946
|
+
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: 0, role: 'specReviewer', assignedTier: initialReviewerTier, reason: initialSpecReview.unavailableReason });
|
|
947
|
+
fallbackOverrides.push({ role: 'specReviewer', loop: 'spec', attempt: 0, assigned: initialReviewerTier, used: initialSpecReview.usedTier, reason: initialSpecReview.unavailableReason, triggeringStatus: initialSpecReview.fallbackTriggeringStatus, bothUnavailable: true });
|
|
992
948
|
specReviewerHistory.push('skipped');
|
|
993
949
|
}
|
|
994
950
|
else {
|
|
995
|
-
specReviewerHistory.push(
|
|
996
|
-
if (
|
|
997
|
-
emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt:
|
|
998
|
-
fallbackOverrides.push({ role: 'specReviewer', loop: 'spec', attempt:
|
|
951
|
+
specReviewerHistory.push(initialSpecReview.usedTier);
|
|
952
|
+
if (initialSpecReview.fallbackFired) {
|
|
953
|
+
emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: 0, role: 'specReviewer', assignedTier: initialReviewerTier, usedTier: initialSpecReview.usedTier, reason: initialSpecReview.fallbackReason, triggeringStatus: initialSpecReview.fallbackTriggeringStatus, violatesSeparation: initialSpecReview.usedTier === implementerHistory[implementerHistory.length - 1] });
|
|
954
|
+
fallbackOverrides.push({ role: 'specReviewer', loop: 'spec', attempt: 0, assigned: initialReviewerTier, used: initialSpecReview.usedTier, reason: initialSpecReview.fallbackReason, triggeringStatus: initialSpecReview.fallbackTriggeringStatus, bothUnavailable: false });
|
|
999
955
|
}
|
|
1000
956
|
}
|
|
1001
|
-
specResult =
|
|
957
|
+
specResult = initialSpecReview.bothUnavailable
|
|
958
|
+
? makeSkippedReviewResult('all_tiers_unavailable')
|
|
959
|
+
: initialSpecReview.result;
|
|
1002
960
|
specStatus = specResult.status;
|
|
1003
961
|
specReport = 'report' in specResult ? specResult.report : undefined;
|
|
1004
962
|
specReviewReason = specStatus === 'skipped' ? 'all_tiers_unavailable' : ('errorReason' in specResult ? specResult.errorReason : undefined);
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
963
|
+
let prevSpecFindings = [...(specResult.findings ?? [])];
|
|
964
|
+
while (specStatus === 'changes_required') {
|
|
965
|
+
if (specAttemptIndex >= maxSpecRows)
|
|
966
|
+
return abortReviewLoop(finalImplResult, 'round_cap', 'review round cap reached before spec rework', 'spec');
|
|
967
|
+
const currentCostUSD = taskCostUSD();
|
|
968
|
+
if (currentCostUSD !== null && maxCostUSD !== undefined && currentCostUSD >= 0.8 * maxCostUSD) {
|
|
969
|
+
emitTaskEvent('cost_check', { stage: 'spec_rework', tripped: true, cost_used_usd: currentCostUSD, cost_cap_usd: maxCostUSD, cost_available: true });
|
|
970
|
+
return abortReviewLoop(finalImplResult, 'cost_ceiling', 'cost ceiling reached before spec rework', 'spec');
|
|
971
|
+
}
|
|
972
|
+
const decision = pickEscalation({ loop: 'spec', attemptIndex: specAttemptIndex, baseTier: resolved.slot });
|
|
973
|
+
if (decision.isEscalated)
|
|
974
|
+
emitEscalationEvent('spec', specAttemptIndex, decision);
|
|
975
|
+
emitTaskEvent('stage_change', { from: 'spec_review', to: 'spec_rework', attempt: specAttemptIndex, attemptCap: maxSpecRows, implTier: decision.impl, reviewerTier: decision.reviewer, escalated: decision.isEscalated });
|
|
976
|
+
heartbeat?.transition({ stage: 'spec_rework', stageIndex: 3, reviewRound: specAttemptIndex, attemptCap: maxSpecRows });
|
|
977
|
+
const feedback = specResult.findings.length > 0 ? `\n\n## Spec Review Feedback (round ${specAttemptIndex}):\n${specResult.findings.map(f => `- ${f}`).join('\n')}` : '';
|
|
978
|
+
const reworkTask = withDoneCondition({ ...task, prompt: `${task.prompt}${feedback}` });
|
|
979
|
+
const reworkCall = await runWithFallback({ assigned: decision.impl, providerFor, unavailableTiers: specUnavailable, isTransportFailure: (r) => TRANSPORT_FAILURES.has(r.status) && r.capExhausted === undefined, getStatus: (r) => r.status, makeSyntheticFailure: (assigned) => makeSyntheticRunResult(assigned, 'all_tiers_unavailable'), call: (provider) => delegateWithEscalation(reworkTask, [provider], { explicitlyPinned: true, onProgress: wrappedOnProgress, taskDeadlineMs, abortSignal: stallController.signal, assignedTier: decision.impl }) });
|
|
980
|
+
if (reworkCall.fallbackFired || reworkCall.bothUnavailable)
|
|
981
|
+
fallbackOverrides.push({ role: 'implementer', loop: 'spec', attempt: specAttemptIndex, assigned: decision.impl, used: reworkCall.usedTier, reason: (reworkCall.fallbackReason ?? reworkCall.unavailableReason), triggeringStatus: reworkCall.fallbackTriggeringStatus, bothUnavailable: reworkCall.bothUnavailable });
|
|
982
|
+
if (reworkCall.fallbackFired) {
|
|
983
|
+
emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'implementer', assignedTier: decision.impl, usedTier: reworkCall.usedTier, reason: reworkCall.fallbackReason, triggeringStatus: reworkCall.fallbackTriggeringStatus, violatesSeparation: false });
|
|
984
|
+
if (decision.isEscalated && reworkCall.fallbackReason === 'not_configured')
|
|
985
|
+
emitEscalationUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'implementer', wantedTier: decision.impl, reason: reworkCall.fallbackReason });
|
|
986
|
+
}
|
|
987
|
+
if (reworkCall.bothUnavailable) {
|
|
988
|
+
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'implementer', assignedTier: decision.impl, reason: reworkCall.unavailableReason });
|
|
989
|
+
if (decision.isEscalated)
|
|
990
|
+
emitEscalationUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'implementer', wantedTier: decision.impl, reason: reworkCall.unavailableReason });
|
|
991
|
+
return __recordOnce(adaptForAllTiersUnavailable(reworkCall.result, 'spec', specAttemptIndex));
|
|
992
|
+
}
|
|
993
|
+
finalImplResult = reworkCall.result;
|
|
994
|
+
latestAttemptedImpl = { tier: reworkCall.usedTier, result: finalImplResult };
|
|
995
|
+
implementerHistory.push(reworkCall.usedTier);
|
|
996
|
+
const reworkReport = parseStructuredReport(finalImplResult.output);
|
|
997
|
+
finalImplReport = reworkReport.summary ? reworkReport : buildFallbackImplReport(finalImplResult);
|
|
998
|
+
fileContents = await readImplementerFileContents(finalImplResult.filesWritten, task.cwd);
|
|
999
|
+
heartbeat?.transition({ stage: 'spec_review', stageIndex: 2, reviewRound: specAttemptIndex + 1, attemptCap: maxSpecRows });
|
|
1000
|
+
const reviewCall = await runWithFallback({ assigned: decision.reviewer, providerFor, unavailableTiers: specUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), call: (provider) => runSpecReview(provider, packet, finalImplReport, fileContents, finalImplResult.toolCalls, task.planContext, evidence.block) });
|
|
1001
|
+
if (reviewCall.bothUnavailable) {
|
|
1002
|
+
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'specReviewer', assignedTier: decision.reviewer, reason: reviewCall.unavailableReason });
|
|
1003
|
+
fallbackOverrides.push({ role: 'specReviewer', loop: 'spec', attempt: specAttemptIndex, assigned: decision.reviewer, used: reviewCall.usedTier, reason: reviewCall.unavailableReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, bothUnavailable: true });
|
|
1004
|
+
specReviewerHistory.push('skipped');
|
|
1005
|
+
}
|
|
1006
|
+
else {
|
|
1007
|
+
specReviewerHistory.push(reviewCall.usedTier);
|
|
1008
|
+
if (reviewCall.fallbackFired) {
|
|
1009
|
+
emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'specReviewer', assignedTier: decision.reviewer, usedTier: reviewCall.usedTier, reason: reviewCall.fallbackReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, violatesSeparation: reviewCall.usedTier === implementerHistory[implementerHistory.length - 1] });
|
|
1010
|
+
fallbackOverrides.push({ role: 'specReviewer', loop: 'spec', attempt: specAttemptIndex, assigned: decision.reviewer, used: reviewCall.usedTier, reason: reviewCall.fallbackReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, bothUnavailable: false });
|
|
1011
|
+
}
|
|
1012
|
+
}
|
|
1013
|
+
specResult = reviewCall.result;
|
|
1014
|
+
specStatus = specResult.status;
|
|
1015
|
+
specReport = 'report' in specResult ? specResult.report : undefined;
|
|
1016
|
+
specReviewReason = specStatus === 'skipped' ? 'all_tiers_unavailable' : ('errorReason' in specResult ? specResult.errorReason : undefined);
|
|
1017
|
+
if (reviewDidNotReject(specStatus))
|
|
1018
|
+
lastNonRejectedImpl = { tier: implementerHistory[implementerHistory.length - 1], result: finalImplResult };
|
|
1019
|
+
specAttemptIndex++;
|
|
1020
|
+
if (specStatus === 'approved' || specStatus === 'skipped')
|
|
1021
|
+
break;
|
|
1022
|
+
const currentFindings = [...(specResult.findings ?? [])].sort().join('\0');
|
|
1023
|
+
const prevFindings = [...prevSpecFindings].sort().join('\0');
|
|
1024
|
+
if (currentFindings === prevFindings && currentFindings !== '')
|
|
1025
|
+
break;
|
|
1026
|
+
prevSpecFindings = [...(specResult.findings ?? [])];
|
|
1027
|
+
}
|
|
1028
|
+
}
|
|
1029
|
+
else {
|
|
1030
|
+
specResult = { status: 'skipped', report: undefined, findings: [], reason: 'all_tiers_unavailable' };
|
|
1031
|
+
specStatus = 'not_applicable';
|
|
1032
|
+
specReport = undefined;
|
|
1033
|
+
specReviewReason = 'skipped: reviewPolicy is quality_only';
|
|
1015
1034
|
}
|
|
1016
|
-
let qualityResult = { status: 'skipped', report: undefined, findings: [], errorReason: reviewPolicy === 'full' ? 'all_tiers_unavailable' : 'skipped: reviewPolicy is spec_only' };
|
|
1035
|
+
let qualityResult = { status: 'skipped', report: undefined, findings: [], errorReason: (reviewPolicy === 'full' || reviewPolicy === 'quality_only') ? 'all_tiers_unavailable' : 'skipped: reviewPolicy is spec_only' };
|
|
1017
1036
|
// Hoisted so endReviewStage (called after this block) can read them on the
|
|
1018
1037
|
// success path. When the quality review is skipped (`reviewPolicy !== 'full'`),
|
|
1019
1038
|
// the values stay at 0/null and the corresponding stage entry remains in its
|
|
1020
1039
|
// `entered: false` default — endReviewStage is never called.
|
|
1021
1040
|
let qualityReviewT0 = 0;
|
|
1022
1041
|
let qualityReviewC0 = null;
|
|
1023
|
-
if (reviewPolicy === 'full') {
|
|
1042
|
+
if (reviewPolicy === 'full' || reviewPolicy === 'quality_only') {
|
|
1024
1043
|
qualityUnavailable = new Map();
|
|
1025
1044
|
const qualityReviewerTier = pickReviewer({ loop: 'quality', attemptIndex: 0, baseTier: resolved.slot });
|
|
1026
1045
|
heartbeat?.transition({ stage: 'quality_review', stageIndex: 4, reviewRound: 1, attemptCap: maxQualityRows });
|
|
1027
1046
|
qualityReviewT0 = Date.now();
|
|
1028
1047
|
qualityReviewC0 = runningCostUSD();
|
|
1029
|
-
const initialQuality = await runWithFallback({ assigned: qualityReviewerTier, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), call: (provider) => runQualityReview(provider, packet, specReport ?? finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block) });
|
|
1048
|
+
const initialQuality = await runWithFallback({ assigned: qualityReviewerTier, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), call: (provider) => runQualityReview(provider, packet, specReport ?? finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block, qualityReviewPromptBuilder, finalImplResult.output) });
|
|
1030
1049
|
if (initialQuality.bothUnavailable) {
|
|
1031
1050
|
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: 0, role: 'qualityReviewer', assignedTier: qualityReviewerTier, reason: initialQuality.unavailableReason });
|
|
1032
1051
|
fallbackOverrides.push({ role: 'qualityReviewer', loop: 'quality', attempt: 0, assigned: qualityReviewerTier, used: initialQuality.usedTier, reason: initialQuality.unavailableReason, triggeringStatus: initialQuality.fallbackTriggeringStatus, bothUnavailable: true });
|
|
@@ -1040,65 +1059,92 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1040
1059
|
}
|
|
1041
1060
|
}
|
|
1042
1061
|
qualityResult = initialQuality.result;
|
|
1043
|
-
let prevQualityFindings = [...(qualityResult.findings ?? [])];
|
|
1044
1062
|
qualityAttemptIndex = 1;
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1063
|
+
if (reviewDidNotReject(qualityResult.status))
|
|
1064
|
+
lastNonRejectedImpl = { tier: implementerHistory[implementerHistory.length - 1], result: finalImplResult };
|
|
1065
|
+
if (reviewPolicy === 'quality_only') {
|
|
1066
|
+
// Annotation model: emit one quality event per pass with severity-correction
|
|
1067
|
+
// and mean-confidence summary fields. Then we are done — no rework loop.
|
|
1068
|
+
const annotated = qualityResult.annotatedFindings ?? [];
|
|
1069
|
+
const severityCorrections = annotated.filter(f => f.reviewerSeverity !== undefined).length;
|
|
1070
|
+
const meanConfidence = annotated.length > 0
|
|
1071
|
+
? Math.round((annotated.reduce((s, f) => s + f.reviewerConfidence, 0) / annotated.length) * 100) / 100
|
|
1072
|
+
: null;
|
|
1073
|
+
emitTaskEvent('read_only_review.quality', {
|
|
1074
|
+
route: routeKey,
|
|
1075
|
+
verdict: qualityResult.status === 'annotated' ? 'annotated'
|
|
1076
|
+
: qualityResult.status === 'skipped' ? 'skipped'
|
|
1077
|
+
: 'error',
|
|
1078
|
+
iterationIndex: 1,
|
|
1079
|
+
findingsReviewed: annotated.length,
|
|
1080
|
+
findingsFlagged: severityCorrections,
|
|
1081
|
+
severityCorrections,
|
|
1082
|
+
meanConfidence,
|
|
1083
|
+
durationMs: Date.now() - qualityReviewT0,
|
|
1084
|
+
costUSD: runningCostUSD() !== null && qualityReviewC0 !== null ? runningCostUSD() - qualityReviewC0 : null,
|
|
1085
|
+
});
|
|
1086
|
+
}
|
|
1087
|
+
else {
|
|
1088
|
+
// Artifact-route gating model — keep the rework loop.
|
|
1089
|
+
let prevQualityFindings = [...(qualityResult.findings ?? [])];
|
|
1090
|
+
while (qualityResult.status === 'changes_required') {
|
|
1091
|
+
if (qualityAttemptIndex >= maxQualityRows)
|
|
1092
|
+
return abortReviewLoop(finalImplResult, 'round_cap', 'review round cap reached before quality rework', 'quality');
|
|
1093
|
+
const currentCostUSD = taskCostUSD();
|
|
1094
|
+
if (currentCostUSD !== null && maxCostUSD !== undefined && currentCostUSD >= 0.8 * maxCostUSD) {
|
|
1095
|
+
emitTaskEvent('cost_check', { stage: 'quality_rework', tripped: true, cost_used_usd: currentCostUSD, cost_cap_usd: maxCostUSD, cost_available: true });
|
|
1096
|
+
return abortReviewLoop(finalImplResult, 'cost_ceiling', 'cost ceiling reached before quality rework', 'quality');
|
|
1097
|
+
}
|
|
1098
|
+
const decision = pickEscalation({ loop: 'quality', attemptIndex: qualityAttemptIndex, baseTier: resolved.slot });
|
|
1067
1099
|
if (decision.isEscalated)
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1100
|
+
emitEscalationEvent('quality', qualityAttemptIndex, decision);
|
|
1101
|
+
emitTaskEvent('stage_change', { from: 'quality_review', to: 'quality_rework', attempt: qualityAttemptIndex, attemptCap: maxQualityRows, implTier: decision.impl, reviewerTier: decision.reviewer, escalated: decision.isEscalated });
|
|
1102
|
+
heartbeat?.transition({ stage: 'quality_rework', stageIndex: 5, reviewRound: qualityAttemptIndex, attemptCap: maxQualityRows });
|
|
1103
|
+
const feedback = qualityResult.findings.length > 0 ? `\n\n## Quality Review Feedback (round ${qualityAttemptIndex}):\n${qualityResult.findings.map(f => `- ${f}`).join('\n')}` : '';
|
|
1104
|
+
const reworkTask = withDoneCondition({ ...task, prompt: `${task.prompt}${feedback}` });
|
|
1105
|
+
const reworkCall = await runWithFallback({ assigned: decision.impl, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => TRANSPORT_FAILURES.has(r.status) && r.capExhausted === undefined, getStatus: (r) => r.status, makeSyntheticFailure: (assigned) => makeSyntheticRunResult(assigned, 'all_tiers_unavailable'), call: (provider) => delegateWithEscalation(reworkTask, [provider], { explicitlyPinned: true, onProgress: wrappedOnProgress, taskDeadlineMs, abortSignal: stallController.signal, assignedTier: decision.impl }) });
|
|
1106
|
+
if (reworkCall.fallbackFired || reworkCall.bothUnavailable)
|
|
1107
|
+
fallbackOverrides.push({ role: 'implementer', loop: 'quality', attempt: qualityAttemptIndex, assigned: decision.impl, used: reworkCall.usedTier, reason: (reworkCall.fallbackReason ?? reworkCall.unavailableReason), triggeringStatus: reworkCall.fallbackTriggeringStatus, bothUnavailable: reworkCall.bothUnavailable });
|
|
1108
|
+
if (reworkCall.fallbackFired)
|
|
1109
|
+
emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'implementer', assignedTier: decision.impl, usedTier: reworkCall.usedTier, reason: reworkCall.fallbackReason, triggeringStatus: reworkCall.fallbackTriggeringStatus, violatesSeparation: false });
|
|
1110
|
+
if (reworkCall.bothUnavailable) {
|
|
1111
|
+
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'implementer', assignedTier: decision.impl, reason: reworkCall.unavailableReason });
|
|
1112
|
+
if (decision.isEscalated)
|
|
1113
|
+
emitEscalationUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'implementer', wantedTier: decision.impl, reason: reworkCall.unavailableReason });
|
|
1114
|
+
return __recordOnce(adaptForAllTiersUnavailable(reworkCall.result, 'quality', qualityAttemptIndex));
|
|
1115
|
+
}
|
|
1116
|
+
finalImplResult = reworkCall.result;
|
|
1117
|
+
latestAttemptedImpl = { tier: reworkCall.usedTier, result: finalImplResult };
|
|
1118
|
+
implementerHistory.push(reworkCall.usedTier);
|
|
1119
|
+
const reworkReport = parseStructuredReport(finalImplResult.output);
|
|
1120
|
+
finalImplReport = reworkReport.summary ? reworkReport : buildFallbackImplReport(finalImplResult);
|
|
1121
|
+
fileContents = await readImplementerFileContents(finalImplResult.filesWritten, task.cwd);
|
|
1122
|
+
heartbeat?.transition({ stage: 'quality_review', stageIndex: 4, reviewRound: qualityAttemptIndex + 1, attemptCap: maxQualityRows });
|
|
1123
|
+
const reviewCall = await runWithFallback({ assigned: decision.reviewer, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), call: (provider) => runQualityReview(provider, packet, finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block, qualityReviewPromptBuilder, finalImplResult.output) });
|
|
1124
|
+
if (reviewCall.bothUnavailable) {
|
|
1125
|
+
emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'qualityReviewer', assignedTier: decision.reviewer, reason: reviewCall.unavailableReason });
|
|
1126
|
+
fallbackOverrides.push({ role: 'qualityReviewer', loop: 'quality', attempt: qualityAttemptIndex, assigned: decision.reviewer, used: reviewCall.usedTier, reason: reviewCall.unavailableReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, bothUnavailable: true });
|
|
1127
|
+
qualityReviewerHistory.push('skipped');
|
|
1128
|
+
}
|
|
1129
|
+
else {
|
|
1130
|
+
qualityReviewerHistory.push(reviewCall.usedTier);
|
|
1131
|
+
if (reviewCall.fallbackFired) {
|
|
1132
|
+
emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'qualityReviewer', assignedTier: decision.reviewer, usedTier: reviewCall.usedTier, reason: reviewCall.fallbackReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, violatesSeparation: reviewCall.usedTier === implementerHistory[implementerHistory.length - 1] });
|
|
1133
|
+
fallbackOverrides.push({ role: 'qualityReviewer', loop: 'quality', attempt: qualityAttemptIndex, assigned: decision.reviewer, used: reviewCall.usedTier, reason: reviewCall.fallbackReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, bothUnavailable: false });
|
|
1134
|
+
}
|
|
1089
1135
|
}
|
|
1136
|
+
qualityResult = reviewCall.result;
|
|
1137
|
+
if (reviewDidNotReject(qualityResult.status))
|
|
1138
|
+
lastNonRejectedImpl = { tier: implementerHistory[implementerHistory.length - 1], result: finalImplResult };
|
|
1139
|
+
qualityAttemptIndex++;
|
|
1140
|
+
if (qualityResult.status === 'approved' || qualityResult.status === 'skipped')
|
|
1141
|
+
break;
|
|
1142
|
+
const currentFindings = [...(qualityResult.findings ?? [])].sort().join('\0');
|
|
1143
|
+
const prevFindings = [...prevQualityFindings].sort().join('\0');
|
|
1144
|
+
if (currentFindings === prevFindings && currentFindings !== '')
|
|
1145
|
+
break;
|
|
1146
|
+
prevQualityFindings = [...(qualityResult.findings ?? [])];
|
|
1090
1147
|
}
|
|
1091
|
-
qualityResult = reviewCall.result;
|
|
1092
|
-
if (reviewDidNotReject(qualityResult.status))
|
|
1093
|
-
lastNonRejectedImpl = { tier: implementerHistory[implementerHistory.length - 1], result: finalImplResult };
|
|
1094
|
-
qualityAttemptIndex++;
|
|
1095
|
-
if (qualityResult.status === 'approved' || qualityResult.status === 'skipped')
|
|
1096
|
-
break;
|
|
1097
|
-
const currentFindings = [...(qualityResult.findings ?? [])].sort().join('\0');
|
|
1098
|
-
const prevFindings = [...prevQualityFindings].sort().join('\0');
|
|
1099
|
-
if (currentFindings === prevFindings && currentFindings !== '')
|
|
1100
|
-
break;
|
|
1101
|
-
prevQualityFindings = [...(qualityResult.findings ?? [])];
|
|
1102
1148
|
}
|
|
1103
1149
|
}
|
|
1104
1150
|
const finalReport = specReport ?? finalImplReport;
|
|
@@ -1120,17 +1166,22 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1120
1166
|
message: 'Implementation diff exceeded the reviewer evidence byte cap and was truncated.',
|
|
1121
1167
|
});
|
|
1122
1168
|
}
|
|
1123
|
-
const specAggregateStatus =
|
|
1124
|
-
|
|
1125
|
-
:
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1169
|
+
const specAggregateStatus = reviewPolicy === 'quality_only'
|
|
1170
|
+
? 'skipped'
|
|
1171
|
+
: (['approved', 'changes_required', 'skipped', 'error', 'api_error', 'network_error', 'timeout'].includes(specStatus) ? specStatus : 'error');
|
|
1172
|
+
if (reviewPolicy !== 'quality_only') {
|
|
1173
|
+
endReviewStage(stats, 'spec_review', specReviewT0, specReviewC0, implementerAgentInfo, runningCostUSD(), specStatus === 'approved' ? 'approved'
|
|
1174
|
+
: specStatus === 'changes_required' ? 'changes_required'
|
|
1175
|
+
: specStatus === 'skipped' ? 'skipped'
|
|
1176
|
+
: specStatus === 'not_applicable' ? 'not_applicable'
|
|
1177
|
+
: 'error', specAttemptIndex - 1);
|
|
1178
|
+
}
|
|
1129
1179
|
const qualityAggregateStatus = qualityResult.status;
|
|
1130
1180
|
endReviewStage(stats, 'quality_review', qualityReviewT0, qualityReviewC0, implementerAgentInfo, runningCostUSD(), qualityResult.status === 'approved' ? 'approved'
|
|
1131
1181
|
: qualityResult.status === 'changes_required' ? 'changes_required'
|
|
1132
|
-
: qualityResult.status === '
|
|
1133
|
-
: '
|
|
1182
|
+
: qualityResult.status === 'annotated' ? 'annotated'
|
|
1183
|
+
: qualityResult.status === 'skipped' ? 'skipped'
|
|
1184
|
+
: 'error', qualityAttemptIndex - 1);
|
|
1134
1185
|
const aggregated = aggregateResult(finalReport, specReport, qualityResult.report, specAggregateStatus, qualityAggregateStatus);
|
|
1135
1186
|
// File artifact verification: check whether output targets exist on disk after all work.
|
|
1136
1187
|
// Only applies when status is ok; non-ok statuses skip verification entirely.
|
|
@@ -1155,6 +1206,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1155
1206
|
specReviewStatus: specEnvelopeStatus,
|
|
1156
1207
|
qualityReviewStatus: qualityEnvelopeStatus,
|
|
1157
1208
|
stageStats: stats,
|
|
1209
|
+
reviewRounds: reviewRounds(),
|
|
1158
1210
|
specReviewReason: 'errorReason' in specResult ? specResult.errorReason : undefined,
|
|
1159
1211
|
qualityReviewReason: 'errorReason' in qualityResult ? qualityResult.errorReason : undefined,
|
|
1160
1212
|
structuredReport: aggregated,
|
|
@@ -1162,21 +1214,41 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
|
|
|
1162
1214
|
specReviewReport: specReport,
|
|
1163
1215
|
qualityReviewReport: qualityResult.report,
|
|
1164
1216
|
filePathsSkipped,
|
|
1165
|
-
agents: agentEnvelope(specReviewerHistory[specReviewerHistory.length - 1] ?? 'not_applicable', qualityReviewerHistory[qualityReviewerHistory.length - 1] ?? (reviewPolicy === 'full' ? 'not_applicable' : 'skipped')),
|
|
1217
|
+
agents: agentEnvelope(specReviewerHistory[specReviewerHistory.length - 1] ?? 'not_applicable', qualityReviewerHistory[qualityReviewerHistory.length - 1] ?? ((reviewPolicy === 'full' || reviewPolicy === 'quality_only') ? 'not_applicable' : 'skipped')),
|
|
1166
1218
|
models: {
|
|
1167
1219
|
implementer: implModel,
|
|
1168
|
-
specReviewer: reviewModel,
|
|
1169
|
-
qualityReviewer: reviewPolicy === 'full' ? reviewModel : null,
|
|
1220
|
+
specReviewer: reviewPolicy !== 'quality_only' ? reviewModel : null,
|
|
1221
|
+
qualityReviewer: (reviewPolicy === 'full' || reviewPolicy === 'quality_only') ? reviewModel : null,
|
|
1170
1222
|
},
|
|
1171
1223
|
fileArtifactsMissing,
|
|
1172
1224
|
commits,
|
|
1173
1225
|
commitError,
|
|
1174
1226
|
verification,
|
|
1175
1227
|
};
|
|
1228
|
+
if (reviewPolicy === 'quality_only') {
|
|
1229
|
+
emitTaskEvent('read_only_review.terminal', {
|
|
1230
|
+
route: routeKey,
|
|
1231
|
+
roundsUsed: qualityAttemptIndex,
|
|
1232
|
+
finalQualityVerdict: qualityResult.status === 'annotated' ? 'annotated'
|
|
1233
|
+
: qualityResult.status === 'skipped' ? 'skipped'
|
|
1234
|
+
: 'error',
|
|
1235
|
+
costUSD: taskCostUSD(),
|
|
1236
|
+
durationMs: Date.now() - taskStartMs,
|
|
1237
|
+
});
|
|
1238
|
+
}
|
|
1176
1239
|
return __recordOnce(runResult);
|
|
1177
1240
|
}
|
|
1178
1241
|
catch (err) {
|
|
1179
1242
|
const errorRunResult = withVerification(workerErrorResult(err));
|
|
1243
|
+
if (reviewPolicy === 'quality_only') {
|
|
1244
|
+
emitTaskEvent('read_only_review.terminal', {
|
|
1245
|
+
route: routeKey,
|
|
1246
|
+
roundsUsed: qualityAttemptIndex,
|
|
1247
|
+
finalQualityVerdict: 'error',
|
|
1248
|
+
costUSD: taskCostUSD(),
|
|
1249
|
+
durationMs: Date.now() - taskStartMs,
|
|
1250
|
+
});
|
|
1251
|
+
}
|
|
1180
1252
|
return __recordOnce(errorRunResult);
|
|
1181
1253
|
}
|
|
1182
1254
|
finally {
|