@zhixuan92/multi-model-agent-core 3.7.0 → 3.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/README.md +1 -1
  2. package/dist/config/read-only-review-flag.d.ts +8 -0
  3. package/dist/config/read-only-review-flag.d.ts.map +1 -0
  4. package/dist/config/read-only-review-flag.js +13 -0
  5. package/dist/config/read-only-review-flag.js.map +1 -0
  6. package/dist/executors/_shared/findings-schema.d.ts +40 -0
  7. package/dist/executors/_shared/findings-schema.d.ts.map +1 -0
  8. package/dist/executors/_shared/findings-schema.js +23 -0
  9. package/dist/executors/_shared/findings-schema.js.map +1 -0
  10. package/dist/executors/_shared/review-verdict-mapping.d.ts +16 -0
  11. package/dist/executors/_shared/review-verdict-mapping.d.ts.map +1 -0
  12. package/dist/executors/_shared/review-verdict-mapping.js +24 -0
  13. package/dist/executors/_shared/review-verdict-mapping.js.map +1 -0
  14. package/dist/executors/audit.d.ts.map +1 -1
  15. package/dist/executors/audit.js +21 -5
  16. package/dist/executors/audit.js.map +1 -1
  17. package/dist/executors/debug.d.ts.map +1 -1
  18. package/dist/executors/debug.js +11 -2
  19. package/dist/executors/debug.js.map +1 -1
  20. package/dist/executors/investigate.d.ts.map +1 -1
  21. package/dist/executors/investigate.js +22 -17
  22. package/dist/executors/investigate.js.map +1 -1
  23. package/dist/executors/review.d.ts.map +1 -1
  24. package/dist/executors/review.js +48 -48
  25. package/dist/executors/review.js.map +1 -1
  26. package/dist/executors/types.d.ts +4 -1
  27. package/dist/executors/types.d.ts.map +1 -1
  28. package/dist/executors/verify.d.ts.map +1 -1
  29. package/dist/executors/verify.js +43 -8
  30. package/dist/executors/verify.js.map +1 -1
  31. package/dist/intake/compilers/investigate.d.ts.map +1 -1
  32. package/dist/intake/compilers/investigate.js +3 -4
  33. package/dist/intake/compilers/investigate.js.map +1 -1
  34. package/dist/intake/resolve.js +10 -10
  35. package/dist/intake/resolve.js.map +1 -1
  36. package/dist/intake/types.d.ts +1 -1
  37. package/dist/intake/types.d.ts.map +1 -1
  38. package/dist/observability/events.d.ts +91 -0
  39. package/dist/observability/events.d.ts.map +1 -1
  40. package/dist/observability/events.js +27 -0
  41. package/dist/observability/events.js.map +1 -1
  42. package/dist/review/quality-only-prompts.d.ts +20 -0
  43. package/dist/review/quality-only-prompts.d.ts.map +1 -0
  44. package/dist/review/quality-only-prompts.js +108 -0
  45. package/dist/review/quality-only-prompts.js.map +1 -0
  46. package/dist/review/quality-reviewer.d.ts +4 -1
  47. package/dist/review/quality-reviewer.d.ts.map +1 -1
  48. package/dist/review/quality-reviewer.js +6 -4
  49. package/dist/review/quality-reviewer.js.map +1 -1
  50. package/dist/run-tasks/index.d.ts +5 -0
  51. package/dist/run-tasks/index.d.ts.map +1 -1
  52. package/dist/run-tasks/index.js +1 -1
  53. package/dist/run-tasks/index.js.map +1 -1
  54. package/dist/run-tasks/reviewed-lifecycle.d.ts +4 -1
  55. package/dist/run-tasks/reviewed-lifecycle.d.ts.map +1 -1
  56. package/dist/run-tasks/reviewed-lifecycle.js +188 -106
  57. package/dist/run-tasks/reviewed-lifecycle.js.map +1 -1
  58. package/dist/tool-schemas/audit.d.ts +17 -0
  59. package/dist/tool-schemas/audit.d.ts.map +1 -1
  60. package/dist/tool-schemas/debug.d.ts +17 -0
  61. package/dist/tool-schemas/debug.d.ts.map +1 -1
  62. package/dist/tool-schemas/delegate.d.ts +17 -0
  63. package/dist/tool-schemas/delegate.d.ts.map +1 -1
  64. package/dist/tool-schemas/execute-plan.d.ts +17 -0
  65. package/dist/tool-schemas/execute-plan.d.ts.map +1 -1
  66. package/dist/tool-schemas/investigate.d.ts +17 -4
  67. package/dist/tool-schemas/investigate.d.ts.map +1 -1
  68. package/dist/tool-schemas/investigate.js +0 -1
  69. package/dist/tool-schemas/investigate.js.map +1 -1
  70. package/dist/tool-schemas/retry.d.ts +17 -0
  71. package/dist/tool-schemas/retry.d.ts.map +1 -1
  72. package/dist/tool-schemas/review.d.ts +17 -0
  73. package/dist/tool-schemas/review.d.ts.map +1 -1
  74. package/dist/tool-schemas/shared-output.d.ts +17 -0
  75. package/dist/tool-schemas/shared-output.d.ts.map +1 -1
  76. package/dist/tool-schemas/shared-output.js +6 -0
  77. package/dist/tool-schemas/shared-output.js.map +1 -1
  78. package/dist/tool-schemas/verify.d.ts +17 -0
  79. package/dist/tool-schemas/verify.d.ts.map +1 -1
  80. package/dist/types.d.ts +1 -1
  81. package/dist/types.d.ts.map +1 -1
  82. package/package.json +5 -1
@@ -23,6 +23,9 @@ import { buildFallbackImplReport, readImplementerFileContents } from './fallback
23
23
  import { composeVerboseLine, toVerboseFields } from '../diagnostics/verbose-line.js';
24
24
  import { withDoneCondition } from './execute-task.js';
25
25
  const exec = promisify(execFile);
26
+ const READ_ONLY_TOOL_NAMES = new Set([
27
+ 'audit', 'review', 'verify', 'investigate', 'debug',
28
+ ]);
26
29
  export function emptyStats() {
27
30
  return {
28
31
  implementing: { stage: 'implementing', entered: false, durationMs: null, costUSD: null, agentTier: null, modelFamily: null, model: null },
@@ -87,8 +90,13 @@ export function endVerifyStage(stats, t0, c0, agent, finalCostUSD, outcome, skip
87
90
  skipReason,
88
91
  };
89
92
  }
90
- export async function executeReviewedLifecycle(task, resolved, config, taskIndex, onProgress, heartbeatWiring, diagnostics, recorder, _route, _client, _triggeringSkill, bus) {
93
+ export async function executeReviewedLifecycle(task, resolved, config, taskIndex, onProgress, heartbeatWiring, diagnostics, recorder, _route, _client, _triggeringSkill, bus, qualityReviewPromptBuilder) {
91
94
  const reviewPolicy = task.reviewPolicy ?? 'full';
95
+ const routeKey = _route ?? '';
96
+ if (reviewPolicy === 'quality_only' && !READ_ONLY_TOOL_NAMES.has(routeKey)) {
97
+ throw new Error(`reviewPolicy 'quality_only' is only valid for read-only routes; received '${routeKey}'. ` +
98
+ `Use 'full', 'spec_only', 'diff_only', or 'off' for artifact-producing routes.`);
99
+ }
92
100
  const otherSlot = resolved.slot === 'standard' ? 'complex' : 'standard';
93
101
  let escalationProvider;
94
102
  try {
@@ -110,7 +118,8 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
110
118
  const { outputTargets } = partitionFilePaths(task.filePaths, task.cwd ?? process.cwd());
111
119
  const stageCount = reviewPolicy === 'off' ? 1 :
112
120
  reviewPolicy === 'spec_only' ? 3 :
113
- 5;
121
+ reviewPolicy === 'quality_only' ? 3 :
122
+ 5;
114
123
  const verbose = diagnostics?.verbose ?? false;
115
124
  const verboseStreamRaw = verbose
116
125
  ? (diagnostics?.verboseStream ?? ((line) => { process.stderr.write(line + '\n'); }))
@@ -361,7 +370,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
361
370
  terminationReason: 'all_tiers_unavailable',
362
371
  reviewRounds: reviewRounds(),
363
372
  error: `runWithFallback: both tiers unavailable (loop=${loop}, attempt=${attempt}, role=implementer)`,
364
- agents: agentEnvelope(specReviewerHistory[specReviewerHistory.length - 1] ?? 'not_applicable', qualityReviewerHistory[qualityReviewerHistory.length - 1] ?? (reviewPolicy === 'full' ? 'not_applicable' : 'skipped')),
373
+ agents: agentEnvelope(specReviewerHistory[specReviewerHistory.length - 1] ?? 'not_applicable', qualityReviewerHistory[qualityReviewerHistory.length - 1] ?? ((reviewPolicy === 'full' || reviewPolicy === 'quality_only') ? 'not_applicable' : 'skipped')),
365
374
  stageStats: stats,
366
375
  };
367
376
  }
@@ -395,7 +404,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
395
404
  error: message,
396
405
  specReviewStatus: aborting === 'spec' ? 'changes_required' : (base.specReviewStatus ?? 'approved'),
397
406
  qualityReviewStatus: aborting === 'quality' ? 'changes_required' : (base.qualityReviewStatus ?? 'skipped'),
398
- agents: agentEnvelope(specReviewerHistory[specReviewerHistory.length - 1] ?? 'not_applicable', qualityReviewerHistory[qualityReviewerHistory.length - 1] ?? (reviewPolicy === 'full' ? 'not_applicable' : 'skipped')),
407
+ agents: agentEnvelope(specReviewerHistory[specReviewerHistory.length - 1] ?? 'not_applicable', qualityReviewerHistory[qualityReviewerHistory.length - 1] ?? ((reviewPolicy === 'full' || reviewPolicy === 'quality_only') ? 'not_applicable' : 'skipped')),
399
408
  stageStats: stats,
400
409
  });
401
410
  const defaultVerification = { status: 'skipped', steps: [], totalDurationMs: 0, skipReason: 'no_command' };
@@ -751,7 +760,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
751
760
  ? [...(implResult.filesRead ?? []), ...implResult.filesWritten].some(f => task.filePaths.some(fp => f === fp || f.endsWith('/' + fp) || f.endsWith(fp)))
752
761
  : true;
753
762
  const filePathsSkipped = !filePathsInteracted;
754
- if (implResult.filesWritten.length === 0) {
763
+ if (implResult.filesWritten.length === 0 && reviewPolicy !== 'quality_only') {
755
764
  if (reviewPolicy === 'off') {
756
765
  emitTaskEvent('stage_change', { from: 'verifying', to: 'terminal' });
757
766
  const terminal = resolveOffTerminal({
@@ -859,7 +868,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
859
868
  };
860
869
  let fileContents = await readImplementerFileContents(implResult.filesWritten, task.cwd);
861
870
  const effectiveImplReport = implReport ?? buildFallbackImplReport(implResult);
862
- const evidence = isArtifactProducing
871
+ const evidence = (isArtifactProducing && reviewPolicy !== 'quality_only')
863
872
  ? await buildEvidence({ cwd, baselineHead, commits, verification, reviewPolicy })
864
873
  : { block: '', diffTruncated: false, fullDiff: '' };
865
874
  if (reviewPolicy === 'diff_only') {
@@ -917,116 +926,126 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
917
926
  let specStatus;
918
927
  let specReport;
919
928
  let specReviewReason;
920
- heartbeat?.transition({ stage: 'spec_review', stageIndex: 2, reviewRound: 1, attemptCap: maxSpecRows });
921
- const initialReviewerTier = pickReviewer({ loop: 'spec', attemptIndex: 0, baseTier: resolved.slot });
922
- const specReviewT0 = Date.now();
923
- const specReviewC0 = runningCostUSD();
924
- const initialSpecReview = await runWithFallback({
925
- assigned: initialReviewerTier,
926
- providerFor,
927
- unavailableTiers: specUnavailable,
928
- isTransportFailure: (r) => isReviewTransportFailure(r),
929
- getStatus: (r) => r.status,
930
- makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'),
931
- call: (provider) => runSpecReview(provider, packet, effectiveImplReport, fileContents, implResult.toolCalls, task.planContext, evidence.block),
932
- });
933
- if (initialSpecReview.bothUnavailable) {
934
- emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: 0, role: 'specReviewer', assignedTier: initialReviewerTier, reason: initialSpecReview.unavailableReason });
935
- fallbackOverrides.push({ role: 'specReviewer', loop: 'spec', attempt: 0, assigned: initialReviewerTier, used: initialSpecReview.usedTier, reason: initialSpecReview.unavailableReason, triggeringStatus: initialSpecReview.fallbackTriggeringStatus, bothUnavailable: true });
936
- specReviewerHistory.push('skipped');
937
- }
938
- else {
939
- specReviewerHistory.push(initialSpecReview.usedTier);
940
- if (initialSpecReview.fallbackFired) {
941
- emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: 0, role: 'specReviewer', assignedTier: initialReviewerTier, usedTier: initialSpecReview.usedTier, reason: initialSpecReview.fallbackReason, triggeringStatus: initialSpecReview.fallbackTriggeringStatus, violatesSeparation: initialSpecReview.usedTier === implementerHistory[implementerHistory.length - 1] });
942
- fallbackOverrides.push({ role: 'specReviewer', loop: 'spec', attempt: 0, assigned: initialReviewerTier, used: initialSpecReview.usedTier, reason: initialSpecReview.fallbackReason, triggeringStatus: initialSpecReview.fallbackTriggeringStatus, bothUnavailable: false });
943
- }
944
- }
945
- specResult = initialSpecReview.bothUnavailable
946
- ? makeSkippedReviewResult('all_tiers_unavailable')
947
- : initialSpecReview.result;
948
- specStatus = specResult.status;
949
- specReport = 'report' in specResult ? specResult.report : undefined;
950
- specReviewReason = specStatus === 'skipped' ? 'all_tiers_unavailable' : ('errorReason' in specResult ? specResult.errorReason : undefined);
951
- let prevSpecFindings = [...(specResult.findings ?? [])];
952
- while (specStatus === 'changes_required') {
953
- if (specAttemptIndex >= maxSpecRows)
954
- return abortReviewLoop(finalImplResult, 'round_cap', 'review round cap reached before spec rework', 'spec');
955
- const currentCostUSD = taskCostUSD();
956
- if (currentCostUSD !== null && maxCostUSD !== undefined && currentCostUSD >= 0.8 * maxCostUSD) {
957
- emitTaskEvent('cost_check', { stage: 'spec_rework', tripped: true, cost_used_usd: currentCostUSD, cost_cap_usd: maxCostUSD, cost_available: true });
958
- return abortReviewLoop(finalImplResult, 'cost_ceiling', 'cost ceiling reached before spec rework', 'spec');
959
- }
960
- const decision = pickEscalation({ loop: 'spec', attemptIndex: specAttemptIndex, baseTier: resolved.slot });
961
- if (decision.isEscalated)
962
- emitEscalationEvent('spec', specAttemptIndex, decision);
963
- emitTaskEvent('stage_change', { from: 'spec_review', to: 'spec_rework', attempt: specAttemptIndex, attemptCap: maxSpecRows, implTier: decision.impl, reviewerTier: decision.reviewer, escalated: decision.isEscalated });
964
- heartbeat?.transition({ stage: 'spec_rework', stageIndex: 3, reviewRound: specAttemptIndex, attemptCap: maxSpecRows });
965
- const feedback = specResult.findings.length > 0 ? `\n\n## Spec Review Feedback (round ${specAttemptIndex}):\n${specResult.findings.map(f => `- ${f}`).join('\n')}` : '';
966
- const reworkTask = withDoneCondition({ ...task, prompt: `${task.prompt}${feedback}` });
967
- const reworkCall = await runWithFallback({ assigned: decision.impl, providerFor, unavailableTiers: specUnavailable, isTransportFailure: (r) => TRANSPORT_FAILURES.has(r.status) && r.capExhausted === undefined, getStatus: (r) => r.status, makeSyntheticFailure: (assigned) => makeSyntheticRunResult(assigned, 'all_tiers_unavailable'), call: (provider) => delegateWithEscalation(reworkTask, [provider], { explicitlyPinned: true, onProgress: wrappedOnProgress, taskDeadlineMs, abortSignal: stallController.signal, assignedTier: decision.impl }) });
968
- if (reworkCall.fallbackFired || reworkCall.bothUnavailable)
969
- fallbackOverrides.push({ role: 'implementer', loop: 'spec', attempt: specAttemptIndex, assigned: decision.impl, used: reworkCall.usedTier, reason: (reworkCall.fallbackReason ?? reworkCall.unavailableReason), triggeringStatus: reworkCall.fallbackTriggeringStatus, bothUnavailable: reworkCall.bothUnavailable });
970
- if (reworkCall.fallbackFired) {
971
- emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'implementer', assignedTier: decision.impl, usedTier: reworkCall.usedTier, reason: reworkCall.fallbackReason, triggeringStatus: reworkCall.fallbackTriggeringStatus, violatesSeparation: false });
972
- if (decision.isEscalated && reworkCall.fallbackReason === 'not_configured')
973
- emitEscalationUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'implementer', wantedTier: decision.impl, reason: reworkCall.fallbackReason });
974
- }
975
- if (reworkCall.bothUnavailable) {
976
- emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'implementer', assignedTier: decision.impl, reason: reworkCall.unavailableReason });
977
- if (decision.isEscalated)
978
- emitEscalationUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'implementer', wantedTier: decision.impl, reason: reworkCall.unavailableReason });
979
- return __recordOnce(adaptForAllTiersUnavailable(reworkCall.result, 'spec', specAttemptIndex));
980
- }
981
- finalImplResult = reworkCall.result;
982
- latestAttemptedImpl = { tier: reworkCall.usedTier, result: finalImplResult };
983
- implementerHistory.push(reworkCall.usedTier);
984
- const reworkReport = parseStructuredReport(finalImplResult.output);
985
- finalImplReport = reworkReport.summary ? reworkReport : buildFallbackImplReport(finalImplResult);
986
- fileContents = await readImplementerFileContents(finalImplResult.filesWritten, task.cwd);
987
- heartbeat?.transition({ stage: 'spec_review', stageIndex: 2, reviewRound: specAttemptIndex + 1, attemptCap: maxSpecRows });
988
- const reviewCall = await runWithFallback({ assigned: decision.reviewer, providerFor, unavailableTiers: specUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), call: (provider) => runSpecReview(provider, packet, finalImplReport, fileContents, finalImplResult.toolCalls, task.planContext, evidence.block) });
989
- if (reviewCall.bothUnavailable) {
990
- emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'specReviewer', assignedTier: decision.reviewer, reason: reviewCall.unavailableReason });
991
- fallbackOverrides.push({ role: 'specReviewer', loop: 'spec', attempt: specAttemptIndex, assigned: decision.reviewer, used: reviewCall.usedTier, reason: reviewCall.unavailableReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, bothUnavailable: true });
929
+ let specReviewT0 = 0;
930
+ let specReviewC0 = null;
931
+ if (reviewPolicy !== 'quality_only') {
932
+ heartbeat?.transition({ stage: 'spec_review', stageIndex: 2, reviewRound: 1, attemptCap: maxSpecRows });
933
+ const initialReviewerTier = pickReviewer({ loop: 'spec', attemptIndex: 0, baseTier: resolved.slot });
934
+ specReviewT0 = Date.now();
935
+ specReviewC0 = runningCostUSD();
936
+ const initialSpecReview = await runWithFallback({
937
+ assigned: initialReviewerTier,
938
+ providerFor,
939
+ unavailableTiers: specUnavailable,
940
+ isTransportFailure: (r) => isReviewTransportFailure(r),
941
+ getStatus: (r) => r.status,
942
+ makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'),
943
+ call: (provider) => runSpecReview(provider, packet, effectiveImplReport, fileContents, implResult.toolCalls, task.planContext, evidence.block),
944
+ });
945
+ if (initialSpecReview.bothUnavailable) {
946
+ emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: 0, role: 'specReviewer', assignedTier: initialReviewerTier, reason: initialSpecReview.unavailableReason });
947
+ fallbackOverrides.push({ role: 'specReviewer', loop: 'spec', attempt: 0, assigned: initialReviewerTier, used: initialSpecReview.usedTier, reason: initialSpecReview.unavailableReason, triggeringStatus: initialSpecReview.fallbackTriggeringStatus, bothUnavailable: true });
992
948
  specReviewerHistory.push('skipped');
993
949
  }
994
950
  else {
995
- specReviewerHistory.push(reviewCall.usedTier);
996
- if (reviewCall.fallbackFired) {
997
- emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'specReviewer', assignedTier: decision.reviewer, usedTier: reviewCall.usedTier, reason: reviewCall.fallbackReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, violatesSeparation: reviewCall.usedTier === implementerHistory[implementerHistory.length - 1] });
998
- fallbackOverrides.push({ role: 'specReviewer', loop: 'spec', attempt: specAttemptIndex, assigned: decision.reviewer, used: reviewCall.usedTier, reason: reviewCall.fallbackReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, bothUnavailable: false });
951
+ specReviewerHistory.push(initialSpecReview.usedTier);
952
+ if (initialSpecReview.fallbackFired) {
953
+ emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: 0, role: 'specReviewer', assignedTier: initialReviewerTier, usedTier: initialSpecReview.usedTier, reason: initialSpecReview.fallbackReason, triggeringStatus: initialSpecReview.fallbackTriggeringStatus, violatesSeparation: initialSpecReview.usedTier === implementerHistory[implementerHistory.length - 1] });
954
+ fallbackOverrides.push({ role: 'specReviewer', loop: 'spec', attempt: 0, assigned: initialReviewerTier, used: initialSpecReview.usedTier, reason: initialSpecReview.fallbackReason, triggeringStatus: initialSpecReview.fallbackTriggeringStatus, bothUnavailable: false });
999
955
  }
1000
956
  }
1001
- specResult = reviewCall.result;
957
+ specResult = initialSpecReview.bothUnavailable
958
+ ? makeSkippedReviewResult('all_tiers_unavailable')
959
+ : initialSpecReview.result;
1002
960
  specStatus = specResult.status;
1003
961
  specReport = 'report' in specResult ? specResult.report : undefined;
1004
962
  specReviewReason = specStatus === 'skipped' ? 'all_tiers_unavailable' : ('errorReason' in specResult ? specResult.errorReason : undefined);
1005
- if (reviewDidNotReject(specStatus))
1006
- lastNonRejectedImpl = { tier: implementerHistory[implementerHistory.length - 1], result: finalImplResult };
1007
- specAttemptIndex++;
1008
- if (specStatus === 'approved' || specStatus === 'skipped')
1009
- break;
1010
- const currentFindings = [...(specResult.findings ?? [])].sort().join('\0');
1011
- const prevFindings = [...prevSpecFindings].sort().join('\0');
1012
- if (currentFindings === prevFindings && currentFindings !== '')
1013
- break;
1014
- prevSpecFindings = [...(specResult.findings ?? [])];
963
+ let prevSpecFindings = [...(specResult.findings ?? [])];
964
+ while (specStatus === 'changes_required') {
965
+ if (specAttemptIndex >= maxSpecRows)
966
+ return abortReviewLoop(finalImplResult, 'round_cap', 'review round cap reached before spec rework', 'spec');
967
+ const currentCostUSD = taskCostUSD();
968
+ if (currentCostUSD !== null && maxCostUSD !== undefined && currentCostUSD >= 0.8 * maxCostUSD) {
969
+ emitTaskEvent('cost_check', { stage: 'spec_rework', tripped: true, cost_used_usd: currentCostUSD, cost_cap_usd: maxCostUSD, cost_available: true });
970
+ return abortReviewLoop(finalImplResult, 'cost_ceiling', 'cost ceiling reached before spec rework', 'spec');
971
+ }
972
+ const decision = pickEscalation({ loop: 'spec', attemptIndex: specAttemptIndex, baseTier: resolved.slot });
973
+ if (decision.isEscalated)
974
+ emitEscalationEvent('spec', specAttemptIndex, decision);
975
+ emitTaskEvent('stage_change', { from: 'spec_review', to: 'spec_rework', attempt: specAttemptIndex, attemptCap: maxSpecRows, implTier: decision.impl, reviewerTier: decision.reviewer, escalated: decision.isEscalated });
976
+ heartbeat?.transition({ stage: 'spec_rework', stageIndex: 3, reviewRound: specAttemptIndex, attemptCap: maxSpecRows });
977
+ const feedback = specResult.findings.length > 0 ? `\n\n## Spec Review Feedback (round ${specAttemptIndex}):\n${specResult.findings.map(f => `- ${f}`).join('\n')}` : '';
978
+ const reworkTask = withDoneCondition({ ...task, prompt: `${task.prompt}${feedback}` });
979
+ const reworkCall = await runWithFallback({ assigned: decision.impl, providerFor, unavailableTiers: specUnavailable, isTransportFailure: (r) => TRANSPORT_FAILURES.has(r.status) && r.capExhausted === undefined, getStatus: (r) => r.status, makeSyntheticFailure: (assigned) => makeSyntheticRunResult(assigned, 'all_tiers_unavailable'), call: (provider) => delegateWithEscalation(reworkTask, [provider], { explicitlyPinned: true, onProgress: wrappedOnProgress, taskDeadlineMs, abortSignal: stallController.signal, assignedTier: decision.impl }) });
980
+ if (reworkCall.fallbackFired || reworkCall.bothUnavailable)
981
+ fallbackOverrides.push({ role: 'implementer', loop: 'spec', attempt: specAttemptIndex, assigned: decision.impl, used: reworkCall.usedTier, reason: (reworkCall.fallbackReason ?? reworkCall.unavailableReason), triggeringStatus: reworkCall.fallbackTriggeringStatus, bothUnavailable: reworkCall.bothUnavailable });
982
+ if (reworkCall.fallbackFired) {
983
+ emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'implementer', assignedTier: decision.impl, usedTier: reworkCall.usedTier, reason: reworkCall.fallbackReason, triggeringStatus: reworkCall.fallbackTriggeringStatus, violatesSeparation: false });
984
+ if (decision.isEscalated && reworkCall.fallbackReason === 'not_configured')
985
+ emitEscalationUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'implementer', wantedTier: decision.impl, reason: reworkCall.fallbackReason });
986
+ }
987
+ if (reworkCall.bothUnavailable) {
988
+ emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'implementer', assignedTier: decision.impl, reason: reworkCall.unavailableReason });
989
+ if (decision.isEscalated)
990
+ emitEscalationUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'implementer', wantedTier: decision.impl, reason: reworkCall.unavailableReason });
991
+ return __recordOnce(adaptForAllTiersUnavailable(reworkCall.result, 'spec', specAttemptIndex));
992
+ }
993
+ finalImplResult = reworkCall.result;
994
+ latestAttemptedImpl = { tier: reworkCall.usedTier, result: finalImplResult };
995
+ implementerHistory.push(reworkCall.usedTier);
996
+ const reworkReport = parseStructuredReport(finalImplResult.output);
997
+ finalImplReport = reworkReport.summary ? reworkReport : buildFallbackImplReport(finalImplResult);
998
+ fileContents = await readImplementerFileContents(finalImplResult.filesWritten, task.cwd);
999
+ heartbeat?.transition({ stage: 'spec_review', stageIndex: 2, reviewRound: specAttemptIndex + 1, attemptCap: maxSpecRows });
1000
+ const reviewCall = await runWithFallback({ assigned: decision.reviewer, providerFor, unavailableTiers: specUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), call: (provider) => runSpecReview(provider, packet, finalImplReport, fileContents, finalImplResult.toolCalls, task.planContext, evidence.block) });
1001
+ if (reviewCall.bothUnavailable) {
1002
+ emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'specReviewer', assignedTier: decision.reviewer, reason: reviewCall.unavailableReason });
1003
+ fallbackOverrides.push({ role: 'specReviewer', loop: 'spec', attempt: specAttemptIndex, assigned: decision.reviewer, used: reviewCall.usedTier, reason: reviewCall.unavailableReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, bothUnavailable: true });
1004
+ specReviewerHistory.push('skipped');
1005
+ }
1006
+ else {
1007
+ specReviewerHistory.push(reviewCall.usedTier);
1008
+ if (reviewCall.fallbackFired) {
1009
+ emitFallback({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'spec', attempt: specAttemptIndex, role: 'specReviewer', assignedTier: decision.reviewer, usedTier: reviewCall.usedTier, reason: reviewCall.fallbackReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, violatesSeparation: reviewCall.usedTier === implementerHistory[implementerHistory.length - 1] });
1010
+ fallbackOverrides.push({ role: 'specReviewer', loop: 'spec', attempt: specAttemptIndex, assigned: decision.reviewer, used: reviewCall.usedTier, reason: reviewCall.fallbackReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, bothUnavailable: false });
1011
+ }
1012
+ }
1013
+ specResult = reviewCall.result;
1014
+ specStatus = specResult.status;
1015
+ specReport = 'report' in specResult ? specResult.report : undefined;
1016
+ specReviewReason = specStatus === 'skipped' ? 'all_tiers_unavailable' : ('errorReason' in specResult ? specResult.errorReason : undefined);
1017
+ if (reviewDidNotReject(specStatus))
1018
+ lastNonRejectedImpl = { tier: implementerHistory[implementerHistory.length - 1], result: finalImplResult };
1019
+ specAttemptIndex++;
1020
+ if (specStatus === 'approved' || specStatus === 'skipped')
1021
+ break;
1022
+ const currentFindings = [...(specResult.findings ?? [])].sort().join('\0');
1023
+ const prevFindings = [...prevSpecFindings].sort().join('\0');
1024
+ if (currentFindings === prevFindings && currentFindings !== '')
1025
+ break;
1026
+ prevSpecFindings = [...(specResult.findings ?? [])];
1027
+ }
1015
1028
  }
1016
- let qualityResult = { status: 'skipped', report: undefined, findings: [], errorReason: reviewPolicy === 'full' ? 'all_tiers_unavailable' : 'skipped: reviewPolicy is spec_only' };
1029
+ else {
1030
+ specResult = { status: 'skipped', report: undefined, findings: [], reason: 'all_tiers_unavailable' };
1031
+ specStatus = 'not_applicable';
1032
+ specReport = undefined;
1033
+ specReviewReason = 'skipped: reviewPolicy is quality_only';
1034
+ }
1035
+ let qualityResult = { status: 'skipped', report: undefined, findings: [], errorReason: (reviewPolicy === 'full' || reviewPolicy === 'quality_only') ? 'all_tiers_unavailable' : 'skipped: reviewPolicy is spec_only' };
1017
1036
  // Hoisted so endReviewStage (called after this block) can read them on the
1018
1037
  // success path. When the quality review is skipped (`reviewPolicy !== 'full'`),
1019
1038
  // the values stay at 0/null and the corresponding stage entry remains in its
1020
1039
  // `entered: false` default — endReviewStage is never called.
1021
1040
  let qualityReviewT0 = 0;
1022
1041
  let qualityReviewC0 = null;
1023
- if (reviewPolicy === 'full') {
1042
+ if (reviewPolicy === 'full' || reviewPolicy === 'quality_only') {
1024
1043
  qualityUnavailable = new Map();
1025
1044
  const qualityReviewerTier = pickReviewer({ loop: 'quality', attemptIndex: 0, baseTier: resolved.slot });
1026
1045
  heartbeat?.transition({ stage: 'quality_review', stageIndex: 4, reviewRound: 1, attemptCap: maxQualityRows });
1027
1046
  qualityReviewT0 = Date.now();
1028
1047
  qualityReviewC0 = runningCostUSD();
1029
- const initialQuality = await runWithFallback({ assigned: qualityReviewerTier, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), call: (provider) => runQualityReview(provider, packet, specReport ?? finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block) });
1048
+ const initialQuality = await runWithFallback({ assigned: qualityReviewerTier, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), call: (provider) => runQualityReview(provider, packet, specReport ?? finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block, qualityReviewPromptBuilder, finalImplResult.output) });
1030
1049
  if (initialQuality.bothUnavailable) {
1031
1050
  emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: 0, role: 'qualityReviewer', assignedTier: qualityReviewerTier, reason: initialQuality.unavailableReason });
1032
1051
  fallbackOverrides.push({ role: 'qualityReviewer', loop: 'quality', attempt: 0, assigned: qualityReviewerTier, used: initialQuality.usedTier, reason: initialQuality.unavailableReason, triggeringStatus: initialQuality.fallbackTriggeringStatus, bothUnavailable: true });
@@ -1040,6 +1059,20 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
1040
1059
  }
1041
1060
  }
1042
1061
  qualityResult = initialQuality.result;
1062
+ if (reviewPolicy === 'quality_only') {
1063
+ emitTaskEvent('read_only_review.quality', {
1064
+ route: routeKey,
1065
+ verdict: qualityResult.status === 'approved' ? 'approved'
1066
+ : qualityResult.status === 'changes_required' ? 'changes_required'
1067
+ : qualityResult.status === 'skipped' ? 'skipped'
1068
+ : 'error',
1069
+ iterationIndex: 1,
1070
+ findingsReviewed: qualityResult.findings?.length ?? 0,
1071
+ findingsFlagged: qualityResult.status === 'changes_required' ? (qualityResult.findings?.length ?? 0) : 0,
1072
+ durationMs: Date.now() - qualityReviewT0,
1073
+ costUSD: runningCostUSD() !== null && qualityReviewC0 !== null ? runningCostUSD() - qualityReviewC0 : null,
1074
+ });
1075
+ }
1043
1076
  let prevQualityFindings = [...(qualityResult.findings ?? [])];
1044
1077
  qualityAttemptIndex = 1;
1045
1078
  while (qualityResult.status === 'changes_required') {
@@ -1054,6 +1087,13 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
1054
1087
  if (decision.isEscalated)
1055
1088
  emitEscalationEvent('quality', qualityAttemptIndex, decision);
1056
1089
  emitTaskEvent('stage_change', { from: 'quality_review', to: 'quality_rework', attempt: qualityAttemptIndex, attemptCap: maxQualityRows, implTier: decision.impl, reviewerTier: decision.reviewer, escalated: decision.isEscalated });
1090
+ if (reviewPolicy === 'quality_only') {
1091
+ emitTaskEvent('read_only_review.rework', {
1092
+ route: routeKey,
1093
+ iterationIndex: qualityAttemptIndex,
1094
+ triggeringIssues: qualityResult.findings?.length ?? 0,
1095
+ });
1096
+ }
1057
1097
  heartbeat?.transition({ stage: 'quality_rework', stageIndex: 5, reviewRound: qualityAttemptIndex, attemptCap: maxQualityRows });
1058
1098
  const feedback = qualityResult.findings.length > 0 ? `\n\n## Quality Review Feedback (round ${qualityAttemptIndex}):\n${qualityResult.findings.map(f => `- ${f}`).join('\n')}` : '';
1059
1099
  const reworkTask = withDoneCondition({ ...task, prompt: `${task.prompt}${feedback}` });
@@ -1075,7 +1115,9 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
1075
1115
  finalImplReport = reworkReport.summary ? reworkReport : buildFallbackImplReport(finalImplResult);
1076
1116
  fileContents = await readImplementerFileContents(finalImplResult.filesWritten, task.cwd);
1077
1117
  heartbeat?.transition({ stage: 'quality_review', stageIndex: 4, reviewRound: qualityAttemptIndex + 1, attemptCap: maxQualityRows });
1078
- const reviewCall = await runWithFallback({ assigned: decision.reviewer, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), call: (provider) => runQualityReview(provider, packet, finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block) });
1118
+ const reworkQualityT0 = Date.now();
1119
+ const reworkQualityC0 = runningCostUSD();
1120
+ const reviewCall = await runWithFallback({ assigned: decision.reviewer, providerFor, unavailableTiers: qualityUnavailable, isTransportFailure: (r) => isReviewTransportFailure(r), getStatus: (r) => r.status, makeSyntheticFailure: () => makeSkippedReviewResult('all_tiers_unavailable'), call: (provider) => runQualityReview(provider, packet, finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block, qualityReviewPromptBuilder, finalImplResult.output) });
1079
1121
  if (reviewCall.bothUnavailable) {
1080
1122
  emitFallbackUnavailable({ batchId: heartbeatWiring?.batchId ?? '', taskIndex, loop: 'quality', attempt: qualityAttemptIndex, role: 'qualityReviewer', assignedTier: decision.reviewer, reason: reviewCall.unavailableReason });
1081
1123
  fallbackOverrides.push({ role: 'qualityReviewer', loop: 'quality', attempt: qualityAttemptIndex, assigned: decision.reviewer, used: reviewCall.usedTier, reason: reviewCall.unavailableReason, triggeringStatus: reviewCall.fallbackTriggeringStatus, bothUnavailable: true });
@@ -1089,6 +1131,20 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
1089
1131
  }
1090
1132
  }
1091
1133
  qualityResult = reviewCall.result;
1134
+ if (reviewPolicy === 'quality_only') {
1135
+ emitTaskEvent('read_only_review.quality', {
1136
+ route: routeKey,
1137
+ verdict: qualityResult.status === 'approved' ? 'approved'
1138
+ : qualityResult.status === 'changes_required' ? 'changes_required'
1139
+ : qualityResult.status === 'skipped' ? 'skipped'
1140
+ : 'error',
1141
+ iterationIndex: qualityAttemptIndex + 1,
1142
+ findingsReviewed: qualityResult.findings?.length ?? 0,
1143
+ findingsFlagged: qualityResult.status === 'changes_required' ? (qualityResult.findings?.length ?? 0) : 0,
1144
+ durationMs: Date.now() - reworkQualityT0,
1145
+ costUSD: runningCostUSD() !== null && reworkQualityC0 !== null ? runningCostUSD() - reworkQualityC0 : null,
1146
+ });
1147
+ }
1092
1148
  if (reviewDidNotReject(qualityResult.status))
1093
1149
  lastNonRejectedImpl = { tier: implementerHistory[implementerHistory.length - 1], result: finalImplResult };
1094
1150
  qualityAttemptIndex++;
@@ -1120,12 +1176,16 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
1120
1176
  message: 'Implementation diff exceeded the reviewer evidence byte cap and was truncated.',
1121
1177
  });
1122
1178
  }
1123
- const specAggregateStatus = (['approved', 'changes_required', 'skipped', 'error', 'api_error', 'network_error', 'timeout'].includes(specStatus) ? specStatus : 'error');
1124
- endReviewStage(stats, 'spec_review', specReviewT0, specReviewC0, implementerAgentInfo, runningCostUSD(), specStatus === 'approved' ? 'approved'
1125
- : specStatus === 'changes_required' ? 'changes_required'
1126
- : specStatus === 'skipped' ? 'skipped'
1127
- : specStatus === 'not_applicable' ? 'not_applicable'
1128
- : 'error', specAttemptIndex - 1);
1179
+ const specAggregateStatus = reviewPolicy === 'quality_only'
1180
+ ? 'skipped'
1181
+ : (['approved', 'changes_required', 'skipped', 'error', 'api_error', 'network_error', 'timeout'].includes(specStatus) ? specStatus : 'error');
1182
+ if (reviewPolicy !== 'quality_only') {
1183
+ endReviewStage(stats, 'spec_review', specReviewT0, specReviewC0, implementerAgentInfo, runningCostUSD(), specStatus === 'approved' ? 'approved'
1184
+ : specStatus === 'changes_required' ? 'changes_required'
1185
+ : specStatus === 'skipped' ? 'skipped'
1186
+ : specStatus === 'not_applicable' ? 'not_applicable'
1187
+ : 'error', specAttemptIndex - 1);
1188
+ }
1129
1189
  const qualityAggregateStatus = qualityResult.status;
1130
1190
  endReviewStage(stats, 'quality_review', qualityReviewT0, qualityReviewC0, implementerAgentInfo, runningCostUSD(), qualityResult.status === 'approved' ? 'approved'
1131
1191
  : qualityResult.status === 'changes_required' ? 'changes_required'
@@ -1155,6 +1215,7 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
1155
1215
  specReviewStatus: specEnvelopeStatus,
1156
1216
  qualityReviewStatus: qualityEnvelopeStatus,
1157
1217
  stageStats: stats,
1218
+ reviewRounds: reviewRounds(),
1158
1219
  specReviewReason: 'errorReason' in specResult ? specResult.errorReason : undefined,
1159
1220
  qualityReviewReason: 'errorReason' in qualityResult ? qualityResult.errorReason : undefined,
1160
1221
  structuredReport: aggregated,
@@ -1162,21 +1223,42 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
1162
1223
  specReviewReport: specReport,
1163
1224
  qualityReviewReport: qualityResult.report,
1164
1225
  filePathsSkipped,
1165
- agents: agentEnvelope(specReviewerHistory[specReviewerHistory.length - 1] ?? 'not_applicable', qualityReviewerHistory[qualityReviewerHistory.length - 1] ?? (reviewPolicy === 'full' ? 'not_applicable' : 'skipped')),
1226
+ agents: agentEnvelope(specReviewerHistory[specReviewerHistory.length - 1] ?? 'not_applicable', qualityReviewerHistory[qualityReviewerHistory.length - 1] ?? ((reviewPolicy === 'full' || reviewPolicy === 'quality_only') ? 'not_applicable' : 'skipped')),
1166
1227
  models: {
1167
1228
  implementer: implModel,
1168
- specReviewer: reviewModel,
1169
- qualityReviewer: reviewPolicy === 'full' ? reviewModel : null,
1229
+ specReviewer: reviewPolicy !== 'quality_only' ? reviewModel : null,
1230
+ qualityReviewer: (reviewPolicy === 'full' || reviewPolicy === 'quality_only') ? reviewModel : null,
1170
1231
  },
1171
1232
  fileArtifactsMissing,
1172
1233
  commits,
1173
1234
  commitError,
1174
1235
  verification,
1175
1236
  };
1237
+ if (reviewPolicy === 'quality_only') {
1238
+ emitTaskEvent('read_only_review.terminal', {
1239
+ route: routeKey,
1240
+ roundsUsed: qualityAttemptIndex,
1241
+ finalQualityVerdict: qualityResult.status === 'approved' ? 'approved'
1242
+ : qualityResult.status === 'changes_required' ? 'changes_required'
1243
+ : qualityResult.status === 'skipped' ? 'skipped'
1244
+ : 'error',
1245
+ costUSD: taskCostUSD(),
1246
+ durationMs: Date.now() - taskStartMs,
1247
+ });
1248
+ }
1176
1249
  return __recordOnce(runResult);
1177
1250
  }
1178
1251
  catch (err) {
1179
1252
  const errorRunResult = withVerification(workerErrorResult(err));
1253
+ if (reviewPolicy === 'quality_only') {
1254
+ emitTaskEvent('read_only_review.terminal', {
1255
+ route: routeKey,
1256
+ roundsUsed: qualityAttemptIndex,
1257
+ finalQualityVerdict: 'error',
1258
+ costUSD: taskCostUSD(),
1259
+ durationMs: Date.now() - taskStartMs,
1260
+ });
1261
+ }
1180
1262
  return __recordOnce(errorRunResult);
1181
1263
  }
1182
1264
  finally {