@zhixuan92/multi-model-agent-core 3.1.7 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/auto-commit.d.ts +8 -1
- package/dist/auto-commit.d.ts.map +1 -1
- package/dist/auto-commit.js +6 -3
- package/dist/auto-commit.js.map +1 -1
- package/dist/batch-cache.d.ts +1 -1
- package/dist/batch-cache.d.ts.map +1 -1
- package/dist/batch-cache.js +3 -5
- package/dist/batch-cache.js.map +1 -1
- package/dist/config/schema.d.ts +13 -13
- package/dist/delegate-with-escalation.d.ts +2 -1
- package/dist/delegate-with-escalation.d.ts.map +1 -1
- package/dist/delegate-with-escalation.js.map +1 -1
- package/dist/diagnostics/request-spill.d.ts +16 -0
- package/dist/diagnostics/request-spill.d.ts.map +1 -0
- package/dist/diagnostics/request-spill.js +23 -0
- package/dist/diagnostics/request-spill.js.map +1 -0
- package/dist/diagnostics/verbose-line.d.ts +12 -0
- package/dist/diagnostics/verbose-line.d.ts.map +1 -0
- package/dist/diagnostics/verbose-line.js +80 -0
- package/dist/diagnostics/verbose-line.js.map +1 -0
- package/dist/executors/audit.js +1 -1
- package/dist/executors/audit.js.map +1 -1
- package/dist/executors/debug.js +2 -2
- package/dist/executors/debug.js.map +1 -1
- package/dist/executors/delegate.d.ts +2 -2
- package/dist/executors/delegate.d.ts.map +1 -1
- package/dist/executors/delegate.js +7 -3
- package/dist/executors/delegate.js.map +1 -1
- package/dist/executors/execute-plan.d.ts.map +1 -1
- package/dist/executors/execute-plan.js +10 -3
- package/dist/executors/execute-plan.js.map +1 -1
- package/dist/executors/execution-context.d.ts +3 -0
- package/dist/executors/execution-context.d.ts.map +1 -0
- package/dist/executors/execution-context.js +20 -0
- package/dist/executors/execution-context.js.map +1 -0
- package/dist/executors/retry.d.ts +1 -1
- package/dist/executors/retry.d.ts.map +1 -1
- package/dist/executors/retry.js +5 -2
- package/dist/executors/retry.js.map +1 -1
- package/dist/executors/review.js +1 -1
- package/dist/executors/review.js.map +1 -1
- package/dist/executors/shared-compute.d.ts +2 -1
- package/dist/executors/shared-compute.d.ts.map +1 -1
- package/dist/executors/shared-compute.js.map +1 -1
- package/dist/executors/types.d.ts +29 -26
- package/dist/executors/types.d.ts.map +1 -1
- package/dist/executors/verify.js +1 -1
- package/dist/executors/verify.js.map +1 -1
- package/dist/heartbeat.d.ts +8 -1
- package/dist/heartbeat.d.ts.map +1 -1
- package/dist/heartbeat.js +28 -1
- package/dist/heartbeat.js.map +1 -1
- package/dist/index.d.ts +7 -3
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -1
- package/dist/index.js.map +1 -1
- package/dist/intake/compilers/delegate.d.ts +3 -1
- package/dist/intake/compilers/delegate.d.ts.map +1 -1
- package/dist/intake/compilers/delegate.js +23 -12
- package/dist/intake/compilers/delegate.js.map +1 -1
- package/dist/intake/compilers/execute-plan.d.ts +6 -1
- package/dist/intake/compilers/execute-plan.d.ts.map +1 -1
- package/dist/intake/compilers/execute-plan.js +8 -1
- package/dist/intake/compilers/execute-plan.js.map +1 -1
- package/dist/intake/resolve.js +1 -1
- package/dist/intake/resolve.js.map +1 -1
- package/dist/intake/types.d.ts +10 -0
- package/dist/intake/types.d.ts.map +1 -1
- package/dist/provider.d.ts.map +1 -1
- package/dist/provider.js.map +1 -1
- package/dist/readiness/readiness.d.ts +2 -1
- package/dist/readiness/readiness.d.ts.map +1 -1
- package/dist/readiness/readiness.js.map +1 -1
- package/dist/reporting/structured-report.d.ts +19 -0
- package/dist/reporting/structured-report.d.ts.map +1 -1
- package/dist/reporting/structured-report.js +50 -1
- package/dist/reporting/structured-report.js.map +1 -1
- package/dist/review/diff-review.d.ts +29 -0
- package/dist/review/diff-review.d.ts.map +1 -0
- package/dist/review/diff-review.js +53 -0
- package/dist/review/diff-review.js.map +1 -0
- package/dist/review/evidence.d.ts +15 -0
- package/dist/review/evidence.d.ts.map +1 -0
- package/dist/review/evidence.js +26 -0
- package/dist/review/evidence.js.map +1 -0
- package/dist/review/quality-reviewer.d.ts +1 -1
- package/dist/review/quality-reviewer.d.ts.map +1 -1
- package/dist/review/quality-reviewer.js +5 -3
- package/dist/review/quality-reviewer.js.map +1 -1
- package/dist/review/spec-reviewer.d.ts +1 -1
- package/dist/review/spec-reviewer.d.ts.map +1 -1
- package/dist/review/spec-reviewer.js +3 -2
- package/dist/review/spec-reviewer.js.map +1 -1
- package/dist/routing/model-profiles.d.ts +1 -1
- package/dist/routing/types.d.ts +15 -0
- package/dist/routing/types.d.ts.map +1 -0
- package/dist/routing/types.js +2 -0
- package/dist/routing/types.js.map +1 -0
- package/dist/run-tasks/commit-stage.d.ts +16 -0
- package/dist/run-tasks/commit-stage.d.ts.map +1 -0
- package/dist/run-tasks/commit-stage.js +43 -0
- package/dist/run-tasks/commit-stage.js.map +1 -0
- package/dist/run-tasks/execute-task.d.ts +20 -0
- package/dist/run-tasks/execute-task.d.ts.map +1 -0
- package/dist/run-tasks/execute-task.js +29 -0
- package/dist/run-tasks/execute-task.js.map +1 -0
- package/dist/run-tasks/fallback-report.d.ts +5 -0
- package/dist/run-tasks/fallback-report.d.ts.map +1 -0
- package/dist/run-tasks/fallback-report.js +33 -0
- package/dist/run-tasks/fallback-report.js.map +1 -0
- package/dist/{run-tasks.d.ts → run-tasks/index.d.ts} +8 -5
- package/dist/run-tasks/index.d.ts.map +1 -0
- package/dist/run-tasks/index.js +118 -0
- package/dist/run-tasks/index.js.map +1 -0
- package/dist/run-tasks/metadata-repair.d.ts +15 -0
- package/dist/run-tasks/metadata-repair.d.ts.map +1 -0
- package/dist/run-tasks/metadata-repair.js +30 -0
- package/dist/run-tasks/metadata-repair.js.map +1 -0
- package/dist/run-tasks/plan-extraction.d.ts +2 -0
- package/dist/run-tasks/plan-extraction.d.ts.map +1 -0
- package/dist/run-tasks/plan-extraction.js +44 -0
- package/dist/run-tasks/plan-extraction.js.map +1 -0
- package/dist/run-tasks/reviewed-lifecycle.d.ts +15 -0
- package/dist/run-tasks/reviewed-lifecycle.d.ts.map +1 -0
- package/dist/run-tasks/reviewed-lifecycle.js +839 -0
- package/dist/run-tasks/reviewed-lifecycle.js.map +1 -0
- package/dist/run-tasks/verify-stage.d.ts +25 -0
- package/dist/run-tasks/verify-stage.d.ts.map +1 -0
- package/dist/run-tasks/verify-stage.js +168 -0
- package/dist/run-tasks/verify-stage.js.map +1 -0
- package/dist/run-tasks/worker-status.d.ts +3 -0
- package/dist/run-tasks/worker-status.d.ts.map +1 -0
- package/dist/run-tasks/worker-status.js +13 -0
- package/dist/run-tasks/worker-status.js.map +1 -0
- package/dist/runners/base/result-builders.d.ts +81 -0
- package/dist/runners/base/result-builders.d.ts.map +1 -0
- package/dist/runners/base/result-builders.js +103 -0
- package/dist/runners/base/result-builders.js.map +1 -0
- package/dist/runners/base/types.d.ts +53 -0
- package/dist/runners/base/types.d.ts.map +1 -0
- package/dist/runners/base/types.js +2 -0
- package/dist/runners/base/types.js.map +1 -0
- package/dist/runners/claude-runner.d.ts +2 -1
- package/dist/runners/claude-runner.d.ts.map +1 -1
- package/dist/runners/claude-runner.js +44 -109
- package/dist/runners/claude-runner.js.map +1 -1
- package/dist/runners/codex-runner.d.ts +2 -1
- package/dist/runners/codex-runner.d.ts.map +1 -1
- package/dist/runners/codex-runner.js +45 -110
- package/dist/runners/codex-runner.js.map +1 -1
- package/dist/runners/error-classification.d.ts +1 -1
- package/dist/runners/error-classification.d.ts.map +1 -1
- package/dist/runners/openai-runner.d.ts +2 -1
- package/dist/runners/openai-runner.d.ts.map +1 -1
- package/dist/runners/openai-runner.js +34 -84
- package/dist/runners/openai-runner.js.map +1 -1
- package/dist/runners/prevention.d.ts.map +1 -1
- package/dist/runners/prevention.js +18 -0
- package/dist/runners/prevention.js.map +1 -1
- package/dist/runners/types.d.ts +126 -0
- package/dist/runners/types.d.ts.map +1 -0
- package/dist/runners/types.js +2 -0
- package/dist/runners/types.js.map +1 -0
- package/dist/tool-schemas/audit.d.ts +2 -2
- package/dist/tool-schemas/delegate.d.ts +9 -0
- package/dist/tool-schemas/delegate.d.ts.map +1 -1
- package/dist/tool-schemas/delegate.js +4 -0
- package/dist/tool-schemas/delegate.js.map +1 -1
- package/dist/tool-schemas/execute-plan.d.ts +13 -2
- package/dist/tool-schemas/execute-plan.d.ts.map +1 -1
- package/dist/tool-schemas/execute-plan.js +22 -4
- package/dist/tool-schemas/execute-plan.js.map +1 -1
- package/dist/tool-schemas/review.d.ts +1 -1
- package/dist/types.d.ts +36 -327
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +8 -37
- package/dist/types.js.map +1 -1
- package/package.json +35 -3
- package/dist/run-tasks.d.ts.map +0 -1
- package/dist/run-tasks.js +0 -687
- package/dist/run-tasks.js.map +0 -1
|
@@ -0,0 +1,839 @@
|
|
|
1
|
+
import { execFile } from 'node:child_process';
|
|
2
|
+
import { promisify } from 'node:util';
|
|
3
|
+
import { computeCostUSD, computeSavedCostUSD } from '../types.js';
|
|
4
|
+
import { createProvider } from '../provider.js';
|
|
5
|
+
import { delegateWithEscalation } from '../delegate-with-escalation.js';
|
|
6
|
+
import { HeartbeatTimer } from '../heartbeat.js';
|
|
7
|
+
import { runSpecReview } from '../review/spec-reviewer.js';
|
|
8
|
+
import { runQualityReview } from '../review/quality-reviewer.js';
|
|
9
|
+
import { runDiffReview } from '../review/diff-review.js';
|
|
10
|
+
import { aggregateResult } from '../review/aggregate-result.js';
|
|
11
|
+
import { buildEvidence } from '../review/evidence.js';
|
|
12
|
+
import { parseStructuredReport } from '../reporting/structured-report.js';
|
|
13
|
+
import { runCommitStage, readbackCommit } from './commit-stage.js';
|
|
14
|
+
import { runVerifyStage } from './verify-stage.js';
|
|
15
|
+
import { runMetadataRepairTurn } from './metadata-repair.js';
|
|
16
|
+
import { partitionFilePaths, checkOutputTargets } from '../file-artifact-check.js';
|
|
17
|
+
import { extractWorkerStatus } from './worker-status.js';
|
|
18
|
+
import { buildFallbackImplReport, readImplementerFileContents } from './fallback-report.js';
|
|
19
|
+
import { composeVerboseLine } from '../diagnostics/verbose-line.js';
|
|
20
|
+
import { withDoneCondition } from './execute-task.js';
|
|
21
|
+
const exec = promisify(execFile);
|
|
22
|
+
export async function executeReviewedLifecycle(task, resolved, config, taskIndex, onProgress, heartbeatWiring, diagnostics) {
|
|
23
|
+
const reviewPolicy = task.reviewPolicy ?? 'full';
|
|
24
|
+
const otherSlot = resolved.slot === 'standard' ? 'complex' : 'standard';
|
|
25
|
+
// Partition filePaths into output targets before the worker runs.
|
|
26
|
+
// Output targets are paths that do not yet exist on disk.
|
|
27
|
+
const { outputTargets } = partitionFilePaths(task.filePaths, task.cwd ?? process.cwd());
|
|
28
|
+
let escalationProvider;
|
|
29
|
+
try {
|
|
30
|
+
escalationProvider = createProvider(otherSlot, config);
|
|
31
|
+
}
|
|
32
|
+
catch {
|
|
33
|
+
// Other slot not configured — auto-escalation not available
|
|
34
|
+
}
|
|
35
|
+
const stageCount = reviewPolicy === 'off' ? 1 :
|
|
36
|
+
reviewPolicy === 'spec_only' ? 3 :
|
|
37
|
+
5;
|
|
38
|
+
const verbose = diagnostics?.verbose ?? false;
|
|
39
|
+
let lastStageSeen;
|
|
40
|
+
const verboseStreamRaw = verbose
|
|
41
|
+
? (diagnostics?.verboseStream ?? ((line) => { process.stderr.write(line + '\n'); }))
|
|
42
|
+
: undefined;
|
|
43
|
+
const verboseBatchIdEarly = heartbeatWiring?.batchId;
|
|
44
|
+
const shortBatchEarly = verboseBatchIdEarly ? verboseBatchIdEarly.slice(0, 8) : '????????';
|
|
45
|
+
// Start the heartbeat whenever there's a downstream consumer:
|
|
46
|
+
// - onProgress (external progress callback from the runTasks caller)
|
|
47
|
+
// - verbose (stderr stream needs the heartbeat's tool_call / turn_complete relay)
|
|
48
|
+
// - recordHeartbeat (server needs heartbeat ticks to update BatchRegistry)
|
|
49
|
+
// - logger (post-mortem JSONL logging needs the events too)
|
|
50
|
+
// Otherwise there is no point creating a timer.
|
|
51
|
+
const needHeartbeat = onProgress !== undefined ||
|
|
52
|
+
verbose ||
|
|
53
|
+
heartbeatWiring?.recordHeartbeat !== undefined ||
|
|
54
|
+
diagnostics?.logger !== undefined;
|
|
55
|
+
// Synthesize an onProgress sink when the caller didn't pass one — the
|
|
56
|
+
// heartbeat needs a place to emit heartbeat events so the stage-change
|
|
57
|
+
// detector below fires. Discards events if there is no external consumer.
|
|
58
|
+
const synthOnProgress = onProgress ?? (() => { });
|
|
59
|
+
const heartbeat = needHeartbeat
|
|
60
|
+
? new HeartbeatTimer((event) => {
|
|
61
|
+
if (verboseStreamRaw && event.kind === 'heartbeat') {
|
|
62
|
+
// Emit on every heartbeat tick so the operator can confirm
|
|
63
|
+
// the timer is actually firing. Stage-change lines are richer
|
|
64
|
+
// but fire only on transitions; plain ticks let you see
|
|
65
|
+
// per-5s progress inside a long-running stage.
|
|
66
|
+
if (event.stage !== lastStageSeen) {
|
|
67
|
+
if (lastStageSeen !== undefined) {
|
|
68
|
+
verboseStreamRaw(composeVerboseLine({
|
|
69
|
+
event: 'stage_change',
|
|
70
|
+
ts: new Date().toISOString(),
|
|
71
|
+
batch: shortBatchEarly,
|
|
72
|
+
task: taskIndex,
|
|
73
|
+
from: lastStageSeen,
|
|
74
|
+
to: event.stage,
|
|
75
|
+
}));
|
|
76
|
+
}
|
|
77
|
+
lastStageSeen = event.stage;
|
|
78
|
+
}
|
|
79
|
+
const sinceLastMs = Date.now() - prevEventAtMs;
|
|
80
|
+
verboseStreamRaw(composeVerboseLine({
|
|
81
|
+
event: 'heartbeat',
|
|
82
|
+
ts: new Date().toISOString(),
|
|
83
|
+
batch: shortBatchEarly,
|
|
84
|
+
task: taskIndex,
|
|
85
|
+
elapsed: event.elapsed,
|
|
86
|
+
stage: event.stage,
|
|
87
|
+
round: event.reviewRound,
|
|
88
|
+
cap: event.maxReviewRounds,
|
|
89
|
+
tools: event.progress.toolCalls,
|
|
90
|
+
read: event.progress.filesRead,
|
|
91
|
+
wrote: event.progress.filesWritten,
|
|
92
|
+
text: textEmissionChars,
|
|
93
|
+
cost: event.costUSD,
|
|
94
|
+
idle_ms: sinceLastMs,
|
|
95
|
+
}));
|
|
96
|
+
}
|
|
97
|
+
synthOnProgress(taskIndex, event);
|
|
98
|
+
}, {
|
|
99
|
+
provider: resolved.provider.config.model,
|
|
100
|
+
parentModel: task.parentModel,
|
|
101
|
+
...(heartbeatWiring?.batchId !== undefined && { batchId: heartbeatWiring.batchId }),
|
|
102
|
+
...(heartbeatWiring?.recordHeartbeat !== undefined && { recordHeartbeat: heartbeatWiring.recordHeartbeat }),
|
|
103
|
+
})
|
|
104
|
+
: undefined;
|
|
105
|
+
heartbeat?.start(stageCount);
|
|
106
|
+
if (verboseStreamRaw) {
|
|
107
|
+
verboseStreamRaw(composeVerboseLine({
|
|
108
|
+
event: 'heartbeat_timer',
|
|
109
|
+
ts: new Date().toISOString(),
|
|
110
|
+
batch: shortBatchEarly,
|
|
111
|
+
task: taskIndex,
|
|
112
|
+
state: heartbeat ? 'started' : 'disabled',
|
|
113
|
+
stage_count: stageCount,
|
|
114
|
+
tick_ms: heartbeat ? 5000 : undefined,
|
|
115
|
+
reason: heartbeat ? undefined : 'no_consumer',
|
|
116
|
+
}));
|
|
117
|
+
}
|
|
118
|
+
const implModel = resolved.provider.config.model;
|
|
119
|
+
const progressCounters = { filesRead: 0, filesWritten: 0, toolCalls: 0 };
|
|
120
|
+
const verboseLogger = verbose && diagnostics?.logger ? diagnostics.logger : undefined;
|
|
121
|
+
const verboseBatchId = verboseBatchIdEarly;
|
|
122
|
+
const verboseStream = verboseStreamRaw;
|
|
123
|
+
const shortBatch = shortBatchEarly;
|
|
124
|
+
if (verboseStream) {
|
|
125
|
+
verboseStream(composeVerboseLine({
|
|
126
|
+
event: 'worker_start',
|
|
127
|
+
ts: new Date().toISOString(),
|
|
128
|
+
batch: shortBatch,
|
|
129
|
+
task: taskIndex,
|
|
130
|
+
worker: resolved.provider.config.model,
|
|
131
|
+
}));
|
|
132
|
+
}
|
|
133
|
+
let prevEventAtMs = verbose ? Date.now() : 0;
|
|
134
|
+
// Wrap whenever we have ANY consumer for InternalRunnerEvent (heartbeat,
|
|
135
|
+
// verbose stream, or verbose logger). Previously this only wrapped when
|
|
136
|
+
// the caller passed onProgress, so --verbose + HTTP handlers (which don't
|
|
137
|
+
// pass onProgress) silently dropped every tool_call / turn_complete event.
|
|
138
|
+
let textEmissionChars = 0;
|
|
139
|
+
const wrappedOnProgress = needHeartbeat
|
|
140
|
+
? (event) => {
|
|
141
|
+
if (event.kind === 'turn_start') {
|
|
142
|
+
heartbeat?.markEvent('llm');
|
|
143
|
+
if (verbose)
|
|
144
|
+
prevEventAtMs = Date.now();
|
|
145
|
+
if (verboseStream) {
|
|
146
|
+
verboseStream(composeVerboseLine({
|
|
147
|
+
event: 'turn_start',
|
|
148
|
+
ts: new Date().toISOString(),
|
|
149
|
+
batch: shortBatch,
|
|
150
|
+
task: taskIndex,
|
|
151
|
+
turn: event.turn,
|
|
152
|
+
provider: event.provider,
|
|
153
|
+
}));
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
if (event.kind === 'text_emission') {
|
|
157
|
+
heartbeat?.markEvent('text');
|
|
158
|
+
textEmissionChars += event.chars;
|
|
159
|
+
if (verboseStream && event.chars > 0) {
|
|
160
|
+
const preview = event.preview.length > 60
|
|
161
|
+
? event.preview.slice(0, 57) + '...'
|
|
162
|
+
: event.preview;
|
|
163
|
+
verboseStream(composeVerboseLine({
|
|
164
|
+
event: 'text_emission',
|
|
165
|
+
ts: new Date().toISOString(),
|
|
166
|
+
batch: shortBatch,
|
|
167
|
+
task: taskIndex,
|
|
168
|
+
chars: event.chars,
|
|
169
|
+
total: textEmissionChars,
|
|
170
|
+
preview,
|
|
171
|
+
}));
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
if (event.kind === 'tool_call') {
|
|
175
|
+
heartbeat?.markEvent('tool');
|
|
176
|
+
progressCounters.toolCalls++;
|
|
177
|
+
const name = event.toolSummary.split('(')[0];
|
|
178
|
+
if (name === 'readFile' || name === 'grep' || name === 'glob' || name === 'listFiles') {
|
|
179
|
+
progressCounters.filesRead++;
|
|
180
|
+
}
|
|
181
|
+
else if (name === 'writeFile' || name === 'editFile') {
|
|
182
|
+
progressCounters.filesWritten++;
|
|
183
|
+
}
|
|
184
|
+
heartbeat?.updateProgress(progressCounters.filesRead, progressCounters.filesWritten, progressCounters.toolCalls);
|
|
185
|
+
const now = verbose ? Date.now() : 0;
|
|
186
|
+
const sincePrevMs = verbose ? now - prevEventAtMs : 0;
|
|
187
|
+
if (verbose)
|
|
188
|
+
prevEventAtMs = now;
|
|
189
|
+
if (verboseLogger && verboseBatchId) {
|
|
190
|
+
verboseLogger.toolCall({
|
|
191
|
+
batchId: verboseBatchId,
|
|
192
|
+
taskIndex,
|
|
193
|
+
tool: event.toolSummary,
|
|
194
|
+
durationMs: sincePrevMs,
|
|
195
|
+
});
|
|
196
|
+
}
|
|
197
|
+
if (verboseStream) {
|
|
198
|
+
verboseStream(composeVerboseLine({
|
|
199
|
+
event: 'tool_call',
|
|
200
|
+
ts: new Date().toISOString(),
|
|
201
|
+
batch: shortBatch,
|
|
202
|
+
task: taskIndex,
|
|
203
|
+
tool: event.toolSummary,
|
|
204
|
+
duration_ms: sincePrevMs,
|
|
205
|
+
}));
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
if (event.kind === 'turn_complete') {
|
|
209
|
+
heartbeat?.markEvent('llm');
|
|
210
|
+
const costUSD = computeCostUSD(event.cumulativeInputTokens, event.cumulativeOutputTokens, resolved.provider.config);
|
|
211
|
+
const savedCostUSD = computeSavedCostUSD(costUSD, event.cumulativeInputTokens, event.cumulativeOutputTokens, task.parentModel);
|
|
212
|
+
heartbeat?.updateCost(costUSD, savedCostUSD);
|
|
213
|
+
const nowTurn = verbose ? Date.now() : 0;
|
|
214
|
+
const turnDurMs = verbose ? nowTurn - prevEventAtMs : 0;
|
|
215
|
+
if (verbose)
|
|
216
|
+
prevEventAtMs = nowTurn;
|
|
217
|
+
if (verboseLogger && verboseBatchId) {
|
|
218
|
+
verboseLogger.llmTurn({
|
|
219
|
+
batchId: verboseBatchId,
|
|
220
|
+
taskIndex,
|
|
221
|
+
turnIndex: progressCounters.toolCalls,
|
|
222
|
+
provider: resolved.provider.config.model,
|
|
223
|
+
inputTokens: event.cumulativeInputTokens,
|
|
224
|
+
outputTokens: event.cumulativeOutputTokens,
|
|
225
|
+
costUSD,
|
|
226
|
+
});
|
|
227
|
+
}
|
|
228
|
+
if (verboseStream) {
|
|
229
|
+
verboseStream(composeVerboseLine({
|
|
230
|
+
event: 'turn_complete',
|
|
231
|
+
ts: new Date().toISOString(),
|
|
232
|
+
batch: shortBatch,
|
|
233
|
+
task: taskIndex,
|
|
234
|
+
input_tokens: event.cumulativeInputTokens,
|
|
235
|
+
output_tokens: event.cumulativeOutputTokens,
|
|
236
|
+
cost: costUSD,
|
|
237
|
+
duration_ms: turnDurMs,
|
|
238
|
+
provider: resolved.provider.config.model,
|
|
239
|
+
}));
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
: undefined;
|
|
244
|
+
const cwd = task.cwd ?? process.cwd();
|
|
245
|
+
const taskStartMs = Date.now();
|
|
246
|
+
const commits = [];
|
|
247
|
+
let commitError;
|
|
248
|
+
let specRework = 0;
|
|
249
|
+
let qualityRework = 0;
|
|
250
|
+
let metadataRepair = 0;
|
|
251
|
+
const maxReviewRounds = task.maxReviewRounds ?? 3;
|
|
252
|
+
const maxCostUSD = task.maxCostUSD;
|
|
253
|
+
const reviewRounds = () => ({ spec: specRework, quality: qualityRework, metadata: metadataRepair, cap: maxReviewRounds });
|
|
254
|
+
const taskCostUSD = () => (heartbeat ? heartbeat.getHeartbeatTickInfo().costUSD : null);
|
|
255
|
+
// When the review loop aborts mid-flight, preserve any review-status info already set
|
|
256
|
+
// on the base result (set by callers via abortReviewLoop({ ...res, specReviewStatus, ... })).
|
|
257
|
+
// Defaults to 'changes_required' for whichever loop tripped — that's the only state the
|
|
258
|
+
// loop ever fires from, by construction.
|
|
259
|
+
const abortReviewLoop = (base, terminationReason, message, aborting) => ({
|
|
260
|
+
...base,
|
|
261
|
+
status: 'incomplete',
|
|
262
|
+
workerStatus: 'review_loop_aborted',
|
|
263
|
+
terminationReason,
|
|
264
|
+
reviewRounds: reviewRounds(),
|
|
265
|
+
error: message,
|
|
266
|
+
specReviewStatus: aborting === 'spec' ? 'changes_required' : (base.specReviewStatus ?? 'approved'),
|
|
267
|
+
qualityReviewStatus: aborting === 'quality' ? 'changes_required' : (base.qualityReviewStatus ?? 'skipped'),
|
|
268
|
+
});
|
|
269
|
+
const defaultVerification = { status: 'skipped', steps: [], totalDurationMs: 0, skipReason: 'no_command' };
|
|
270
|
+
let latestVerification = defaultVerification;
|
|
271
|
+
const emitVerbose = (event, fields) => {
|
|
272
|
+
if (!verboseStream)
|
|
273
|
+
return;
|
|
274
|
+
verboseStream(composeVerboseLine({
|
|
275
|
+
event,
|
|
276
|
+
ts: new Date().toISOString(),
|
|
277
|
+
batch: shortBatch,
|
|
278
|
+
task: taskIndex,
|
|
279
|
+
...fields,
|
|
280
|
+
}));
|
|
281
|
+
};
|
|
282
|
+
async function runVerificationStage() {
|
|
283
|
+
emitVerbose('stage_change', { from: 'committing', to: 'verifying' });
|
|
284
|
+
heartbeat?.transition({
|
|
285
|
+
stage: 'verifying',
|
|
286
|
+
stageIndex: 4,
|
|
287
|
+
reviewRound: undefined,
|
|
288
|
+
maxReviewRounds: task.maxReviewRounds ?? 5,
|
|
289
|
+
});
|
|
290
|
+
const verification = await runVerifyStage({
|
|
291
|
+
cwd,
|
|
292
|
+
verifyCommand: task.verifyCommand,
|
|
293
|
+
taskTimeoutMs: task.timeoutMs ?? config.defaults.timeoutMs ?? 1_800_000,
|
|
294
|
+
taskStartMs,
|
|
295
|
+
});
|
|
296
|
+
latestVerification = verification;
|
|
297
|
+
for (const step of verification.steps) {
|
|
298
|
+
emitVerbose('verify_step', {
|
|
299
|
+
command: step.command,
|
|
300
|
+
status: step.status,
|
|
301
|
+
exit_code: step.exitCode,
|
|
302
|
+
signal: step.signal,
|
|
303
|
+
duration_ms: step.durationMs,
|
|
304
|
+
error_message: step.errorMessage ?? undefined,
|
|
305
|
+
});
|
|
306
|
+
}
|
|
307
|
+
if (verification.status === 'skipped') {
|
|
308
|
+
emitVerbose('verify_skipped', { reason: verification.skipReason ?? 'no_command', stage: 'verifying' });
|
|
309
|
+
}
|
|
310
|
+
return verification;
|
|
311
|
+
}
|
|
312
|
+
function withVerification(result, verification = latestVerification) {
|
|
313
|
+
return { ...result, verification };
|
|
314
|
+
}
|
|
315
|
+
function verificationErrorResult(base, verification) {
|
|
316
|
+
if (verification.status !== 'error')
|
|
317
|
+
return null;
|
|
318
|
+
const failedIndex = verification.steps.findIndex((step) => step.status !== 'passed');
|
|
319
|
+
const failedStep = failedIndex >= 0 ? verification.steps[failedIndex] : undefined;
|
|
320
|
+
return withVerification({
|
|
321
|
+
...base,
|
|
322
|
+
status: 'error',
|
|
323
|
+
workerStatus: 'done_with_concerns',
|
|
324
|
+
error: failedStep?.errorMessage ?? 'verify command error',
|
|
325
|
+
errorCode: 'verify_command_error',
|
|
326
|
+
commits,
|
|
327
|
+
commitError,
|
|
328
|
+
verification,
|
|
329
|
+
}, verification);
|
|
330
|
+
}
|
|
331
|
+
function resolveOffTerminal(base, verification) {
|
|
332
|
+
const concerns = [...(base.concerns ?? [])];
|
|
333
|
+
let workerStatus = workerStatusForTerminal(base.workerStatus);
|
|
334
|
+
if (verification.status === 'failed') {
|
|
335
|
+
concerns.push({
|
|
336
|
+
source: 'verification',
|
|
337
|
+
severity: 'high',
|
|
338
|
+
message: 'Verification failed after implementation.',
|
|
339
|
+
});
|
|
340
|
+
workerStatus = 'done_with_concerns';
|
|
341
|
+
}
|
|
342
|
+
if (verification.status === 'error') {
|
|
343
|
+
const failedIndex = verification.steps.findIndex((step) => step.status !== 'passed');
|
|
344
|
+
const failedStep = failedIndex >= 0 ? verification.steps[failedIndex] : undefined;
|
|
345
|
+
return withVerification({
|
|
346
|
+
...base,
|
|
347
|
+
status: 'error',
|
|
348
|
+
workerStatus: 'failed',
|
|
349
|
+
error: failedStep?.errorMessage ?? 'verify command error',
|
|
350
|
+
errorCode: 'verify_command_error',
|
|
351
|
+
commits,
|
|
352
|
+
commitError,
|
|
353
|
+
verification,
|
|
354
|
+
}, verification);
|
|
355
|
+
}
|
|
356
|
+
return withVerification({
|
|
357
|
+
...base,
|
|
358
|
+
status: base.status === 'ok' ? 'ok' : base.status,
|
|
359
|
+
workerStatus,
|
|
360
|
+
concerns,
|
|
361
|
+
commits,
|
|
362
|
+
commitError,
|
|
363
|
+
verification,
|
|
364
|
+
}, verification);
|
|
365
|
+
}
|
|
366
|
+
function resolveDiffOnlyTerminal(base, verdict, verification, diffTruncated) {
|
|
367
|
+
const concerns = [...(base.concerns ?? [])];
|
|
368
|
+
if (verdict.kind === 'reject') {
|
|
369
|
+
return withVerification({
|
|
370
|
+
...base,
|
|
371
|
+
status: 'error',
|
|
372
|
+
workerStatus: 'failed',
|
|
373
|
+
error: verdict.message || 'diff review rejected implementation',
|
|
374
|
+
errorCode: 'diff_review_rejected',
|
|
375
|
+
structuredError: {
|
|
376
|
+
code: 'diff_review_rejected',
|
|
377
|
+
message: verdict.message || 'diff review rejected implementation',
|
|
378
|
+
},
|
|
379
|
+
concerns,
|
|
380
|
+
commits,
|
|
381
|
+
commitError,
|
|
382
|
+
verification,
|
|
383
|
+
}, verification);
|
|
384
|
+
}
|
|
385
|
+
concerns.push(...verdict.concerns);
|
|
386
|
+
if (verification.status === 'failed') {
|
|
387
|
+
concerns.push({
|
|
388
|
+
source: 'verification',
|
|
389
|
+
severity: 'high',
|
|
390
|
+
message: 'Verification failed after implementation.',
|
|
391
|
+
});
|
|
392
|
+
}
|
|
393
|
+
if (diffTruncated) {
|
|
394
|
+
concerns.push({
|
|
395
|
+
source: 'diff_truncated',
|
|
396
|
+
severity: 'medium',
|
|
397
|
+
message: 'Implementation diff exceeded the reviewer evidence byte cap and was truncated.',
|
|
398
|
+
});
|
|
399
|
+
}
|
|
400
|
+
const hasConcerns = concerns.length > 0 || verification.status === 'failed';
|
|
401
|
+
return withVerification({
|
|
402
|
+
...base,
|
|
403
|
+
status: base.status === 'ok' ? 'ok' : base.status,
|
|
404
|
+
workerStatus: hasConcerns ? 'done_with_concerns' : workerStatusForTerminal(base.workerStatus),
|
|
405
|
+
concerns,
|
|
406
|
+
commits,
|
|
407
|
+
commitError,
|
|
408
|
+
verification,
|
|
409
|
+
}, verification);
|
|
410
|
+
}
|
|
411
|
+
function workerStatusForTerminal(status) {
|
|
412
|
+
return status === 'needs_context' || status === 'blocked' || status === 'failed' || status === 'done_with_concerns'
|
|
413
|
+
? status
|
|
414
|
+
: 'done';
|
|
415
|
+
}
|
|
416
|
+
async function recordWorkerCommits(from, to = 'HEAD') {
|
|
417
|
+
const { stdout: revs } = await exec('git', ['rev-list', '--reverse', `${from}..${to}`], { cwd });
|
|
418
|
+
for (const sha of revs.trim().split('\n').filter(Boolean)) {
|
|
419
|
+
const c = await readbackCommit(sha, cwd);
|
|
420
|
+
commits.push(c);
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
async function repairCommitMetadata(initialDiagnostic) {
|
|
424
|
+
let metadataAttempts = 0;
|
|
425
|
+
let lastZodError = initialDiagnostic || 'no commit block emitted';
|
|
426
|
+
let validCommit = null;
|
|
427
|
+
while (metadataAttempts < 2 && !validCommit) {
|
|
428
|
+
const preStatus = (await exec('git', ['status', '--porcelain=v1', '-z'], { cwd })).stdout;
|
|
429
|
+
const repaired = await runMetadataRepairTurn({ task, zodError: lastZodError, cwd, providerSlot: resolved.slot, provider: resolved.provider });
|
|
430
|
+
const postStatus = (await exec('git', ['status', '--porcelain=v1', '-z'], { cwd })).stdout;
|
|
431
|
+
metadataAttempts += 1;
|
|
432
|
+
if (preStatus !== postStatus) {
|
|
433
|
+
commitError = 'commit_metadata_repair_modified_files';
|
|
434
|
+
return null;
|
|
435
|
+
}
|
|
436
|
+
if (repaired.commit)
|
|
437
|
+
validCommit = repaired.commit;
|
|
438
|
+
else
|
|
439
|
+
lastZodError = repaired.commitDiagnostic ?? 'no commit block emitted';
|
|
440
|
+
}
|
|
441
|
+
if (!validCommit)
|
|
442
|
+
commitError = `commit_metadata_invalid: ${lastZodError}`;
|
|
443
|
+
return validCommit;
|
|
444
|
+
}
|
|
445
|
+
async function captureCommitsAfterImplementation(implResult, implReport, baselineHead) {
|
|
446
|
+
const porcelain = (await exec('git', ['status', '--porcelain=v1'], { cwd })).stdout;
|
|
447
|
+
const headNow = (await exec('git', ['rev-parse', 'HEAD'], { cwd })).stdout.trim();
|
|
448
|
+
const headMoved = headNow !== baselineHead;
|
|
449
|
+
const treeDirty = porcelain.length > 0;
|
|
450
|
+
if (!headMoved && !treeDirty)
|
|
451
|
+
return;
|
|
452
|
+
if (headMoved)
|
|
453
|
+
await recordWorkerCommits(baselineHead, 'HEAD');
|
|
454
|
+
if (treeDirty) {
|
|
455
|
+
const validCommit = implReport?.commit ?? await repairCommitMetadata(implReport?.commitDiagnostic ?? 'no commit block emitted');
|
|
456
|
+
if (!validCommit)
|
|
457
|
+
return;
|
|
458
|
+
const c = await runCommitStage({ cwd, filesWritten: implResult.filesWritten, commit: validCommit });
|
|
459
|
+
commits.push(c);
|
|
460
|
+
}
|
|
461
|
+
}
|
|
462
|
+
try {
|
|
463
|
+
// The dirty-tree precondition + git baseline only apply to artifact-producing tasks
|
|
464
|
+
// (those with autoCommit === true). Non-artifact presets — audit, review, verify,
|
|
465
|
+
// debug — neither produce commits nor read git state, so they bypass the check
|
|
466
|
+
// entirely. Per spec Section A: "Non-artifact tasks (audits, analyses, read-only
|
|
467
|
+
// investigations) skip stages 3 and 4."
|
|
468
|
+
const isArtifactProducing = task.autoCommit === true;
|
|
469
|
+
let baselineHead = '';
|
|
470
|
+
if (isArtifactProducing) {
|
|
471
|
+
baselineHead = (await exec('git', ['rev-parse', 'HEAD'], { cwd })).stdout.trim();
|
|
472
|
+
const baselinePorcelain = (await exec('git', ['status', '--porcelain=v1', '-z'], { cwd })).stdout;
|
|
473
|
+
if (baselinePorcelain.length !== 0) {
|
|
474
|
+
return withVerification({
|
|
475
|
+
output: `Sub-agent error: task.cwd ${cwd} had pre-existing modifications`,
|
|
476
|
+
status: 'error',
|
|
477
|
+
usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0, costUSD: null },
|
|
478
|
+
turns: 0,
|
|
479
|
+
filesRead: [],
|
|
480
|
+
filesWritten: [],
|
|
481
|
+
toolCalls: [],
|
|
482
|
+
outputIsDiagnostic: true,
|
|
483
|
+
escalationLog: [],
|
|
484
|
+
error: `task.cwd ${cwd} had pre-existing modifications`,
|
|
485
|
+
errorCode: 'dirty_worktree',
|
|
486
|
+
commits,
|
|
487
|
+
});
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
const implResult = await delegateWithEscalation(withDoneCondition(task), [resolved.provider], { explicitlyPinned: false, escalateToProvider: escalationProvider, onProgress: wrappedOnProgress });
|
|
491
|
+
const implReport = implResult.status === 'ok' ? parseStructuredReport(implResult.output) : undefined;
|
|
492
|
+
const workerStatus = extractWorkerStatus(implReport);
|
|
493
|
+
if (implResult.status === 'ok' && isArtifactProducing) {
|
|
494
|
+
await captureCommitsAfterImplementation(implResult, implReport, baselineHead);
|
|
495
|
+
}
|
|
496
|
+
const verification = isArtifactProducing ? await runVerificationStage() : defaultVerification;
|
|
497
|
+
const verifyError = verificationErrorResult(implResult, verification);
|
|
498
|
+
if (verifyError)
|
|
499
|
+
return verifyError;
|
|
500
|
+
const filePathsInteracted = task.filePaths && task.filePaths.length > 0
|
|
501
|
+
? [...(implResult.filesRead ?? []), ...implResult.filesWritten].some(f => task.filePaths.some(fp => f === fp || f.endsWith('/' + fp) || f.endsWith(fp)))
|
|
502
|
+
: true;
|
|
503
|
+
const filePathsSkipped = !filePathsInteracted;
|
|
504
|
+
if (implResult.filesWritten.length === 0) {
|
|
505
|
+
heartbeat?.updateStageCount(1);
|
|
506
|
+
const effectiveImplReport = implReport ?? buildFallbackImplReport(implResult);
|
|
507
|
+
const earlyFileArtifactsMissing = implResult.status === 'ok' ? checkOutputTargets(outputTargets) : undefined;
|
|
508
|
+
const earlyStatus = implResult.status === 'ok' && earlyFileArtifactsMissing
|
|
509
|
+
? 'incomplete'
|
|
510
|
+
: implResult.status;
|
|
511
|
+
return {
|
|
512
|
+
...implResult,
|
|
513
|
+
status: earlyStatus,
|
|
514
|
+
workerStatus,
|
|
515
|
+
specReviewStatus: 'not_applicable',
|
|
516
|
+
qualityReviewStatus: 'not_applicable',
|
|
517
|
+
specReviewReason: 'task produced no file artifacts to review',
|
|
518
|
+
qualityReviewReason: 'task produced no file artifacts to review',
|
|
519
|
+
implementationReport: effectiveImplReport,
|
|
520
|
+
structuredReport: {
|
|
521
|
+
summary: '[No artifacts] task produced no file artifacts to review',
|
|
522
|
+
filesChanged: effectiveImplReport.filesChanged,
|
|
523
|
+
validationsRun: effectiveImplReport.validationsRun,
|
|
524
|
+
deviationsFromBrief: effectiveImplReport.deviationsFromBrief,
|
|
525
|
+
unresolved: effectiveImplReport.unresolved,
|
|
526
|
+
},
|
|
527
|
+
filePathsSkipped,
|
|
528
|
+
agents: {
|
|
529
|
+
implementer: resolved.slot,
|
|
530
|
+
specReviewer: 'not_applicable',
|
|
531
|
+
qualityReviewer: 'not_applicable',
|
|
532
|
+
},
|
|
533
|
+
models: {
|
|
534
|
+
implementer: implModel,
|
|
535
|
+
specReviewer: null,
|
|
536
|
+
qualityReviewer: null,
|
|
537
|
+
},
|
|
538
|
+
fileArtifactsMissing: earlyFileArtifactsMissing,
|
|
539
|
+
commits,
|
|
540
|
+
commitError,
|
|
541
|
+
verification,
|
|
542
|
+
};
|
|
543
|
+
}
|
|
544
|
+
if (workerStatus === 'needs_context' || workerStatus === 'blocked') {
|
|
545
|
+
return {
|
|
546
|
+
...implResult,
|
|
547
|
+
workerStatus,
|
|
548
|
+
specReviewStatus: 'skipped',
|
|
549
|
+
qualityReviewStatus: 'skipped',
|
|
550
|
+
specReviewReason: 'skipped: worker reported ' + workerStatus,
|
|
551
|
+
qualityReviewReason: 'skipped: worker reported ' + workerStatus,
|
|
552
|
+
agents: {
|
|
553
|
+
implementer: resolved.slot,
|
|
554
|
+
specReviewer: 'skipped',
|
|
555
|
+
qualityReviewer: 'skipped',
|
|
556
|
+
},
|
|
557
|
+
models: {
|
|
558
|
+
implementer: implModel,
|
|
559
|
+
specReviewer: null,
|
|
560
|
+
qualityReviewer: null,
|
|
561
|
+
},
|
|
562
|
+
fileArtifactsMissing: implResult.status === 'ok' ? checkOutputTargets(outputTargets) : undefined,
|
|
563
|
+
commits,
|
|
564
|
+
commitError,
|
|
565
|
+
verification,
|
|
566
|
+
};
|
|
567
|
+
}
|
|
568
|
+
if (reviewPolicy === 'off') {
|
|
569
|
+
emitVerbose('stage_change', { from: 'verifying', to: 'terminal' });
|
|
570
|
+
const terminal = resolveOffTerminal({
|
|
571
|
+
...implResult,
|
|
572
|
+
workerStatus,
|
|
573
|
+
specReviewStatus: 'skipped',
|
|
574
|
+
qualityReviewStatus: 'skipped',
|
|
575
|
+
specReviewReason: 'skipped: reviewPolicy is off',
|
|
576
|
+
qualityReviewReason: 'skipped: reviewPolicy is off',
|
|
577
|
+
agents: {
|
|
578
|
+
implementer: resolved.slot,
|
|
579
|
+
specReviewer: 'skipped',
|
|
580
|
+
qualityReviewer: 'skipped',
|
|
581
|
+
},
|
|
582
|
+
models: {
|
|
583
|
+
implementer: implModel,
|
|
584
|
+
specReviewer: null,
|
|
585
|
+
qualityReviewer: null,
|
|
586
|
+
},
|
|
587
|
+
implementationReport: implReport,
|
|
588
|
+
fileArtifactsMissing: implResult.status === 'ok' ? checkOutputTargets(outputTargets) : undefined,
|
|
589
|
+
}, verification);
|
|
590
|
+
return terminal;
|
|
591
|
+
}
|
|
592
|
+
let otherProvider;
|
|
593
|
+
try {
|
|
594
|
+
otherProvider = createProvider(otherSlot, config);
|
|
595
|
+
}
|
|
596
|
+
catch {
|
|
597
|
+
return {
|
|
598
|
+
...implResult,
|
|
599
|
+
workerStatus,
|
|
600
|
+
specReviewStatus: 'skipped',
|
|
601
|
+
qualityReviewStatus: 'skipped',
|
|
602
|
+
specReviewReason: 'skipped: no review agent configured',
|
|
603
|
+
qualityReviewReason: 'skipped: no review agent configured',
|
|
604
|
+
agents: {
|
|
605
|
+
implementer: resolved.slot,
|
|
606
|
+
specReviewer: 'skipped',
|
|
607
|
+
qualityReviewer: 'skipped',
|
|
608
|
+
},
|
|
609
|
+
models: {
|
|
610
|
+
implementer: implModel,
|
|
611
|
+
specReviewer: null,
|
|
612
|
+
qualityReviewer: null,
|
|
613
|
+
},
|
|
614
|
+
fileArtifactsMissing: implResult.status === 'ok' ? checkOutputTargets(outputTargets) : undefined,
|
|
615
|
+
commits,
|
|
616
|
+
commitError,
|
|
617
|
+
verification,
|
|
618
|
+
};
|
|
619
|
+
}
|
|
620
|
+
const reviewModel = otherProvider.config.model;
|
|
621
|
+
const packet = {
|
|
622
|
+
prompt: task.prompt,
|
|
623
|
+
scope: task.filePaths ?? [],
|
|
624
|
+
doneCondition: task.done ?? 'tsc passes',
|
|
625
|
+
};
|
|
626
|
+
let fileContents = await readImplementerFileContents(implResult.filesWritten, task.cwd);
|
|
627
|
+
const effectiveImplReport = implReport ?? buildFallbackImplReport(implResult);
|
|
628
|
+
const evidence = isArtifactProducing
|
|
629
|
+
? await buildEvidence({ cwd, baselineHead, commits, verification, reviewPolicy })
|
|
630
|
+
: { block: '', diffTruncated: false, fullDiff: '' };
|
|
631
|
+
if (reviewPolicy === 'diff_only') {
|
|
632
|
+
emitVerbose('stage_change', { from: 'verifying', to: 'diff_review' });
|
|
633
|
+
heartbeat?.transition({
|
|
634
|
+
stage: 'diff_review',
|
|
635
|
+
stageIndex: 2,
|
|
636
|
+
reviewRound: 1,
|
|
637
|
+
maxReviewRounds,
|
|
638
|
+
});
|
|
639
|
+
const verdict = await runDiffReview({
|
|
640
|
+
cwd,
|
|
641
|
+
diff: evidence.fullDiff,
|
|
642
|
+
diffTruncated: evidence.diffTruncated,
|
|
643
|
+
verification,
|
|
644
|
+
worker: { call: (prompt) => otherProvider.run(prompt) },
|
|
645
|
+
});
|
|
646
|
+
emitVerbose('review_decision', { stage: 'diff_review', verdict: verdict.kind, round: 1 });
|
|
647
|
+
return resolveDiffOnlyTerminal({
|
|
648
|
+
...implResult,
|
|
649
|
+
workerStatus,
|
|
650
|
+
specReviewStatus: 'skipped',
|
|
651
|
+
qualityReviewStatus: 'skipped',
|
|
652
|
+
specReviewReason: 'skipped: reviewPolicy is diff_only',
|
|
653
|
+
qualityReviewReason: 'skipped: reviewPolicy is diff_only',
|
|
654
|
+
implementationReport: effectiveImplReport,
|
|
655
|
+
fileArtifactsMissing: implResult.status === 'ok' ? checkOutputTargets(outputTargets) : undefined,
|
|
656
|
+
agents: {
|
|
657
|
+
implementer: resolved.slot,
|
|
658
|
+
specReviewer: 'skipped',
|
|
659
|
+
qualityReviewer: 'skipped',
|
|
660
|
+
},
|
|
661
|
+
models: {
|
|
662
|
+
implementer: implModel,
|
|
663
|
+
specReviewer: reviewModel,
|
|
664
|
+
qualityReviewer: null,
|
|
665
|
+
},
|
|
666
|
+
}, verdict, verification, evidence.diffTruncated);
|
|
667
|
+
}
|
|
668
|
+
heartbeat?.transition({
|
|
669
|
+
stage: 'spec_review', stageIndex: 2,
|
|
670
|
+
reviewRound: 1, maxReviewRounds: task.maxReviewRounds ?? 5,
|
|
671
|
+
});
|
|
672
|
+
let specResult = await runSpecReview(otherProvider, packet, effectiveImplReport, fileContents, implResult.toolCalls, task.planContext, evidence.block);
|
|
673
|
+
let finalImplResult = implResult;
|
|
674
|
+
let finalImplReport = effectiveImplReport;
|
|
675
|
+
let specStatus = specResult.status;
|
|
676
|
+
let specReport = specResult.report;
|
|
677
|
+
if (specStatus === 'changes_required') {
|
|
678
|
+
let prevSpecFindings = [];
|
|
679
|
+
while (true) {
|
|
680
|
+
if (specRework + qualityRework >= maxReviewRounds) {
|
|
681
|
+
return abortReviewLoop(finalImplResult, 'round_cap', 'review round cap reached before spec rework', 'spec');
|
|
682
|
+
}
|
|
683
|
+
const currentCostUSD = taskCostUSD();
|
|
684
|
+
if (currentCostUSD !== null && maxCostUSD !== undefined && currentCostUSD >= 0.8 * maxCostUSD) {
|
|
685
|
+
emitVerbose('cost_check', { stage: 'spec_rework', tripped: true, cost_used_usd: currentCostUSD, cost_cap_usd: maxCostUSD, cost_available: true });
|
|
686
|
+
return abortReviewLoop(finalImplResult, 'cost_ceiling', 'cost ceiling reached before spec rework', 'spec');
|
|
687
|
+
}
|
|
688
|
+
emitVerbose('stage_change', { from: 'spec_review', to: 'spec_rework', round: specRework + 1, cap: maxReviewRounds });
|
|
689
|
+
specRework++;
|
|
690
|
+
const round = specRework;
|
|
691
|
+
heartbeat?.transition({
|
|
692
|
+
stage: 'spec_rework', stageIndex: 3,
|
|
693
|
+
reviewRound: round, maxReviewRounds,
|
|
694
|
+
});
|
|
695
|
+
const feedback = specResult.findings.length > 0
|
|
696
|
+
? `\n\n## Spec Review Feedback (round ${round}):\n${specResult.findings.map(f => `- ${f}`).join('\n')}`
|
|
697
|
+
: '';
|
|
698
|
+
const reworkPrompt = `${task.prompt}${feedback}`;
|
|
699
|
+
const reworkTask = withDoneCondition({ ...task, prompt: reworkPrompt });
|
|
700
|
+
const reworkResult = await delegateWithEscalation(reworkTask, [resolved.provider], { explicitlyPinned: true, onProgress: wrappedOnProgress });
|
|
701
|
+
finalImplResult = reworkResult;
|
|
702
|
+
const reworkReport = parseStructuredReport(reworkResult.output);
|
|
703
|
+
finalImplReport = reworkReport.summary ? reworkReport : buildFallbackImplReport(reworkResult);
|
|
704
|
+
const reworkContents = await readImplementerFileContents(reworkResult.filesWritten, task.cwd);
|
|
705
|
+
fileContents = reworkContents;
|
|
706
|
+
heartbeat?.transition({
|
|
707
|
+
stage: 'spec_review', stageIndex: 2,
|
|
708
|
+
reviewRound: round + 1, maxReviewRounds,
|
|
709
|
+
});
|
|
710
|
+
specResult = await runSpecReview(otherProvider, packet, finalImplReport, reworkContents, reworkResult.toolCalls, task.planContext, evidence.block);
|
|
711
|
+
specStatus = specResult.status;
|
|
712
|
+
specReport = specResult.report;
|
|
713
|
+
if (specStatus === 'approved')
|
|
714
|
+
break;
|
|
715
|
+
const currentFindings = [...specResult.findings].sort().join('\0');
|
|
716
|
+
const prevFindings = prevSpecFindings.sort().join('\0');
|
|
717
|
+
if (currentFindings === prevFindings && currentFindings !== '')
|
|
718
|
+
break;
|
|
719
|
+
prevSpecFindings = specResult.findings;
|
|
720
|
+
}
|
|
721
|
+
}
|
|
722
|
+
let qualityResult = { status: 'skipped', report: undefined, findings: [] };
|
|
723
|
+
if (reviewPolicy === 'full') {
|
|
724
|
+
heartbeat?.transition({
|
|
725
|
+
stage: 'quality_review', stageIndex: 4,
|
|
726
|
+
reviewRound: 1, maxReviewRounds,
|
|
727
|
+
});
|
|
728
|
+
qualityResult = await runQualityReview(otherProvider, packet, specReport ?? finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block);
|
|
729
|
+
if (qualityResult.status === 'changes_required') {
|
|
730
|
+
let prevQualityFindings = [];
|
|
731
|
+
while (true) {
|
|
732
|
+
if (specRework + qualityRework >= maxReviewRounds) {
|
|
733
|
+
return abortReviewLoop(finalImplResult, 'round_cap', 'review round cap reached before quality rework', 'quality');
|
|
734
|
+
}
|
|
735
|
+
const currentCostUSD = taskCostUSD();
|
|
736
|
+
if (currentCostUSD !== null && maxCostUSD !== undefined && currentCostUSD >= 0.8 * maxCostUSD) {
|
|
737
|
+
emitVerbose('cost_check', { stage: 'quality_rework', tripped: true, cost_used_usd: currentCostUSD, cost_cap_usd: maxCostUSD, cost_available: true });
|
|
738
|
+
return abortReviewLoop(finalImplResult, 'cost_ceiling', 'cost ceiling reached before quality rework', 'quality');
|
|
739
|
+
}
|
|
740
|
+
emitVerbose('stage_change', { from: 'quality_review', to: 'quality_rework', round: qualityRework + 1, cap: maxReviewRounds });
|
|
741
|
+
qualityRework++;
|
|
742
|
+
const round = qualityRework;
|
|
743
|
+
heartbeat?.transition({
|
|
744
|
+
stage: 'quality_rework', stageIndex: 5,
|
|
745
|
+
reviewRound: round, maxReviewRounds,
|
|
746
|
+
});
|
|
747
|
+
const feedback = qualityResult.findings.length > 0
|
|
748
|
+
? `\n\n## Quality Review Feedback (round ${round}):\n${qualityResult.findings.map(f => `- ${f}`).join('\n')}`
|
|
749
|
+
: '';
|
|
750
|
+
const reworkPrompt = `${task.prompt}${feedback}`;
|
|
751
|
+
const reworkTask = withDoneCondition({ ...task, prompt: reworkPrompt });
|
|
752
|
+
const reworkResult = await delegateWithEscalation(reworkTask, [resolved.provider], { explicitlyPinned: true, onProgress: wrappedOnProgress });
|
|
753
|
+
finalImplResult = reworkResult;
|
|
754
|
+
const reworkReport = parseStructuredReport(reworkResult.output);
|
|
755
|
+
finalImplReport = reworkReport.summary ? reworkReport : buildFallbackImplReport(reworkResult);
|
|
756
|
+
const reworkContents = await readImplementerFileContents(reworkResult.filesWritten, task.cwd);
|
|
757
|
+
heartbeat?.transition({
|
|
758
|
+
stage: 'quality_review', stageIndex: 4,
|
|
759
|
+
reviewRound: round + 1, maxReviewRounds,
|
|
760
|
+
});
|
|
761
|
+
qualityResult = await runQualityReview(otherProvider, packet, finalImplReport, reworkContents, reworkResult.toolCalls, reworkResult.filesWritten, evidence.block);
|
|
762
|
+
if (qualityResult.status === 'approved')
|
|
763
|
+
break;
|
|
764
|
+
const currentFindings = [...qualityResult.findings].sort().join('\0');
|
|
765
|
+
const prevFindings = prevQualityFindings.sort().join('\0');
|
|
766
|
+
if (currentFindings === prevFindings && currentFindings !== '')
|
|
767
|
+
break;
|
|
768
|
+
prevQualityFindings = qualityResult.findings;
|
|
769
|
+
}
|
|
770
|
+
}
|
|
771
|
+
}
|
|
772
|
+
const finalReport = specReport ?? finalImplReport;
|
|
773
|
+
const concerns = [...(finalImplResult.concerns ?? [])];
|
|
774
|
+
let finalWorkerStatus = workerStatus;
|
|
775
|
+
if (verification.status === 'failed') {
|
|
776
|
+
concerns.push({
|
|
777
|
+
source: 'verification',
|
|
778
|
+
severity: 'high',
|
|
779
|
+
message: 'Verification failed after implementation.',
|
|
780
|
+
});
|
|
781
|
+
if (finalWorkerStatus === 'done')
|
|
782
|
+
finalWorkerStatus = 'done_with_concerns';
|
|
783
|
+
}
|
|
784
|
+
if (evidence.diffTruncated) {
|
|
785
|
+
concerns.push({
|
|
786
|
+
source: 'diff_truncated',
|
|
787
|
+
severity: 'medium',
|
|
788
|
+
message: 'Implementation diff exceeded the reviewer evidence byte cap and was truncated.',
|
|
789
|
+
});
|
|
790
|
+
}
|
|
791
|
+
const aggregated = aggregateResult(finalReport, specReport, qualityResult.report, specStatus, qualityResult.status);
|
|
792
|
+
// File artifact verification: check whether output targets exist on disk after all work.
|
|
793
|
+
// Only applies when status is ok; non-ok statuses skip verification entirely.
|
|
794
|
+
const fileArtifactsMissing = finalImplResult.status === 'ok' && outputTargets.length > 0
|
|
795
|
+
? checkOutputTargets(outputTargets)
|
|
796
|
+
: undefined;
|
|
797
|
+
// Status downgrade: review verdicts are authoritative. File artifact verification
|
|
798
|
+
// is also authoritative — missing output targets downgrade ok → incomplete.
|
|
799
|
+
const finalStatus = finalImplResult.status === 'ok' &&
|
|
800
|
+
(specStatus === 'changes_required' || qualityResult.status === 'changes_required')
|
|
801
|
+
? 'incomplete'
|
|
802
|
+
: finalImplResult.status === 'ok' && fileArtifactsMissing
|
|
803
|
+
? 'incomplete'
|
|
804
|
+
: finalImplResult.status;
|
|
805
|
+
return {
|
|
806
|
+
...finalImplResult,
|
|
807
|
+
status: finalStatus,
|
|
808
|
+
workerStatus: finalWorkerStatus,
|
|
809
|
+
concerns,
|
|
810
|
+
specReviewStatus: specStatus,
|
|
811
|
+
qualityReviewStatus: qualityResult.status,
|
|
812
|
+
specReviewReason: specResult.errorReason,
|
|
813
|
+
qualityReviewReason: qualityResult.errorReason,
|
|
814
|
+
structuredReport: aggregated,
|
|
815
|
+
implementationReport: finalImplReport,
|
|
816
|
+
specReviewReport: specReport,
|
|
817
|
+
qualityReviewReport: qualityResult.report,
|
|
818
|
+
filePathsSkipped,
|
|
819
|
+
agents: {
|
|
820
|
+
implementer: resolved.slot,
|
|
821
|
+
specReviewer: otherSlot,
|
|
822
|
+
qualityReviewer: reviewPolicy === 'full' ? otherSlot : 'skipped',
|
|
823
|
+
},
|
|
824
|
+
models: {
|
|
825
|
+
implementer: implModel,
|
|
826
|
+
specReviewer: reviewModel,
|
|
827
|
+
qualityReviewer: reviewPolicy === 'full' ? reviewModel : null,
|
|
828
|
+
},
|
|
829
|
+
fileArtifactsMissing,
|
|
830
|
+
commits,
|
|
831
|
+
commitError,
|
|
832
|
+
verification,
|
|
833
|
+
};
|
|
834
|
+
}
|
|
835
|
+
finally {
|
|
836
|
+
heartbeat?.stop();
|
|
837
|
+
}
|
|
838
|
+
}
|
|
839
|
+
//# sourceMappingURL=reviewed-lifecycle.js.map
|