@zhixuan92/multi-model-agent-core 3.2.0 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. package/dist/auto-commit.d.ts +8 -1
  2. package/dist/auto-commit.d.ts.map +1 -1
  3. package/dist/auto-commit.js +6 -3
  4. package/dist/auto-commit.js.map +1 -1
  5. package/dist/batch-cache.d.ts +1 -1
  6. package/dist/batch-cache.d.ts.map +1 -1
  7. package/dist/batch-cache.js +3 -5
  8. package/dist/batch-cache.js.map +1 -1
  9. package/dist/diagnostics/request-spill.d.ts +16 -0
  10. package/dist/diagnostics/request-spill.d.ts.map +1 -0
  11. package/dist/diagnostics/request-spill.js +23 -0
  12. package/dist/diagnostics/request-spill.js.map +1 -0
  13. package/dist/diagnostics/verbose-line.d.ts +12 -0
  14. package/dist/diagnostics/verbose-line.d.ts.map +1 -0
  15. package/dist/diagnostics/verbose-line.js +80 -0
  16. package/dist/diagnostics/verbose-line.js.map +1 -0
  17. package/dist/executors/debug.js +1 -1
  18. package/dist/executors/debug.js.map +1 -1
  19. package/dist/executors/delegate.d.ts.map +1 -1
  20. package/dist/executors/delegate.js +6 -2
  21. package/dist/executors/delegate.js.map +1 -1
  22. package/dist/executors/execute-plan.d.ts.map +1 -1
  23. package/dist/executors/execute-plan.js +9 -2
  24. package/dist/executors/execute-plan.js.map +1 -1
  25. package/dist/executors/retry.d.ts.map +1 -1
  26. package/dist/executors/retry.js +4 -1
  27. package/dist/executors/retry.js.map +1 -1
  28. package/dist/heartbeat.d.ts +7 -0
  29. package/dist/heartbeat.d.ts.map +1 -1
  30. package/dist/heartbeat.js +28 -1
  31. package/dist/heartbeat.js.map +1 -1
  32. package/dist/intake/compilers/delegate.d.ts +3 -1
  33. package/dist/intake/compilers/delegate.d.ts.map +1 -1
  34. package/dist/intake/compilers/delegate.js +23 -12
  35. package/dist/intake/compilers/delegate.js.map +1 -1
  36. package/dist/intake/compilers/execute-plan.d.ts +6 -1
  37. package/dist/intake/compilers/execute-plan.d.ts.map +1 -1
  38. package/dist/intake/compilers/execute-plan.js +8 -1
  39. package/dist/intake/compilers/execute-plan.js.map +1 -1
  40. package/dist/intake/resolve.js +1 -1
  41. package/dist/intake/resolve.js.map +1 -1
  42. package/dist/intake/types.d.ts +1 -0
  43. package/dist/intake/types.d.ts.map +1 -1
  44. package/dist/reporting/structured-report.d.ts +19 -0
  45. package/dist/reporting/structured-report.d.ts.map +1 -1
  46. package/dist/reporting/structured-report.js +50 -1
  47. package/dist/reporting/structured-report.js.map +1 -1
  48. package/dist/review/diff-review.d.ts +29 -0
  49. package/dist/review/diff-review.d.ts.map +1 -0
  50. package/dist/review/diff-review.js +53 -0
  51. package/dist/review/diff-review.js.map +1 -0
  52. package/dist/review/evidence.d.ts +15 -0
  53. package/dist/review/evidence.d.ts.map +1 -0
  54. package/dist/review/evidence.js +26 -0
  55. package/dist/review/evidence.js.map +1 -0
  56. package/dist/review/quality-reviewer.d.ts +1 -1
  57. package/dist/review/quality-reviewer.d.ts.map +1 -1
  58. package/dist/review/quality-reviewer.js +5 -3
  59. package/dist/review/quality-reviewer.js.map +1 -1
  60. package/dist/review/spec-reviewer.d.ts +1 -1
  61. package/dist/review/spec-reviewer.d.ts.map +1 -1
  62. package/dist/review/spec-reviewer.js +3 -2
  63. package/dist/review/spec-reviewer.js.map +1 -1
  64. package/dist/run-tasks/commit-stage.d.ts +16 -0
  65. package/dist/run-tasks/commit-stage.d.ts.map +1 -0
  66. package/dist/run-tasks/commit-stage.js +43 -0
  67. package/dist/run-tasks/commit-stage.js.map +1 -0
  68. package/dist/run-tasks/metadata-repair.d.ts +15 -0
  69. package/dist/run-tasks/metadata-repair.d.ts.map +1 -0
  70. package/dist/run-tasks/metadata-repair.js +30 -0
  71. package/dist/run-tasks/metadata-repair.js.map +1 -0
  72. package/dist/run-tasks/reviewed-lifecycle.d.ts.map +1 -1
  73. package/dist/run-tasks/reviewed-lifecycle.js +443 -68
  74. package/dist/run-tasks/reviewed-lifecycle.js.map +1 -1
  75. package/dist/run-tasks/verify-stage.d.ts +25 -0
  76. package/dist/run-tasks/verify-stage.d.ts.map +1 -0
  77. package/dist/run-tasks/verify-stage.js +168 -0
  78. package/dist/run-tasks/verify-stage.js.map +1 -0
  79. package/dist/runners/base/result-builders.d.ts +26 -1
  80. package/dist/runners/base/result-builders.d.ts.map +1 -1
  81. package/dist/runners/base/result-builders.js +5 -0
  82. package/dist/runners/base/result-builders.js.map +1 -1
  83. package/dist/runners/prevention.d.ts.map +1 -1
  84. package/dist/runners/prevention.js +18 -0
  85. package/dist/runners/prevention.js.map +1 -1
  86. package/dist/runners/types.d.ts +4 -1
  87. package/dist/runners/types.d.ts.map +1 -1
  88. package/dist/tool-schemas/audit.d.ts +2 -2
  89. package/dist/tool-schemas/delegate.d.ts +9 -0
  90. package/dist/tool-schemas/delegate.d.ts.map +1 -1
  91. package/dist/tool-schemas/delegate.js +4 -0
  92. package/dist/tool-schemas/delegate.js.map +1 -1
  93. package/dist/tool-schemas/execute-plan.d.ts +13 -2
  94. package/dist/tool-schemas/execute-plan.d.ts.map +1 -1
  95. package/dist/tool-schemas/execute-plan.js +22 -4
  96. package/dist/tool-schemas/execute-plan.js.map +1 -1
  97. package/dist/tool-schemas/review.d.ts +1 -1
  98. package/dist/types.d.ts +33 -4
  99. package/dist/types.d.ts.map +1 -1
  100. package/dist/types.js.map +1 -1
  101. package/package.json +29 -1
@@ -1,16 +1,24 @@
1
+ import { execFile } from 'node:child_process';
2
+ import { promisify } from 'node:util';
1
3
  import { computeCostUSD, computeSavedCostUSD } from '../types.js';
2
4
  import { createProvider } from '../provider.js';
3
5
  import { delegateWithEscalation } from '../delegate-with-escalation.js';
4
6
  import { HeartbeatTimer } from '../heartbeat.js';
5
7
  import { runSpecReview } from '../review/spec-reviewer.js';
6
8
  import { runQualityReview } from '../review/quality-reviewer.js';
9
+ import { runDiffReview } from '../review/diff-review.js';
7
10
  import { aggregateResult } from '../review/aggregate-result.js';
11
+ import { buildEvidence } from '../review/evidence.js';
8
12
  import { parseStructuredReport } from '../reporting/structured-report.js';
9
- import { autoCommitFiles } from '../auto-commit.js';
13
+ import { runCommitStage, readbackCommit } from './commit-stage.js';
14
+ import { runVerifyStage } from './verify-stage.js';
15
+ import { runMetadataRepairTurn } from './metadata-repair.js';
10
16
  import { partitionFilePaths, checkOutputTargets } from '../file-artifact-check.js';
11
17
  import { extractWorkerStatus } from './worker-status.js';
12
18
  import { buildFallbackImplReport, readImplementerFileContents } from './fallback-report.js';
19
+ import { composeVerboseLine } from '../diagnostics/verbose-line.js';
13
20
  import { withDoneCondition } from './execute-task.js';
21
+ const exec = promisify(execFile);
14
22
  export async function executeReviewedLifecycle(task, resolved, config, taskIndex, onProgress, heartbeatWiring, diagnostics) {
15
23
  const reviewPolicy = task.reviewPolicy ?? 'full';
16
24
  const otherSlot = resolved.slot === 'standard' ? 'complex' : 'standard';
@@ -57,16 +65,34 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
57
65
  // per-5s progress inside a long-running stage.
58
66
  if (event.stage !== lastStageSeen) {
59
67
  if (lastStageSeen !== undefined) {
60
- verboseStreamRaw(`[mmagent verbose] batch=${shortBatchEarly} task=${taskIndex} stage ${lastStageSeen} → ${event.stage}`);
68
+ verboseStreamRaw(composeVerboseLine({
69
+ event: 'stage_change',
70
+ ts: new Date().toISOString(),
71
+ batch: shortBatchEarly,
72
+ task: taskIndex,
73
+ from: lastStageSeen,
74
+ to: event.stage,
75
+ }));
61
76
  }
62
77
  lastStageSeen = event.stage;
63
78
  }
64
- const costStr = event.costUSD !== null ? ` cost=$${event.costUSD.toFixed(4)}` : '';
65
- const roundStr = event.reviewRound !== undefined && event.maxReviewRounds !== undefined
66
- ? ` round=${event.reviewRound}/${event.maxReviewRounds}`
67
- : '';
68
79
  const sinceLastMs = Date.now() - prevEventAtMs;
69
- verboseStreamRaw(`[mmagent verbose] batch=${shortBatchEarly} task=${taskIndex} heartbeat ${event.elapsed} stage=${event.stage}${roundStr} tools=${event.progress.toolCalls} read=${event.progress.filesRead} wrote=${event.progress.filesWritten} text=${textEmissionChars}c${costStr} idle=${sinceLastMs}ms`);
80
+ verboseStreamRaw(composeVerboseLine({
81
+ event: 'heartbeat',
82
+ ts: new Date().toISOString(),
83
+ batch: shortBatchEarly,
84
+ task: taskIndex,
85
+ elapsed: event.elapsed,
86
+ stage: event.stage,
87
+ round: event.reviewRound,
88
+ cap: event.maxReviewRounds,
89
+ tools: event.progress.toolCalls,
90
+ read: event.progress.filesRead,
91
+ wrote: event.progress.filesWritten,
92
+ text: textEmissionChars,
93
+ cost: event.costUSD,
94
+ idle_ms: sinceLastMs,
95
+ }));
70
96
  }
71
97
  synthOnProgress(taskIndex, event);
72
98
  }, {
@@ -78,8 +104,16 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
78
104
  : undefined;
79
105
  heartbeat?.start(stageCount);
80
106
  if (verboseStreamRaw) {
81
- verboseStreamRaw(`[mmagent verbose] batch=${shortBatchEarly} task=${taskIndex} heartbeat ` +
82
- (heartbeat ? `started (stageCount=${stageCount}, 5s tick)` : 'DISABLED (no consumer)'));
107
+ verboseStreamRaw(composeVerboseLine({
108
+ event: 'heartbeat_timer',
109
+ ts: new Date().toISOString(),
110
+ batch: shortBatchEarly,
111
+ task: taskIndex,
112
+ state: heartbeat ? 'started' : 'disabled',
113
+ stage_count: stageCount,
114
+ tick_ms: heartbeat ? 5000 : undefined,
115
+ reason: heartbeat ? undefined : 'no_consumer',
116
+ }));
83
117
  }
84
118
  const implModel = resolved.provider.config.model;
85
119
  const progressCounters = { filesRead: 0, filesWritten: 0, toolCalls: 0 };
@@ -88,7 +122,13 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
88
122
  const verboseStream = verboseStreamRaw;
89
123
  const shortBatch = shortBatchEarly;
90
124
  if (verboseStream) {
91
- verboseStream(`[mmagent verbose] batch=${shortBatch} task=${taskIndex} start worker=${resolved.provider.config.model}`);
125
+ verboseStream(composeVerboseLine({
126
+ event: 'worker_start',
127
+ ts: new Date().toISOString(),
128
+ batch: shortBatch,
129
+ task: taskIndex,
130
+ worker: resolved.provider.config.model,
131
+ }));
92
132
  }
93
133
  let prevEventAtMs = verbose ? Date.now() : 0;
94
134
  // Wrap whenever we have ANY consumer for InternalRunnerEvent (heartbeat,
@@ -99,22 +139,40 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
99
139
  const wrappedOnProgress = needHeartbeat
100
140
  ? (event) => {
101
141
  if (event.kind === 'turn_start') {
142
+ heartbeat?.markEvent('llm');
102
143
  if (verbose)
103
144
  prevEventAtMs = Date.now();
104
145
  if (verboseStream) {
105
- verboseStream(`[mmagent verbose] batch=${shortBatch} task=${taskIndex} turn_start turn=${event.turn} provider=${event.provider}`);
146
+ verboseStream(composeVerboseLine({
147
+ event: 'turn_start',
148
+ ts: new Date().toISOString(),
149
+ batch: shortBatch,
150
+ task: taskIndex,
151
+ turn: event.turn,
152
+ provider: event.provider,
153
+ }));
106
154
  }
107
155
  }
108
156
  if (event.kind === 'text_emission') {
157
+ heartbeat?.markEvent('text');
109
158
  textEmissionChars += event.chars;
110
159
  if (verboseStream && event.chars > 0) {
111
160
  const preview = event.preview.length > 60
112
161
  ? event.preview.slice(0, 57) + '...'
113
162
  : event.preview;
114
- verboseStream(`[mmagent verbose] batch=${shortBatch} task=${taskIndex} text +${event.chars}c (total ${textEmissionChars}) preview="${preview.replace(/\n/g, '\\n')}"`);
163
+ verboseStream(composeVerboseLine({
164
+ event: 'text_emission',
165
+ ts: new Date().toISOString(),
166
+ batch: shortBatch,
167
+ task: taskIndex,
168
+ chars: event.chars,
169
+ total: textEmissionChars,
170
+ preview,
171
+ }));
115
172
  }
116
173
  }
117
174
  if (event.kind === 'tool_call') {
175
+ heartbeat?.markEvent('tool');
118
176
  progressCounters.toolCalls++;
119
177
  const name = event.toolSummary.split('(')[0];
120
178
  if (name === 'readFile' || name === 'grep' || name === 'glob' || name === 'listFiles') {
@@ -137,10 +195,18 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
137
195
  });
138
196
  }
139
197
  if (verboseStream) {
140
- verboseStream(`[mmagent verbose] batch=${shortBatch} task=${taskIndex} tool=${event.toolSummary} +${sincePrevMs}ms`);
198
+ verboseStream(composeVerboseLine({
199
+ event: 'tool_call',
200
+ ts: new Date().toISOString(),
201
+ batch: shortBatch,
202
+ task: taskIndex,
203
+ tool: event.toolSummary,
204
+ duration_ms: sincePrevMs,
205
+ }));
141
206
  }
142
207
  }
143
208
  if (event.kind === 'turn_complete') {
209
+ heartbeat?.markEvent('llm');
144
210
  const costUSD = computeCostUSD(event.cumulativeInputTokens, event.cumulativeOutputTokens, resolved.provider.config);
145
211
  const savedCostUSD = computeSavedCostUSD(costUSD, event.cumulativeInputTokens, event.cumulativeOutputTokens, task.parentModel);
146
212
  heartbeat?.updateCost(costUSD, savedCostUSD);
@@ -160,27 +226,277 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
160
226
  });
161
227
  }
162
228
  if (verboseStream) {
163
- const costStr = costUSD !== null ? ` $${costUSD.toFixed(4)}` : '';
164
- verboseStream(`[mmagent verbose] batch=${shortBatch} task=${taskIndex} ` +
165
- `turn in=${event.cumulativeInputTokens} out=${event.cumulativeOutputTokens}${costStr} ` +
166
- `+${turnDurMs}ms (${resolved.provider.config.model})`);
229
+ verboseStream(composeVerboseLine({
230
+ event: 'turn_complete',
231
+ ts: new Date().toISOString(),
232
+ batch: shortBatch,
233
+ task: taskIndex,
234
+ input_tokens: event.cumulativeInputTokens,
235
+ output_tokens: event.cumulativeOutputTokens,
236
+ cost: costUSD,
237
+ duration_ms: turnDurMs,
238
+ provider: resolved.provider.config.model,
239
+ }));
167
240
  }
168
241
  }
169
242
  }
170
243
  : undefined;
171
- // Track auto-commit state across all rounds
172
- let commitSha;
244
+ const cwd = task.cwd ?? process.cwd();
245
+ const taskStartMs = Date.now();
246
+ const commits = [];
173
247
  let commitError;
248
+ let specRework = 0;
249
+ let qualityRework = 0;
250
+ let metadataRepair = 0;
251
+ const maxReviewRounds = task.maxReviewRounds ?? 3;
252
+ const maxCostUSD = task.maxCostUSD;
253
+ const reviewRounds = () => ({ spec: specRework, quality: qualityRework, metadata: metadataRepair, cap: maxReviewRounds });
254
+ const taskCostUSD = () => (heartbeat ? heartbeat.getHeartbeatTickInfo().costUSD : null);
255
+ // When the review loop aborts mid-flight, preserve any review-status info already set
256
+ // on the base result (set by callers via abortReviewLoop({ ...res, specReviewStatus, ... })).
257
+ // Defaults to 'changes_required' for whichever loop tripped — that's the only state the
258
+ // loop ever fires from, by construction.
259
+ const abortReviewLoop = (base, terminationReason, message, aborting) => ({
260
+ ...base,
261
+ status: 'incomplete',
262
+ workerStatus: 'review_loop_aborted',
263
+ terminationReason,
264
+ reviewRounds: reviewRounds(),
265
+ error: message,
266
+ specReviewStatus: aborting === 'spec' ? 'changes_required' : (base.specReviewStatus ?? 'approved'),
267
+ qualityReviewStatus: aborting === 'quality' ? 'changes_required' : (base.qualityReviewStatus ?? 'skipped'),
268
+ });
269
+ const defaultVerification = { status: 'skipped', steps: [], totalDurationMs: 0, skipReason: 'no_command' };
270
+ let latestVerification = defaultVerification;
271
+ const emitVerbose = (event, fields) => {
272
+ if (!verboseStream)
273
+ return;
274
+ verboseStream(composeVerboseLine({
275
+ event,
276
+ ts: new Date().toISOString(),
277
+ batch: shortBatch,
278
+ task: taskIndex,
279
+ ...fields,
280
+ }));
281
+ };
282
+ async function runVerificationStage() {
283
+ emitVerbose('stage_change', { from: 'committing', to: 'verifying' });
284
+ heartbeat?.transition({
285
+ stage: 'verifying',
286
+ stageIndex: 4,
287
+ reviewRound: undefined,
288
+ maxReviewRounds: task.maxReviewRounds ?? 5,
289
+ });
290
+ const verification = await runVerifyStage({
291
+ cwd,
292
+ verifyCommand: task.verifyCommand,
293
+ taskTimeoutMs: task.timeoutMs ?? config.defaults.timeoutMs ?? 1_800_000,
294
+ taskStartMs,
295
+ });
296
+ latestVerification = verification;
297
+ for (const step of verification.steps) {
298
+ emitVerbose('verify_step', {
299
+ command: step.command,
300
+ status: step.status,
301
+ exit_code: step.exitCode,
302
+ signal: step.signal,
303
+ duration_ms: step.durationMs,
304
+ error_message: step.errorMessage ?? undefined,
305
+ });
306
+ }
307
+ if (verification.status === 'skipped') {
308
+ emitVerbose('verify_skipped', { reason: verification.skipReason ?? 'no_command', stage: 'verifying' });
309
+ }
310
+ return verification;
311
+ }
312
+ function withVerification(result, verification = latestVerification) {
313
+ return { ...result, verification };
314
+ }
315
+ function verificationErrorResult(base, verification) {
316
+ if (verification.status !== 'error')
317
+ return null;
318
+ const failedIndex = verification.steps.findIndex((step) => step.status !== 'passed');
319
+ const failedStep = failedIndex >= 0 ? verification.steps[failedIndex] : undefined;
320
+ return withVerification({
321
+ ...base,
322
+ status: 'error',
323
+ workerStatus: 'done_with_concerns',
324
+ error: failedStep?.errorMessage ?? 'verify command error',
325
+ errorCode: 'verify_command_error',
326
+ commits,
327
+ commitError,
328
+ verification,
329
+ }, verification);
330
+ }
331
+ function resolveOffTerminal(base, verification) {
332
+ const concerns = [...(base.concerns ?? [])];
333
+ let workerStatus = workerStatusForTerminal(base.workerStatus);
334
+ if (verification.status === 'failed') {
335
+ concerns.push({
336
+ source: 'verification',
337
+ severity: 'high',
338
+ message: 'Verification failed after implementation.',
339
+ });
340
+ workerStatus = 'done_with_concerns';
341
+ }
342
+ if (verification.status === 'error') {
343
+ const failedIndex = verification.steps.findIndex((step) => step.status !== 'passed');
344
+ const failedStep = failedIndex >= 0 ? verification.steps[failedIndex] : undefined;
345
+ return withVerification({
346
+ ...base,
347
+ status: 'error',
348
+ workerStatus: 'failed',
349
+ error: failedStep?.errorMessage ?? 'verify command error',
350
+ errorCode: 'verify_command_error',
351
+ commits,
352
+ commitError,
353
+ verification,
354
+ }, verification);
355
+ }
356
+ return withVerification({
357
+ ...base,
358
+ status: base.status === 'ok' ? 'ok' : base.status,
359
+ workerStatus,
360
+ concerns,
361
+ commits,
362
+ commitError,
363
+ verification,
364
+ }, verification);
365
+ }
366
+ function resolveDiffOnlyTerminal(base, verdict, verification, diffTruncated) {
367
+ const concerns = [...(base.concerns ?? [])];
368
+ if (verdict.kind === 'reject') {
369
+ return withVerification({
370
+ ...base,
371
+ status: 'error',
372
+ workerStatus: 'failed',
373
+ error: verdict.message || 'diff review rejected implementation',
374
+ errorCode: 'diff_review_rejected',
375
+ structuredError: {
376
+ code: 'diff_review_rejected',
377
+ message: verdict.message || 'diff review rejected implementation',
378
+ },
379
+ concerns,
380
+ commits,
381
+ commitError,
382
+ verification,
383
+ }, verification);
384
+ }
385
+ concerns.push(...verdict.concerns);
386
+ if (verification.status === 'failed') {
387
+ concerns.push({
388
+ source: 'verification',
389
+ severity: 'high',
390
+ message: 'Verification failed after implementation.',
391
+ });
392
+ }
393
+ if (diffTruncated) {
394
+ concerns.push({
395
+ source: 'diff_truncated',
396
+ severity: 'medium',
397
+ message: 'Implementation diff exceeded the reviewer evidence byte cap and was truncated.',
398
+ });
399
+ }
400
+ const hasConcerns = concerns.length > 0 || verification.status === 'failed';
401
+ return withVerification({
402
+ ...base,
403
+ status: base.status === 'ok' ? 'ok' : base.status,
404
+ workerStatus: hasConcerns ? 'done_with_concerns' : workerStatusForTerminal(base.workerStatus),
405
+ concerns,
406
+ commits,
407
+ commitError,
408
+ verification,
409
+ }, verification);
410
+ }
411
+ function workerStatusForTerminal(status) {
412
+ return status === 'needs_context' || status === 'blocked' || status === 'failed' || status === 'done_with_concerns'
413
+ ? status
414
+ : 'done';
415
+ }
416
+ async function recordWorkerCommits(from, to = 'HEAD') {
417
+ const { stdout: revs } = await exec('git', ['rev-list', '--reverse', `${from}..${to}`], { cwd });
418
+ for (const sha of revs.trim().split('\n').filter(Boolean)) {
419
+ const c = await readbackCommit(sha, cwd);
420
+ commits.push(c);
421
+ }
422
+ }
423
+ async function repairCommitMetadata(initialDiagnostic) {
424
+ let metadataAttempts = 0;
425
+ let lastZodError = initialDiagnostic || 'no commit block emitted';
426
+ let validCommit = null;
427
+ while (metadataAttempts < 2 && !validCommit) {
428
+ const preStatus = (await exec('git', ['status', '--porcelain=v1', '-z'], { cwd })).stdout;
429
+ const repaired = await runMetadataRepairTurn({ task, zodError: lastZodError, cwd, providerSlot: resolved.slot, provider: resolved.provider });
430
+ const postStatus = (await exec('git', ['status', '--porcelain=v1', '-z'], { cwd })).stdout;
431
+ metadataAttempts += 1;
432
+ if (preStatus !== postStatus) {
433
+ commitError = 'commit_metadata_repair_modified_files';
434
+ return null;
435
+ }
436
+ if (repaired.commit)
437
+ validCommit = repaired.commit;
438
+ else
439
+ lastZodError = repaired.commitDiagnostic ?? 'no commit block emitted';
440
+ }
441
+ if (!validCommit)
442
+ commitError = `commit_metadata_invalid: ${lastZodError}`;
443
+ return validCommit;
444
+ }
445
+ async function captureCommitsAfterImplementation(implResult, implReport, baselineHead) {
446
+ const porcelain = (await exec('git', ['status', '--porcelain=v1'], { cwd })).stdout;
447
+ const headNow = (await exec('git', ['rev-parse', 'HEAD'], { cwd })).stdout.trim();
448
+ const headMoved = headNow !== baselineHead;
449
+ const treeDirty = porcelain.length > 0;
450
+ if (!headMoved && !treeDirty)
451
+ return;
452
+ if (headMoved)
453
+ await recordWorkerCommits(baselineHead, 'HEAD');
454
+ if (treeDirty) {
455
+ const validCommit = implReport?.commit ?? await repairCommitMetadata(implReport?.commitDiagnostic ?? 'no commit block emitted');
456
+ if (!validCommit)
457
+ return;
458
+ const c = await runCommitStage({ cwd, filesWritten: implResult.filesWritten, commit: validCommit });
459
+ commits.push(c);
460
+ }
461
+ }
174
462
  try {
463
+ // The dirty-tree precondition + git baseline only apply to artifact-producing tasks
464
+ // (those with autoCommit === true). Non-artifact presets — audit, review, verify,
465
+ // debug — neither produce commits nor read git state, so they bypass the check
466
+ // entirely. Per spec Section A: "Non-artifact tasks (audits, analyses, read-only
467
+ // investigations) skip stages 3 and 4."
468
+ const isArtifactProducing = task.autoCommit === true;
469
+ let baselineHead = '';
470
+ if (isArtifactProducing) {
471
+ baselineHead = (await exec('git', ['rev-parse', 'HEAD'], { cwd })).stdout.trim();
472
+ const baselinePorcelain = (await exec('git', ['status', '--porcelain=v1', '-z'], { cwd })).stdout;
473
+ if (baselinePorcelain.length !== 0) {
474
+ return withVerification({
475
+ output: `Sub-agent error: task.cwd ${cwd} had pre-existing modifications`,
476
+ status: 'error',
477
+ usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0, costUSD: null },
478
+ turns: 0,
479
+ filesRead: [],
480
+ filesWritten: [],
481
+ toolCalls: [],
482
+ outputIsDiagnostic: true,
483
+ escalationLog: [],
484
+ error: `task.cwd ${cwd} had pre-existing modifications`,
485
+ errorCode: 'dirty_worktree',
486
+ commits,
487
+ });
488
+ }
489
+ }
175
490
  const implResult = await delegateWithEscalation(withDoneCondition(task), [resolved.provider], { explicitlyPinned: false, escalateToProvider: escalationProvider, onProgress: wrappedOnProgress });
176
491
  const implReport = implResult.status === 'ok' ? parseStructuredReport(implResult.output) : undefined;
177
492
  const workerStatus = extractWorkerStatus(implReport);
178
- // Auto-commit: commit the worker's file changes
179
- if (task.autoCommit && implResult.status === 'ok' && implResult.filesWritten.length > 0) {
180
- const commitResult = autoCommitFiles(implResult.filesWritten, implReport?.summary ?? undefined, task.cwd ?? process.cwd());
181
- commitSha = commitResult.sha;
182
- commitError = commitResult.error;
493
+ if (implResult.status === 'ok' && isArtifactProducing) {
494
+ await captureCommitsAfterImplementation(implResult, implReport, baselineHead);
183
495
  }
496
+ const verification = isArtifactProducing ? await runVerificationStage() : defaultVerification;
497
+ const verifyError = verificationErrorResult(implResult, verification);
498
+ if (verifyError)
499
+ return verifyError;
184
500
  const filePathsInteracted = task.filePaths && task.filePaths.length > 0
185
501
  ? [...(implResult.filesRead ?? []), ...implResult.filesWritten].some(f => task.filePaths.some(fp => f === fp || f.endsWith('/' + fp) || f.endsWith(fp)))
186
502
  : true;
@@ -220,8 +536,9 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
220
536
  qualityReviewer: null,
221
537
  },
222
538
  fileArtifactsMissing: earlyFileArtifactsMissing,
223
- commitSha,
539
+ commits,
224
540
  commitError,
541
+ verification,
225
542
  };
226
543
  }
227
544
  if (workerStatus === 'needs_context' || workerStatus === 'blocked') {
@@ -243,12 +560,14 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
243
560
  qualityReviewer: null,
244
561
  },
245
562
  fileArtifactsMissing: implResult.status === 'ok' ? checkOutputTargets(outputTargets) : undefined,
246
- commitSha,
563
+ commits,
247
564
  commitError,
565
+ verification,
248
566
  };
249
567
  }
250
568
  if (reviewPolicy === 'off') {
251
- return {
569
+ emitVerbose('stage_change', { from: 'verifying', to: 'terminal' });
570
+ const terminal = resolveOffTerminal({
252
571
  ...implResult,
253
572
  workerStatus,
254
573
  specReviewStatus: 'skipped',
@@ -267,9 +586,8 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
267
586
  },
268
587
  implementationReport: implReport,
269
588
  fileArtifactsMissing: implResult.status === 'ok' ? checkOutputTargets(outputTargets) : undefined,
270
- commitSha,
271
- commitError,
272
- };
589
+ }, verification);
590
+ return terminal;
273
591
  }
274
592
  let otherProvider;
275
593
  try {
@@ -294,8 +612,9 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
294
612
  qualityReviewer: null,
295
613
  },
296
614
  fileArtifactsMissing: implResult.status === 'ok' ? checkOutputTargets(outputTargets) : undefined,
297
- commitSha,
615
+ commits,
298
616
  commitError,
617
+ verification,
299
618
  };
300
619
  }
301
620
  const reviewModel = otherProvider.config.model;
@@ -306,23 +625,72 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
306
625
  };
307
626
  let fileContents = await readImplementerFileContents(implResult.filesWritten, task.cwd);
308
627
  const effectiveImplReport = implReport ?? buildFallbackImplReport(implResult);
628
+ const evidence = isArtifactProducing
629
+ ? await buildEvidence({ cwd, baselineHead, commits, verification, reviewPolicy })
630
+ : { block: '', diffTruncated: false, fullDiff: '' };
631
+ if (reviewPolicy === 'diff_only') {
632
+ emitVerbose('stage_change', { from: 'verifying', to: 'diff_review' });
633
+ heartbeat?.transition({
634
+ stage: 'diff_review',
635
+ stageIndex: 2,
636
+ reviewRound: 1,
637
+ maxReviewRounds,
638
+ });
639
+ const verdict = await runDiffReview({
640
+ cwd,
641
+ diff: evidence.fullDiff,
642
+ diffTruncated: evidence.diffTruncated,
643
+ verification,
644
+ worker: { call: (prompt) => otherProvider.run(prompt) },
645
+ });
646
+ emitVerbose('review_decision', { stage: 'diff_review', verdict: verdict.kind, round: 1 });
647
+ return resolveDiffOnlyTerminal({
648
+ ...implResult,
649
+ workerStatus,
650
+ specReviewStatus: 'skipped',
651
+ qualityReviewStatus: 'skipped',
652
+ specReviewReason: 'skipped: reviewPolicy is diff_only',
653
+ qualityReviewReason: 'skipped: reviewPolicy is diff_only',
654
+ implementationReport: effectiveImplReport,
655
+ fileArtifactsMissing: implResult.status === 'ok' ? checkOutputTargets(outputTargets) : undefined,
656
+ agents: {
657
+ implementer: resolved.slot,
658
+ specReviewer: 'skipped',
659
+ qualityReviewer: 'skipped',
660
+ },
661
+ models: {
662
+ implementer: implModel,
663
+ specReviewer: reviewModel,
664
+ qualityReviewer: null,
665
+ },
666
+ }, verdict, verification, evidence.diffTruncated);
667
+ }
309
668
  heartbeat?.transition({
310
669
  stage: 'spec_review', stageIndex: 2,
311
670
  reviewRound: 1, maxReviewRounds: task.maxReviewRounds ?? 5,
312
671
  });
313
- let specResult = await runSpecReview(otherProvider, packet, effectiveImplReport, fileContents, implResult.toolCalls, task.planContext);
672
+ let specResult = await runSpecReview(otherProvider, packet, effectiveImplReport, fileContents, implResult.toolCalls, task.planContext, evidence.block);
314
673
  let finalImplResult = implResult;
315
674
  let finalImplReport = effectiveImplReport;
316
675
  let specStatus = specResult.status;
317
676
  let specReport = specResult.report;
318
677
  if (specStatus === 'changes_required') {
319
678
  let prevSpecFindings = [];
320
- let round = 0;
321
679
  while (true) {
322
- round++;
680
+ if (specRework + qualityRework >= maxReviewRounds) {
681
+ return abortReviewLoop(finalImplResult, 'round_cap', 'review round cap reached before spec rework', 'spec');
682
+ }
683
+ const currentCostUSD = taskCostUSD();
684
+ if (currentCostUSD !== null && maxCostUSD !== undefined && currentCostUSD >= 0.8 * maxCostUSD) {
685
+ emitVerbose('cost_check', { stage: 'spec_rework', tripped: true, cost_used_usd: currentCostUSD, cost_cap_usd: maxCostUSD, cost_available: true });
686
+ return abortReviewLoop(finalImplResult, 'cost_ceiling', 'cost ceiling reached before spec rework', 'spec');
687
+ }
688
+ emitVerbose('stage_change', { from: 'spec_review', to: 'spec_rework', round: specRework + 1, cap: maxReviewRounds });
689
+ specRework++;
690
+ const round = specRework;
323
691
  heartbeat?.transition({
324
692
  stage: 'spec_rework', stageIndex: 3,
325
- reviewRound: round, maxReviewRounds: task.maxReviewRounds ?? 5,
693
+ reviewRound: round, maxReviewRounds,
326
694
  });
327
695
  const feedback = specResult.findings.length > 0
328
696
  ? `\n\n## Spec Review Feedback (round ${round}):\n${specResult.findings.map(f => `- ${f}`).join('\n')}`
@@ -330,15 +698,6 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
330
698
  const reworkPrompt = `${task.prompt}${feedback}`;
331
699
  const reworkTask = withDoneCondition({ ...task, prompt: reworkPrompt });
332
700
  const reworkResult = await delegateWithEscalation(reworkTask, [resolved.provider], { explicitlyPinned: true, onProgress: wrappedOnProgress });
333
- // Auto-commit rework changes
334
- if (task.autoCommit && reworkResult.status === 'ok' && reworkResult.filesWritten.length > 0) {
335
- const reworkReport = parseStructuredReport(reworkResult.output);
336
- const reworkCommit = autoCommitFiles(reworkResult.filesWritten, reworkReport.summary ?? undefined, task.cwd ?? process.cwd());
337
- if (reworkCommit.sha)
338
- commitSha = reworkCommit.sha;
339
- if (reworkCommit.error)
340
- commitError = reworkCommit.error;
341
- }
342
701
  finalImplResult = reworkResult;
343
702
  const reworkReport = parseStructuredReport(reworkResult.output);
344
703
  finalImplReport = reworkReport.summary ? reworkReport : buildFallbackImplReport(reworkResult);
@@ -346,9 +705,9 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
346
705
  fileContents = reworkContents;
347
706
  heartbeat?.transition({
348
707
  stage: 'spec_review', stageIndex: 2,
349
- reviewRound: round + 1, maxReviewRounds: task.maxReviewRounds ?? 5,
708
+ reviewRound: round + 1, maxReviewRounds,
350
709
  });
351
- specResult = await runSpecReview(otherProvider, packet, finalImplReport, reworkContents, reworkResult.toolCalls, task.planContext);
710
+ specResult = await runSpecReview(otherProvider, packet, finalImplReport, reworkContents, reworkResult.toolCalls, task.planContext, evidence.block);
352
711
  specStatus = specResult.status;
353
712
  specReport = specResult.report;
354
713
  if (specStatus === 'approved')
@@ -358,25 +717,32 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
358
717
  if (currentFindings === prevFindings && currentFindings !== '')
359
718
  break;
360
719
  prevSpecFindings = specResult.findings;
361
- if (round >= (task.maxReviewRounds ?? 5))
362
- break;
363
720
  }
364
721
  }
365
722
  let qualityResult = { status: 'skipped', report: undefined, findings: [] };
366
723
  if (reviewPolicy === 'full') {
367
724
  heartbeat?.transition({
368
725
  stage: 'quality_review', stageIndex: 4,
369
- reviewRound: 1, maxReviewRounds: task.maxReviewRounds ?? 5,
726
+ reviewRound: 1, maxReviewRounds,
370
727
  });
371
- qualityResult = await runQualityReview(otherProvider, packet, specReport ?? finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten);
728
+ qualityResult = await runQualityReview(otherProvider, packet, specReport ?? finalImplReport, fileContents, finalImplResult.toolCalls, finalImplResult.filesWritten, evidence.block);
372
729
  if (qualityResult.status === 'changes_required') {
373
730
  let prevQualityFindings = [];
374
- let round = 0;
375
731
  while (true) {
376
- round++;
732
+ if (specRework + qualityRework >= maxReviewRounds) {
733
+ return abortReviewLoop(finalImplResult, 'round_cap', 'review round cap reached before quality rework', 'quality');
734
+ }
735
+ const currentCostUSD = taskCostUSD();
736
+ if (currentCostUSD !== null && maxCostUSD !== undefined && currentCostUSD >= 0.8 * maxCostUSD) {
737
+ emitVerbose('cost_check', { stage: 'quality_rework', tripped: true, cost_used_usd: currentCostUSD, cost_cap_usd: maxCostUSD, cost_available: true });
738
+ return abortReviewLoop(finalImplResult, 'cost_ceiling', 'cost ceiling reached before quality rework', 'quality');
739
+ }
740
+ emitVerbose('stage_change', { from: 'quality_review', to: 'quality_rework', round: qualityRework + 1, cap: maxReviewRounds });
741
+ qualityRework++;
742
+ const round = qualityRework;
377
743
  heartbeat?.transition({
378
744
  stage: 'quality_rework', stageIndex: 5,
379
- reviewRound: round, maxReviewRounds: task.maxReviewRounds ?? 5,
745
+ reviewRound: round, maxReviewRounds,
380
746
  });
381
747
  const feedback = qualityResult.findings.length > 0
382
748
  ? `\n\n## Quality Review Feedback (round ${round}):\n${qualityResult.findings.map(f => `- ${f}`).join('\n')}`
@@ -384,24 +750,15 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
384
750
  const reworkPrompt = `${task.prompt}${feedback}`;
385
751
  const reworkTask = withDoneCondition({ ...task, prompt: reworkPrompt });
386
752
  const reworkResult = await delegateWithEscalation(reworkTask, [resolved.provider], { explicitlyPinned: true, onProgress: wrappedOnProgress });
387
- // Auto-commit rework changes
388
- if (task.autoCommit && reworkResult.status === 'ok' && reworkResult.filesWritten.length > 0) {
389
- const reworkReport = parseStructuredReport(reworkResult.output);
390
- const reworkCommit = autoCommitFiles(reworkResult.filesWritten, reworkReport.summary ?? undefined, task.cwd ?? process.cwd());
391
- if (reworkCommit.sha)
392
- commitSha = reworkCommit.sha;
393
- if (reworkCommit.error)
394
- commitError = reworkCommit.error;
395
- }
396
753
  finalImplResult = reworkResult;
397
754
  const reworkReport = parseStructuredReport(reworkResult.output);
398
755
  finalImplReport = reworkReport.summary ? reworkReport : buildFallbackImplReport(reworkResult);
399
756
  const reworkContents = await readImplementerFileContents(reworkResult.filesWritten, task.cwd);
400
757
  heartbeat?.transition({
401
758
  stage: 'quality_review', stageIndex: 4,
402
- reviewRound: round + 1, maxReviewRounds: task.maxReviewRounds ?? 5,
759
+ reviewRound: round + 1, maxReviewRounds,
403
760
  });
404
- qualityResult = await runQualityReview(otherProvider, packet, finalImplReport, reworkContents, reworkResult.toolCalls, reworkResult.filesWritten);
761
+ qualityResult = await runQualityReview(otherProvider, packet, finalImplReport, reworkContents, reworkResult.toolCalls, reworkResult.filesWritten, evidence.block);
405
762
  if (qualityResult.status === 'approved')
406
763
  break;
407
764
  const currentFindings = [...qualityResult.findings].sort().join('\0');
@@ -409,12 +766,28 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
409
766
  if (currentFindings === prevFindings && currentFindings !== '')
410
767
  break;
411
768
  prevQualityFindings = qualityResult.findings;
412
- if (round >= (task.maxReviewRounds ?? 5))
413
- break;
414
769
  }
415
770
  }
416
771
  }
417
772
  const finalReport = specReport ?? finalImplReport;
773
+ const concerns = [...(finalImplResult.concerns ?? [])];
774
+ let finalWorkerStatus = workerStatus;
775
+ if (verification.status === 'failed') {
776
+ concerns.push({
777
+ source: 'verification',
778
+ severity: 'high',
779
+ message: 'Verification failed after implementation.',
780
+ });
781
+ if (finalWorkerStatus === 'done')
782
+ finalWorkerStatus = 'done_with_concerns';
783
+ }
784
+ if (evidence.diffTruncated) {
785
+ concerns.push({
786
+ source: 'diff_truncated',
787
+ severity: 'medium',
788
+ message: 'Implementation diff exceeded the reviewer evidence byte cap and was truncated.',
789
+ });
790
+ }
418
791
  const aggregated = aggregateResult(finalReport, specReport, qualityResult.report, specStatus, qualityResult.status);
419
792
  // File artifact verification: check whether output targets exist on disk after all work.
420
793
  // Only applies when status is ok; non-ok statuses skip verification entirely.
@@ -432,7 +805,8 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
432
805
  return {
433
806
  ...finalImplResult,
434
807
  status: finalStatus,
435
- workerStatus,
808
+ workerStatus: finalWorkerStatus,
809
+ concerns,
436
810
  specReviewStatus: specStatus,
437
811
  qualityReviewStatus: qualityResult.status,
438
812
  specReviewReason: specResult.errorReason,
@@ -453,8 +827,9 @@ export async function executeReviewedLifecycle(task, resolved, config, taskIndex
453
827
  qualityReviewer: reviewPolicy === 'full' ? reviewModel : null,
454
828
  },
455
829
  fileArtifactsMissing,
456
- commitSha,
830
+ commits,
457
831
  commitError,
832
+ verification,
458
833
  };
459
834
  }
460
835
  finally {