@weldr/runr 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/CHANGELOG.md +216 -0
  2. package/LICENSE +190 -0
  3. package/NOTICE +4 -0
  4. package/README.md +200 -0
  5. package/dist/cli.js +464 -0
  6. package/dist/commands/__tests__/report.test.js +202 -0
  7. package/dist/commands/compare.js +168 -0
  8. package/dist/commands/doctor.js +124 -0
  9. package/dist/commands/follow.js +251 -0
  10. package/dist/commands/gc.js +161 -0
  11. package/dist/commands/guards-only.js +89 -0
  12. package/dist/commands/metrics.js +441 -0
  13. package/dist/commands/orchestrate.js +800 -0
  14. package/dist/commands/paths.js +31 -0
  15. package/dist/commands/preflight.js +152 -0
  16. package/dist/commands/report.js +478 -0
  17. package/dist/commands/resume.js +149 -0
  18. package/dist/commands/run.js +538 -0
  19. package/dist/commands/status.js +189 -0
  20. package/dist/commands/summarize.js +220 -0
  21. package/dist/commands/version.js +82 -0
  22. package/dist/commands/wait.js +170 -0
  23. package/dist/config/__tests__/presets.test.js +104 -0
  24. package/dist/config/load.js +66 -0
  25. package/dist/config/schema.js +160 -0
  26. package/dist/context/__tests__/artifact.test.js +130 -0
  27. package/dist/context/__tests__/pack.test.js +191 -0
  28. package/dist/context/artifact.js +67 -0
  29. package/dist/context/index.js +2 -0
  30. package/dist/context/pack.js +273 -0
  31. package/dist/diagnosis/analyzer.js +678 -0
  32. package/dist/diagnosis/formatter.js +136 -0
  33. package/dist/diagnosis/index.js +6 -0
  34. package/dist/diagnosis/types.js +7 -0
  35. package/dist/env/__tests__/fingerprint.test.js +116 -0
  36. package/dist/env/fingerprint.js +111 -0
  37. package/dist/orchestrator/__tests__/policy.test.js +185 -0
  38. package/dist/orchestrator/__tests__/schema-version.test.js +65 -0
  39. package/dist/orchestrator/artifacts.js +405 -0
  40. package/dist/orchestrator/state-machine.js +646 -0
  41. package/dist/orchestrator/types.js +88 -0
  42. package/dist/ownership/normalize.js +45 -0
  43. package/dist/repo/context.js +90 -0
  44. package/dist/repo/git.js +13 -0
  45. package/dist/repo/worktree.js +239 -0
  46. package/dist/store/run-store.js +107 -0
  47. package/dist/store/run-utils.js +69 -0
  48. package/dist/store/runs-root.js +126 -0
  49. package/dist/supervisor/__tests__/evidence-gate.test.js +111 -0
  50. package/dist/supervisor/__tests__/ownership.test.js +103 -0
  51. package/dist/supervisor/__tests__/state-machine.test.js +290 -0
  52. package/dist/supervisor/collision.js +240 -0
  53. package/dist/supervisor/evidence-gate.js +98 -0
  54. package/dist/supervisor/planner.js +18 -0
  55. package/dist/supervisor/runner.js +1562 -0
  56. package/dist/supervisor/scope-guard.js +55 -0
  57. package/dist/supervisor/state-machine.js +121 -0
  58. package/dist/supervisor/verification-policy.js +64 -0
  59. package/dist/tasks/task-metadata.js +72 -0
  60. package/dist/types/schemas.js +1 -0
  61. package/dist/verification/engine.js +49 -0
  62. package/dist/workers/__tests__/claude.test.js +88 -0
  63. package/dist/workers/__tests__/codex.test.js +81 -0
  64. package/dist/workers/claude.js +119 -0
  65. package/dist/workers/codex.js +162 -0
  66. package/dist/workers/json.js +22 -0
  67. package/dist/workers/mock.js +193 -0
  68. package/dist/workers/prompts.js +98 -0
  69. package/dist/workers/schemas.js +39 -0
  70. package/package.json +47 -0
  71. package/templates/prompts/implementer.md +70 -0
  72. package/templates/prompts/planner.md +62 -0
  73. package/templates/prompts/reviewer.md +77 -0
@@ -0,0 +1,1562 @@
1
+ import fs from 'node:fs';
2
+ import path from 'node:path';
3
+ import crypto from 'node:crypto';
4
+ import picomatch from 'picomatch';
5
+ import { SCOPE_PRESETS } from '../config/schema.js';
6
+ import { git } from '../repo/git.js';
7
+ import { listChangedFiles } from '../repo/context.js';
8
+ import { buildImplementPrompt, buildPlanPrompt, buildReviewPrompt } from '../workers/prompts.js';
9
+ import { buildContextPack, formatContextPackForPrompt, writeContextPackArtifact } from '../context/index.js';
10
+ import { runClaude } from '../workers/claude.js';
11
+ import { runCodex } from '../workers/codex.js';
12
+ import { isMockWorkerEnabled, runMockWorker } from '../workers/mock.js';
13
+ import { implementerOutputSchema, planOutputSchema, reviewOutputSchema } from '../workers/schemas.js';
14
+ import { parseJsonWithSchema } from '../workers/json.js';
15
+ import { checkLockfiles, checkScope, partitionChangedFiles } from './scope-guard.js';
16
+ import { commandsForTier, selectTiersWithReasons } from './verification-policy.js';
17
+ import { runVerification } from '../verification/engine.js';
18
+ import { stopRun, updatePhase, prepareForResume } from './state-machine.js';
19
+ import { getActiveRuns, checkFileCollisions, formatFileCollisionError } from './collision.js';
20
+ import { validateNoChangesEvidence, formatEvidenceErrors } from './evidence-gate.js';
21
+ import { normalizeOwnsPatterns, toPosixPath } from '../ownership/normalize.js';
22
+ export function checkOwnership(changedFiles, ownedPaths, envAllowlist) {
23
+ // No enforcement if no ownership declared
24
+ if (ownedPaths.length === 0) {
25
+ return {
26
+ ok: true,
27
+ owned_paths: [],
28
+ semantic_changed: [],
29
+ violating_files: []
30
+ };
31
+ }
32
+ // Defensive normalization: ensures consistent matching even if caller passes raw patterns
33
+ const normalizedOwned = normalizeOwnsPatterns(ownedPaths);
34
+ // Partition to get semantic changes (exclude env artifacts)
35
+ const { semantic_changed } = partitionChangedFiles(changedFiles, envAllowlist);
36
+ // No semantic changes = no violation
37
+ if (semantic_changed.length === 0) {
38
+ return {
39
+ ok: true,
40
+ owned_paths: normalizedOwned,
41
+ semantic_changed: [],
42
+ violating_files: []
43
+ };
44
+ }
45
+ // Compile ownership matchers
46
+ const ownershipMatchers = normalizedOwned.map((p) => picomatch(p));
47
+ // Check each semantic change against ownership
48
+ const violating_files = [];
49
+ for (const file of semantic_changed) {
50
+ const posixFile = toPosixPath(file);
51
+ const isOwned = ownershipMatchers.some((m) => m(posixFile));
52
+ if (!isOwned) {
53
+ violating_files.push(file);
54
+ }
55
+ }
56
+ return {
57
+ ok: violating_files.length === 0,
58
+ owned_paths: normalizedOwned,
59
+ semantic_changed,
60
+ violating_files
61
+ };
62
+ }
63
+ /**
64
+ * Stop reasons that are eligible for auto-resume.
65
+ * These represent transient/infrastructure failures, not logic errors.
66
+ *
67
+ * Explicitly NOT included:
68
+ * - time_budget_exceeded: Creates "budget treadmill" if resumed
69
+ * - verification_failed_max_retries: Real code issue
70
+ * - guard_violation: Real scope/policy issue
71
+ * - implement_blocked: Worker can't proceed
72
+ * - *_parse_failed: Persistent format issues
73
+ * - complete: Success, nothing to resume
74
+ */
75
+ const AUTO_RESUMABLE_REASONS = new Set([
76
+ 'stalled_timeout',
77
+ 'worker_call_timeout'
78
+ ]);
79
+ function isAutoResumable(reason) {
80
+ return reason !== undefined && AUTO_RESUMABLE_REASONS.has(reason);
81
+ }
82
+ /**
83
+ * Maximum number of verification retry attempts per milestone before stopping.
84
+ * Each retry transitions back to IMPLEMENT with fix instructions.
85
+ */
86
+ const MAX_MILESTONE_RETRIES = 3;
87
+ const DEFAULT_STALL_TIMEOUT_MINUTES = 15;
88
+ const DEFAULT_WORKER_TIMEOUT_MINUTES = 30;
89
+ /**
90
+ * Suggest scope presets based on violation patterns.
91
+ * Matches violation file paths against preset patterns to recommend additions.
92
+ */
93
+ function suggestPresetsForViolations(violations) {
94
+ const suggestions = new Set();
95
+ for (const violation of violations) {
96
+ for (const [presetName, patterns] of Object.entries(SCOPE_PRESETS)) {
97
+ for (const pattern of patterns) {
98
+ // Check if the violation matches the pattern
99
+ if (picomatch.isMatch(violation, pattern)) {
100
+ suggestions.add(presetName);
101
+ break;
102
+ }
103
+ }
104
+ }
105
+ }
106
+ return Array.from(suggestions);
107
+ }
108
+ /**
109
+ * Resolve stall timeout in milliseconds.
110
+ * Priority: AGENT_STALL_TIMEOUT_MS > STALL_TIMEOUT_MINUTES > config-based default
111
+ */
112
+ function resolveStallTimeoutMs(config) {
113
+ // Direct millisecond override (for fast testing)
114
+ const msValue = Number.parseInt(process.env.AGENT_STALL_TIMEOUT_MS ?? '', 10);
115
+ if (Number.isFinite(msValue) && msValue > 0) {
116
+ return msValue;
117
+ }
118
+ // Minutes-based override
119
+ const envValue = Number.parseInt(process.env.STALL_TIMEOUT_MINUTES ?? '', 10);
120
+ if (Number.isFinite(envValue) && envValue > 0) {
121
+ return envValue * 60 * 1000;
122
+ }
123
+ // Config-based default: max(15min, verify_time + 5min)
124
+ const verifyMinutes = Math.ceil(config.verification.max_verify_time_per_milestone / 60);
125
+ const fallbackMinutes = Math.max(DEFAULT_STALL_TIMEOUT_MINUTES, verifyMinutes + 5);
126
+ return fallbackMinutes * 60 * 1000;
127
+ }
128
+ /**
129
+ * Resolve worker call timeout in milliseconds.
130
+ * Priority: AGENT_WORKER_CALL_TIMEOUT_MS > WORKER_TIMEOUT_MINUTES > computed default
131
+ */
132
+ function resolveWorkerTimeoutMs(stallTimeoutMs) {
133
+ // Direct millisecond override (for fast testing)
134
+ const msValue = Number.parseInt(process.env.AGENT_WORKER_CALL_TIMEOUT_MS ?? '', 10);
135
+ if (Number.isFinite(msValue) && msValue > 0) {
136
+ return msValue;
137
+ }
138
+ // Minutes-based override
139
+ const envValue = Number.parseInt(process.env.WORKER_TIMEOUT_MINUTES ?? '', 10);
140
+ if (Number.isFinite(envValue) && envValue > 0) {
141
+ return envValue * 60 * 1000;
142
+ }
143
+ // Default: max(30min, 2x stall timeout)
144
+ const defaultMs = DEFAULT_WORKER_TIMEOUT_MINUTES * 60 * 1000;
145
+ return Math.max(defaultMs, stallTimeoutMs * 2);
146
+ }
147
+ const DEFAULT_STOP_MEMO = [
148
+ '# Stop Memo',
149
+ '',
150
+ "What's done:",
151
+ '- ',
152
+ '',
153
+ "What's broken:",
154
+ '- ',
155
+ '',
156
+ 'Best next step (one command):',
157
+ '- ',
158
+ '',
159
+ 'Risk notes:',
160
+ '- ',
161
+ '',
162
+ 'Where to look:',
163
+ '- '
164
+ ].join('\n');
165
+ /**
166
+ * Build a structured stop memo with clear next actions.
167
+ * Phase 6.3: Structured Stop Output
168
+ */
169
+ function buildStructuredStopMemo(params) {
170
+ const { reason, runId, phase, milestoneIndex, milestonesTotal, lastError, suggestedTime, suggestedTicks } = params;
171
+ const reasonDescriptions = {
172
+ time_budget_exceeded: 'Time budget was exhausted before completing all milestones.',
173
+ max_ticks_reached: 'Maximum phase transitions (ticks) reached before completion.',
174
+ stalled_timeout: 'No progress detected for too long (worker may have hung).',
175
+ worker_call_timeout: 'Worker call exceeded maximum duration hard cap.',
176
+ verification_failed_max_retries: 'Verification failed too many times on the same milestone.',
177
+ implement_blocked: 'Implementer reported it could not proceed.',
178
+ guard_violation: 'Changes violated scope or lockfile constraints.',
179
+ ownership_violation: 'Task modified files outside its declared ownership.',
180
+ parallel_file_collision: 'Stopped to avoid merge conflicts with another active run.',
181
+ insufficient_evidence: 'Implementer claimed no changes needed but provided insufficient evidence.',
182
+ plan_parse_failed: 'Planner output could not be parsed.',
183
+ implement_parse_failed: 'Implementer output could not be parsed.',
184
+ review_parse_failed: 'Reviewer output could not be parsed.',
185
+ review_loop_detected: 'Reviewer requested the same changes repeatedly or max review rounds exceeded.',
186
+ plan_scope_violation: 'Planner proposed files outside the allowed scope.',
187
+ complete: 'Run completed successfully.'
188
+ };
189
+ const likelyCauses = {
190
+ time_budget_exceeded: 'Task took longer than expected, or time budget was too short.',
191
+ max_ticks_reached: 'Complex task with many iterations, or tick budget was too low.',
192
+ stalled_timeout: 'Worker CLI hung, network issues, or API timeout.',
193
+ worker_call_timeout: 'Worker process hung indefinitely. Check worker CLI health with `agent doctor`.',
194
+ verification_failed_max_retries: 'Code changes broke tests/lint and fixes kept failing.',
195
+ implement_blocked: 'Missing dependencies, unclear requirements, or environment issue.',
196
+ guard_violation: 'Implementer modified files outside allowed scope.',
197
+ ownership_violation: 'Task declared owns: paths in frontmatter but touched files outside that claim.',
198
+ parallel_file_collision: 'Another run is expected to modify the same files. Running in parallel would create merge conflicts.',
199
+ insufficient_evidence: 'Worker claimed work was already done without proving it. This prevents false certainty.',
200
+ plan_parse_failed: 'Planner returned malformed JSON.',
201
+ implement_parse_failed: 'Implementer returned malformed JSON.',
202
+ review_parse_failed: 'Reviewer returned malformed JSON.',
203
+ review_loop_detected: 'Implementer unable to satisfy reviewer feedback, or reviewer expectations are unclear/impossible.',
204
+ plan_scope_violation: 'Task requires files outside allowlist. Update scope.allowlist or scope.presets in agent.config.json.'
205
+ };
206
+ let nextAction;
207
+ if (reason === 'time_budget_exceeded') {
208
+ nextAction = `agent resume ${runId}${suggestedTime ? ` --time ${suggestedTime}` : ''}`;
209
+ }
210
+ else if (reason === 'max_ticks_reached') {
211
+ nextAction = `agent resume ${runId}${suggestedTicks ? ` --max-ticks ${suggestedTicks}` : ''}`;
212
+ }
213
+ else if (reason === 'parallel_file_collision') {
214
+ nextAction = `# Wait for conflicting run to complete, then:\nagent resume ${runId}`;
215
+ }
216
+ else if (reason === 'complete') {
217
+ nextAction = 'None - run completed successfully.';
218
+ }
219
+ else {
220
+ nextAction = `agent resume ${runId} --force # Review state first`;
221
+ }
222
+ const lines = [
223
+ '# Stop Memo',
224
+ '',
225
+ '## What Happened',
226
+ `- **Stop reason**: ${reason}`,
227
+ `- **Phase**: ${phase}`,
228
+ `- **Progress**: Milestone ${milestoneIndex + 1} of ${milestonesTotal}`,
229
+ '',
230
+ '## Description',
231
+ reasonDescriptions[reason] || 'Unknown stop reason.',
232
+ '',
233
+ '## Likely Cause',
234
+ likelyCauses[reason] || 'Unknown cause.'
235
+ ];
236
+ if (lastError) {
237
+ lines.push('', '## Last Error', '```', lastError.slice(0, 500), '```');
238
+ }
239
+ // Add violation details for guard_violation
240
+ if (params.scopeViolations && params.scopeViolations.length > 0) {
241
+ lines.push('', '## Scope Violations', 'Files modified outside allowlist:');
242
+ for (const file of params.scopeViolations.slice(0, 10)) {
243
+ lines.push(`- \`${file}\``);
244
+ }
245
+ if (params.scopeViolations.length > 10) {
246
+ lines.push(`- ... and ${params.scopeViolations.length - 10} more`);
247
+ }
248
+ }
249
+ if (params.lockfileViolations && params.lockfileViolations.length > 0) {
250
+ lines.push('', '## Lockfile Violations', 'Lockfiles modified without --allow-deps:');
251
+ for (const file of params.lockfileViolations) {
252
+ lines.push(`- \`${file}\``);
253
+ }
254
+ }
255
+ const tipsByReason = {
256
+ time_budget_exceeded: '- Consider increasing --time if task is complex',
257
+ max_ticks_reached: '- ~5 ticks per milestone is typical. Increase --max-ticks for complex tasks.',
258
+ stalled_timeout: '- Check if workers are authenticated. Run `agent doctor` to diagnose.',
259
+ worker_call_timeout: '- Worker hung indefinitely. Check API status, network, and run `agent doctor`.',
260
+ parallel_file_collision: '- Use `agent status --all` to see conflicting runs. If you must proceed, use --force-parallel (may require manual merge resolution).',
261
+ insufficient_evidence: '- Worker must provide files_checked, grep_output, or commands_run to prove no changes needed. Re-run with clearer task instructions.',
262
+ review_loop_detected: '- Check review_digest.md for the requested changes. Consider simplifying the task or adjusting verification commands.',
263
+ plan_scope_violation: '- Add missing file patterns to scope.allowlist, or use scope.presets for common stacks (vitest, nextjs, drizzle, etc.).',
264
+ guard_violation: '- Add missing file patterns to scope.allowlist, or use --allow-deps for lockfile changes.'
265
+ };
266
+ lines.push('', '## Next Action', '```bash', nextAction, '```', '', '## Tips', tipsByReason[reason] ?? '- Review the timeline.jsonl for detailed event history.');
267
+ return lines.join('\n');
268
+ }
269
+ /**
270
+ * Main supervisor entry point with auto-resume support.
271
+ *
272
+ * Wraps runSupervisorOnce with a while loop that automatically resumes
273
+ * on transient failures (stall_timeout, worker_call_timeout) up to a configured limit.
274
+ *
275
+ * Auto-resume is enabled if:
276
+ * - options.autoResume is true (CLI flag), OR
277
+ * - config.resilience.auto_resume is true (config file)
278
+ *
279
+ * @param options - Supervisor configuration including run store, config, and budgets
280
+ */
281
+ export async function runSupervisorLoop(options) {
282
+ // Determine if auto-resume is enabled (CLI flag overrides config)
283
+ const autoResumeEnabled = options.autoResume ?? options.config.resilience?.auto_resume ?? false;
284
+ if (!autoResumeEnabled) {
285
+ // No auto-resume, just run once
286
+ await runSupervisorOnce(options);
287
+ return;
288
+ }
289
+ const maxResumes = options.config.resilience?.max_auto_resumes ?? 1;
290
+ const delays = options.config.resilience?.auto_resume_delays_ms ?? [30000, 120000, 300000];
291
+ // Track consecutive same-stop-reason to detect loops
292
+ let lastStopReason;
293
+ let consecutiveSameStops = 0;
294
+ const MAX_CONSECUTIVE_SAME_STOPS = 2; // Cut off if same reason 2x in a row
295
+ // Auto-resume loop
296
+ let currentAttempt = 0; // Track which auto-resume attempt we're on (0 = initial run)
297
+ while (true) {
298
+ await runSupervisorOnce(options);
299
+ // Check final state after loop completes
300
+ const finalState = options.runStore.readState();
301
+ const stopReason = finalState.stop_reason;
302
+ const autoResumeCount = finalState.auto_resume_count ?? 0;
303
+ // Emit auto_resume_result for metrics (only after auto-resume attempts, not initial run)
304
+ if (currentAttempt > 0) {
305
+ const outcome = stopReason === 'complete' ? 'completed' : 'stopped_again';
306
+ options.runStore.appendEvent({
307
+ type: 'auto_resume_result',
308
+ source: 'supervisor',
309
+ payload: {
310
+ attempt: currentAttempt,
311
+ outcome,
312
+ stop_reason: stopReason ?? undefined
313
+ }
314
+ });
315
+ }
316
+ // Check if this stop reason is auto-resumable
317
+ if (!isAutoResumable(stopReason)) {
318
+ if (stopReason && stopReason !== 'complete') {
319
+ options.runStore.appendEvent({
320
+ type: 'auto_resume_skipped',
321
+ source: 'supervisor',
322
+ payload: {
323
+ reason: stopReason,
324
+ resumable: false,
325
+ auto_resume_count: autoResumeCount
326
+ }
327
+ });
328
+ }
329
+ break;
330
+ }
331
+ // Check if we've hit the auto-resume cap
332
+ if (autoResumeCount >= maxResumes) {
333
+ options.runStore.appendEvent({
334
+ type: 'auto_resume_exhausted',
335
+ source: 'supervisor',
336
+ payload: {
337
+ count: autoResumeCount,
338
+ max: maxResumes,
339
+ reason: stopReason
340
+ }
341
+ });
342
+ console.log(`\nAuto-resume cap reached (${autoResumeCount}/${maxResumes}). Manual intervention required.`);
343
+ console.log(`Tip: Use \`agent resume ${finalState.run_id}\` to continue manually.\n`);
344
+ break;
345
+ }
346
+ // Same-stop-repeat protection: cut off if same reason repeats too many times
347
+ if (stopReason === lastStopReason) {
348
+ consecutiveSameStops++;
349
+ if (consecutiveSameStops >= MAX_CONSECUTIVE_SAME_STOPS) {
350
+ options.runStore.appendEvent({
351
+ type: 'auto_resume_loop_detected',
352
+ source: 'supervisor',
353
+ payload: {
354
+ reason: stopReason,
355
+ consecutive_count: consecutiveSameStops,
356
+ auto_resume_count: autoResumeCount
357
+ }
358
+ });
359
+ console.log(`\nAuto-resume loop detected: ${stopReason} repeated ${consecutiveSameStops}x. Stopping.`);
360
+ console.log(`Tip: Investigate root cause before resuming manually.\n`);
361
+ break;
362
+ }
363
+ }
364
+ else {
365
+ consecutiveSameStops = 1;
366
+ }
367
+ lastStopReason = stopReason;
368
+ // Calculate backoff delay
369
+ const delayMs = delays[Math.min(autoResumeCount, delays.length - 1)];
370
+ options.runStore.appendEvent({
371
+ type: 'auto_resume_scheduled',
372
+ source: 'supervisor',
373
+ payload: {
374
+ attempt: autoResumeCount + 1,
375
+ max: maxResumes,
376
+ delay_ms: delayMs,
377
+ reason: stopReason,
378
+ previous_stop_reason: stopReason,
379
+ resume_phase: finalState.last_successful_phase
380
+ }
381
+ });
382
+ console.log(`\nAuto-resuming in ${Math.round(delayMs / 1000)}s (attempt ${autoResumeCount + 1}/${maxResumes})...`);
383
+ await sleep(delayMs);
384
+ // Prepare state for resume
385
+ const resumedState = prepareForResume(finalState, { incrementAutoResumeCount: true });
386
+ options.runStore.writeState(resumedState);
387
+ options.runStore.appendEvent({
388
+ type: 'auto_resume_started',
389
+ source: 'supervisor',
390
+ payload: {
391
+ attempt: autoResumeCount + 1,
392
+ run_id: finalState.run_id,
393
+ previous_stop_reason: stopReason,
394
+ resume_phase: resumedState.phase
395
+ }
396
+ });
397
+ console.log(`Auto-resume started. Resuming from phase: ${resumedState.phase}\n`);
398
+ // Loop continues, runSupervisorOnce will be called again
399
+ }
400
+ }
401
+ /**
402
+ * Single execution of the supervisor loop (no auto-resume).
403
+ *
404
+ * Executes up to `maxTicks` phase transitions, stopping early if:
405
+ * - Time budget is exceeded
406
+ * - Run reaches STOPPED phase
407
+ * - A phase handler stops the run (e.g., guard violation, max retries)
408
+ *
409
+ * Phase flow: INIT -> PLAN -> IMPLEMENT -> VERIFY -> REVIEW -> CHECKPOINT -> FINALIZE
410
+ *
411
+ * @param options - Supervisor configuration including run store, config, and budgets
412
+ */
413
+ async function runSupervisorOnce(options) {
414
+ const startTime = Date.now();
415
+ const stallTimeoutMs = resolveStallTimeoutMs(options.config);
416
+ let lastProgressAt = Date.now();
417
+ let stalled = false;
418
+ const recordProgress = (state) => {
419
+ const now = new Date().toISOString();
420
+ lastProgressAt = Date.now();
421
+ return {
422
+ ...state,
423
+ last_progress_at: now,
424
+ updated_at: now
425
+ };
426
+ };
427
+ // Worker calls can take 5-20 minutes; use longer timeout when worker is in-flight
428
+ // Configurable via WORKER_TIMEOUT_MINUTES env var (default: 30min or 2x stall timeout)
429
+ const workerTimeoutMs = resolveWorkerTimeoutMs(stallTimeoutMs);
430
+ // Hard cap on worker call duration (prevents infinite in-flight)
431
+ const maxWorkerCallMs = (options.config.resilience?.max_worker_call_minutes ?? 45) * 60 * 1000;
432
+ const watchdog = setInterval(() => {
433
+ if (stalled)
434
+ return;
435
+ const lastWorkerCall = options.runStore.getLastWorkerCall();
436
+ const lastEvent = options.runStore.getLastEvent();
437
+ // Check if worker is in-flight (started after last progress)
438
+ let workerInFlight = false;
439
+ let workerStartedAt = 0;
440
+ let workerCallDurationMs = 0;
441
+ if (lastWorkerCall?.at) {
442
+ workerStartedAt = new Date(lastWorkerCall.at).getTime();
443
+ workerInFlight = workerStartedAt > lastProgressAt;
444
+ workerCallDurationMs = Date.now() - workerStartedAt;
445
+ }
446
+ // Hard cap: if worker call exceeds max duration, force stop
447
+ // This catches hung workers that never return
448
+ if (workerInFlight && workerCallDurationMs >= maxWorkerCallMs) {
449
+ stalled = true;
450
+ const current = options.runStore.readState();
451
+ const stopped = stopRun(current, 'worker_call_timeout');
452
+ options.runStore.writeState(stopped);
453
+ options.runStore.appendEvent({
454
+ type: 'stop',
455
+ source: 'supervisor',
456
+ payload: {
457
+ reason: 'worker_call_timeout',
458
+ phase: current.phase,
459
+ milestone_index: current.milestone_index,
460
+ last_worker_call: lastWorkerCall,
461
+ worker_call_duration_ms: workerCallDurationMs,
462
+ max_worker_call_ms: maxWorkerCallMs
463
+ }
464
+ });
465
+ const memo = buildStructuredStopMemo({
466
+ reason: 'worker_call_timeout',
467
+ runId: current.run_id,
468
+ phase: current.phase,
469
+ milestoneIndex: current.milestone_index,
470
+ milestonesTotal: current.milestones.length,
471
+ lastError: `Worker call exceeded ${options.config.resilience?.max_worker_call_minutes ?? 45} minute hard cap`
472
+ });
473
+ writeStopMemo(options.runStore, memo);
474
+ return;
475
+ }
476
+ const elapsedMs = Date.now() - lastProgressAt;
477
+ const effectiveTimeoutMs = workerInFlight ? workerTimeoutMs : stallTimeoutMs;
478
+ if (elapsedMs < effectiveTimeoutMs)
479
+ return;
480
+ stalled = true;
481
+ const current = options.runStore.readState();
482
+ const stopped = stopRun(current, 'stalled_timeout');
483
+ options.runStore.writeState(stopped);
484
+ options.runStore.appendEvent({
485
+ type: 'stop',
486
+ source: 'supervisor',
487
+ payload: {
488
+ reason: 'stalled_timeout',
489
+ phase: current.phase,
490
+ milestone_index: current.milestone_index,
491
+ last_event_type: lastEvent?.type ?? null,
492
+ last_worker_call: lastWorkerCall ?? null,
493
+ worker_in_flight: workerInFlight,
494
+ elapsed_ms: elapsedMs,
495
+ timeout_ms: effectiveTimeoutMs
496
+ }
497
+ });
498
+ writeStopMemo(options.runStore, DEFAULT_STOP_MEMO);
499
+ }, 10000);
500
+ let ticksUsed = 0;
501
+ try {
502
+ for (let tick = 0; tick < options.maxTicks; tick += 1) {
503
+ ticksUsed = tick + 1;
504
+ if (stalled) {
505
+ break;
506
+ }
507
+ let state = options.runStore.readState();
508
+ if (state.phase === 'STOPPED') {
509
+ break;
510
+ }
511
+ const elapsedMinutes = (Date.now() - startTime) / 60000;
512
+ if (elapsedMinutes >= options.timeBudgetMinutes) {
513
+ state = stopRun(state, 'time_budget_exceeded');
514
+ options.runStore.writeState(state);
515
+ options.runStore.appendEvent({
516
+ type: 'stop',
517
+ source: 'supervisor',
518
+ payload: { reason: 'time_budget_exceeded', ticks_used: ticksUsed }
519
+ });
520
+ const memo = buildStructuredStopMemo({
521
+ reason: 'time_budget_exceeded',
522
+ runId: state.run_id,
523
+ phase: state.phase,
524
+ milestoneIndex: state.milestone_index,
525
+ milestonesTotal: state.milestones.length,
526
+ lastError: state.last_error,
527
+ suggestedTime: Math.ceil(options.timeBudgetMinutes * 1.5),
528
+ suggestedTicks: options.maxTicks
529
+ });
530
+ writeStopMemo(options.runStore, memo);
531
+ console.log(`\nTime budget exceeded (${Math.floor(elapsedMinutes)}/${options.timeBudgetMinutes} min) at milestone ${state.milestone_index + 1}/${state.milestones.length}.`);
532
+ console.log(`Tip: Use \`agent resume ${state.run_id} --time ${Math.ceil(options.timeBudgetMinutes * 1.5)}\` to continue with more time.\n`);
533
+ break;
534
+ }
535
+ state = recordProgress(state);
536
+ options.runStore.writeState(state);
537
+ state = await runPhase(state, options);
538
+ if (stalled) {
539
+ break;
540
+ }
541
+ state = recordProgress(state);
542
+ options.runStore.writeState(state);
543
+ }
544
+ // Check if we exited due to maxTicks (run not complete, not stalled, not time-exceeded)
545
+ if (!stalled) {
546
+ let finalState = options.runStore.readState();
547
+ if (finalState.phase !== 'STOPPED') {
548
+ // Mark as stopped with max_ticks_reached reason (resumable, not a failure)
549
+ finalState = stopRun(finalState, 'max_ticks_reached');
550
+ options.runStore.writeState(finalState);
551
+ options.runStore.appendEvent({
552
+ type: 'max_ticks_reached',
553
+ source: 'supervisor',
554
+ payload: {
555
+ ticks_used: ticksUsed,
556
+ max_ticks: options.maxTicks,
557
+ phase: finalState.phase,
558
+ milestone_index: finalState.milestone_index,
559
+ milestones_total: finalState.milestones.length
560
+ }
561
+ });
562
+ const memo = buildStructuredStopMemo({
563
+ reason: 'max_ticks_reached',
564
+ runId: finalState.run_id,
565
+ phase: finalState.phase,
566
+ milestoneIndex: finalState.milestone_index,
567
+ milestonesTotal: finalState.milestones.length,
568
+ lastError: finalState.last_error,
569
+ suggestedTicks: Math.ceil(options.maxTicks * 1.5)
570
+ });
571
+ writeStopMemo(options.runStore, memo);
572
+ console.log(`\nMax ticks reached (${ticksUsed}/${options.maxTicks}) at milestone ${finalState.milestone_index + 1}/${finalState.milestones.length}.`);
573
+ console.log(`Tip: ~5 ticks per milestone. Use \`agent resume ${finalState.run_id} --max-ticks ${Math.ceil(options.maxTicks * 1.5)}\` to continue.\n`);
574
+ }
575
+ }
576
+ }
577
+ finally {
578
+ clearInterval(watchdog);
579
+ }
580
+ }
581
+ /**
582
+ * Dispatches to the appropriate phase handler based on current state.
583
+ * Returns updated state after phase execution.
584
+ *
585
+ * Fast path mode (--fast):
586
+ * - INIT → IMPLEMENT (skip PLAN)
587
+ * - VERIFY → CHECKPOINT (skip REVIEW)
588
+ */
589
+ async function runPhase(state, options) {
590
+ switch (state.phase) {
591
+ case 'PLAN':
592
+ return handlePlan(state, options);
593
+ case 'IMPLEMENT':
594
+ return handleImplement(state, options);
595
+ case 'VERIFY':
596
+ return handleVerify(state, options);
597
+ case 'REVIEW':
598
+ return handleReview(state, options);
599
+ case 'CHECKPOINT':
600
+ return handleCheckpoint(state, options);
601
+ case 'FINALIZE':
602
+ return handleFinalize(state, options);
603
+ case 'INIT':
604
+ // Fast path: skip PLAN, go directly to IMPLEMENT
605
+ if (options.fast) {
606
+ options.runStore.appendEvent({
607
+ type: 'fast_path_skip',
608
+ source: 'supervisor',
609
+ payload: { skipped_phase: 'PLAN', reason: 'fast_mode' }
610
+ });
611
+ return updatePhase(state, 'IMPLEMENT');
612
+ }
613
+ return updatePhase(state, 'PLAN');
614
+ default:
615
+ return state;
616
+ }
617
+ }
618
+ /**
619
+ * PLAN phase: Invokes the planner worker to generate milestones from the task.
620
+ * Validates that all files_expected are within the scope allowlist.
621
+ * Writes plan.md artifact and transitions to IMPLEMENT on success.
622
+ */
623
+ async function handlePlan(state, options) {
624
+ options.runStore.appendEvent({
625
+ type: 'phase_start',
626
+ source: 'supervisor',
627
+ payload: { phase: 'PLAN' }
628
+ });
629
+ const prompt = buildPlanPrompt({
630
+ taskText: options.taskText,
631
+ scopeAllowlist: state.scope_lock.allowlist
632
+ });
633
+ const planWorker = options.config.phases.plan;
634
+ const parsed = await callWorkerJson({
635
+ prompt,
636
+ repoPath: options.repoPath,
637
+ workerType: planWorker,
638
+ workers: options.config.workers,
639
+ schema: planOutputSchema,
640
+ runStore: options.runStore,
641
+ stage: 'plan'
642
+ });
643
+ // Check if we were stopped while waiting for worker (e.g., stall watchdog)
644
+ const lateStopPlan = checkForLateResult(options, 'plan', parsed.worker);
645
+ if (lateStopPlan)
646
+ return lateStopPlan;
647
+ if (!parsed.data) {
648
+ options.runStore.appendEvent({
649
+ type: 'parse_failed',
650
+ source: parsed.worker,
651
+ payload: {
652
+ stage: 'plan',
653
+ parser_context: 'plan',
654
+ retry_count: parsed.retry_count ?? 0,
655
+ error: parsed.error,
656
+ output_snippet: snippet(parsed.output)
657
+ }
658
+ });
659
+ return stopWithError(state, options, 'plan_parse_failed', parsed.error ?? 'Unknown error');
660
+ }
661
+ const plan = parsed.data;
662
+ // Sanity check: all files_expected must be within allowlist
663
+ const scopeViolations = validateFilesExpected(plan.milestones, state.scope_lock.allowlist);
664
+ if (scopeViolations.length > 0) {
665
+ // Infer expected root prefix from first allowlist pattern for debugging
666
+ const expectedPrefix = state.scope_lock.allowlist[0]?.replace(/\*.*$/, '') || '';
667
+ // Suggest presets that would cover the violation patterns
668
+ const suggestedPresets = suggestPresetsForViolations(scopeViolations);
669
+ options.runStore.appendEvent({
670
+ type: 'plan_scope_violation',
671
+ source: 'supervisor',
672
+ payload: {
673
+ violations: scopeViolations,
674
+ allowlist: state.scope_lock.allowlist,
675
+ expected_prefix: expectedPrefix,
676
+ suggested_presets: suggestedPresets,
677
+ hint: `All files_expected must start with a path matching allowlist patterns`
678
+ }
679
+ });
680
+ // Build actionable error message
681
+ let errorMessage = `Planner produced files_expected outside allowlist: ${scopeViolations.join(', ')}`;
682
+ if (suggestedPresets.length > 0) {
683
+ errorMessage += `. Try adding presets: [${suggestedPresets.join(', ')}] to scope.presets in agent.config.json`;
684
+ }
685
+ return stopWithError(state, options, 'plan_scope_violation', errorMessage);
686
+ }
687
+ // Stage 2: Post-PLAN file collision check (STOP by default)
688
+ if (!options.forceParallel) {
689
+ // Extract union of all files_expected from milestones
690
+ const expectedFiles = [];
691
+ for (const milestone of plan.milestones) {
692
+ if (milestone.files_expected) {
693
+ expectedFiles.push(...milestone.files_expected);
694
+ }
695
+ }
696
+ // Get active runs (excluding this run)
697
+ const activeRuns = getActiveRuns(options.repoPath, state.run_id);
698
+ if (activeRuns.length > 0 && expectedFiles.length > 0) {
699
+ const fileCollisions = checkFileCollisions(expectedFiles, activeRuns);
700
+ if (fileCollisions.length > 0) {
701
+ options.runStore.appendEvent({
702
+ type: 'parallel_file_collision',
703
+ source: 'supervisor',
704
+ payload: {
705
+ stage: 'post_plan',
706
+ predicted_files: expectedFiles,
707
+ collisions: fileCollisions.map(c => ({
708
+ run_id: c.runId,
709
+ colliding_files: c.collidingFiles,
710
+ run_phase: c.phase,
711
+ run_updated_at: c.updatedAt
712
+ }))
713
+ }
714
+ });
715
+ const collisionSummary = fileCollisions
716
+ .map(c => `Run ${c.runId}: ${c.collidingFiles.slice(0, 3).join(', ')}${c.collidingFiles.length > 3 ? ` (+${c.collidingFiles.length - 3} more)` : ''}`)
717
+ .join('; ');
718
+ console.error('\n' + formatFileCollisionError(fileCollisions));
719
+ return stopWithError(state, options, 'parallel_file_collision', `File collision detected with active runs: ${collisionSummary}`);
720
+ }
721
+ }
722
+ }
723
+ const updated = {
724
+ ...state,
725
+ milestones: plan.milestones,
726
+ worker_stats: incrementWorkerStats(state.worker_stats, parsed.worker, 'plan')
727
+ };
728
+ options.runStore.writePlan(JSON.stringify(plan, null, 2));
729
+ options.runStore.appendEvent({
730
+ type: 'plan_generated',
731
+ source: parsed.worker,
732
+ payload: plan
733
+ });
734
+ return updatePhase(updated, 'IMPLEMENT');
735
+ }
736
+ /**
737
+ * IMPLEMENT phase: Invokes the implementer worker to execute the current milestone.
738
+ * Includes fix instructions if retrying after verification failure.
739
+ * Validates scope and lockfile guards after implementation.
740
+ * Writes handoff memo and transitions to VERIFY on success.
741
+ */
742
+ async function handleImplement(state, options) {
743
+ options.runStore.appendEvent({
744
+ type: 'phase_start',
745
+ source: 'supervisor',
746
+ payload: { phase: 'IMPLEMENT' }
747
+ });
748
+ const milestone = state.milestones[state.milestone_index];
749
+ if (!milestone) {
750
+ return stopWithError(state, options, 'milestone_missing', 'No milestone found.');
751
+ }
752
+ // Build context pack if enabled via env var (avoids config schema changes)
753
+ const enableContextPack = process.env.CONTEXT_PACK === '1';
754
+ let pack = null;
755
+ if (enableContextPack) {
756
+ // Extract references from task text (simple pattern matching for v1)
757
+ const references = [];
758
+ const taskLower = options.taskText.toLowerCase();
759
+ if (taskLower.includes('rng') && taskLower.includes('deckbuilder')) {
760
+ references.push({ pattern: 'RNG pattern from deckbuilder' });
761
+ }
762
+ if (taskLower.includes('rng pattern')) {
763
+ references.push({ pattern: 'RNG pattern' });
764
+ }
765
+ pack = buildContextPack({
766
+ repoRoot: options.repoPath,
767
+ targetRoot: state.scope_lock.allowlist[0]?.replace('/**', '') ?? options.repoPath,
768
+ config: {
769
+ verification: options.config.verification,
770
+ scope: {
771
+ allowlist: state.scope_lock.allowlist,
772
+ denylist: state.scope_lock.denylist
773
+ },
774
+ lockfiles: options.config.scope?.lockfiles
775
+ },
776
+ references,
777
+ allowDeps: options.allowDeps
778
+ });
779
+ }
780
+ // Persist context pack artifact (enabled pack or disabled stub)
781
+ writeContextPackArtifact(options.runStore.path, pack);
782
+ const contextPackText = pack ? formatContextPackForPrompt(pack) : undefined;
783
+ const prompt = buildImplementPrompt({
784
+ milestone,
785
+ scopeAllowlist: state.scope_lock.allowlist,
786
+ scopeDenylist: state.scope_lock.denylist,
787
+ allowDeps: options.allowDeps,
788
+ contextPack: contextPackText,
789
+ fixInstructions: state.last_verify_failure
790
+ ? {
791
+ failedCommand: state.last_verify_failure.failedCommand,
792
+ errorOutput: state.last_verify_failure.errorOutput,
793
+ changedFiles: state.last_verify_failure.changedFiles,
794
+ attemptNumber: state.milestone_retries + 1
795
+ }
796
+ : undefined
797
+ });
798
+ const implementWorker = options.config.phases.implement;
799
+ const parsed = await callWorkerJson({
800
+ prompt,
801
+ repoPath: options.repoPath,
802
+ workerType: implementWorker,
803
+ workers: options.config.workers,
804
+ schema: implementerOutputSchema,
805
+ runStore: options.runStore,
806
+ stage: 'implement'
807
+ });
808
+ // Check if we were stopped while waiting for worker (e.g., stall watchdog)
809
+ const lateStopImplement = checkForLateResult(options, 'implement', parsed.worker);
810
+ if (lateStopImplement)
811
+ return lateStopImplement;
812
+ if (!parsed.data) {
813
+ options.runStore.appendEvent({
814
+ type: 'parse_failed',
815
+ source: parsed.worker,
816
+ payload: {
817
+ stage: 'implement',
818
+ parser_context: 'implement',
819
+ retry_count: parsed.retry_count ?? 0,
820
+ error: parsed.error,
821
+ output_snippet: snippet(parsed.output)
822
+ }
823
+ });
824
+ return stopWithError(state, options, 'implement_parse_failed', parsed.error ?? 'Unknown error');
825
+ }
826
+ const implementer = parsed.data;
827
+ options.runStore.writeMemo(`milestone_${String(state.milestone_index + 1).padStart(2, '0')}_handoff.md`, implementer.handoff_memo);
828
+ // Handle no_changes_needed with evidence validation
829
+ if (implementer.status === 'no_changes_needed') {
830
+ const evidenceResult = validateNoChangesEvidence(implementer.evidence, state.scope_lock.allowlist);
831
+ if (!evidenceResult.ok) {
832
+ options.runStore.appendEvent({
833
+ type: 'no_changes_evidence_failed',
834
+ source: parsed.worker,
835
+ payload: {
836
+ errors: evidenceResult.errors,
837
+ evidence_provided: implementer.evidence ?? null
838
+ }
839
+ });
840
+ const errorDetails = formatEvidenceErrors(evidenceResult);
841
+ return stopWithError(state, options, 'insufficient_evidence', errorDetails);
842
+ }
843
+ // Evidence validated - log success and skip to CHECKPOINT (no changes to verify)
844
+ options.runStore.appendEvent({
845
+ type: 'no_changes_evidence_ok',
846
+ source: parsed.worker,
847
+ payload: {
848
+ satisfied_by: evidenceResult.satisfied_by,
849
+ evidence: implementer.evidence
850
+ }
851
+ });
852
+ options.runStore.appendEvent({
853
+ type: 'implement_complete',
854
+ source: parsed.worker,
855
+ payload: {
856
+ changed_files: [],
857
+ handoff_memo: implementer.handoff_memo,
858
+ no_changes_needed: true,
859
+ evidence_satisfied_by: evidenceResult.satisfied_by
860
+ }
861
+ });
862
+ // Skip VERIFY since no changes were made, go directly to CHECKPOINT
863
+ const updatedWithStats = {
864
+ ...state,
865
+ worker_stats: incrementWorkerStats(state.worker_stats, parsed.worker, 'implement')
866
+ };
867
+ return updatePhase(updatedWithStats, 'CHECKPOINT');
868
+ }
869
+ // Handle blocked/failed status
870
+ if (implementer.status !== 'ok') {
871
+ return stopWithError(state, options, 'implement_blocked', implementer.handoff_memo);
872
+ }
873
+ const changedFiles = await listChangedFiles(options.repoPath);
874
+ const scopeCheck = checkScope(changedFiles, state.scope_lock.allowlist, state.scope_lock.denylist);
875
+ const lockfileCheck = checkLockfiles(changedFiles, options.config.scope.lockfiles, options.allowDeps);
876
+ if (!scopeCheck.ok || !lockfileCheck.ok) {
877
+ const allViolations = [...scopeCheck.violations, ...lockfileCheck.violations];
878
+ const errorMessage = allViolations.length > 0
879
+ ? `Guard violation: ${allViolations.slice(0, 5).join(', ')}${allViolations.length > 5 ? ` (+${allViolations.length - 5} more)` : ''}`
880
+ : 'Guard violation detected.';
881
+ options.runStore.appendEvent({
882
+ type: 'guard_violation',
883
+ source: 'supervisor',
884
+ payload: {
885
+ scope_violations: scopeCheck.violations,
886
+ lockfile_violations: lockfileCheck.violations
887
+ }
888
+ });
889
+ // Build structured stop memo with violation details
890
+ const memo = buildStructuredStopMemo({
891
+ reason: 'guard_violation',
892
+ runId: state.run_id,
893
+ phase: state.phase,
894
+ milestoneIndex: state.milestone_index,
895
+ milestonesTotal: state.milestones.length,
896
+ lastError: errorMessage,
897
+ scopeViolations: scopeCheck.violations,
898
+ lockfileViolations: lockfileCheck.violations
899
+ });
900
+ const updated = stopRun({
901
+ ...state,
902
+ last_error: errorMessage
903
+ }, 'guard_violation');
904
+ options.runStore.appendEvent({
905
+ type: 'stop',
906
+ source: 'supervisor',
907
+ payload: { reason: 'guard_violation', error: errorMessage }
908
+ });
909
+ writeStopMemo(options.runStore, memo);
910
+ return updated;
911
+ }
912
+ // Phase-2 ownership enforcement: only when owns is declared
913
+ if (options.ownedPaths && options.ownedPaths.length > 0) {
914
+ const ownershipCheck = checkOwnership(changedFiles, options.ownedPaths, options.config.scope.env_allowlist ?? []);
915
+ if (!ownershipCheck.ok) {
916
+ options.runStore.appendEvent({
917
+ type: 'ownership_violation',
918
+ source: 'supervisor',
919
+ payload: {
920
+ owned_paths: ownershipCheck.owned_paths,
921
+ semantic_changed: ownershipCheck.semantic_changed,
922
+ violating_files: ownershipCheck.violating_files
923
+ }
924
+ });
925
+ return stopWithError(state, options, 'ownership_violation', `Task modified files outside declared ownership: ${ownershipCheck.violating_files.join(', ')}`);
926
+ }
927
+ }
928
+ options.runStore.appendEvent({
929
+ type: 'implement_complete',
930
+ source: parsed.worker,
931
+ payload: {
932
+ changed_files: changedFiles,
933
+ handoff_memo: implementer.handoff_memo
934
+ }
935
+ });
936
+ const updatedWithStats = {
937
+ ...state,
938
+ worker_stats: incrementWorkerStats(state.worker_stats, parsed.worker, 'implement')
939
+ };
940
+ return updatePhase(updatedWithStats, 'VERIFY');
941
+ }
942
+ /**
943
+ * VERIFY phase: Runs verification commands based on tier selection.
944
+ * Selects tiers based on risk triggers and milestone risk level.
945
+ * On failure, retries up to MAX_MILESTONE_RETRIES times before stopping.
946
+ * Writes verification logs and transitions to REVIEW on success.
947
+ */
948
+ async function handleVerify(state, options) {
949
+ options.runStore.appendEvent({
950
+ type: 'phase_start',
951
+ source: 'supervisor',
952
+ payload: { phase: 'VERIFY' }
953
+ });
954
+ const changedFiles = await listChangedFiles(options.repoPath);
955
+ const isLastMilestone = state.milestone_index === state.milestones.length - 1;
956
+ const selection = selectTiersWithReasons(options.config.verification, {
957
+ changed_files: changedFiles,
958
+ risk_level: state.milestones[state.milestone_index]?.risk_level ?? 'medium',
959
+ is_milestone_end: isLastMilestone,
960
+ is_run_end: isLastMilestone
961
+ });
962
+ const results = [];
963
+ const start = Date.now();
964
+ // Compute verification cwd (default to repo root)
965
+ const verifyCwd = options.config.verification.cwd
966
+ ? path.join(options.repoPath, options.config.verification.cwd)
967
+ : options.repoPath;
968
+ // Track all commands required and run for evidence
969
+ const allCommandsRequired = [];
970
+ const allCommandsRun = [];
971
+ const tiersRun = [];
972
+ for (const tier of selection.tiers) {
973
+ const elapsed = (Date.now() - start) / 1000;
974
+ const remaining = options.config.verification.max_verify_time_per_milestone - elapsed;
975
+ if (remaining <= 0) {
976
+ results.push(`Tier ${tier} skipped: time budget exceeded.`);
977
+ break;
978
+ }
979
+ const commands = commandsForTier(options.config.verification, tier);
980
+ allCommandsRequired.push(...commands);
981
+ if (commands.length === 0) {
982
+ results.push(`Tier ${tier}: no commands configured.`);
983
+ continue;
984
+ }
985
+ tiersRun.push(tier);
986
+ const verifyResult = await runVerification(tier, commands, verifyCwd, Math.floor(remaining));
987
+ // Track individual command results
988
+ for (const cmdResult of verifyResult.command_results) {
989
+ allCommandsRun.push({
990
+ command: cmdResult.command,
991
+ exit_code: cmdResult.exit_code
992
+ });
993
+ }
994
+ const artifactName = `tests_${tier}.log`;
995
+ options.runStore.writeArtifact(artifactName, verifyResult.output);
996
+ results.push(`Tier ${tier}: ${verifyResult.ok ? 'ok' : 'failed'}`);
997
+ options.runStore.appendEvent({
998
+ type: 'verification',
999
+ source: 'verifier',
1000
+ payload: {
1001
+ tier,
1002
+ ok: verifyResult.ok,
1003
+ commands,
1004
+ command_results: verifyResult.command_results,
1005
+ duration_ms: verifyResult.duration_ms
1006
+ }
1007
+ });
1008
+ if (!verifyResult.ok) {
1009
+ // Check if we've exceeded retry limit
1010
+ if (state.milestone_retries >= MAX_MILESTONE_RETRIES) {
1011
+ options.runStore.appendEvent({
1012
+ type: 'verify_failed_max_retries',
1013
+ source: 'verifier',
1014
+ payload: {
1015
+ tier,
1016
+ retries: state.milestone_retries,
1017
+ max_retries: MAX_MILESTONE_RETRIES
1018
+ }
1019
+ });
1020
+ return stopWithError(state, options, 'verification_failed_max_retries', verifyResult.output);
1021
+ }
1022
+ // Record failure and retry
1023
+ const changedFiles = await listChangedFiles(options.repoPath);
1024
+ const failedCommand = commands.join(' && ');
1025
+ options.runStore.appendEvent({
1026
+ type: 'verify_failed_retry',
1027
+ source: 'verifier',
1028
+ payload: {
1029
+ tier,
1030
+ failed_command: failedCommand,
1031
+ retry_count: state.milestone_retries + 1,
1032
+ max_retries: MAX_MILESTONE_RETRIES
1033
+ }
1034
+ });
1035
+ const updated = {
1036
+ ...state,
1037
+ milestone_retries: state.milestone_retries + 1,
1038
+ last_verify_failure: {
1039
+ failedCommand,
1040
+ errorOutput: verifyResult.output,
1041
+ changedFiles,
1042
+ tier
1043
+ }
1044
+ };
1045
+ return updatePhase(updated, 'IMPLEMENT');
1046
+ }
1047
+ }
1048
+ // Compute missing commands (required but not run)
1049
+ const commandsRunSet = new Set(allCommandsRun.map(c => c.command));
1050
+ const commandsMissing = allCommandsRequired.filter(c => !commandsRunSet.has(c));
1051
+ // Build verification evidence for REVIEW phase
1052
+ const verificationEvidence = {
1053
+ commands_required: allCommandsRequired,
1054
+ commands_run: allCommandsRun,
1055
+ commands_missing: commandsMissing,
1056
+ tiers_run: tiersRun
1057
+ };
1058
+ options.runStore.appendEvent({
1059
+ type: 'verify_complete',
1060
+ source: 'verifier',
1061
+ payload: {
1062
+ results,
1063
+ tier_reasons: selection.reasons,
1064
+ verification_evidence: verificationEvidence
1065
+ }
1066
+ });
1067
+ // Clear verify failure on success and store verification evidence
1068
+ const cleared = {
1069
+ ...state,
1070
+ last_verify_failure: undefined,
1071
+ last_verification_evidence: verificationEvidence
1072
+ };
1073
+ // Fast path: skip REVIEW, go directly to CHECKPOINT
1074
+ if (options.fast) {
1075
+ options.runStore.appendEvent({
1076
+ type: 'fast_path_skip',
1077
+ source: 'supervisor',
1078
+ payload: { skipped_phase: 'REVIEW', reason: 'fast_mode' }
1079
+ });
1080
+ return updatePhase(cleared, 'CHECKPOINT');
1081
+ }
1082
+ return updatePhase(cleared, 'REVIEW');
1083
+ }
1084
+ /**
1085
+ * REVIEW phase: Invokes the reviewer worker to evaluate the implementation.
1086
+ * Provides diff summary and verification output for review context.
1087
+ * On approval, transitions to CHECKPOINT; on rejection, returns to IMPLEMENT.
1088
+ */
1089
+ async function handleReview(state, options) {
1090
+ options.runStore.appendEvent({
1091
+ type: 'phase_start',
1092
+ source: 'supervisor',
1093
+ payload: { phase: 'REVIEW' }
1094
+ });
1095
+ const milestone = state.milestones[state.milestone_index];
1096
+ if (!milestone) {
1097
+ return stopWithError(state, options, 'milestone_missing', 'No milestone found.');
1098
+ }
1099
+ // Intent-to-add untracked files so git diff shows their content
1100
+ // This makes review see actual file contents, not just "untracked: foo.ts"
1101
+ await git(['add', '-N', '.'], options.repoPath);
1102
+ const diffSummary = await git(['diff', '--stat'], options.repoPath);
1103
+ const diffContent = await git(['diff'], options.repoPath);
1104
+ // Truncate diff content to avoid overwhelming the reviewer
1105
+ const truncatedDiff = diffContent.stdout.length > 8000
1106
+ ? diffContent.stdout.slice(0, 8000) + '\n... (truncated)'
1107
+ : diffContent.stdout;
1108
+ const verifyLogPath = path.join(options.runStore.path, 'artifacts', 'tests_tier0.log');
1109
+ const verificationOutput = fs.existsSync(verifyLogPath)
1110
+ ? fs.readFileSync(verifyLogPath, 'utf-8')
1111
+ : '';
1112
+ // Build verification summary for evidence gating
1113
+ const filesExpected = milestone.files_expected ?? [];
1114
+ const filesExist = filesExpected.map(f => ({
1115
+ path: f,
1116
+ exists: fs.existsSync(path.join(options.repoPath, f))
1117
+ }));
1118
+ const verificationEvidence = state.last_verification_evidence;
1119
+ // Compute single boolean for easy reviewer compliance
1120
+ const commandsMissing = verificationEvidence?.commands_missing ?? ['(no verification evidence available)'];
1121
+ const allCommandsPassed = verificationEvidence?.commands_run?.every(c => c.exit_code === 0) ?? false;
1122
+ const allFilesExist = filesExist.every(f => f.exists);
1123
+ const evidenceGatesPassed = commandsMissing.length === 0 &&
1124
+ allCommandsPassed &&
1125
+ allFilesExist &&
1126
+ (verificationEvidence?.commands_run?.length ?? 0) > 0;
1127
+ const verificationSummary = {
1128
+ evidence_gates_passed: evidenceGatesPassed,
1129
+ commands_required: verificationEvidence?.commands_required ?? [],
1130
+ commands_run: verificationEvidence?.commands_run ?? [],
1131
+ commands_missing: commandsMissing,
1132
+ files_expected: filesExpected,
1133
+ files_exist: filesExist
1134
+ };
1135
+ const combinedDiff = [diffSummary.stdout.trim(), '', truncatedDiff].filter(Boolean).join('\n');
1136
+ const prompt = buildReviewPrompt({
1137
+ milestone,
1138
+ diffSummary: combinedDiff,
1139
+ verificationOutput,
1140
+ verificationSummary
1141
+ });
1142
+ const reviewWorker = options.config.phases.review;
1143
+ const parsed = await callWorkerJson({
1144
+ prompt,
1145
+ repoPath: options.repoPath,
1146
+ workerType: reviewWorker,
1147
+ workers: options.config.workers,
1148
+ schema: reviewOutputSchema,
1149
+ runStore: options.runStore,
1150
+ stage: 'review'
1151
+ });
1152
+ // Check if we were stopped while waiting for worker (e.g., stall watchdog)
1153
+ const lateStopReview = checkForLateResult(options, 'review', parsed.worker);
1154
+ if (lateStopReview)
1155
+ return lateStopReview;
1156
+ if (!parsed.data) {
1157
+ options.runStore.appendEvent({
1158
+ type: 'parse_failed',
1159
+ source: parsed.worker,
1160
+ payload: {
1161
+ stage: 'review',
1162
+ parser_context: 'review',
1163
+ retry_count: parsed.retry_count ?? 0,
1164
+ error: parsed.error,
1165
+ output_snippet: snippet(parsed.output)
1166
+ }
1167
+ });
1168
+ return stopWithError(state, options, 'review_parse_failed', parsed.error ?? 'Unknown error');
1169
+ }
1170
+ const review = parsed.data;
1171
+ options.runStore.appendEvent({
1172
+ type: 'review_complete',
1173
+ source: parsed.worker,
1174
+ payload: review
1175
+ });
1176
+ const updatedWithStats = {
1177
+ ...state,
1178
+ worker_stats: incrementWorkerStats(state.worker_stats, parsed.worker, 'review')
1179
+ };
1180
+ if (review.status === 'request_changes' || review.status === 'reject') {
1181
+ // Compute fingerprint of review changes for loop detection
1182
+ const changesText = review.changes.join('\n');
1183
+ const fingerprint = crypto.createHash('sha256').update(changesText).digest('hex').slice(0, 16);
1184
+ // Increment review rounds and check for loops
1185
+ const currentRounds = (updatedWithStats.review_rounds ?? 0) + 1;
1186
+ const maxRounds = options.config.resilience?.max_review_rounds ?? 2;
1187
+ const lastFingerprint = updatedWithStats.last_review_fingerprint;
1188
+ // Detect loop: same fingerprint twice in a row OR exceeded max rounds
1189
+ const sameFingerprint = lastFingerprint === fingerprint;
1190
+ const exceededRounds = currentRounds > maxRounds;
1191
+ if (sameFingerprint || exceededRounds) {
1192
+ const reason = sameFingerprint ? 'identical_review_feedback' : 'max_review_rounds_exceeded';
1193
+ options.runStore.appendEvent({
1194
+ type: 'review_loop_detected',
1195
+ source: 'supervisor',
1196
+ payload: {
1197
+ milestone_index: state.milestone_index,
1198
+ review_rounds: currentRounds,
1199
+ max_review_rounds: maxRounds,
1200
+ same_fingerprint: sameFingerprint,
1201
+ last_changes: review.changes.slice(0, 2) // First 2 items for context
1202
+ }
1203
+ });
1204
+ // Write review digest for debugging
1205
+ const digestLines = [
1206
+ '# Review Digest',
1207
+ '',
1208
+ `**Milestone:** ${state.milestone_index + 1} of ${state.milestones.length}`,
1209
+ `**Review Rounds:** ${currentRounds}`,
1210
+ `**Stop Reason:** ${reason}`,
1211
+ '',
1212
+ '## Last Requested Changes',
1213
+ '',
1214
+ ...review.changes.map((change, i) => `${i + 1}. ${change}`),
1215
+ '',
1216
+ '## Status',
1217
+ `- **Verdict:** ${review.status}`
1218
+ ];
1219
+ options.runStore.writeMemo('review_digest.md', digestLines.join('\n'));
1220
+ const errorMsg = sameFingerprint
1221
+ ? `Identical review feedback detected after ${currentRounds} rounds. Manual intervention required.`
1222
+ : `Review loop detected after ${currentRounds} rounds (max: ${maxRounds}). Manual intervention required.`;
1223
+ return stopWithError(updatedWithStats, options, 'review_loop_detected', errorMsg);
1224
+ }
1225
+ // Update state with new review_rounds and fingerprint
1226
+ const stateWithReviewTracking = {
1227
+ ...updatedWithStats,
1228
+ review_rounds: currentRounds,
1229
+ last_review_fingerprint: fingerprint
1230
+ };
1231
+ options.runStore.writeMemo(`milestone_${String(state.milestone_index + 1).padStart(2, '0')}_review.md`, changesText);
1232
+ return updatePhase(stateWithReviewTracking, 'IMPLEMENT');
1233
+ }
1234
+ return updatePhase(updatedWithStats, 'CHECKPOINT');
1235
+ }
1236
+ /**
1237
+ * CHECKPOINT phase: Commits changes and advances to the next milestone.
1238
+ * Creates a git commit with standardized message format.
1239
+ * If more milestones remain, transitions to IMPLEMENT; otherwise FINALIZE.
1240
+ */
1241
+ async function handleCheckpoint(state, options) {
1242
+ options.runStore.appendEvent({
1243
+ type: 'phase_start',
1244
+ source: 'supervisor',
1245
+ payload: { phase: 'CHECKPOINT' }
1246
+ });
1247
+ const status = await git(['status', '--porcelain'], options.repoPath);
1248
+ if (status.stdout.trim().length > 0) {
1249
+ await git(['add', '-A'], options.repoPath);
1250
+ const message = `chore(agent): checkpoint milestone ${state.milestone_index + 1}`;
1251
+ await git(['commit', '-m', message], options.repoPath);
1252
+ }
1253
+ const shaResult = await git(['rev-parse', 'HEAD'], options.repoPath);
1254
+ const nextIndex = state.milestone_index + 1;
1255
+ const updated = {
1256
+ ...state,
1257
+ checkpoint_commit_sha: shaResult.stdout.trim(),
1258
+ milestone_index: nextIndex,
1259
+ milestone_retries: 0,
1260
+ last_verify_failure: undefined,
1261
+ review_rounds: 0, // Reset for next milestone
1262
+ last_review_fingerprint: undefined // Reset for next milestone
1263
+ };
1264
+ options.runStore.appendEvent({
1265
+ type: 'checkpoint_complete',
1266
+ source: 'supervisor',
1267
+ payload: {
1268
+ commit: updated.checkpoint_commit_sha,
1269
+ milestone_index: state.milestone_index
1270
+ }
1271
+ });
1272
+ if (nextIndex >= updated.milestones.length) {
1273
+ return updatePhase(updated, 'FINALIZE');
1274
+ }
1275
+ return updatePhase(updated, 'IMPLEMENT');
1276
+ }
1277
+ /**
1278
+ * FINALIZE phase: Writes summary, emits worker stats, and stops the run.
1279
+ * Called when all milestones are complete.
1280
+ */
1281
+ async function handleFinalize(state, options) {
1282
+ options.runStore.appendEvent({
1283
+ type: 'phase_start',
1284
+ source: 'supervisor',
1285
+ payload: { phase: 'FINALIZE' }
1286
+ });
1287
+ const stats = state.worker_stats;
1288
+ const summary = [
1289
+ '# Summary',
1290
+ '',
1291
+ 'Run completed.',
1292
+ '',
1293
+ '## Worker Stats',
1294
+ '',
1295
+ `| Worker | Total | Plan | Implement | Review |`,
1296
+ `|--------|-------|------|-----------|--------|`,
1297
+ `| Claude | ${stats.claude} | ${stats.by_phase.plan.claude} | ${stats.by_phase.implement.claude} | ${stats.by_phase.review.claude} |`,
1298
+ `| Codex | ${stats.codex} | ${stats.by_phase.plan.codex} | ${stats.by_phase.implement.codex} | ${stats.by_phase.review.codex} |`
1299
+ ].join('\n');
1300
+ options.runStore.writeSummary(summary);
1301
+ // Emit worker stats event for easy querying
1302
+ options.runStore.appendEvent({
1303
+ type: 'worker_stats',
1304
+ source: 'supervisor',
1305
+ payload: stats
1306
+ });
1307
+ // Write completion artifact for meta-agent coordination
1308
+ const completePayload = {
1309
+ run_id: state.run_id,
1310
+ status: 'complete',
1311
+ phase: 'FINALIZE',
1312
+ progress: {
1313
+ milestone: state.milestones.length,
1314
+ of: state.milestones.length
1315
+ },
1316
+ worker_stats: stats,
1317
+ ts: new Date().toISOString()
1318
+ };
1319
+ options.runStore.writeMemo('complete.json', JSON.stringify(completePayload, null, 2));
1320
+ writeStopMemo(options.runStore, DEFAULT_STOP_MEMO);
1321
+ return stopRun(state, 'complete');
1322
+ }
1323
+ function stopWithError(state, options, reason, error) {
1324
+ const updated = stopRun({
1325
+ ...state,
1326
+ last_error: error
1327
+ }, reason);
1328
+ options.runStore.appendEvent({
1329
+ type: 'stop',
1330
+ source: 'supervisor',
1331
+ payload: { reason, error }
1332
+ });
1333
+ writeStopMemo(options.runStore, DEFAULT_STOP_MEMO);
1334
+ return updated;
1335
+ }
1336
+ function writeStopMemo(runStore, content) {
1337
+ runStore.writeMemo('stop.md', content);
1338
+ }
1339
+ function sleep(ms) {
1340
+ return new Promise(resolve => setTimeout(resolve, ms));
1341
+ }
1342
+ function jitter(baseMs) {
1343
+ // Add 0-50% random jitter
1344
+ return baseMs + Math.random() * baseMs * 0.5;
1345
+ }
1346
+ /**
1347
+ * Check if run was stopped while waiting for a worker (e.g., by stall watchdog).
1348
+ * Returns the current state if stopped, null otherwise.
1349
+ * If stopped, logs a late_worker_result_ignored event.
1350
+ */
1351
+ function checkForLateResult(options, stage, workerType) {
1352
+ const currentState = options.runStore.readState();
1353
+ if (currentState.phase === 'STOPPED') {
1354
+ options.runStore.appendEvent({
1355
+ type: 'late_worker_result_ignored',
1356
+ source: 'supervisor',
1357
+ payload: { stage, worker: workerType }
1358
+ });
1359
+ return currentState;
1360
+ }
1361
+ return null;
1362
+ }
1363
+ // Jitter delays for parse retries: 250ms, 1s
1364
+ const RETRY_DELAYS_MS = [250, 1000];
1365
+ function classifyInfraOutput(output) {
1366
+ const lower = output.toLowerCase();
1367
+ // Auth errors
1368
+ if (lower.includes('oauth') || lower.includes('token expired') ||
1369
+ lower.includes('authentication') || lower.includes('login') ||
1370
+ lower.includes('401') || lower.includes('unauthorized') ||
1371
+ lower.includes('not authenticated') || lower.includes('sign in')) {
1372
+ return 'auth';
1373
+ }
1374
+ // Network errors
1375
+ if (lower.includes('enotfound') || lower.includes('econnrefused') ||
1376
+ lower.includes('network') || lower.includes('timeout') ||
1377
+ lower.includes('econnreset') || lower.includes('socket')) {
1378
+ return 'network';
1379
+ }
1380
+ // Rate limit errors
1381
+ if (lower.includes('rate limit') || lower.includes('429') ||
1382
+ lower.includes('too many requests') || lower.includes('quota')) {
1383
+ return 'rate_limit';
1384
+ }
1385
+ return 'unknown';
1386
+ }
1387
+ function resolveInfraReason(output) {
1388
+ if (!output)
1389
+ return 'parse';
1390
+ const category = classifyInfraOutput(output);
1391
+ return category === 'unknown' ? 'parse' : category;
1392
+ }
1393
+ async function runWorkerWithRetries(input) {
1394
+ const worker = input.workers[input.workerType];
1395
+ // Use mock worker if enabled (for testing stall detection)
1396
+ const useMock = isMockWorkerEnabled();
1397
+ const runWorker = useMock
1398
+ ? runMockWorker
1399
+ : (input.workerType === 'claude' ? runClaude : runCodex);
1400
+ const rawOutputs = [];
1401
+ let lastError;
1402
+ let lastOutput;
1403
+ for (let attempt = 0; attempt <= RETRY_DELAYS_MS.length; attempt++) {
1404
+ if (attempt > 0) {
1405
+ // Check if run was stopped by watchdog before retrying
1406
+ const currentState = input.runStore.readState();
1407
+ if (currentState.phase === 'STOPPED') {
1408
+ break;
1409
+ }
1410
+ const delayMs = jitter(RETRY_DELAYS_MS[attempt - 1]);
1411
+ await sleep(delayMs);
1412
+ }
1413
+ const callInfo = {
1414
+ worker: input.workerType,
1415
+ stage: input.stage,
1416
+ attempt: attempt + 1,
1417
+ at: new Date().toISOString()
1418
+ };
1419
+ input.runStore.recordWorkerCall(callInfo);
1420
+ const runResult = await runWorker({
1421
+ prompt: attempt === 0 ? input.prompt : input.retryPrompt,
1422
+ repo_path: input.repoPath,
1423
+ worker
1424
+ });
1425
+ // Check if run was stopped by watchdog during worker call
1426
+ const postCallState = input.runStore.readState();
1427
+ if (postCallState.phase === 'STOPPED') {
1428
+ break;
1429
+ }
1430
+ const output = runResult.observations.join('\n');
1431
+ rawOutputs.push(output);
1432
+ lastOutput = output;
1433
+ const parsed = parseJsonWithSchema(output, input.schema);
1434
+ if (parsed.data) {
1435
+ return { data: parsed.data, output, retry_count: attempt, rawOutputs, worker: input.workerType };
1436
+ }
1437
+ lastError = parsed.error ?? lastError;
1438
+ }
1439
+ logRawOutputsToArtifact(input.runStore, input.stage, input.workerType, rawOutputs);
1440
+ return {
1441
+ error: lastError ?? 'JSON parse failed after retries',
1442
+ output: lastOutput ?? rawOutputs[rawOutputs.length - 1],
1443
+ rawOutputs,
1444
+ retry_count: RETRY_DELAYS_MS.length,
1445
+ worker: input.workerType
1446
+ };
1447
+ }
1448
+ /**
1449
+ * Unified worker call that dispatches to the appropriate worker based on config.
1450
+ * This allows phases to be configured to use either Claude or Codex.
1451
+ *
1452
+ * Retry policy (N=2): up to 2 retries with jitter delays (250ms, 1s).
1453
+ * Returns raw outputs for artifact logging on failure.
1454
+ */
1455
+ async function callWorkerJson(input) {
1456
+ const retryPrompt = `${input.prompt}\n\nOutput JSON only between BEGIN_JSON and END_JSON. No other text.`;
1457
+ const primary = await runWorkerWithRetries({
1458
+ prompt: input.prompt,
1459
+ retryPrompt,
1460
+ repoPath: input.repoPath,
1461
+ workerType: input.workerType,
1462
+ workers: input.workers,
1463
+ schema: input.schema,
1464
+ runStore: input.runStore,
1465
+ stage: input.stage
1466
+ });
1467
+ if (primary.data) {
1468
+ return primary;
1469
+ }
1470
+ const fallbackWorker = input.workerType === 'claude' ? 'codex' : 'claude';
1471
+ if (!input.workers[fallbackWorker]) {
1472
+ return primary;
1473
+ }
1474
+ const reason = resolveInfraReason(primary.output);
1475
+ input.runStore.appendEvent({
1476
+ type: 'worker_fallback',
1477
+ source: 'supervisor',
1478
+ payload: {
1479
+ stage: input.stage,
1480
+ from: input.workerType,
1481
+ to: fallbackWorker,
1482
+ reason
1483
+ }
1484
+ });
1485
+ return runWorkerWithRetries({
1486
+ prompt: input.prompt,
1487
+ retryPrompt,
1488
+ repoPath: input.repoPath,
1489
+ workerType: fallbackWorker,
1490
+ workers: input.workers,
1491
+ schema: input.schema,
1492
+ runStore: input.runStore,
1493
+ stage: input.stage
1494
+ });
1495
+ }
1496
+ function snippet(output) {
1497
+ if (!output) {
1498
+ return '';
1499
+ }
1500
+ const trimmed = output.trim();
1501
+ if (trimmed.length <= 800) {
1502
+ return trimmed;
1503
+ }
1504
+ return `${trimmed.slice(0, 800)}...`;
1505
+ }
1506
+ /**
1507
+ * Log raw worker outputs to artifact for debugging parse failures.
1508
+ * Writes last 2KB of each attempt to help diagnose malformed responses.
1509
+ */
1510
+ function logRawOutputsToArtifact(runStore, stage, worker, rawOutputs) {
1511
+ if (!rawOutputs || rawOutputs.length === 0)
1512
+ return;
1513
+ const MAX_BYTES = 2048;
1514
+ const lines = [`# Raw Worker Outputs (${stage})`, ''];
1515
+ for (let i = 0; i < rawOutputs.length; i++) {
1516
+ const output = rawOutputs[i];
1517
+ const label = i === 0 ? 'Initial attempt' : `Retry ${i}`;
1518
+ const tail = output.length > MAX_BYTES
1519
+ ? output.slice(-MAX_BYTES)
1520
+ : output;
1521
+ lines.push(`## ${label}`);
1522
+ lines.push('```');
1523
+ lines.push(tail);
1524
+ lines.push('```');
1525
+ lines.push('');
1526
+ }
1527
+ runStore.writeArtifact(`raw-outputs-${stage}-${worker}.md`, lines.join('\n'));
1528
+ }
1529
+ /**
1530
+ * Increment worker stats for a given worker and phase.
1531
+ */
1532
+ function incrementWorkerStats(stats, worker, phase) {
1533
+ return {
1534
+ ...stats,
1535
+ [worker]: stats[worker] + 1,
1536
+ by_phase: {
1537
+ ...stats.by_phase,
1538
+ [phase]: {
1539
+ ...stats.by_phase[phase],
1540
+ [worker]: stats.by_phase[phase][worker] + 1
1541
+ }
1542
+ }
1543
+ };
1544
+ }
1545
+ /**
1546
+ * Validate that all files_expected in milestones are within the allowlist.
1547
+ * Returns array of violating file paths.
1548
+ */
1549
+ function validateFilesExpected(milestones, allowlist) {
1550
+ const matchers = allowlist.map((pattern) => picomatch(pattern));
1551
+ const violations = [];
1552
+ for (const milestone of milestones) {
1553
+ const files = milestone.files_expected ?? [];
1554
+ for (const file of files) {
1555
+ const inScope = matchers.some((match) => match(file));
1556
+ if (!inScope) {
1557
+ violations.push(file);
1558
+ }
1559
+ }
1560
+ }
1561
+ return violations;
1562
+ }