@weldr/runr 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/CHANGELOG.md +216 -0
  2. package/LICENSE +190 -0
  3. package/NOTICE +4 -0
  4. package/README.md +200 -0
  5. package/dist/cli.js +464 -0
  6. package/dist/commands/__tests__/report.test.js +202 -0
  7. package/dist/commands/compare.js +168 -0
  8. package/dist/commands/doctor.js +124 -0
  9. package/dist/commands/follow.js +251 -0
  10. package/dist/commands/gc.js +161 -0
  11. package/dist/commands/guards-only.js +89 -0
  12. package/dist/commands/metrics.js +441 -0
  13. package/dist/commands/orchestrate.js +800 -0
  14. package/dist/commands/paths.js +31 -0
  15. package/dist/commands/preflight.js +152 -0
  16. package/dist/commands/report.js +478 -0
  17. package/dist/commands/resume.js +149 -0
  18. package/dist/commands/run.js +538 -0
  19. package/dist/commands/status.js +189 -0
  20. package/dist/commands/summarize.js +220 -0
  21. package/dist/commands/version.js +82 -0
  22. package/dist/commands/wait.js +170 -0
  23. package/dist/config/__tests__/presets.test.js +104 -0
  24. package/dist/config/load.js +66 -0
  25. package/dist/config/schema.js +160 -0
  26. package/dist/context/__tests__/artifact.test.js +130 -0
  27. package/dist/context/__tests__/pack.test.js +191 -0
  28. package/dist/context/artifact.js +67 -0
  29. package/dist/context/index.js +2 -0
  30. package/dist/context/pack.js +273 -0
  31. package/dist/diagnosis/analyzer.js +678 -0
  32. package/dist/diagnosis/formatter.js +136 -0
  33. package/dist/diagnosis/index.js +6 -0
  34. package/dist/diagnosis/types.js +7 -0
  35. package/dist/env/__tests__/fingerprint.test.js +116 -0
  36. package/dist/env/fingerprint.js +111 -0
  37. package/dist/orchestrator/__tests__/policy.test.js +185 -0
  38. package/dist/orchestrator/__tests__/schema-version.test.js +65 -0
  39. package/dist/orchestrator/artifacts.js +405 -0
  40. package/dist/orchestrator/state-machine.js +646 -0
  41. package/dist/orchestrator/types.js +88 -0
  42. package/dist/ownership/normalize.js +45 -0
  43. package/dist/repo/context.js +90 -0
  44. package/dist/repo/git.js +13 -0
  45. package/dist/repo/worktree.js +239 -0
  46. package/dist/store/run-store.js +107 -0
  47. package/dist/store/run-utils.js +69 -0
  48. package/dist/store/runs-root.js +126 -0
  49. package/dist/supervisor/__tests__/evidence-gate.test.js +111 -0
  50. package/dist/supervisor/__tests__/ownership.test.js +103 -0
  51. package/dist/supervisor/__tests__/state-machine.test.js +290 -0
  52. package/dist/supervisor/collision.js +240 -0
  53. package/dist/supervisor/evidence-gate.js +98 -0
  54. package/dist/supervisor/planner.js +18 -0
  55. package/dist/supervisor/runner.js +1562 -0
  56. package/dist/supervisor/scope-guard.js +55 -0
  57. package/dist/supervisor/state-machine.js +121 -0
  58. package/dist/supervisor/verification-policy.js +64 -0
  59. package/dist/tasks/task-metadata.js +72 -0
  60. package/dist/types/schemas.js +1 -0
  61. package/dist/verification/engine.js +49 -0
  62. package/dist/workers/__tests__/claude.test.js +88 -0
  63. package/dist/workers/__tests__/codex.test.js +81 -0
  64. package/dist/workers/claude.js +119 -0
  65. package/dist/workers/codex.js +162 -0
  66. package/dist/workers/json.js +22 -0
  67. package/dist/workers/mock.js +193 -0
  68. package/dist/workers/prompts.js +98 -0
  69. package/dist/workers/schemas.js +39 -0
  70. package/package.json +47 -0
  71. package/templates/prompts/implementer.md +70 -0
  72. package/templates/prompts/planner.md +62 -0
  73. package/templates/prompts/reviewer.md +77 -0
@@ -0,0 +1,31 @@
1
+ /**
2
+ * runr paths command - Display canonical runr directory paths.
3
+ *
4
+ * Single source of truth for all runr paths. Use this in scripts,
5
+ * CI, and external tools instead of hardcoding paths.
6
+ */
7
+ import { getRunrPaths } from '../store/runs-root.js';
8
+ /**
9
+ * Execute the paths command.
10
+ */
11
+ export async function pathsCommand(options) {
12
+ const paths = getRunrPaths(options.repo);
13
+ if (options.json) {
14
+ console.log(JSON.stringify(paths, null, 2));
15
+ }
16
+ else {
17
+ console.log('Runr Paths');
18
+ console.log('==========');
19
+ console.log('');
20
+ console.log(`repo_root: ${paths.repo_root}`);
21
+ console.log(`runr_root: ${paths.runr_root}`);
22
+ console.log(`runs_dir: ${paths.runs_dir}`);
23
+ console.log(`worktrees_dir: ${paths.worktrees_dir}`);
24
+ console.log(`orchestrations_dir: ${paths.orchestrations_dir}`);
25
+ if (paths.using_legacy) {
26
+ console.log('');
27
+ console.log('\x1b[33m⚠ Using legacy .agent/ directory. Consider migrating to .runr/\x1b[0m');
28
+ }
29
+ }
30
+ process.exitCode = 0;
31
+ }
@@ -0,0 +1,152 @@
1
+ import fs from 'node:fs';
2
+ import path from 'node:path';
3
+ import { execa } from 'execa';
4
+ import { buildRepoContext } from '../repo/context.js';
5
+ import { checkLockfiles, checkScope, partitionChangedFiles } from '../supervisor/scope-guard.js';
6
+ import { selectTiersWithReasons } from '../supervisor/verification-policy.js';
7
+ import { pingClaude } from '../workers/claude.js';
8
+ import { pingCodex } from '../workers/codex.js';
9
+ /**
10
+ * Check worker binary exists and can report version.
11
+ * This is cheaper than ping (no API call).
12
+ */
13
+ async function checkWorkerBinary(name, worker) {
14
+ try {
15
+ const result = await execa(worker.bin, ['--version'], {
16
+ timeout: 5000,
17
+ reject: false
18
+ });
19
+ if (result.exitCode === 0) {
20
+ const version = result.stdout.trim().split('\n')[0];
21
+ return { worker: name, bin: worker.bin, ok: true, version };
22
+ }
23
+ return {
24
+ worker: name,
25
+ bin: worker.bin,
26
+ ok: false,
27
+ error: result.stderr || 'Version check failed'
28
+ };
29
+ }
30
+ catch (err) {
31
+ const error = err;
32
+ return {
33
+ worker: name,
34
+ bin: worker.bin,
35
+ ok: false,
36
+ error: error.message.includes('ENOENT')
37
+ ? `Command not found: ${worker.bin}`
38
+ : error.message
39
+ };
40
+ }
41
+ }
42
+ export async function runPreflight(options) {
43
+ const repoContext = await buildRepoContext(options.repoPath, options.runId, options.slug, options.config.repo.default_branch ?? 'main');
44
+ // Partition changed files into env artifacts vs semantic changes
45
+ // Env artifacts (node_modules, .next, .agent, etc.) are allowed noise
46
+ // Built-in patterns ensure agent artifacts never trigger guard failures
47
+ const builtinEnvAllowlist = ['.agent/**', '.agent-worktrees/**'];
48
+ const effectiveEnvAllowlist = Array.from(new Set([...(options.config.scope.env_allowlist ?? []), ...builtinEnvAllowlist]));
49
+ const { env_touched, semantic_changed } = partitionChangedFiles(repoContext.changed_files, effectiveEnvAllowlist);
50
+ const dirty_files = repoContext.changed_files;
51
+ const dirty_is_env_only = dirty_files.length > 0 && semantic_changed.length === 0;
52
+ // "Dirty" means semantic dirty, not env noise
53
+ const dirty = semantic_changed.length > 0;
54
+ // Scope/lockfile checks should only consider semantic changes
55
+ const scopeCheck = checkScope(semantic_changed, options.config.scope.allowlist, options.config.scope.denylist);
56
+ const lockfileCheck = checkLockfiles(semantic_changed, options.config.scope.lockfiles, options.allowDeps);
57
+ const reasons = [];
58
+ if (dirty && !options.allowDirty) {
59
+ reasons.push('dirty_worktree');
60
+ }
61
+ if (!scopeCheck.ok) {
62
+ reasons.push('scope_violation');
63
+ }
64
+ if (!lockfileCheck.ok) {
65
+ reasons.push('lockfile_violation');
66
+ }
67
+ // Check verification.cwd exists if specified
68
+ if (options.config.verification.cwd) {
69
+ const verifyCwd = path.join(options.repoPath, options.config.verification.cwd);
70
+ if (!fs.existsSync(verifyCwd)) {
71
+ reasons.push(`verification_cwd_missing:${options.config.verification.cwd}`);
72
+ }
73
+ }
74
+ // Check worker binaries exist (cheaper than ping, catches "command not found")
75
+ const workers = options.config.workers;
76
+ const binaryCheckPromises = [];
77
+ if (workers.claude) {
78
+ binaryCheckPromises.push(checkWorkerBinary('claude', workers.claude));
79
+ }
80
+ if (workers.codex) {
81
+ binaryCheckPromises.push(checkWorkerBinary('codex', workers.codex));
82
+ }
83
+ const binaryResults = await Promise.all(binaryCheckPromises);
84
+ // Add binary failures to reasons
85
+ for (const result of binaryResults) {
86
+ if (!result.ok) {
87
+ reasons.push(`binary_missing:${result.worker}:${result.error}`);
88
+ }
89
+ }
90
+ const binaryStatus = {
91
+ ok: binaryResults.every(r => r.ok),
92
+ results: binaryResults
93
+ };
94
+ // Ping workers to verify auth/connectivity (after binary checks)
95
+ // Skip ping if binaries failed (no point pinging missing binaries)
96
+ let pingStatus;
97
+ if (options.skipPing || !binaryStatus.ok) {
98
+ pingStatus = {
99
+ ok: true,
100
+ skipped: true,
101
+ results: []
102
+ };
103
+ }
104
+ else {
105
+ const pingResults = [];
106
+ // Ping all configured workers in parallel
107
+ const pingPromises = [];
108
+ if (workers.claude) {
109
+ pingPromises.push(pingClaude(workers.claude));
110
+ }
111
+ if (workers.codex) {
112
+ pingPromises.push(pingCodex(workers.codex));
113
+ }
114
+ const results = await Promise.all(pingPromises);
115
+ pingResults.push(...results);
116
+ // Add failures to reasons
117
+ for (const result of pingResults) {
118
+ if (!result.ok) {
119
+ const category = result.category || 'unknown';
120
+ reasons.push(`ping_failed:${result.worker}:${category}`);
121
+ }
122
+ }
123
+ pingStatus = {
124
+ ok: pingResults.every(r => r.ok),
125
+ skipped: false,
126
+ results: pingResults
127
+ };
128
+ }
129
+ const selection = selectTiersWithReasons(options.config.verification, {
130
+ changed_files: repoContext.changed_files,
131
+ risk_level: options.milestoneRiskLevel,
132
+ is_milestone_end: false,
133
+ is_run_end: false
134
+ });
135
+ return {
136
+ repo_context: repoContext,
137
+ guard: {
138
+ ok: reasons.length === 0,
139
+ reasons,
140
+ dirty,
141
+ scope_violations: scopeCheck.violations,
142
+ lockfile_violations: lockfileCheck.violations,
143
+ dirty_files,
144
+ env_touched,
145
+ dirty_is_env_only,
146
+ },
147
+ binary: binaryStatus,
148
+ ping: pingStatus,
149
+ tiers: selection.tiers,
150
+ tier_reasons: selection.reasons
151
+ };
152
+ }
@@ -0,0 +1,478 @@
1
+ import fs from 'node:fs';
2
+ import path from 'node:path';
3
+ import readline from 'node:readline';
4
+ import { readContextPackArtifact, formatContextPackStatus } from '../context/index.js';
5
+ import { findLatestRunId } from '../store/run-utils.js';
6
+ import { getRunsRoot } from '../store/runs-root.js';
7
+ // Re-export for backward compatibility with cli.ts
8
+ export { findLatestRunId };
9
+ export async function reportCommand(options) {
10
+ const runDir = path.join(getRunsRoot(options.repo), options.runId);
11
+ if (!fs.existsSync(runDir)) {
12
+ throw new Error(`${missingRunMessage(runDir)}`);
13
+ }
14
+ const statePath = path.join(runDir, 'state.json');
15
+ if (!fs.existsSync(statePath)) {
16
+ throw new Error(`State not found: ${statePath}`);
17
+ }
18
+ const state = JSON.parse(fs.readFileSync(statePath, 'utf-8'));
19
+ const timelinePath = path.join(runDir, 'timeline.jsonl');
20
+ const defaultKpi = {
21
+ version: 1,
22
+ total_duration_ms: null,
23
+ unattributed_ms: null,
24
+ started_at: null,
25
+ ended_at: null,
26
+ phases: {},
27
+ workers: { claude: 'unknown', codex: 'unknown' },
28
+ verify: { attempts: 0, retries: 0, total_duration_ms: 0 },
29
+ milestones: { completed: 0 },
30
+ reliability: {
31
+ infra_retries: 0,
32
+ fallback_used: false,
33
+ fallback_count: 0,
34
+ stalls_triggered: 0,
35
+ late_results_ignored: 0
36
+ },
37
+ outcome: 'unknown',
38
+ stop_reason: null
39
+ };
40
+ const scan = fs.existsSync(timelinePath)
41
+ ? await scanTimeline(timelinePath, options.tail)
42
+ : { tailEvents: [], kpi: defaultKpi };
43
+ const flags = readFlags(scan.runStarted);
44
+ const contextPackArtifact = readContextPackArtifact(runDir);
45
+ const header = [
46
+ 'Run',
47
+ `run_id: ${options.runId}`,
48
+ `repo: ${state.repo_path}`,
49
+ `run_dir: ${runDir}`,
50
+ `current_phase: ${state.phase}`,
51
+ `milestone_index: ${state.milestone_index}`,
52
+ `phase_attempt: ${state.phase_attempt ?? 0}`,
53
+ `last_error: ${state.last_error ?? 'none'}`,
54
+ `dry_run: ${flags.dry_run ?? 'unknown'}`,
55
+ `no_branch: ${flags.no_branch ?? 'unknown'}`,
56
+ `allow_dirty: ${flags.allow_dirty ?? 'unknown'}`,
57
+ `allow_deps: ${flags.allow_deps ?? 'unknown'}`
58
+ ].join('\n');
59
+ const kpiBlock = formatKpiBlock(scan.kpi, contextPackArtifact);
60
+ if (options.kpiOnly) {
61
+ // Compact output: just run_id and KPIs
62
+ console.log(`${options.runId}: ${scan.kpi.outcome} ${formatDuration(scan.kpi.total_duration_ms)} milestones=${scan.kpi.milestones.completed}`);
63
+ return;
64
+ }
65
+ const events = formatEvents(scan.tailEvents);
66
+ const pointers = formatPointers({
67
+ statePath,
68
+ timelinePath,
69
+ runDir,
70
+ checkpoint: state.checkpoint_commit_sha
71
+ });
72
+ console.log([header, '', 'KPIs', kpiBlock, '', 'Last events', events, '', 'Pointers', pointers].join('\n'));
73
+ }
74
+ function formatDuration(ms) {
75
+ if (ms === null)
76
+ return 'unknown';
77
+ if (ms < 0)
78
+ return `-${formatPositiveDuration(Math.abs(ms))}`;
79
+ return formatPositiveDuration(ms);
80
+ }
81
+ function formatPositiveDuration(ms) {
82
+ if (ms < 1000)
83
+ return `${ms}ms`;
84
+ const seconds = Math.floor(ms / 1000);
85
+ if (seconds < 60)
86
+ return `${seconds}s`;
87
+ const minutes = Math.floor(seconds / 60);
88
+ const remainingSeconds = seconds % 60;
89
+ if (minutes < 60) {
90
+ return remainingSeconds > 0 ? `${minutes}m${remainingSeconds}s` : `${minutes}m`;
91
+ }
92
+ const hours = Math.floor(minutes / 60);
93
+ const remainingMinutes = minutes % 60;
94
+ return remainingMinutes > 0 ? `${hours}h${remainingMinutes}m` : `${hours}h`;
95
+ }
96
+ function missingRunMessage(runDir) {
97
+ const runsRoot = path.dirname(runDir);
98
+ if (!fs.existsSync(runsRoot)) {
99
+ return `Run not found: ${runDir}. Known runs: none.`;
100
+ }
101
+ const candidates = fs
102
+ .readdirSync(runsRoot, { withFileTypes: true })
103
+ .filter((entry) => entry.isDirectory())
104
+ .map((entry) => entry.name)
105
+ .sort()
106
+ .reverse()
107
+ .slice(0, 5);
108
+ const hint = candidates.length ? candidates.join(', ') : 'none';
109
+ return `Run not found: ${runDir}. Known runs: ${hint}.`;
110
+ }
111
+ function readFlags(runStarted) {
112
+ if (!runStarted?.payload || typeof runStarted.payload !== 'object') {
113
+ return {};
114
+ }
115
+ const payload = runStarted.payload;
116
+ return {
117
+ dry_run: payload.dry_run,
118
+ no_branch: payload.no_branch,
119
+ allow_dirty: payload.allow_dirty,
120
+ allow_deps: payload.allow_deps
121
+ };
122
+ }
123
+ function formatKpiBlock(kpi, contextPackArtifact) {
124
+ const lines = [];
125
+ // Total duration + unattributed
126
+ const durationStr = kpi.total_duration_ms !== null ? formatDuration(kpi.total_duration_ms) : 'unknown';
127
+ let unattributedStr;
128
+ if (kpi.unattributed_ms === null) {
129
+ unattributedStr = 'unknown';
130
+ }
131
+ else if (kpi.unattributed_ms < 0) {
132
+ // Negative = phases exceed tracked time (e.g., resumed runs with gaps)
133
+ unattributedStr = `-${formatPositiveDuration(Math.abs(kpi.unattributed_ms))} (resume/gap)`;
134
+ }
135
+ else {
136
+ unattributedStr = formatPositiveDuration(kpi.unattributed_ms);
137
+ }
138
+ lines.push(`total_duration: ${durationStr} (unattributed: ${unattributedStr})`);
139
+ // Outcome
140
+ const outcomeStr = kpi.stop_reason
141
+ ? `${kpi.outcome} (${kpi.stop_reason})`
142
+ : kpi.outcome;
143
+ lines.push(`outcome: ${outcomeStr}`);
144
+ // Milestones
145
+ lines.push(`milestones_completed: ${kpi.milestones.completed}`);
146
+ // Worker calls
147
+ const claudeCalls = kpi.workers.claude;
148
+ const codexCalls = kpi.workers.codex;
149
+ lines.push(`worker_calls: claude=${claudeCalls} codex=${codexCalls}`);
150
+ // Phase durations (sorted by typical order: plan, implement, review, verify)
151
+ const phaseOrder = ['plan', 'implement', 'review', 'verify'];
152
+ const phaseEntries = Object.entries(kpi.phases);
153
+ if (phaseEntries.length > 0) {
154
+ const sortedPhases = phaseEntries.sort(([a], [b]) => {
155
+ const aIdx = phaseOrder.indexOf(a);
156
+ const bIdx = phaseOrder.indexOf(b);
157
+ if (aIdx === -1 && bIdx === -1)
158
+ return a.localeCompare(b);
159
+ if (aIdx === -1)
160
+ return 1;
161
+ if (bIdx === -1)
162
+ return -1;
163
+ return aIdx - bIdx;
164
+ });
165
+ const phaseParts = sortedPhases.map(([phase, data]) => `${phase}=${formatDuration(data.duration_ms)}(x${data.count})`);
166
+ lines.push(`phases: ${phaseParts.join(' ')}`);
167
+ }
168
+ else {
169
+ lines.push('phases: (no phase data)');
170
+ }
171
+ // Verification stats
172
+ if (kpi.verify.attempts > 0) {
173
+ const verifyDur = formatDuration(kpi.verify.total_duration_ms);
174
+ lines.push(`verify: attempts=${kpi.verify.attempts} retries=${kpi.verify.retries} duration=${verifyDur}`);
175
+ }
176
+ else {
177
+ lines.push('verify: (no verification data)');
178
+ }
179
+ // Reliability metrics
180
+ const rel = kpi.reliability;
181
+ const relParts = [];
182
+ if (rel.infra_retries > 0)
183
+ relParts.push(`retries=${rel.infra_retries}`);
184
+ if (rel.fallback_used)
185
+ relParts.push(`fallback=${rel.fallback_count}`);
186
+ if (rel.stalls_triggered > 0)
187
+ relParts.push(`stalls=${rel.stalls_triggered}`);
188
+ if (rel.late_results_ignored > 0)
189
+ relParts.push(`late_ignored=${rel.late_results_ignored}`);
190
+ lines.push(`reliability: ${relParts.length ? relParts.join(' ') : 'clean'}`);
191
+ // Context pack status
192
+ lines.push(formatContextPackStatus(contextPackArtifact ?? null));
193
+ return lines.join('\n');
194
+ }
195
+ // Exported for testing - computes KPIs from an array of timeline events
196
+ export function computeKpiFromEvents(events) {
197
+ let startedAt = null;
198
+ let endedAt = null;
199
+ const phases = {};
200
+ let currentPhase = null;
201
+ let currentPhaseStart = null;
202
+ let workersClaude = 'unknown';
203
+ let workersCodex = 'unknown';
204
+ let verifyAttempts = 0;
205
+ let verifyRetries = 0;
206
+ let verifyDurationMs = 0;
207
+ let milestonesCompleted = 0;
208
+ let outcome = 'unknown';
209
+ let stopReason = null;
210
+ // Reliability metrics
211
+ let infraRetries = 0;
212
+ let fallbackCount = 0;
213
+ let stallsTriggered = 0;
214
+ let lateResultsIgnored = 0;
215
+ for (const event of events) {
216
+ const eventType = event.type;
217
+ const timestamp = event.timestamp;
218
+ const payload = event.payload && typeof event.payload === 'object'
219
+ ? event.payload
220
+ : {};
221
+ // Track run_started
222
+ if (eventType === 'run_started' && startedAt === null) {
223
+ startedAt = timestamp ?? null;
224
+ outcome = 'running';
225
+ }
226
+ // Track phase_start events for phase durations
227
+ if (eventType === 'phase_start' && payload.phase) {
228
+ // Close previous phase if any
229
+ if (currentPhase && currentPhaseStart !== null && timestamp) {
230
+ const phaseEnd = new Date(timestamp).getTime();
231
+ const phaseDuration = phaseEnd - currentPhaseStart;
232
+ if (!phases[currentPhase]) {
233
+ phases[currentPhase] = { duration_ms: 0, count: 0 };
234
+ }
235
+ phases[currentPhase].duration_ms += phaseDuration;
236
+ }
237
+ // Start new phase
238
+ currentPhase = payload.phase;
239
+ currentPhaseStart = timestamp ? new Date(timestamp).getTime() : null;
240
+ if (!phases[currentPhase]) {
241
+ phases[currentPhase] = { duration_ms: 0, count: 0 };
242
+ }
243
+ phases[currentPhase].count += 1;
244
+ }
245
+ // Track worker_stats event (emitted at finalize)
246
+ if (eventType === 'worker_stats' && payload.stats) {
247
+ const stats = payload.stats;
248
+ if (typeof stats.claude === 'number') {
249
+ workersClaude = stats.claude;
250
+ }
251
+ if (typeof stats.codex === 'number') {
252
+ workersCodex = stats.codex;
253
+ }
254
+ }
255
+ // Track verification events
256
+ if (eventType === 'verification') {
257
+ verifyAttempts += 1;
258
+ if (payload.duration_ms && typeof payload.duration_ms === 'number') {
259
+ verifyDurationMs += payload.duration_ms;
260
+ }
261
+ }
262
+ // Track verify retries (retry_count in verification payload)
263
+ if (eventType === 'verification' && typeof payload.retry === 'number') {
264
+ verifyRetries += payload.retry;
265
+ }
266
+ // Track milestone completion
267
+ if (eventType === 'milestone_complete') {
268
+ milestonesCompleted += 1;
269
+ }
270
+ // Track stop event
271
+ if (eventType === 'stop') {
272
+ endedAt = timestamp ?? null;
273
+ outcome = 'stopped';
274
+ stopReason = payload.reason ?? null;
275
+ // Close current phase
276
+ if (currentPhase && currentPhaseStart !== null && timestamp) {
277
+ const phaseEnd = new Date(timestamp).getTime();
278
+ const phaseDuration = phaseEnd - currentPhaseStart;
279
+ if (!phases[currentPhase]) {
280
+ phases[currentPhase] = { duration_ms: 0, count: 0 };
281
+ }
282
+ phases[currentPhase].duration_ms += phaseDuration;
283
+ }
284
+ }
285
+ // Track run_complete event
286
+ if (eventType === 'run_complete') {
287
+ endedAt = timestamp ?? null;
288
+ outcome = 'complete';
289
+ // Close current phase
290
+ if (currentPhase && currentPhaseStart !== null && timestamp) {
291
+ const phaseEnd = new Date(timestamp).getTime();
292
+ const phaseDuration = phaseEnd - currentPhaseStart;
293
+ if (!phases[currentPhase]) {
294
+ phases[currentPhase] = { duration_ms: 0, count: 0 };
295
+ }
296
+ phases[currentPhase].duration_ms += phaseDuration;
297
+ }
298
+ }
299
+ // Track reliability metrics
300
+ if (eventType === 'parse_failed') {
301
+ const retryCount = payload.retry_count ?? 0;
302
+ infraRetries += retryCount;
303
+ }
304
+ if (eventType === 'worker_fallback') {
305
+ fallbackCount += 1;
306
+ }
307
+ if (eventType === 'stop' && payload.reason === 'stalled_timeout') {
308
+ stallsTriggered += 1;
309
+ }
310
+ if (eventType === 'late_worker_result_ignored') {
311
+ lateResultsIgnored += 1;
312
+ }
313
+ }
314
+ // Compute total duration
315
+ let totalDurationMs = null;
316
+ if (startedAt && endedAt) {
317
+ totalDurationMs = new Date(endedAt).getTime() - new Date(startedAt).getTime();
318
+ }
319
+ // Compute unattributed time (total - sum of phase durations)
320
+ let unattributedMs = null;
321
+ if (totalDurationMs !== null) {
322
+ const attributedMs = Object.values(phases).reduce((sum, p) => sum + p.duration_ms, 0);
323
+ unattributedMs = totalDurationMs - attributedMs;
324
+ }
325
+ return {
326
+ version: 1,
327
+ total_duration_ms: totalDurationMs,
328
+ unattributed_ms: unattributedMs,
329
+ started_at: startedAt,
330
+ ended_at: endedAt,
331
+ phases,
332
+ workers: {
333
+ claude: workersClaude,
334
+ codex: workersCodex
335
+ },
336
+ verify: {
337
+ attempts: verifyAttempts,
338
+ retries: verifyRetries,
339
+ total_duration_ms: verifyDurationMs
340
+ },
341
+ milestones: {
342
+ completed: milestonesCompleted
343
+ },
344
+ reliability: {
345
+ infra_retries: infraRetries,
346
+ fallback_used: fallbackCount > 0,
347
+ fallback_count: fallbackCount,
348
+ stalls_triggered: stallsTriggered,
349
+ late_results_ignored: lateResultsIgnored
350
+ },
351
+ outcome,
352
+ stop_reason: stopReason
353
+ };
354
+ }
355
+ async function scanTimeline(timelinePath, tailCount) {
356
+ const allEvents = [];
357
+ const tailEvents = [];
358
+ let runStarted;
359
+ const stream = fs.createReadStream(timelinePath, { encoding: 'utf-8' });
360
+ const rl = readline.createInterface({ input: stream, crlfDelay: Infinity });
361
+ for await (const line of rl) {
362
+ const trimmed = line.trim();
363
+ if (!trimmed) {
364
+ continue;
365
+ }
366
+ try {
367
+ const event = JSON.parse(trimmed);
368
+ allEvents.push(event);
369
+ if (!runStarted && event.type === 'run_started') {
370
+ runStarted = event;
371
+ }
372
+ tailEvents.push(event);
373
+ if (tailEvents.length > tailCount) {
374
+ tailEvents.shift();
375
+ }
376
+ }
377
+ catch {
378
+ continue;
379
+ }
380
+ }
381
+ const kpi = computeKpiFromEvents(allEvents);
382
+ return { runStarted, tailEvents, kpi };
383
+ }
384
+ function formatEvents(events) {
385
+ if (events.length === 0) {
386
+ return '(no events)';
387
+ }
388
+ const lines = events.map((event) => {
389
+ const seq = event.seq ?? '?';
390
+ const ts = event.timestamp ?? '?';
391
+ const type = event.type ?? 'unknown';
392
+ const source = event.source ?? 'unknown';
393
+ const summary = summarizeEvent(event);
394
+ return `${seq} ${ts} ${type} ${source} ${summary}`.trim();
395
+ });
396
+ return lines.join('\n');
397
+ }
398
+ function summarizeEvent(event) {
399
+ const payload = event.payload && typeof event.payload === 'object'
400
+ ? event.payload
401
+ : {};
402
+ if (event.type === 'phase_start' && payload.phase) {
403
+ return `phase=${payload.phase}`;
404
+ }
405
+ if (event.type === 'verification' && payload.tier) {
406
+ return `tier=${payload.tier} ok=${payload.ok}`;
407
+ }
408
+ if (event.type === 'verify_complete' && Array.isArray(payload.results)) {
409
+ return `results=${payload.results.join('; ')}`;
410
+ }
411
+ if (event.type === 'guard_violation') {
412
+ if (payload.guard && typeof payload.guard === 'object') {
413
+ const guard = payload.guard;
414
+ const reasons = Array.isArray(guard.reasons) ? guard.reasons.join(',') : '';
415
+ return `guard=${reasons || 'violation'}`;
416
+ }
417
+ return 'guard_violation';
418
+ }
419
+ if (event.type === 'stop' && payload.reason) {
420
+ return `reason=${payload.reason}`;
421
+ }
422
+ if (event.type === 'parse_failed') {
423
+ const context = payload.parser_context ?? 'unknown';
424
+ const retry = payload.retry_count ?? 0;
425
+ const snippet = payload.output_snippet ? clip(String(payload.output_snippet), 120) : '';
426
+ return `context=${context} retry=${retry} ${snippet ? `snippet="${snippet}"` : ''}`.trim();
427
+ }
428
+ if (event.type === 'run_started') {
429
+ const flags = [
430
+ `dry_run=${payload.dry_run}`,
431
+ `no_branch=${payload.no_branch}`,
432
+ `allow_dirty=${payload.allow_dirty}`,
433
+ `allow_deps=${payload.allow_deps}`
434
+ ].join(' ');
435
+ return flags;
436
+ }
437
+ if (event.type === 'run_resumed') {
438
+ return `max_ticks=${payload.max_ticks ?? '?'} time=${payload.time ?? '?'}`;
439
+ }
440
+ const keys = Object.keys(payload);
441
+ if (keys.length) {
442
+ return `keys=${keys.slice(0, 4).join(',')}`;
443
+ }
444
+ return '';
445
+ }
446
+ function formatPointers(input) {
447
+ const artifactsDir = path.join(input.runDir, 'artifacts');
448
+ const lastVerifyLog = findLatestVerifyLog(artifactsDir);
449
+ const lines = [
450
+ `state: ${input.statePath}`,
451
+ `timeline: ${input.timelinePath}`,
452
+ `last_verification_log: ${lastVerifyLog ?? 'none'}`,
453
+ `checkpoint_sha: ${input.checkpoint ?? 'none'}`
454
+ ];
455
+ return lines.join('\n');
456
+ }
457
+ function findLatestVerifyLog(artifactsDir) {
458
+ if (!fs.existsSync(artifactsDir)) {
459
+ return null;
460
+ }
461
+ const logs = fs
462
+ .readdirSync(artifactsDir)
463
+ .filter((file) => file.startsWith('tests_') && file.endsWith('.log'))
464
+ .map((file) => path.join(artifactsDir, file));
465
+ if (logs.length === 0) {
466
+ return null;
467
+ }
468
+ const withTimes = logs
469
+ .map((file) => ({ file, mtime: fs.statSync(file).mtimeMs }))
470
+ .sort((a, b) => b.mtime - a.mtime);
471
+ return withTimes[0]?.file ?? null;
472
+ }
473
+ function clip(value, max) {
474
+ if (value.length <= max) {
475
+ return value;
476
+ }
477
+ return `${value.slice(0, max)}...`;
478
+ }