@weldr/runr 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/CHANGELOG.md +216 -0
  2. package/LICENSE +190 -0
  3. package/NOTICE +4 -0
  4. package/README.md +200 -0
  5. package/dist/cli.js +464 -0
  6. package/dist/commands/__tests__/report.test.js +202 -0
  7. package/dist/commands/compare.js +168 -0
  8. package/dist/commands/doctor.js +124 -0
  9. package/dist/commands/follow.js +251 -0
  10. package/dist/commands/gc.js +161 -0
  11. package/dist/commands/guards-only.js +89 -0
  12. package/dist/commands/metrics.js +441 -0
  13. package/dist/commands/orchestrate.js +800 -0
  14. package/dist/commands/paths.js +31 -0
  15. package/dist/commands/preflight.js +152 -0
  16. package/dist/commands/report.js +478 -0
  17. package/dist/commands/resume.js +149 -0
  18. package/dist/commands/run.js +538 -0
  19. package/dist/commands/status.js +189 -0
  20. package/dist/commands/summarize.js +220 -0
  21. package/dist/commands/version.js +82 -0
  22. package/dist/commands/wait.js +170 -0
  23. package/dist/config/__tests__/presets.test.js +104 -0
  24. package/dist/config/load.js +66 -0
  25. package/dist/config/schema.js +160 -0
  26. package/dist/context/__tests__/artifact.test.js +130 -0
  27. package/dist/context/__tests__/pack.test.js +191 -0
  28. package/dist/context/artifact.js +67 -0
  29. package/dist/context/index.js +2 -0
  30. package/dist/context/pack.js +273 -0
  31. package/dist/diagnosis/analyzer.js +678 -0
  32. package/dist/diagnosis/formatter.js +136 -0
  33. package/dist/diagnosis/index.js +6 -0
  34. package/dist/diagnosis/types.js +7 -0
  35. package/dist/env/__tests__/fingerprint.test.js +116 -0
  36. package/dist/env/fingerprint.js +111 -0
  37. package/dist/orchestrator/__tests__/policy.test.js +185 -0
  38. package/dist/orchestrator/__tests__/schema-version.test.js +65 -0
  39. package/dist/orchestrator/artifacts.js +405 -0
  40. package/dist/orchestrator/state-machine.js +646 -0
  41. package/dist/orchestrator/types.js +88 -0
  42. package/dist/ownership/normalize.js +45 -0
  43. package/dist/repo/context.js +90 -0
  44. package/dist/repo/git.js +13 -0
  45. package/dist/repo/worktree.js +239 -0
  46. package/dist/store/run-store.js +107 -0
  47. package/dist/store/run-utils.js +69 -0
  48. package/dist/store/runs-root.js +126 -0
  49. package/dist/supervisor/__tests__/evidence-gate.test.js +111 -0
  50. package/dist/supervisor/__tests__/ownership.test.js +103 -0
  51. package/dist/supervisor/__tests__/state-machine.test.js +290 -0
  52. package/dist/supervisor/collision.js +240 -0
  53. package/dist/supervisor/evidence-gate.js +98 -0
  54. package/dist/supervisor/planner.js +18 -0
  55. package/dist/supervisor/runner.js +1562 -0
  56. package/dist/supervisor/scope-guard.js +55 -0
  57. package/dist/supervisor/state-machine.js +121 -0
  58. package/dist/supervisor/verification-policy.js +64 -0
  59. package/dist/tasks/task-metadata.js +72 -0
  60. package/dist/types/schemas.js +1 -0
  61. package/dist/verification/engine.js +49 -0
  62. package/dist/workers/__tests__/claude.test.js +88 -0
  63. package/dist/workers/__tests__/codex.test.js +81 -0
  64. package/dist/workers/claude.js +119 -0
  65. package/dist/workers/codex.js +162 -0
  66. package/dist/workers/json.js +22 -0
  67. package/dist/workers/mock.js +193 -0
  68. package/dist/workers/prompts.js +98 -0
  69. package/dist/workers/schemas.js +39 -0
  70. package/package.json +47 -0
  71. package/templates/prompts/implementer.md +70 -0
  72. package/templates/prompts/planner.md +62 -0
  73. package/templates/prompts/reviewer.md +77 -0
package/dist/cli.js ADDED
@@ -0,0 +1,464 @@
1
+ #!/usr/bin/env node
2
+ import { Command } from 'commander';
3
+ import { runCommand } from './commands/run.js';
4
+ import { resumeCommand } from './commands/resume.js';
5
+ import { statusCommand, statusAllCommand } from './commands/status.js';
6
+ import { reportCommand, findLatestRunId } from './commands/report.js';
7
+ import { summarizeCommand } from './commands/summarize.js';
8
+ import { compareCommand } from './commands/compare.js';
9
+ import { guardsOnlyCommand } from './commands/guards-only.js';
10
+ import { doctorCommand } from './commands/doctor.js';
11
+ import { followCommand, findBestRunToFollow } from './commands/follow.js';
12
+ import { gcCommand } from './commands/gc.js';
13
+ import { waitCommand, findLatestRunId as findLatestRunIdForWait } from './commands/wait.js';
14
+ import { orchestrateCommand, resumeOrchestrationCommand, waitOrchestrationCommand } from './commands/orchestrate.js';
15
+ import { pathsCommand } from './commands/paths.js';
16
+ import { metricsCommand } from './commands/metrics.js';
17
+ import { versionCommand } from './commands/version.js';
18
+ const program = new Command();
19
+ // Check if invoked as deprecated 'agent' command
20
+ const invokedAs = process.argv[1]?.split('/').pop() || 'runr';
21
+ if (invokedAs === 'agent') {
22
+ console.warn('\x1b[33m⚠ Deprecation: The "agent" command is deprecated. Use "runr" instead.\x1b[0m\n');
23
+ }
24
+ program
25
+ .name('runr')
26
+ .description('Phase-gated orchestration for agent tasks');
27
+ program
28
+ .command('run')
29
+ .option('--repo <path>', 'Target repo path (default: current directory)', '.')
30
+ .requiredOption('--task <path>', 'Task brief file')
31
+ .option('--time <minutes>', 'Time budget in minutes', '120')
32
+ .option('--config <path>', 'Path to runr.config.json (or agent.config.json)')
33
+ .option('--allow-deps', 'Allow lockfile changes', false)
34
+ .option('--allow-dirty', 'Allow dirty worktree', false)
35
+ .option('--no-branch', 'Do not checkout run branch')
36
+ .option('--no-write', 'Do not write run artifacts')
37
+ .option('--web', 'Allow web access for unblock', false)
38
+ .option('--dry-run', 'Initialize run without executing', false)
39
+ .option('--max-ticks <count>', 'Max supervisor ticks (default: 50)', '50')
40
+ .option('--skip-doctor', 'Skip worker health checks', false)
41
+ .option('--fresh-target', 'Wipe target root before starting', false)
42
+ .option('--worktree', 'Create isolated git worktree for this run', false)
43
+ .option('--fast', 'Fast path: skip PLAN and REVIEW phases for small tasks', false)
44
+ .option('--auto-resume', 'Auto-resume on transient failures (stall, worker timeout)', false)
45
+ .option('--force-parallel', 'Bypass file collision checks with active runs', false)
46
+ .option('--json', 'Output JSON with run_id (for orchestrator consumption)', false)
47
+ .action(async (options) => {
48
+ const noBranch = options.branch === false;
49
+ const noWrite = options.write === false;
50
+ await runCommand({
51
+ repo: options.repo,
52
+ task: options.task,
53
+ time: Number.parseInt(options.time, 10),
54
+ config: options.config,
55
+ allowDeps: options.allowDeps,
56
+ allowDirty: options.allowDirty,
57
+ web: options.web,
58
+ dryRun: options.dryRun,
59
+ noBranch,
60
+ noWrite,
61
+ maxTicks: Number.parseInt(options.maxTicks, 10),
62
+ skipDoctor: options.skipDoctor,
63
+ freshTarget: options.freshTarget,
64
+ worktree: options.worktree,
65
+ fast: options.fast,
66
+ autoResume: options.autoResume,
67
+ forceParallel: options.forceParallel,
68
+ json: options.json
69
+ });
70
+ });
71
+ program
72
+ .command('guards-only')
73
+ .option('--repo <path>', 'Target repo path (default: current directory)', '.')
74
+ .requiredOption('--task <path>', 'Task brief file')
75
+ .option('--config <path>', 'Path to runr.config.json (or agent.config.json)')
76
+ .option('--allow-deps', 'Allow lockfile changes', false)
77
+ .option('--allow-dirty', 'Allow dirty worktree', false)
78
+ .option('--no-write', 'Do not write run artifacts')
79
+ .action(async (options) => {
80
+ const noWrite = options.write === false;
81
+ await guardsOnlyCommand({
82
+ repo: options.repo,
83
+ task: options.task,
84
+ config: options.config,
85
+ allowDeps: options.allowDeps,
86
+ allowDirty: options.allowDirty,
87
+ noWrite
88
+ });
89
+ });
90
+ program
91
+ .command('resume')
92
+ .argument('<runId>', 'Run ID')
93
+ .option('--repo <path>', 'Target repo path (default: current directory)', '.')
94
+ .option('--time <minutes>', 'Time budget in minutes', '120')
95
+ .option('--max-ticks <count>', 'Max supervisor ticks (default: 50)', '50')
96
+ .option('--allow-deps', 'Allow lockfile changes', false)
97
+ .option('--config <path>', 'Path to runr.config.json (or agent.config.json)')
98
+ .option('--force', 'Resume despite env fingerprint mismatch', false)
99
+ .option('--auto-resume', 'Continue auto-resuming on transient failures', false)
100
+ .action(async (runId, options) => {
101
+ await resumeCommand({
102
+ runId,
103
+ repo: options.repo,
104
+ time: Number.parseInt(options.time, 10),
105
+ maxTicks: Number.parseInt(options.maxTicks, 10),
106
+ allowDeps: options.allowDeps,
107
+ config: options.config,
108
+ force: options.force,
109
+ autoResume: options.autoResume
110
+ });
111
+ });
112
+ program
113
+ .command('status')
114
+ .argument('[runId]', 'Run ID (omit with --all to show all runs)')
115
+ .option('--repo <path>', 'Target repo path (default: current directory)', '.')
116
+ .option('--all', 'Show status of all runs', false)
117
+ .action(async (runId, options) => {
118
+ if (options.all) {
119
+ await statusAllCommand({ repo: options.repo });
120
+ }
121
+ else if (runId) {
122
+ await statusCommand({ runId, repo: options.repo });
123
+ }
124
+ else {
125
+ console.error('Error: Run ID required unless using --all');
126
+ process.exit(1);
127
+ }
128
+ });
129
+ program
130
+ .command('report')
131
+ .argument('<runId>', 'Run ID (or "latest")')
132
+ .option('--repo <path>', 'Target repo path (default: current directory)', '.')
133
+ .option('--tail <count>', 'Tail last N events', '50')
134
+ .option('--kpi-only', 'Show compact KPI summary only')
135
+ .action(async (runId, options) => {
136
+ let resolvedRunId = runId;
137
+ if (runId === 'latest') {
138
+ const latest = findLatestRunId(options.repo);
139
+ if (!latest) {
140
+ console.error('No runs found');
141
+ process.exit(1);
142
+ }
143
+ resolvedRunId = latest;
144
+ }
145
+ await reportCommand({
146
+ runId: resolvedRunId,
147
+ repo: options.repo,
148
+ tail: Number.parseInt(options.tail, 10),
149
+ kpiOnly: options.kpiOnly
150
+ });
151
+ });
152
+ program
153
+ .command('summarize')
154
+ .description('Generate summary.json from run KPIs')
155
+ .argument('<runId>', 'Run ID (or "latest")')
156
+ .option('--repo <path>', 'Target repo path (default: current directory)', '.')
157
+ .action(async (runId, options) => {
158
+ let resolvedRunId = runId;
159
+ if (runId === 'latest') {
160
+ const latest = findLatestRunId(options.repo);
161
+ if (!latest) {
162
+ console.error('No runs found');
163
+ process.exit(1);
164
+ }
165
+ resolvedRunId = latest;
166
+ }
167
+ await summarizeCommand({ runId: resolvedRunId, repo: options.repo });
168
+ });
169
+ program
170
+ .command('compare')
171
+ .description('Compare KPIs between two runs')
172
+ .argument('<runA>', 'First run ID')
173
+ .argument('<runB>', 'Second run ID')
174
+ .option('--repo <path>', 'Target repo path (default: current directory)', '.')
175
+ .action(async (runA, runB, options) => {
176
+ await compareCommand({ runA, runB, repo: options.repo });
177
+ });
178
+ program
179
+ .command('doctor')
180
+ .description('Check worker CLI availability and headless mode')
181
+ .option('--repo <path>', 'Target repo path', '.')
182
+ .option('--config <path>', 'Path to runr.config.json (or agent.config.json)')
183
+ .action(async (options) => {
184
+ await doctorCommand({
185
+ repo: options.repo,
186
+ config: options.config
187
+ });
188
+ });
189
+ program
190
+ .command('paths')
191
+ .description('Display canonical runr directory paths (for scripts and tooling)')
192
+ .option('--repo <path>', 'Target repo path', '.')
193
+ .option('--json', 'Output JSON (default: true)', true)
194
+ .option('--no-json', 'Output human-readable table')
195
+ .action(async (options) => {
196
+ await pathsCommand({
197
+ repo: options.repo,
198
+ json: options.json
199
+ });
200
+ });
201
+ program
202
+ .command('metrics')
203
+ .description('Show aggregated metrics across all runs and orchestrations')
204
+ .option('--repo <path>', 'Target repo path', '.')
205
+ .option('--days <n>', 'Number of days to aggregate (default: 30)', '30')
206
+ .option('--window <n>', 'Max runs to consider (default: 50 runs, 20 orchestrations)')
207
+ .option('--json', 'Output JSON format', false)
208
+ .action(async (options) => {
209
+ await metricsCommand({
210
+ repo: options.repo,
211
+ days: parseInt(options.days, 10),
212
+ window: options.window ? parseInt(options.window, 10) : undefined,
213
+ json: options.json
214
+ });
215
+ });
216
+ program
217
+ .command('version')
218
+ .description('Show version information')
219
+ .option('--json', 'Output JSON format', false)
220
+ .action(async (options) => {
221
+ await versionCommand({
222
+ json: options.json
223
+ });
224
+ });
225
+ program
226
+ .command('follow')
227
+ .description('Tail run timeline and exit on termination')
228
+ .argument('[runId]', 'Run ID (or "latest", default: latest running or latest)')
229
+ .option('--repo <path>', 'Target repo path (default: current directory)', '.')
230
+ .action(async (runId, options) => {
231
+ let resolvedRunId;
232
+ if (!runId || runId === 'latest') {
233
+ const best = findBestRunToFollow(options.repo);
234
+ if (!best) {
235
+ console.error('No runs found');
236
+ process.exit(1);
237
+ }
238
+ resolvedRunId = best.runId;
239
+ if (!best.wasRunning) {
240
+ console.log(`No running runs; following latest (${resolvedRunId})`);
241
+ }
242
+ }
243
+ else {
244
+ resolvedRunId = runId;
245
+ }
246
+ await followCommand({ runId: resolvedRunId, repo: options.repo });
247
+ });
248
+ program
249
+ .command('gc')
250
+ .description('Clean up old worktree directories to reclaim disk space')
251
+ .option('--repo <path>', 'Target repo path (default: current directory)', '.')
252
+ .option('--dry-run', 'Preview what would be deleted without actually deleting', false)
253
+ .option('--older-than <days>', 'Only delete worktrees older than N days', '7')
254
+ .action(async (options) => {
255
+ await gcCommand({
256
+ repo: options.repo,
257
+ dryRun: options.dryRun,
258
+ olderThan: Number.parseInt(options.olderThan, 10)
259
+ });
260
+ });
261
+ program
262
+ .command('wait')
263
+ .description('Block until run reaches terminal state (for meta-agent coordination)')
264
+ .argument('[runId]', 'Run ID (or "latest")')
265
+ .option('--repo <path>', 'Target repo path (default: current directory)', '.')
266
+ .option('--for <condition>', 'Wait condition: terminal, stop, complete', 'terminal')
267
+ .option('--timeout <ms>', 'Timeout in milliseconds')
268
+ .option('--json', 'Output JSON (default: true)', true)
269
+ .option('--no-json', 'Output human-readable text')
270
+ .action(async (runId, options) => {
271
+ let resolvedRunId;
272
+ if (!runId || runId === 'latest') {
273
+ const latest = findLatestRunIdForWait(options.repo);
274
+ if (!latest) {
275
+ if (options.json) {
276
+ console.log(JSON.stringify({ error: 'no_runs', message: 'No runs found' }));
277
+ }
278
+ else {
279
+ console.error('No runs found');
280
+ }
281
+ process.exit(1);
282
+ }
283
+ resolvedRunId = latest;
284
+ }
285
+ else {
286
+ resolvedRunId = runId;
287
+ }
288
+ await waitCommand({
289
+ runId: resolvedRunId,
290
+ repo: options.repo,
291
+ for: options.for,
292
+ timeout: options.timeout ? Number.parseInt(options.timeout, 10) : undefined,
293
+ json: options.json
294
+ });
295
+ });
296
+ // Orchestrate subcommands
297
+ const orchestrateCmd = program
298
+ .command('orchestrate')
299
+ .description('Run multiple tracks of tasks in parallel with collision-aware scheduling');
300
+ orchestrateCmd
301
+ .command('run')
302
+ .description('Start a new orchestration from config')
303
+ .requiredOption('--config <path>', 'Path to orchestration config file (YAML or JSON)')
304
+ .option('--repo <path>', 'Target repo path (default: current directory)', '.')
305
+ .option('--time <minutes>', 'Time budget per run in minutes', '120')
306
+ .option('--max-ticks <count>', 'Max supervisor ticks per run', '50')
307
+ .option('--collision-policy <policy>', 'Collision policy: serialize, force, fail', 'serialize')
308
+ .option('--allow-deps', 'Allow lockfile changes', false)
309
+ .option('--worktree', 'Create isolated git worktree for each run', false)
310
+ .option('--fast', 'Fast path: skip PLAN and REVIEW phases', false)
311
+ .option('--auto-resume', 'Auto-resume runs on transient failures', false)
312
+ .option('--dry-run', 'Show planned execution without running', false)
313
+ .action(async (options) => {
314
+ const collisionPolicy = options.collisionPolicy;
315
+ if (!['serialize', 'force', 'fail'].includes(collisionPolicy)) {
316
+ console.error(`Invalid collision policy: ${collisionPolicy}`);
317
+ console.error('Valid values: serialize, force, fail');
318
+ process.exit(1);
319
+ }
320
+ await orchestrateCommand({
321
+ config: options.config,
322
+ repo: options.repo,
323
+ time: Number.parseInt(options.time, 10),
324
+ maxTicks: Number.parseInt(options.maxTicks, 10),
325
+ collisionPolicy,
326
+ allowDeps: options.allowDeps,
327
+ worktree: options.worktree,
328
+ fast: options.fast,
329
+ autoResume: options.autoResume,
330
+ dryRun: options.dryRun
331
+ });
332
+ });
333
+ orchestrateCmd
334
+ .command('resume')
335
+ .description('Resume a previously started orchestration')
336
+ .argument('<orchestratorId>', 'Orchestrator ID to resume (or "latest")')
337
+ .option('--repo <path>', 'Target repo path (default: current directory)', '.')
338
+ // Policy override flags (optional, logged if used)
339
+ .option('--time <minutes>', 'Override time budget per run')
340
+ .option('--max-ticks <count>', 'Override max supervisor ticks')
341
+ .option('--fast', 'Override fast mode (skip PLAN/REVIEW)')
342
+ .option('--no-fast', 'Disable fast mode override')
343
+ .option('--collision-policy <policy>', 'Override collision policy: serialize, force, fail')
344
+ .action(async (orchestratorId, options) => {
345
+ await resumeOrchestrationCommand({
346
+ orchestratorId,
347
+ repo: options.repo,
348
+ overrides: {
349
+ time: options.time ? Number.parseInt(options.time, 10) : undefined,
350
+ maxTicks: options.maxTicks ? Number.parseInt(options.maxTicks, 10) : undefined,
351
+ fast: options.fast,
352
+ collisionPolicy: options.collisionPolicy
353
+ }
354
+ });
355
+ });
356
+ orchestrateCmd
357
+ .command('wait')
358
+ .description('Block until orchestration reaches terminal state')
359
+ .argument('<orchestratorId>', 'Orchestrator ID to wait for (or "latest")')
360
+ .option('--repo <path>', 'Target repo path (default: current directory)', '.')
361
+ .option('--for <condition>', 'Wait condition: terminal, stop, complete', 'terminal')
362
+ .option('--timeout <ms>', 'Timeout in milliseconds')
363
+ .option('--json', 'Output JSON (default: true)', true)
364
+ .option('--no-json', 'Output human-readable text')
365
+ .action(async (orchestratorId, options) => {
366
+ await waitOrchestrationCommand({
367
+ orchestratorId,
368
+ repo: options.repo,
369
+ for: options.for,
370
+ timeout: options.timeout ? Number.parseInt(options.timeout, 10) : undefined,
371
+ json: options.json
372
+ });
373
+ });
374
+ // ==========================================
375
+ // Edgy aliases (same commands, different vibe)
376
+ // ==========================================
377
+ // summon → run
378
+ program
379
+ .command('summon')
380
+ .description('Summon a worker to execute a task (alias for "run")')
381
+ .option('--repo <path>', 'Target repo path', '.')
382
+ .requiredOption('--task <path>', 'Task brief file')
383
+ .option('--time <minutes>', 'Time budget in minutes', '120')
384
+ .option('--config <path>', 'Path to runr.config.json')
385
+ .option('--worktree', 'Create isolated git worktree', false)
386
+ .option('--fast', 'Skip PLAN and REVIEW phases', false)
387
+ .option('--auto-resume', 'Auto-resume on transient failures', false)
388
+ .option('--json', 'Output JSON', false)
389
+ .action(async (options) => {
390
+ await runCommand({
391
+ repo: options.repo,
392
+ task: options.task,
393
+ time: Number.parseInt(options.time, 10),
394
+ config: options.config,
395
+ allowDeps: false,
396
+ allowDirty: false,
397
+ web: false,
398
+ dryRun: false,
399
+ noBranch: false,
400
+ noWrite: false,
401
+ maxTicks: 50,
402
+ skipDoctor: false,
403
+ freshTarget: false,
404
+ worktree: options.worktree,
405
+ fast: options.fast,
406
+ autoResume: options.autoResume,
407
+ forceParallel: false,
408
+ json: options.json
409
+ });
410
+ });
411
+ // resurrect → resume
412
+ program
413
+ .command('resurrect')
414
+ .description('Resurrect a stopped run from checkpoint (alias for "resume")')
415
+ .argument('<runId>', 'Run ID')
416
+ .option('--repo <path>', 'Target repo path', '.')
417
+ .option('--time <minutes>', 'Time budget in minutes', '120')
418
+ .option('--force', 'Resume despite env mismatch', false)
419
+ .action(async (runId, options) => {
420
+ await resumeCommand({
421
+ runId,
422
+ repo: options.repo,
423
+ time: Number.parseInt(options.time, 10),
424
+ maxTicks: 50,
425
+ allowDeps: false,
426
+ config: options.config,
427
+ force: options.force,
428
+ autoResume: false
429
+ });
430
+ });
431
+ // scry → status
432
+ program
433
+ .command('scry')
434
+ .description('Scry the fate of a run (alias for "status")')
435
+ .argument('[runId]', 'Run ID')
436
+ .option('--repo <path>', 'Target repo path', '.')
437
+ .option('--all', 'Show all runs', false)
438
+ .action(async (runId, options) => {
439
+ if (options.all) {
440
+ await statusAllCommand({ repo: options.repo });
441
+ }
442
+ else if (runId) {
443
+ await statusCommand({ runId, repo: options.repo });
444
+ }
445
+ else {
446
+ console.error('Error: Run ID required unless using --all');
447
+ process.exit(1);
448
+ }
449
+ });
450
+ // banish → gc
451
+ program
452
+ .command('banish')
453
+ .description('Banish old worktrees to the void (alias for "gc")')
454
+ .option('--repo <path>', 'Target repo path', '.')
455
+ .option('--dry-run', 'Preview without deleting', false)
456
+ .option('--older-than <days>', 'Only banish worktrees older than N days', '7')
457
+ .action(async (options) => {
458
+ await gcCommand({
459
+ repo: options.repo,
460
+ dryRun: options.dryRun,
461
+ olderThan: Number.parseInt(options.olderThan, 10)
462
+ });
463
+ });
464
+ program.parseAsync();
@@ -0,0 +1,202 @@
1
+ import { describe, it, expect } from 'vitest';
2
+ import { computeKpiFromEvents } from '../report.js';
3
+ // Helper to create events with timestamps
4
+ function event(type, timestamp, payload = {}) {
5
+ return { type, timestamp, payload };
6
+ }
7
+ describe('computeKpiFromEvents', () => {
8
+ describe('graceful degradation', () => {
9
+ it('never throws on empty events', () => {
10
+ expect(() => computeKpiFromEvents([])).not.toThrow();
11
+ });
12
+ it('returns unknown for workers when no worker_stats event', () => {
13
+ const events = [
14
+ event('run_started', '2025-01-01T00:00:00Z'),
15
+ event('phase_start', '2025-01-01T00:00:01Z', { phase: 'PLAN' }),
16
+ event('stop', '2025-01-01T00:01:00Z', { reason: 'user' })
17
+ ];
18
+ const kpi = computeKpiFromEvents(events);
19
+ expect(kpi.workers.claude).toBe('unknown');
20
+ expect(kpi.workers.codex).toBe('unknown');
21
+ });
22
+ it('returns outcome running when no stop or complete event', () => {
23
+ const events = [
24
+ event('run_started', '2025-01-01T00:00:00Z'),
25
+ event('phase_start', '2025-01-01T00:00:01Z', { phase: 'PLAN' })
26
+ ];
27
+ const kpi = computeKpiFromEvents(events);
28
+ expect(kpi.outcome).toBe('running');
29
+ expect(kpi.total_duration_ms).toBeNull();
30
+ expect(kpi.unattributed_ms).toBeNull();
31
+ });
32
+ it('returns outcome unknown when no run_started event', () => {
33
+ const events = [
34
+ event('phase_start', '2025-01-01T00:00:01Z', { phase: 'PLAN' })
35
+ ];
36
+ const kpi = computeKpiFromEvents(events);
37
+ expect(kpi.outcome).toBe('unknown');
38
+ });
39
+ it('returns empty phases when no phase_start events', () => {
40
+ const events = [
41
+ event('run_started', '2025-01-01T00:00:00Z'),
42
+ event('stop', '2025-01-01T00:01:00Z', { reason: 'user' })
43
+ ];
44
+ const kpi = computeKpiFromEvents(events);
45
+ expect(Object.keys(kpi.phases)).toHaveLength(0);
46
+ });
47
+ });
48
+ describe('old run (no worker_stats)', () => {
49
+ it('computes duration and phases without worker counts', () => {
50
+ const events = [
51
+ event('run_started', '2025-01-01T00:00:00Z'),
52
+ event('phase_start', '2025-01-01T00:00:10Z', { phase: 'PLAN' }),
53
+ event('phase_start', '2025-01-01T00:01:00Z', { phase: 'IMPLEMENT' }),
54
+ event('phase_start', '2025-01-01T00:02:00Z', { phase: 'VERIFY' }),
55
+ event('verification', '2025-01-01T00:02:05Z', { tier: 'tier0', ok: true, duration_ms: 5000 }),
56
+ event('stop', '2025-01-01T00:02:10Z', { reason: 'complete' })
57
+ ];
58
+ const kpi = computeKpiFromEvents(events);
59
+ expect(kpi.total_duration_ms).toBe(130000); // 2m10s
60
+ expect(kpi.workers.claude).toBe('unknown');
61
+ expect(kpi.workers.codex).toBe('unknown');
62
+ expect(kpi.phases['PLAN'].duration_ms).toBe(50000); // 50s
63
+ expect(kpi.phases['IMPLEMENT'].duration_ms).toBe(60000); // 60s
64
+ expect(kpi.phases['VERIFY'].duration_ms).toBe(10000); // 10s
65
+ expect(kpi.verify.attempts).toBe(1);
66
+ });
67
+ });
68
+ describe('multiple phase loops', () => {
69
+ it('accumulates phase durations across multiple iterations', () => {
70
+ const events = [
71
+ event('run_started', '2025-01-01T00:00:00Z'),
72
+ // First IMPLEMENT cycle
73
+ event('phase_start', '2025-01-01T00:00:00Z', { phase: 'IMPLEMENT' }),
74
+ event('phase_start', '2025-01-01T00:01:00Z', { phase: 'VERIFY' }),
75
+ event('verification', '2025-01-01T00:01:05Z', { tier: 'tier0', ok: false }),
76
+ // Retry - second IMPLEMENT cycle
77
+ event('phase_start', '2025-01-01T00:01:10Z', { phase: 'IMPLEMENT' }),
78
+ event('phase_start', '2025-01-01T00:02:00Z', { phase: 'VERIFY' }),
79
+ event('verification', '2025-01-01T00:02:05Z', { tier: 'tier0', ok: false }),
80
+ // Retry - third IMPLEMENT cycle
81
+ event('phase_start', '2025-01-01T00:02:10Z', { phase: 'IMPLEMENT' }),
82
+ event('phase_start', '2025-01-01T00:03:00Z', { phase: 'VERIFY' }),
83
+ event('verification', '2025-01-01T00:03:05Z', { tier: 'tier0', ok: true }),
84
+ event('stop', '2025-01-01T00:03:10Z', { reason: 'complete' })
85
+ ];
86
+ const kpi = computeKpiFromEvents(events);
87
+ expect(kpi.phases['IMPLEMENT'].count).toBe(3);
88
+ expect(kpi.phases['VERIFY'].count).toBe(3);
89
+ // IMPLEMENT: 60s + 50s + 50s = 160s
90
+ expect(kpi.phases['IMPLEMENT'].duration_ms).toBe(160000);
91
+ // VERIFY: 10s + 10s + 10s = 30s
92
+ expect(kpi.phases['VERIFY'].duration_ms).toBe(30000);
93
+ expect(kpi.verify.attempts).toBe(3);
94
+ });
95
+ });
96
+ describe('verify retries', () => {
97
+ it('counts retry field from verification events', () => {
98
+ const events = [
99
+ event('run_started', '2025-01-01T00:00:00Z'),
100
+ event('phase_start', '2025-01-01T00:00:00Z', { phase: 'VERIFY' }),
101
+ event('verification', '2025-01-01T00:00:05Z', { tier: 'tier0', ok: false, retry: 0 }),
102
+ event('verification', '2025-01-01T00:00:10Z', { tier: 'tier0', ok: false, retry: 1 }),
103
+ event('verification', '2025-01-01T00:00:15Z', { tier: 'tier0', ok: true, retry: 2 }),
104
+ event('stop', '2025-01-01T00:00:20Z', { reason: 'complete' })
105
+ ];
106
+ const kpi = computeKpiFromEvents(events);
107
+ expect(kpi.verify.attempts).toBe(3);
108
+ expect(kpi.verify.retries).toBe(3); // 0 + 1 + 2 = 3
109
+ });
110
+ });
111
+ describe('worker_stats tracking', () => {
112
+ it('extracts worker call counts from worker_stats event', () => {
113
+ const events = [
114
+ event('run_started', '2025-01-01T00:00:00Z'),
115
+ event('phase_start', '2025-01-01T00:00:00Z', { phase: 'PLAN' }),
116
+ event('worker_stats', '2025-01-01T00:01:00Z', {
117
+ stats: { claude: 5, codex: 3, by_phase: {} }
118
+ }),
119
+ event('stop', '2025-01-01T00:01:00Z', { reason: 'complete' })
120
+ ];
121
+ const kpi = computeKpiFromEvents(events);
122
+ expect(kpi.workers.claude).toBe(5);
123
+ expect(kpi.workers.codex).toBe(3);
124
+ });
125
+ });
126
+ describe('milestone tracking', () => {
127
+ it('counts milestone_complete events', () => {
128
+ const events = [
129
+ event('run_started', '2025-01-01T00:00:00Z'),
130
+ event('milestone_complete', '2025-01-01T00:01:00Z', { index: 0 }),
131
+ event('milestone_complete', '2025-01-01T00:02:00Z', { index: 1 }),
132
+ event('milestone_complete', '2025-01-01T00:03:00Z', { index: 2 }),
133
+ event('stop', '2025-01-01T00:03:00Z', { reason: 'complete' })
134
+ ];
135
+ const kpi = computeKpiFromEvents(events);
136
+ expect(kpi.milestones.completed).toBe(3);
137
+ });
138
+ });
139
+ describe('outcome detection', () => {
140
+ it('sets outcome to stopped with reason', () => {
141
+ const events = [
142
+ event('run_started', '2025-01-01T00:00:00Z'),
143
+ event('stop', '2025-01-01T00:01:00Z', { reason: 'implement_blocked' })
144
+ ];
145
+ const kpi = computeKpiFromEvents(events);
146
+ expect(kpi.outcome).toBe('stopped');
147
+ expect(kpi.stop_reason).toBe('implement_blocked');
148
+ });
149
+ it('sets outcome to complete on run_complete', () => {
150
+ const events = [
151
+ event('run_started', '2025-01-01T00:00:00Z'),
152
+ event('run_complete', '2025-01-01T00:01:00Z')
153
+ ];
154
+ const kpi = computeKpiFromEvents(events);
155
+ expect(kpi.outcome).toBe('complete');
156
+ expect(kpi.stop_reason).toBeNull();
157
+ });
158
+ });
159
+ describe('unattributed time', () => {
160
+ it('computes positive unattributed time (preflight, gaps)', () => {
161
+ const events = [
162
+ event('run_started', '2025-01-01T00:00:00Z'),
163
+ // 30s gap before first phase (preflight, etc)
164
+ event('phase_start', '2025-01-01T00:00:30Z', { phase: 'PLAN' }),
165
+ event('phase_start', '2025-01-01T00:01:00Z', { phase: 'IMPLEMENT' }),
166
+ event('stop', '2025-01-01T00:02:00Z', { reason: 'complete' })
167
+ ];
168
+ const kpi = computeKpiFromEvents(events);
169
+ // Total: 2m = 120s
170
+ // PLAN: 30s, IMPLEMENT: 60s
171
+ // Unattributed: 120 - 90 = 30s
172
+ expect(kpi.total_duration_ms).toBe(120000);
173
+ expect(kpi.unattributed_ms).toBe(30000);
174
+ });
175
+ it('handles negative unattributed (resumed runs with gap)', () => {
176
+ // This can happen when run is paused and resumed
177
+ // Total duration doesn't account for pause gap
178
+ const events = [
179
+ event('run_started', '2025-01-01T00:00:00Z'),
180
+ event('phase_start', '2025-01-01T00:00:00Z', { phase: 'IMPLEMENT' }),
181
+ event('stop', '2025-01-01T00:01:00Z', { reason: 'blocked' }),
182
+ // After resume - phase duration exceeds tracked total
183
+ event('run_resumed', '2025-01-01T00:10:00Z'),
184
+ event('phase_start', '2025-01-01T00:10:00Z', { phase: 'IMPLEMENT' }),
185
+ event('stop', '2025-01-01T00:11:00Z', { reason: 'complete' })
186
+ ];
187
+ const kpi = computeKpiFromEvents(events);
188
+ // Total: 11m - but phases ran: 1m + 1m = 2m
189
+ // Actually: stop at 00:11:00 - started at 00:00:00 = 11m
190
+ // Phase IMPLEMENT: 1m (00:00:00-00:01:00) + 1m (00:10:00-00:11:00) = 2m
191
+ // Unattributed: 11m - 2m = 9m (positive in this case due to pause gap)
192
+ expect(kpi.outcome).toBe('stopped');
193
+ // The second stop overwrites the first
194
+ });
195
+ });
196
+ describe('version field', () => {
197
+ it('always returns version 1', () => {
198
+ const kpi = computeKpiFromEvents([]);
199
+ expect(kpi.version).toBe(1);
200
+ });
201
+ });
202
+ });