@spacek33z/autoauto 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/README.md +197 -0
  2. package/package.json +51 -0
  3. package/src/App.tsx +224 -0
  4. package/src/cli.ts +772 -0
  5. package/src/components/AgentPanel.tsx +254 -0
  6. package/src/components/Chat.test.tsx +71 -0
  7. package/src/components/Chat.tsx +308 -0
  8. package/src/components/CycleField.tsx +23 -0
  9. package/src/components/ModelPicker.tsx +97 -0
  10. package/src/components/PostUpdatePrompt.tsx +46 -0
  11. package/src/components/ResultsTable.tsx +172 -0
  12. package/src/components/RunCompletePrompt.tsx +90 -0
  13. package/src/components/RunSettingsOverlay.tsx +49 -0
  14. package/src/components/RunsTable.tsx +219 -0
  15. package/src/components/StatsHeader.tsx +100 -0
  16. package/src/daemon.ts +264 -0
  17. package/src/index.tsx +8 -0
  18. package/src/lib/agent/agent-provider.test.ts +133 -0
  19. package/src/lib/agent/claude-provider.ts +277 -0
  20. package/src/lib/agent/codex-provider.ts +413 -0
  21. package/src/lib/agent/default-providers.ts +10 -0
  22. package/src/lib/agent/index.ts +32 -0
  23. package/src/lib/agent/mock-provider.ts +61 -0
  24. package/src/lib/agent/opencode-provider.ts +424 -0
  25. package/src/lib/agent/types.ts +73 -0
  26. package/src/lib/auth.ts +11 -0
  27. package/src/lib/config.ts +152 -0
  28. package/src/lib/daemon-callbacks.ts +59 -0
  29. package/src/lib/daemon-client.ts +16 -0
  30. package/src/lib/daemon-lifecycle.ts +368 -0
  31. package/src/lib/daemon-spawn.ts +122 -0
  32. package/src/lib/daemon-status.ts +189 -0
  33. package/src/lib/daemon-watcher.ts +192 -0
  34. package/src/lib/experiment-loop.ts +679 -0
  35. package/src/lib/experiment.ts +356 -0
  36. package/src/lib/finalize.test.ts +143 -0
  37. package/src/lib/finalize.ts +511 -0
  38. package/src/lib/format.test.ts +32 -0
  39. package/src/lib/format.ts +44 -0
  40. package/src/lib/git.ts +176 -0
  41. package/src/lib/ideas-backlog.test.ts +54 -0
  42. package/src/lib/ideas-backlog.ts +109 -0
  43. package/src/lib/measure.ts +472 -0
  44. package/src/lib/model-options.ts +24 -0
  45. package/src/lib/programs.ts +247 -0
  46. package/src/lib/push-stream.ts +48 -0
  47. package/src/lib/run-context.ts +112 -0
  48. package/src/lib/run-setup.ts +34 -0
  49. package/src/lib/run.ts +383 -0
  50. package/src/lib/syntax-theme.ts +39 -0
  51. package/src/lib/system-prompts/experiment.ts +77 -0
  52. package/src/lib/system-prompts/finalize.ts +90 -0
  53. package/src/lib/system-prompts/index.ts +7 -0
  54. package/src/lib/system-prompts/setup.ts +516 -0
  55. package/src/lib/system-prompts/update.ts +188 -0
  56. package/src/lib/tool-events.ts +99 -0
  57. package/src/lib/validate-measurement.ts +326 -0
  58. package/src/lib/worktree.ts +40 -0
  59. package/src/screens/AuthErrorScreen.tsx +31 -0
  60. package/src/screens/ExecutionScreen.tsx +851 -0
  61. package/src/screens/FirstSetupScreen.tsx +168 -0
  62. package/src/screens/HomeScreen.tsx +406 -0
  63. package/src/screens/PreRunScreen.tsx +206 -0
  64. package/src/screens/SettingsScreen.tsx +189 -0
  65. package/src/screens/SetupScreen.tsx +226 -0
  66. package/src/tui.tsx +17 -0
  67. package/tsconfig.json +17 -0
package/src/cli.ts ADDED
@@ -0,0 +1,772 @@
1
+ import { join } from "node:path"
2
+ import {
3
+ listPrograms,
4
+ loadProgramConfig,
5
+ getProgramDir,
6
+ getProjectRoot,
7
+ type ProgramConfig,
8
+ } from "./lib/programs.ts"
9
+ import {
10
+ spawnDaemon,
11
+ getDaemonStatus,
12
+ sendStop,
13
+ sendAbort,
14
+ forceKillDaemon,
15
+ findActiveRun,
16
+ updateMaxExperiments,
17
+ } from "./lib/daemon-client.ts"
18
+ import {
19
+ getLatestRun,
20
+ listRuns,
21
+ readAllResults,
22
+ readState,
23
+ getRunStats,
24
+ type RunState,
25
+ } from "./lib/run.ts"
26
+ import { loadProjectConfig, type ModelSlot, type EffortLevel } from "./lib/config.ts"
27
+ import { streamLogName } from "./lib/daemon-callbacks.ts"
28
+ import { closeProviders, type AgentProviderID } from "./lib/agent/index.ts"
29
+ import { registerDefaultProviders } from "./lib/agent/default-providers.ts"
30
+ import { getDefaultModel } from "./lib/model-options.ts"
31
+ import { formatShellError } from "./lib/git.ts"
32
+
33
+ // --- Arg Parsing ---
34
+
35
+ interface ParsedArgs {
36
+ command: string
37
+ positional: string[]
38
+ flags: Record<string, string | boolean>
39
+ }
40
+
41
+ function parseArgs(argv: string[]): ParsedArgs {
42
+ const command = argv[0]
43
+ const positional: string[] = []
44
+ const flags: Record<string, string | boolean> = {}
45
+
46
+ for (let i = 1; i < argv.length; i++) {
47
+ const arg = argv[i]
48
+ if (arg.startsWith("--")) {
49
+ const key = arg.slice(2)
50
+ const next = argv[i + 1]
51
+ if (next && !next.startsWith("--")) {
52
+ flags[key] = next
53
+ i++
54
+ } else {
55
+ flags[key] = true
56
+ }
57
+ } else {
58
+ positional.push(arg)
59
+ }
60
+ }
61
+
62
+ return { command, positional, flags }
63
+ }
64
+
65
+ function getFlag(flags: Record<string, string | boolean>, key: string): string | undefined {
66
+ const val = flags[key]
67
+ return typeof val === "string" ? val : undefined
68
+ }
69
+
70
+ function hasFlag(flags: Record<string, string | boolean>, key: string): boolean {
71
+ return key in flags
72
+ }
73
+
74
+ // --- Output Helpers ---
75
+
76
+ function out(text: string) {
77
+ process.stdout.write(text + "\n")
78
+ }
79
+
80
+ function outJson(data: unknown) {
81
+ process.stdout.write(JSON.stringify(data, null, 2) + "\n")
82
+ }
83
+
84
+ function die(message: string, code = 1): never {
85
+ process.stderr.write(`Error: ${message}\n`)
86
+ process.exit(code)
87
+ }
88
+
89
+ function padRight(str: string, len: number): string {
90
+ return str.length >= len ? str : str + " ".repeat(len - str.length)
91
+ }
92
+
93
+ function formatElapsed(startedAt: string, endedAt?: string): string {
94
+ const start = new Date(startedAt).getTime()
95
+ const end = endedAt ? new Date(endedAt).getTime() : Date.now()
96
+ const ms = end - start
97
+ const mins = Math.floor(ms / 60_000)
98
+ if (mins < 60) return `${mins}m`
99
+ const hours = Math.floor(mins / 60)
100
+ const remainingMins = mins % 60
101
+ return `${hours}h ${remainingMins}m`
102
+ }
103
+
104
+ function formatCost(usd: number | undefined): string {
105
+ if (usd == null) return "$0.00"
106
+ return `$${usd.toFixed(2)}`
107
+ }
108
+
109
+ function formatChangePct(
110
+ original: number,
111
+ current: number,
112
+ direction: ProgramConfig["direction"],
113
+ ): string {
114
+ if (original === 0) return "—"
115
+ const pct =
116
+ direction === "lower"
117
+ ? ((original - current) / Math.abs(original)) * 100
118
+ : ((current - original) / Math.abs(original)) * 100
119
+ const sign = pct > 0 ? "+" : ""
120
+ return `${sign}${pct.toFixed(1)}%`
121
+ }
122
+
123
+ function parsePositiveInt(value: string): number | null {
124
+ if (!/^\d+$/.test(value)) return null
125
+ const n = parseInt(value, 10)
126
+ return n >= 1 ? n : null
127
+ }
128
+
129
+ function parseProvider(value: string | undefined): AgentProviderID | null {
130
+ if (value === "claude" || value === "opencode" || value === "codex") return value
131
+ return null
132
+ }
133
+
134
+ // --- Resolve common context ---
135
+
136
+ async function resolveRoot(flags: Record<string, string | boolean>): Promise<string> {
137
+ const cwd = getFlag(flags, "cwd") ?? process.cwd()
138
+ return getProjectRoot(cwd)
139
+ }
140
+
141
+ async function resolveRunDir(
142
+ programDir: string,
143
+ flags: Record<string, string | boolean>,
144
+ ): Promise<{ runDir: string; runId: string }> {
145
+ const runId = getFlag(flags, "run")
146
+ if (runId) {
147
+ const runDir = join(programDir, "runs", runId)
148
+ try {
149
+ await readState(runDir)
150
+ } catch {
151
+ die(`Run "${runId}" not found.`)
152
+ }
153
+ return { runDir, runId }
154
+ }
155
+
156
+ const latest = await getLatestRun(programDir)
157
+ if (!latest) die(`No runs found. Start one with: autoauto start <slug>`)
158
+ return { runDir: latest.run_dir, runId: latest.run_id }
159
+ }
160
+
161
+ // --- Commands ---
162
+
163
+ async function cmdList(args: ParsedArgs) {
164
+ const root = await resolveRoot(args.flags)
165
+ const programs = await listPrograms(root)
166
+ const json = hasFlag(args.flags, "json")
167
+
168
+ if (programs.length === 0) {
169
+ if (json) {
170
+ outJson([])
171
+ } else {
172
+ out("No programs found. Create one in the TUI first.")
173
+ }
174
+ return
175
+ }
176
+
177
+ const rows: Array<{
178
+ slug: string
179
+ status: string
180
+ last_run_id: string | null
181
+ best_metric: number | null
182
+ best_metric_change: string | null
183
+ metric_field: string
184
+ direction: string
185
+ goal: string
186
+ }> = []
187
+
188
+ for (const program of programs) {
189
+ const programDir = getProgramDir(root, program.name)
190
+ let config: ProgramConfig | null = null
191
+ try {
192
+ config = await loadProgramConfig(programDir)
193
+ } catch {
194
+ // Skip programs with broken config
195
+ }
196
+
197
+ const active = await findActiveRun(programDir)
198
+ const latest = await getLatestRun(programDir)
199
+ const status = active?.daemonAlive ? "running" : "idle"
200
+
201
+ let goal = ""
202
+ try {
203
+ const md = await Bun.file(join(programDir, "program.md")).text()
204
+ const match = md.match(/## Goal\n+([\s\S]*?)(?:\n##|\n*$)/)
205
+ if (match) goal = match[1].trim()
206
+ } catch {}
207
+
208
+ let best_metric: number | null = null
209
+ let best_metric_change: string | null = null
210
+
211
+ if (latest?.state && config) {
212
+ best_metric = latest.state.best_metric
213
+ const stats = getRunStats(latest.state, config.direction)
214
+ if (stats.improvement_pct !== 0) {
215
+ const sign = stats.improvement_pct > 0 ? "+" : ""
216
+ best_metric_change = `${sign}${stats.improvement_pct.toFixed(1)}%`
217
+ }
218
+ }
219
+
220
+ rows.push({
221
+ slug: program.name,
222
+ status,
223
+ last_run_id: latest?.run_id ?? null,
224
+ best_metric,
225
+ best_metric_change,
226
+ metric_field: config?.metric_field ?? "unknown",
227
+ direction: config?.direction ?? "unknown",
228
+ goal,
229
+ })
230
+ }
231
+
232
+ if (json) {
233
+ outJson(rows)
234
+ return
235
+ }
236
+
237
+ // Human-readable table
238
+ const metricLabel = rows.length > 0 ? `Best (${rows[0].metric_field})` : "Best"
239
+ const header = `${padRight("Program", 20)} ${padRight("Status", 10)} ${padRight("Last Run", 18)} ${metricLabel}`
240
+ out(header)
241
+
242
+ for (const row of rows) {
243
+ const metricStr =
244
+ row.best_metric != null
245
+ ? `${row.best_metric}${row.best_metric_change ? ` (${row.best_metric_change})` : ""}`
246
+ : "—"
247
+ out(
248
+ `${padRight(row.slug, 20)} ${padRight(row.status, 10)} ${padRight(row.last_run_id ?? "—", 18)} ${metricStr}`,
249
+ )
250
+ }
251
+ }
252
+
253
+ async function cmdStart(args: ParsedArgs) {
254
+ const slug = args.positional[0]
255
+ if (!slug) die("Usage: autoauto start <program-slug>")
256
+
257
+ const root = await resolveRoot(args.flags)
258
+ const programDir = getProgramDir(root, slug)
259
+ const json = hasFlag(args.flags, "json")
260
+ const noWait = hasFlag(args.flags, "no-wait")
261
+
262
+ // Validate program exists
263
+ let programConfig: ProgramConfig
264
+ try {
265
+ programConfig = await loadProgramConfig(programDir)
266
+ } catch {
267
+ die(`Program "${slug}" not found. Run \`autoauto list\` to see available programs.`)
268
+ }
269
+
270
+ // Load project config for defaults
271
+ const projectConfig = await loadProjectConfig(root)
272
+
273
+ // Build model config from flags or defaults
274
+ const providerFlag = getFlag(args.flags, "provider")
275
+ const parsedProvider = parseProvider(providerFlag)
276
+ if (providerFlag && !parsedProvider) die(`Invalid --provider: "${providerFlag}". Use claude, opencode, or codex.`)
277
+
278
+ const explicitModel = getFlag(args.flags, "model")
279
+ const provider: AgentProviderID = parsedProvider ?? (explicitModel ? "claude" : projectConfig.executionModel.provider)
280
+ if (provider === "opencode" && hasFlag(args.flags, "effort")) {
281
+ die("--effort is not supported with --provider opencode yet.")
282
+ }
283
+
284
+ let model = explicitModel
285
+ if (!model) {
286
+ if (provider === projectConfig.executionModel.provider) {
287
+ model = projectConfig.executionModel.model
288
+ } else if (provider === "opencode") {
289
+ model = await getDefaultModel("opencode", root) ?? undefined
290
+ if (!model) die("No connected OpenCode models found. Run `opencode auth login` or `/connect` first.")
291
+ } else if (provider === "codex") {
292
+ model = await getDefaultModel("codex", root) ?? undefined
293
+ if (!model) die("Could not resolve Codex default model.")
294
+ } else {
295
+ model = "sonnet"
296
+ }
297
+ }
298
+ if (!model) die("Could not resolve model.")
299
+
300
+ const modelConfig: ModelSlot = {
301
+ provider,
302
+ model,
303
+ effort: provider !== "opencode"
304
+ ? ((getFlag(args.flags, "effort") as EffortLevel) ?? projectConfig.executionModel.effort)
305
+ : projectConfig.executionModel.effort,
306
+ }
307
+
308
+ const maxExperimentsStr = getFlag(args.flags, "max-experiments")
309
+ let maxExperiments: number = programConfig.max_experiments ?? 25
310
+ if (maxExperimentsStr != null) {
311
+ const parsed = parsePositiveInt(maxExperimentsStr)
312
+ if (parsed == null) die(`Invalid --max-experiments: "${maxExperimentsStr}". Must be a positive integer.`)
313
+ maxExperiments = parsed
314
+ }
315
+
316
+ const ideasBacklogEnabled = hasFlag(args.flags, "no-ideas-backlog")
317
+ ? false
318
+ : hasFlag(args.flags, "ideas-backlog")
319
+ ? true
320
+ : projectConfig.ideasBacklogEnabled
321
+
322
+ const useWorktree = !hasFlag(args.flags, "in-place")
323
+
324
+ // Spawn daemon
325
+ let result: { runId: string; runDir: string; worktreePath: string | null; pid: number }
326
+ try {
327
+ result = await spawnDaemon(root, slug, modelConfig, maxExperiments, ideasBacklogEnabled, useWorktree)
328
+ } catch (err) {
329
+ const msg = formatShellError(err)
330
+ if (msg.includes("uncommitted changes")) die(msg)
331
+ if (msg.includes("already active")) die(msg)
332
+ die(msg, 2)
333
+ }
334
+
335
+ if (noWait) {
336
+ if (json) {
337
+ outJson({ run_id: result.runId, daemon_pid: result.pid, status: "started" })
338
+ } else {
339
+ out(`Started run ${result.runId} for ${slug}`)
340
+ out(`Daemon PID: ${result.pid}`)
341
+ out("")
342
+ out("The daemon is running baseline measurement in the background.")
343
+ out("")
344
+ out("Next steps:")
345
+ out(` autoauto status ${slug} # Check progress (baseline first, then experiments)`)
346
+ out(` autoauto results ${slug} # View experiment results table`)
347
+ out(` autoauto stop ${slug} # Stop after current experiment`)
348
+ }
349
+ return
350
+ }
351
+
352
+ // Block until baseline completes (or fails).
353
+ // Detect baseline completion by checking results.tsv for a baseline row (experiment #0).
354
+ // No hard timeout — baselines can be legitimately slow. Daemon death is the exit condition.
355
+ if (!json) out(`Starting run ${result.runId} for ${slug}... waiting for baseline`)
356
+
357
+ const pollInterval = 1000
358
+
359
+ while (true) {
360
+ await new Promise((r) => setTimeout(r, pollInterval))
361
+
362
+ // Check if daemon is still alive
363
+ const status = await getDaemonStatus(result.runDir)
364
+ if (!status.alive && !status.starting) {
365
+ // Daemon died — try to read state for error info
366
+ try {
367
+ const state = await readState(result.runDir)
368
+ if (state.error) die(`Baseline failed: ${state.error}`, 2)
369
+ if (state.phase === "crashed") die("Daemon crashed during baseline.", 2)
370
+ } catch {}
371
+ die("Daemon exited unexpectedly during baseline.", 2)
372
+ }
373
+
374
+ // Check for baseline row in results.tsv
375
+ try {
376
+ const results = await readAllResults(result.runDir)
377
+ const baselineRow = results.find((r) => r.experiment_number === 0)
378
+ if (baselineRow) {
379
+ if (json) {
380
+ outJson({
381
+ run_id: result.runId,
382
+ daemon_pid: result.pid,
383
+ baseline_metric: baselineRow.metric_value,
384
+ status: "running",
385
+ })
386
+ } else {
387
+ out(`Started run ${result.runId} for ${slug}`)
388
+ out(`Baseline ${programConfig.metric_field}: ${baselineRow.metric_value} (${programConfig.repeats} measurements)`)
389
+ out("")
390
+ out("Run is now executing experiments in the background.")
391
+ out("")
392
+ out("Next steps:")
393
+ out(` autoauto status ${slug} # Check current progress`)
394
+ out(` autoauto results ${slug} # View experiment results table`)
395
+ out(` autoauto stop ${slug} # Stop after current experiment`)
396
+ }
397
+ return
398
+ }
399
+ } catch {
400
+ // results.tsv not written yet or only header — keep waiting
401
+ }
402
+
403
+ // Check if it crashed during baseline
404
+ try {
405
+ const state = await readState(result.runDir)
406
+ if (state.phase === "crashed" || state.phase === "complete") {
407
+ if (state.error) die(`Baseline failed: ${state.error}`, 2)
408
+ die("Run ended before completing baseline.", 2)
409
+ }
410
+ } catch {
411
+ // state.json not written yet — keep waiting
412
+ }
413
+ }
414
+ }
415
+
416
+ async function cmdStatus(args: ParsedArgs) {
417
+ const slug = args.positional[0]
418
+ if (!slug) die("Usage: autoauto status <program-slug>")
419
+
420
+ const root = await resolveRoot(args.flags)
421
+ const programDir = getProgramDir(root, slug)
422
+ const json = hasFlag(args.flags, "json")
423
+ const showAll = hasFlag(args.flags, "all")
424
+
425
+ // Validate program exists
426
+ let programConfig: ProgramConfig
427
+ try {
428
+ programConfig = await loadProgramConfig(programDir)
429
+ } catch {
430
+ die(`Program "${slug}" not found.`)
431
+ }
432
+
433
+ if (showAll) {
434
+ const runs = await listRuns(programDir)
435
+ if (runs.length === 0) die(`No runs found for "${slug}". Start one with: autoauto start ${slug}`)
436
+
437
+ if (json) {
438
+ outJson(
439
+ runs.map((r) => ({
440
+ run_id: r.run_id,
441
+ status: r.state?.phase ?? "unknown",
442
+ experiment_number: r.state?.experiment_number ?? 0,
443
+ best_metric: r.state?.best_metric ?? null,
444
+ best_metric_change:
445
+ r.state
446
+ ? formatChangePct(r.state.original_baseline, r.state.best_metric, programConfig.direction)
447
+ : null,
448
+ })),
449
+ )
450
+ return
451
+ }
452
+
453
+ const header = `${padRight("Run", 18)} ${padRight("Status", 12)} ${padRight("Experiments", 13)} Best (${programConfig.metric_field})`
454
+ out(header)
455
+ for (const r of runs) {
456
+ const s = r.state
457
+ const statusStr = s?.phase ?? "unknown"
458
+ const experiments = s ? String(s.total_keeps + s.total_discards + s.total_crashes) : "0"
459
+ const best =
460
+ s && s.best_metric !== 0
461
+ ? `${s.best_metric} (${formatChangePct(s.original_baseline, s.best_metric, programConfig.direction)})`
462
+ : "—"
463
+ out(`${padRight(r.run_id, 18)} ${padRight(statusStr, 12)} ${padRight(experiments, 13)} ${best}`)
464
+ }
465
+ return
466
+ }
467
+
468
+ const { runDir, runId } = await resolveRunDir(programDir, args.flags)
469
+ let state: RunState
470
+ try {
471
+ state = await readState(runDir)
472
+ } catch {
473
+ die(`Could not read state for run "${runId}".`)
474
+ }
475
+
476
+ const stats = getRunStats(state, programConfig.direction)
477
+ const active = await findActiveRun(programDir)
478
+ const daemonAlive = active?.runId === runId && active.daemonAlive
479
+ const isComplete = state.phase === "complete" || state.phase === "crashed"
480
+
481
+ if (json) {
482
+ outJson({
483
+ ...state,
484
+ daemon_alive: daemonAlive,
485
+ elapsed: formatElapsed(state.started_at, isComplete ? state.updated_at : undefined),
486
+ improvement_pct: stats.improvement_pct,
487
+ keep_rate: stats.keep_rate,
488
+ metric_field: programConfig.metric_field,
489
+ direction: programConfig.direction,
490
+ })
491
+ return
492
+ }
493
+
494
+ const dirLabel = programConfig.direction === "lower" ? "lower is better" : "higher is better"
495
+ out(`Program: ${slug} (${programConfig.metric_field}, ${dirLabel})`)
496
+ out(`Run: ${runId}`)
497
+
498
+ if (isComplete) {
499
+ const reason =
500
+ state.termination_reason === "aborted"
501
+ ? "aborted"
502
+ : state.termination_reason === "max_experiments"
503
+ ? `reached max experiments (${state.experiment_number})`
504
+ : state.termination_reason === "stagnation"
505
+ ? `stagnation (${state.total_discards} consecutive discards)`
506
+ : state.termination_reason === "stopped"
507
+ ? "stopped by user"
508
+ : state.phase === "crashed"
509
+ ? "crashed"
510
+ : "finished"
511
+ out(`Status: ${state.phase} (${reason})`)
512
+ out(
513
+ `Baseline: ${state.original_baseline} → Final best: ${state.best_metric} (${formatChangePct(state.original_baseline, state.best_metric, programConfig.direction)})`,
514
+ )
515
+ out(`Keeps: ${stats.total_keeps} | Discards: ${stats.total_discards} | Crashes: ${stats.total_crashes}`)
516
+ out(`Cost: ${formatCost(state.total_cost_usd)} | Duration: ${formatElapsed(state.started_at, state.updated_at)}`)
517
+ if (state.error) out(`Error: ${state.error}`)
518
+ } else {
519
+ const phaseDetail =
520
+ state.phase === "agent_running" || state.phase === "measuring"
521
+ ? ` (experiment #${state.experiment_number})`
522
+ : ""
523
+ out(`Status: ${daemonAlive ? "running" : "stale"} ${state.phase}${phaseDetail}`)
524
+ out(
525
+ `Baseline: ${state.original_baseline} → Current best: ${state.best_metric} (${formatChangePct(state.original_baseline, state.best_metric, programConfig.direction)})`,
526
+ )
527
+ out(`Keeps: ${stats.total_keeps} | Discards: ${stats.total_discards} | Crashes: ${stats.total_crashes}`)
528
+ out(`Cost: ${formatCost(state.total_cost_usd)} | Elapsed: ${formatElapsed(state.started_at)}`)
529
+ }
530
+ }
531
+
532
+ async function cmdResults(args: ParsedArgs) {
533
+ const slug = args.positional[0]
534
+ if (!slug) die("Usage: autoauto results <program-slug>")
535
+
536
+ const root = await resolveRoot(args.flags)
537
+ const programDir = getProgramDir(root, slug)
538
+ const json = hasFlag(args.flags, "json")
539
+ const detail = getFlag(args.flags, "detail")
540
+ const limit = getFlag(args.flags, "limit")
541
+
542
+ let programConfig: ProgramConfig
543
+ try {
544
+ programConfig = await loadProgramConfig(programDir)
545
+ } catch {
546
+ die(`Program "${slug}" not found.`)
547
+ }
548
+
549
+ const { runDir, runId } = await resolveRunDir(programDir, args.flags)
550
+ const allResults = await readAllResults(runDir)
551
+
552
+ if (allResults.length === 0) {
553
+ die("No result rows yet. Run may still be in baseline phase.")
554
+ }
555
+
556
+ // Always compute baseline from full results before any slicing
557
+ const originalBaseline = allResults.find((r) => r.experiment_number === 0)?.metric_value ?? allResults[0].metric_value
558
+
559
+ // Handle --detail
560
+ if (detail != null) {
561
+ let expNum: number
562
+ if (detail === "latest") {
563
+ expNum = allResults[allResults.length - 1].experiment_number
564
+ } else {
565
+ expNum = parseInt(detail, 10)
566
+ if (isNaN(expNum)) die(`Invalid experiment number: "${detail}"`)
567
+ }
568
+
569
+ const result = allResults.find((r) => r.experiment_number === expNum)
570
+ if (!result) die(`Experiment #${expNum} not found in run ${runId}.`)
571
+
572
+ const logFile = join(runDir, streamLogName(expNum))
573
+ let logContent = ""
574
+ try {
575
+ logContent = await Bun.file(logFile).text()
576
+ } catch {
577
+ logContent = "(no stream log found)"
578
+ }
579
+
580
+ if (json) {
581
+ outJson({
582
+ experiment_number: result.experiment_number,
583
+ status: result.status,
584
+ metric_value: result.metric_value,
585
+ change_pct: result.experiment_number === 0
586
+ ? null
587
+ : formatChangePct(originalBaseline, result.metric_value, programConfig.direction),
588
+ description: result.description,
589
+ log: logContent,
590
+ })
591
+ } else {
592
+ out(logContent)
593
+ }
594
+ return
595
+ }
596
+
597
+ // Apply --limit (after baseline computation)
598
+ let results = allResults
599
+ if (limit != null) {
600
+ const n = parsePositiveInt(limit)
601
+ if (n == null) die(`Invalid limit: "${limit}"`)
602
+ results = allResults.slice(-n)
603
+ }
604
+
605
+ if (json) {
606
+ outJson(
607
+ results.map((r) => ({
608
+ ...r,
609
+ change_pct:
610
+ r.experiment_number === 0
611
+ ? null
612
+ : formatChangePct(originalBaseline, r.metric_value, programConfig.direction),
613
+ })),
614
+ )
615
+ return
616
+ }
617
+
618
+ // Human-readable table
619
+ const metricField = programConfig.metric_field
620
+ const header = `${padRight("#", 5)} ${padRight("Status", 22)} ${padRight(metricField, 14)} ${padRight("Change", 10)} ${padRight("Commit", 10)} Description`
621
+ out(header)
622
+
623
+ for (const r of results) {
624
+ const change =
625
+ r.experiment_number === 0
626
+ ? "—"
627
+ : formatChangePct(originalBaseline, r.metric_value, programConfig.direction)
628
+ const num = String(r.experiment_number)
629
+ out(
630
+ `${padRight(num, 5)} ${padRight(r.status, 22)} ${padRight(String(r.metric_value), 14)} ${padRight(change, 10)} ${padRight(r.commit.slice(0, 7), 10)} ${r.description}`,
631
+ )
632
+ }
633
+ }
634
+
635
+ async function cmdStop(args: ParsedArgs) {
636
+ const slug = args.positional[0]
637
+ if (!slug) die("Usage: autoauto stop <program-slug>")
638
+
639
+ const root = await resolveRoot(args.flags)
640
+ const programDir = getProgramDir(root, slug)
641
+ const json = hasFlag(args.flags, "json")
642
+ const abort = hasFlag(args.flags, "abort")
643
+
644
+ // Find active run (lock-based)
645
+ const runIdOverride = getFlag(args.flags, "run")
646
+ let runDir: string
647
+ let runId: string
648
+
649
+ if (runIdOverride) {
650
+ runDir = join(programDir, "runs", runIdOverride)
651
+ runId = runIdOverride
652
+ const status = await getDaemonStatus(runDir)
653
+ if (!status.alive) die("Daemon is not running. Run may have already completed.")
654
+ } else {
655
+ const active = await findActiveRun(programDir)
656
+ if (!active) die(`No active run for "${slug}".`)
657
+ if (!active.daemonAlive) die("Daemon is not running. Run may have already completed.")
658
+ runDir = active.runDir
659
+ runId = active.runId
660
+ }
661
+
662
+ if (abort) {
663
+ await sendAbort(runDir)
664
+
665
+ // Wait briefly for daemon to exit
666
+ const timeout = 10_000
667
+ const start = Date.now()
668
+ while (Date.now() - start < timeout) {
669
+ await new Promise((r) => setTimeout(r, 500))
670
+ const status = await getDaemonStatus(runDir)
671
+ if (!status.alive) break
672
+ }
673
+
674
+ // Force kill if still alive
675
+ const finalStatus = await getDaemonStatus(runDir)
676
+ if (finalStatus.alive) {
677
+ await forceKillDaemon(runDir)
678
+ }
679
+
680
+ if (json) {
681
+ outJson({ action: "abort", run_id: runId, status: "aborted" })
682
+ } else {
683
+ out(`Aborting ${slug} run ${runId}...`)
684
+ out("Run aborted. Current experiment recorded as crash.")
685
+ }
686
+ } else {
687
+ await sendStop(runDir)
688
+
689
+ let experimentNum = 0
690
+ try {
691
+ const state = await readState(runDir)
692
+ experimentNum = state.experiment_number
693
+ } catch {}
694
+
695
+ if (json) {
696
+ outJson({ action: "stop", run_id: runId, status: "stopping" })
697
+ } else {
698
+ out(`Stopping ${slug} run ${runId}...`)
699
+ out(`The current experiment (#${experimentNum}) will finish, then the run will stop.`)
700
+ out(`Use \`autoauto status ${slug}\` to check when it's done.`)
701
+ }
702
+ }
703
+ }
704
+
705
+ async function cmdLimit(args: ParsedArgs) {
706
+ const slug = args.positional[0]
707
+ const valueStr = args.positional[1]
708
+ if (!slug || valueStr == null) die("Usage: autoauto limit <program-slug> <n>")
709
+
710
+ const root = await resolveRoot(args.flags)
711
+ const programDir = getProgramDir(root, slug)
712
+ const json = hasFlag(args.flags, "json")
713
+
714
+ const active = await findActiveRun(programDir)
715
+ if (!active) die(`No active run for "${slug}".`)
716
+ if (!active.daemonAlive) die("Daemon is not running. Run may have already completed.")
717
+
718
+ const parsed = parsePositiveInt(valueStr)
719
+ if (parsed == null) die(`Invalid value: "${valueStr}". Must be a positive integer.`)
720
+ const maxExperiments = parsed
721
+
722
+ await updateMaxExperiments(active.runDir, maxExperiments)
723
+
724
+ if (json) {
725
+ outJson({ run_id: active.runId, max_experiments: maxExperiments })
726
+ } else {
727
+ out(`Updated ${slug} run ${active.runId}: max experiments set to ${maxExperiments}.`)
728
+ }
729
+ }
730
+
731
+ // --- Main Router ---
732
+
733
+ const COMMANDS: Record<string, (args: ParsedArgs) => Promise<void>> = {
734
+ list: cmdList,
735
+ start: cmdStart,
736
+ status: cmdStatus,
737
+ results: cmdResults,
738
+ stop: cmdStop,
739
+ limit: cmdLimit,
740
+ }
741
+
742
+ export async function run(argv: string[]) {
743
+ registerDefaultProviders()
744
+ const args = parseArgs(argv)
745
+ const handler = COMMANDS[args.command]
746
+
747
+ if (!handler) {
748
+ out("Usage: autoauto <command> [options]")
749
+ out("")
750
+ out("Commands:")
751
+ out(" list List all programs")
752
+ out(" start <slug> Start an experiment run")
753
+ out(" status <slug> Show run status")
754
+ out(" results <slug> Show experiment results")
755
+ out(" stop <slug> Stop the active run")
756
+ out(" limit <slug> <n|none> Update experiment cap on active run")
757
+ out("")
758
+ out("Global flags:")
759
+ out(" --json Output as JSON")
760
+ out(" --cwd <path> Override working directory")
761
+ out(" --provider <claude|opencode|codex> Agent provider for start")
762
+ process.exit(1)
763
+ }
764
+
765
+ try {
766
+ await handler(args)
767
+ } catch (err) {
768
+ die(formatShellError(err), 2)
769
+ } finally {
770
+ await closeProviders()
771
+ }
772
+ }