@spacek33z/autoauto 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +197 -0
- package/package.json +51 -0
- package/src/App.tsx +224 -0
- package/src/cli.ts +772 -0
- package/src/components/AgentPanel.tsx +254 -0
- package/src/components/Chat.test.tsx +71 -0
- package/src/components/Chat.tsx +308 -0
- package/src/components/CycleField.tsx +23 -0
- package/src/components/ModelPicker.tsx +97 -0
- package/src/components/PostUpdatePrompt.tsx +46 -0
- package/src/components/ResultsTable.tsx +172 -0
- package/src/components/RunCompletePrompt.tsx +90 -0
- package/src/components/RunSettingsOverlay.tsx +49 -0
- package/src/components/RunsTable.tsx +219 -0
- package/src/components/StatsHeader.tsx +100 -0
- package/src/daemon.ts +264 -0
- package/src/index.tsx +8 -0
- package/src/lib/agent/agent-provider.test.ts +133 -0
- package/src/lib/agent/claude-provider.ts +277 -0
- package/src/lib/agent/codex-provider.ts +413 -0
- package/src/lib/agent/default-providers.ts +10 -0
- package/src/lib/agent/index.ts +32 -0
- package/src/lib/agent/mock-provider.ts +61 -0
- package/src/lib/agent/opencode-provider.ts +424 -0
- package/src/lib/agent/types.ts +73 -0
- package/src/lib/auth.ts +11 -0
- package/src/lib/config.ts +152 -0
- package/src/lib/daemon-callbacks.ts +59 -0
- package/src/lib/daemon-client.ts +16 -0
- package/src/lib/daemon-lifecycle.ts +368 -0
- package/src/lib/daemon-spawn.ts +122 -0
- package/src/lib/daemon-status.ts +189 -0
- package/src/lib/daemon-watcher.ts +192 -0
- package/src/lib/experiment-loop.ts +679 -0
- package/src/lib/experiment.ts +356 -0
- package/src/lib/finalize.test.ts +143 -0
- package/src/lib/finalize.ts +511 -0
- package/src/lib/format.test.ts +32 -0
- package/src/lib/format.ts +44 -0
- package/src/lib/git.ts +176 -0
- package/src/lib/ideas-backlog.test.ts +54 -0
- package/src/lib/ideas-backlog.ts +109 -0
- package/src/lib/measure.ts +472 -0
- package/src/lib/model-options.ts +24 -0
- package/src/lib/programs.ts +247 -0
- package/src/lib/push-stream.ts +48 -0
- package/src/lib/run-context.ts +112 -0
- package/src/lib/run-setup.ts +34 -0
- package/src/lib/run.ts +383 -0
- package/src/lib/syntax-theme.ts +39 -0
- package/src/lib/system-prompts/experiment.ts +77 -0
- package/src/lib/system-prompts/finalize.ts +90 -0
- package/src/lib/system-prompts/index.ts +7 -0
- package/src/lib/system-prompts/setup.ts +516 -0
- package/src/lib/system-prompts/update.ts +188 -0
- package/src/lib/tool-events.ts +99 -0
- package/src/lib/validate-measurement.ts +326 -0
- package/src/lib/worktree.ts +40 -0
- package/src/screens/AuthErrorScreen.tsx +31 -0
- package/src/screens/ExecutionScreen.tsx +851 -0
- package/src/screens/FirstSetupScreen.tsx +168 -0
- package/src/screens/HomeScreen.tsx +406 -0
- package/src/screens/PreRunScreen.tsx +206 -0
- package/src/screens/SettingsScreen.tsx +189 -0
- package/src/screens/SetupScreen.tsx +226 -0
- package/src/tui.tsx +17 -0
- package/tsconfig.json +17 -0
package/src/cli.ts
ADDED
|
@@ -0,0 +1,772 @@
|
|
|
1
|
+
import { join } from "node:path"
|
|
2
|
+
import {
|
|
3
|
+
listPrograms,
|
|
4
|
+
loadProgramConfig,
|
|
5
|
+
getProgramDir,
|
|
6
|
+
getProjectRoot,
|
|
7
|
+
type ProgramConfig,
|
|
8
|
+
} from "./lib/programs.ts"
|
|
9
|
+
import {
|
|
10
|
+
spawnDaemon,
|
|
11
|
+
getDaemonStatus,
|
|
12
|
+
sendStop,
|
|
13
|
+
sendAbort,
|
|
14
|
+
forceKillDaemon,
|
|
15
|
+
findActiveRun,
|
|
16
|
+
updateMaxExperiments,
|
|
17
|
+
} from "./lib/daemon-client.ts"
|
|
18
|
+
import {
|
|
19
|
+
getLatestRun,
|
|
20
|
+
listRuns,
|
|
21
|
+
readAllResults,
|
|
22
|
+
readState,
|
|
23
|
+
getRunStats,
|
|
24
|
+
type RunState,
|
|
25
|
+
} from "./lib/run.ts"
|
|
26
|
+
import { loadProjectConfig, type ModelSlot, type EffortLevel } from "./lib/config.ts"
|
|
27
|
+
import { streamLogName } from "./lib/daemon-callbacks.ts"
|
|
28
|
+
import { closeProviders, type AgentProviderID } from "./lib/agent/index.ts"
|
|
29
|
+
import { registerDefaultProviders } from "./lib/agent/default-providers.ts"
|
|
30
|
+
import { getDefaultModel } from "./lib/model-options.ts"
|
|
31
|
+
import { formatShellError } from "./lib/git.ts"
|
|
32
|
+
|
|
33
|
+
// --- Arg Parsing ---
|
|
34
|
+
|
|
35
|
+
interface ParsedArgs {
|
|
36
|
+
command: string
|
|
37
|
+
positional: string[]
|
|
38
|
+
flags: Record<string, string | boolean>
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function parseArgs(argv: string[]): ParsedArgs {
|
|
42
|
+
const command = argv[0]
|
|
43
|
+
const positional: string[] = []
|
|
44
|
+
const flags: Record<string, string | boolean> = {}
|
|
45
|
+
|
|
46
|
+
for (let i = 1; i < argv.length; i++) {
|
|
47
|
+
const arg = argv[i]
|
|
48
|
+
if (arg.startsWith("--")) {
|
|
49
|
+
const key = arg.slice(2)
|
|
50
|
+
const next = argv[i + 1]
|
|
51
|
+
if (next && !next.startsWith("--")) {
|
|
52
|
+
flags[key] = next
|
|
53
|
+
i++
|
|
54
|
+
} else {
|
|
55
|
+
flags[key] = true
|
|
56
|
+
}
|
|
57
|
+
} else {
|
|
58
|
+
positional.push(arg)
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
return { command, positional, flags }
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
function getFlag(flags: Record<string, string | boolean>, key: string): string | undefined {
|
|
66
|
+
const val = flags[key]
|
|
67
|
+
return typeof val === "string" ? val : undefined
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
function hasFlag(flags: Record<string, string | boolean>, key: string): boolean {
|
|
71
|
+
return key in flags
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// --- Output Helpers ---
|
|
75
|
+
|
|
76
|
+
function out(text: string) {
|
|
77
|
+
process.stdout.write(text + "\n")
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
function outJson(data: unknown) {
|
|
81
|
+
process.stdout.write(JSON.stringify(data, null, 2) + "\n")
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
function die(message: string, code = 1): never {
|
|
85
|
+
process.stderr.write(`Error: ${message}\n`)
|
|
86
|
+
process.exit(code)
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
function padRight(str: string, len: number): string {
|
|
90
|
+
return str.length >= len ? str : str + " ".repeat(len - str.length)
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
function formatElapsed(startedAt: string, endedAt?: string): string {
|
|
94
|
+
const start = new Date(startedAt).getTime()
|
|
95
|
+
const end = endedAt ? new Date(endedAt).getTime() : Date.now()
|
|
96
|
+
const ms = end - start
|
|
97
|
+
const mins = Math.floor(ms / 60_000)
|
|
98
|
+
if (mins < 60) return `${mins}m`
|
|
99
|
+
const hours = Math.floor(mins / 60)
|
|
100
|
+
const remainingMins = mins % 60
|
|
101
|
+
return `${hours}h ${remainingMins}m`
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
function formatCost(usd: number | undefined): string {
|
|
105
|
+
if (usd == null) return "$0.00"
|
|
106
|
+
return `$${usd.toFixed(2)}`
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
function formatChangePct(
|
|
110
|
+
original: number,
|
|
111
|
+
current: number,
|
|
112
|
+
direction: ProgramConfig["direction"],
|
|
113
|
+
): string {
|
|
114
|
+
if (original === 0) return "—"
|
|
115
|
+
const pct =
|
|
116
|
+
direction === "lower"
|
|
117
|
+
? ((original - current) / Math.abs(original)) * 100
|
|
118
|
+
: ((current - original) / Math.abs(original)) * 100
|
|
119
|
+
const sign = pct > 0 ? "+" : ""
|
|
120
|
+
return `${sign}${pct.toFixed(1)}%`
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
function parsePositiveInt(value: string): number | null {
|
|
124
|
+
if (!/^\d+$/.test(value)) return null
|
|
125
|
+
const n = parseInt(value, 10)
|
|
126
|
+
return n >= 1 ? n : null
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
function parseProvider(value: string | undefined): AgentProviderID | null {
|
|
130
|
+
if (value === "claude" || value === "opencode" || value === "codex") return value
|
|
131
|
+
return null
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// --- Resolve common context ---
|
|
135
|
+
|
|
136
|
+
async function resolveRoot(flags: Record<string, string | boolean>): Promise<string> {
|
|
137
|
+
const cwd = getFlag(flags, "cwd") ?? process.cwd()
|
|
138
|
+
return getProjectRoot(cwd)
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
async function resolveRunDir(
|
|
142
|
+
programDir: string,
|
|
143
|
+
flags: Record<string, string | boolean>,
|
|
144
|
+
): Promise<{ runDir: string; runId: string }> {
|
|
145
|
+
const runId = getFlag(flags, "run")
|
|
146
|
+
if (runId) {
|
|
147
|
+
const runDir = join(programDir, "runs", runId)
|
|
148
|
+
try {
|
|
149
|
+
await readState(runDir)
|
|
150
|
+
} catch {
|
|
151
|
+
die(`Run "${runId}" not found.`)
|
|
152
|
+
}
|
|
153
|
+
return { runDir, runId }
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
const latest = await getLatestRun(programDir)
|
|
157
|
+
if (!latest) die(`No runs found. Start one with: autoauto start <slug>`)
|
|
158
|
+
return { runDir: latest.run_dir, runId: latest.run_id }
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// --- Commands ---
|
|
162
|
+
|
|
163
|
+
async function cmdList(args: ParsedArgs) {
|
|
164
|
+
const root = await resolveRoot(args.flags)
|
|
165
|
+
const programs = await listPrograms(root)
|
|
166
|
+
const json = hasFlag(args.flags, "json")
|
|
167
|
+
|
|
168
|
+
if (programs.length === 0) {
|
|
169
|
+
if (json) {
|
|
170
|
+
outJson([])
|
|
171
|
+
} else {
|
|
172
|
+
out("No programs found. Create one in the TUI first.")
|
|
173
|
+
}
|
|
174
|
+
return
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
const rows: Array<{
|
|
178
|
+
slug: string
|
|
179
|
+
status: string
|
|
180
|
+
last_run_id: string | null
|
|
181
|
+
best_metric: number | null
|
|
182
|
+
best_metric_change: string | null
|
|
183
|
+
metric_field: string
|
|
184
|
+
direction: string
|
|
185
|
+
goal: string
|
|
186
|
+
}> = []
|
|
187
|
+
|
|
188
|
+
for (const program of programs) {
|
|
189
|
+
const programDir = getProgramDir(root, program.name)
|
|
190
|
+
let config: ProgramConfig | null = null
|
|
191
|
+
try {
|
|
192
|
+
config = await loadProgramConfig(programDir)
|
|
193
|
+
} catch {
|
|
194
|
+
// Skip programs with broken config
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
const active = await findActiveRun(programDir)
|
|
198
|
+
const latest = await getLatestRun(programDir)
|
|
199
|
+
const status = active?.daemonAlive ? "running" : "idle"
|
|
200
|
+
|
|
201
|
+
let goal = ""
|
|
202
|
+
try {
|
|
203
|
+
const md = await Bun.file(join(programDir, "program.md")).text()
|
|
204
|
+
const match = md.match(/## Goal\n+([\s\S]*?)(?:\n##|\n*$)/)
|
|
205
|
+
if (match) goal = match[1].trim()
|
|
206
|
+
} catch {}
|
|
207
|
+
|
|
208
|
+
let best_metric: number | null = null
|
|
209
|
+
let best_metric_change: string | null = null
|
|
210
|
+
|
|
211
|
+
if (latest?.state && config) {
|
|
212
|
+
best_metric = latest.state.best_metric
|
|
213
|
+
const stats = getRunStats(latest.state, config.direction)
|
|
214
|
+
if (stats.improvement_pct !== 0) {
|
|
215
|
+
const sign = stats.improvement_pct > 0 ? "+" : ""
|
|
216
|
+
best_metric_change = `${sign}${stats.improvement_pct.toFixed(1)}%`
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
rows.push({
|
|
221
|
+
slug: program.name,
|
|
222
|
+
status,
|
|
223
|
+
last_run_id: latest?.run_id ?? null,
|
|
224
|
+
best_metric,
|
|
225
|
+
best_metric_change,
|
|
226
|
+
metric_field: config?.metric_field ?? "unknown",
|
|
227
|
+
direction: config?.direction ?? "unknown",
|
|
228
|
+
goal,
|
|
229
|
+
})
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
if (json) {
|
|
233
|
+
outJson(rows)
|
|
234
|
+
return
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
// Human-readable table
|
|
238
|
+
const metricLabel = rows.length > 0 ? `Best (${rows[0].metric_field})` : "Best"
|
|
239
|
+
const header = `${padRight("Program", 20)} ${padRight("Status", 10)} ${padRight("Last Run", 18)} ${metricLabel}`
|
|
240
|
+
out(header)
|
|
241
|
+
|
|
242
|
+
for (const row of rows) {
|
|
243
|
+
const metricStr =
|
|
244
|
+
row.best_metric != null
|
|
245
|
+
? `${row.best_metric}${row.best_metric_change ? ` (${row.best_metric_change})` : ""}`
|
|
246
|
+
: "—"
|
|
247
|
+
out(
|
|
248
|
+
`${padRight(row.slug, 20)} ${padRight(row.status, 10)} ${padRight(row.last_run_id ?? "—", 18)} ${metricStr}`,
|
|
249
|
+
)
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
async function cmdStart(args: ParsedArgs) {
|
|
254
|
+
const slug = args.positional[0]
|
|
255
|
+
if (!slug) die("Usage: autoauto start <program-slug>")
|
|
256
|
+
|
|
257
|
+
const root = await resolveRoot(args.flags)
|
|
258
|
+
const programDir = getProgramDir(root, slug)
|
|
259
|
+
const json = hasFlag(args.flags, "json")
|
|
260
|
+
const noWait = hasFlag(args.flags, "no-wait")
|
|
261
|
+
|
|
262
|
+
// Validate program exists
|
|
263
|
+
let programConfig: ProgramConfig
|
|
264
|
+
try {
|
|
265
|
+
programConfig = await loadProgramConfig(programDir)
|
|
266
|
+
} catch {
|
|
267
|
+
die(`Program "${slug}" not found. Run \`autoauto list\` to see available programs.`)
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
// Load project config for defaults
|
|
271
|
+
const projectConfig = await loadProjectConfig(root)
|
|
272
|
+
|
|
273
|
+
// Build model config from flags or defaults
|
|
274
|
+
const providerFlag = getFlag(args.flags, "provider")
|
|
275
|
+
const parsedProvider = parseProvider(providerFlag)
|
|
276
|
+
if (providerFlag && !parsedProvider) die(`Invalid --provider: "${providerFlag}". Use claude, opencode, or codex.`)
|
|
277
|
+
|
|
278
|
+
const explicitModel = getFlag(args.flags, "model")
|
|
279
|
+
const provider: AgentProviderID = parsedProvider ?? (explicitModel ? "claude" : projectConfig.executionModel.provider)
|
|
280
|
+
if (provider === "opencode" && hasFlag(args.flags, "effort")) {
|
|
281
|
+
die("--effort is not supported with --provider opencode yet.")
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
let model = explicitModel
|
|
285
|
+
if (!model) {
|
|
286
|
+
if (provider === projectConfig.executionModel.provider) {
|
|
287
|
+
model = projectConfig.executionModel.model
|
|
288
|
+
} else if (provider === "opencode") {
|
|
289
|
+
model = await getDefaultModel("opencode", root) ?? undefined
|
|
290
|
+
if (!model) die("No connected OpenCode models found. Run `opencode auth login` or `/connect` first.")
|
|
291
|
+
} else if (provider === "codex") {
|
|
292
|
+
model = await getDefaultModel("codex", root) ?? undefined
|
|
293
|
+
if (!model) die("Could not resolve Codex default model.")
|
|
294
|
+
} else {
|
|
295
|
+
model = "sonnet"
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
if (!model) die("Could not resolve model.")
|
|
299
|
+
|
|
300
|
+
const modelConfig: ModelSlot = {
|
|
301
|
+
provider,
|
|
302
|
+
model,
|
|
303
|
+
effort: provider !== "opencode"
|
|
304
|
+
? ((getFlag(args.flags, "effort") as EffortLevel) ?? projectConfig.executionModel.effort)
|
|
305
|
+
: projectConfig.executionModel.effort,
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
const maxExperimentsStr = getFlag(args.flags, "max-experiments")
|
|
309
|
+
let maxExperiments: number = programConfig.max_experiments ?? 25
|
|
310
|
+
if (maxExperimentsStr != null) {
|
|
311
|
+
const parsed = parsePositiveInt(maxExperimentsStr)
|
|
312
|
+
if (parsed == null) die(`Invalid --max-experiments: "${maxExperimentsStr}". Must be a positive integer.`)
|
|
313
|
+
maxExperiments = parsed
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
const ideasBacklogEnabled = hasFlag(args.flags, "no-ideas-backlog")
|
|
317
|
+
? false
|
|
318
|
+
: hasFlag(args.flags, "ideas-backlog")
|
|
319
|
+
? true
|
|
320
|
+
: projectConfig.ideasBacklogEnabled
|
|
321
|
+
|
|
322
|
+
const useWorktree = !hasFlag(args.flags, "in-place")
|
|
323
|
+
|
|
324
|
+
// Spawn daemon
|
|
325
|
+
let result: { runId: string; runDir: string; worktreePath: string | null; pid: number }
|
|
326
|
+
try {
|
|
327
|
+
result = await spawnDaemon(root, slug, modelConfig, maxExperiments, ideasBacklogEnabled, useWorktree)
|
|
328
|
+
} catch (err) {
|
|
329
|
+
const msg = formatShellError(err)
|
|
330
|
+
if (msg.includes("uncommitted changes")) die(msg)
|
|
331
|
+
if (msg.includes("already active")) die(msg)
|
|
332
|
+
die(msg, 2)
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
if (noWait) {
|
|
336
|
+
if (json) {
|
|
337
|
+
outJson({ run_id: result.runId, daemon_pid: result.pid, status: "started" })
|
|
338
|
+
} else {
|
|
339
|
+
out(`Started run ${result.runId} for ${slug}`)
|
|
340
|
+
out(`Daemon PID: ${result.pid}`)
|
|
341
|
+
out("")
|
|
342
|
+
out("The daemon is running baseline measurement in the background.")
|
|
343
|
+
out("")
|
|
344
|
+
out("Next steps:")
|
|
345
|
+
out(` autoauto status ${slug} # Check progress (baseline first, then experiments)`)
|
|
346
|
+
out(` autoauto results ${slug} # View experiment results table`)
|
|
347
|
+
out(` autoauto stop ${slug} # Stop after current experiment`)
|
|
348
|
+
}
|
|
349
|
+
return
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
// Block until baseline completes (or fails).
|
|
353
|
+
// Detect baseline completion by checking results.tsv for a baseline row (experiment #0).
|
|
354
|
+
// No hard timeout — baselines can be legitimately slow. Daemon death is the exit condition.
|
|
355
|
+
if (!json) out(`Starting run ${result.runId} for ${slug}... waiting for baseline`)
|
|
356
|
+
|
|
357
|
+
const pollInterval = 1000
|
|
358
|
+
|
|
359
|
+
while (true) {
|
|
360
|
+
await new Promise((r) => setTimeout(r, pollInterval))
|
|
361
|
+
|
|
362
|
+
// Check if daemon is still alive
|
|
363
|
+
const status = await getDaemonStatus(result.runDir)
|
|
364
|
+
if (!status.alive && !status.starting) {
|
|
365
|
+
// Daemon died — try to read state for error info
|
|
366
|
+
try {
|
|
367
|
+
const state = await readState(result.runDir)
|
|
368
|
+
if (state.error) die(`Baseline failed: ${state.error}`, 2)
|
|
369
|
+
if (state.phase === "crashed") die("Daemon crashed during baseline.", 2)
|
|
370
|
+
} catch {}
|
|
371
|
+
die("Daemon exited unexpectedly during baseline.", 2)
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
// Check for baseline row in results.tsv
|
|
375
|
+
try {
|
|
376
|
+
const results = await readAllResults(result.runDir)
|
|
377
|
+
const baselineRow = results.find((r) => r.experiment_number === 0)
|
|
378
|
+
if (baselineRow) {
|
|
379
|
+
if (json) {
|
|
380
|
+
outJson({
|
|
381
|
+
run_id: result.runId,
|
|
382
|
+
daemon_pid: result.pid,
|
|
383
|
+
baseline_metric: baselineRow.metric_value,
|
|
384
|
+
status: "running",
|
|
385
|
+
})
|
|
386
|
+
} else {
|
|
387
|
+
out(`Started run ${result.runId} for ${slug}`)
|
|
388
|
+
out(`Baseline ${programConfig.metric_field}: ${baselineRow.metric_value} (${programConfig.repeats} measurements)`)
|
|
389
|
+
out("")
|
|
390
|
+
out("Run is now executing experiments in the background.")
|
|
391
|
+
out("")
|
|
392
|
+
out("Next steps:")
|
|
393
|
+
out(` autoauto status ${slug} # Check current progress`)
|
|
394
|
+
out(` autoauto results ${slug} # View experiment results table`)
|
|
395
|
+
out(` autoauto stop ${slug} # Stop after current experiment`)
|
|
396
|
+
}
|
|
397
|
+
return
|
|
398
|
+
}
|
|
399
|
+
} catch {
|
|
400
|
+
// results.tsv not written yet or only header — keep waiting
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
// Check if it crashed during baseline
|
|
404
|
+
try {
|
|
405
|
+
const state = await readState(result.runDir)
|
|
406
|
+
if (state.phase === "crashed" || state.phase === "complete") {
|
|
407
|
+
if (state.error) die(`Baseline failed: ${state.error}`, 2)
|
|
408
|
+
die("Run ended before completing baseline.", 2)
|
|
409
|
+
}
|
|
410
|
+
} catch {
|
|
411
|
+
// state.json not written yet — keep waiting
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
async function cmdStatus(args: ParsedArgs) {
|
|
417
|
+
const slug = args.positional[0]
|
|
418
|
+
if (!slug) die("Usage: autoauto status <program-slug>")
|
|
419
|
+
|
|
420
|
+
const root = await resolveRoot(args.flags)
|
|
421
|
+
const programDir = getProgramDir(root, slug)
|
|
422
|
+
const json = hasFlag(args.flags, "json")
|
|
423
|
+
const showAll = hasFlag(args.flags, "all")
|
|
424
|
+
|
|
425
|
+
// Validate program exists
|
|
426
|
+
let programConfig: ProgramConfig
|
|
427
|
+
try {
|
|
428
|
+
programConfig = await loadProgramConfig(programDir)
|
|
429
|
+
} catch {
|
|
430
|
+
die(`Program "${slug}" not found.`)
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
if (showAll) {
|
|
434
|
+
const runs = await listRuns(programDir)
|
|
435
|
+
if (runs.length === 0) die(`No runs found for "${slug}". Start one with: autoauto start ${slug}`)
|
|
436
|
+
|
|
437
|
+
if (json) {
|
|
438
|
+
outJson(
|
|
439
|
+
runs.map((r) => ({
|
|
440
|
+
run_id: r.run_id,
|
|
441
|
+
status: r.state?.phase ?? "unknown",
|
|
442
|
+
experiment_number: r.state?.experiment_number ?? 0,
|
|
443
|
+
best_metric: r.state?.best_metric ?? null,
|
|
444
|
+
best_metric_change:
|
|
445
|
+
r.state
|
|
446
|
+
? formatChangePct(r.state.original_baseline, r.state.best_metric, programConfig.direction)
|
|
447
|
+
: null,
|
|
448
|
+
})),
|
|
449
|
+
)
|
|
450
|
+
return
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
const header = `${padRight("Run", 18)} ${padRight("Status", 12)} ${padRight("Experiments", 13)} Best (${programConfig.metric_field})`
|
|
454
|
+
out(header)
|
|
455
|
+
for (const r of runs) {
|
|
456
|
+
const s = r.state
|
|
457
|
+
const statusStr = s?.phase ?? "unknown"
|
|
458
|
+
const experiments = s ? String(s.total_keeps + s.total_discards + s.total_crashes) : "0"
|
|
459
|
+
const best =
|
|
460
|
+
s && s.best_metric !== 0
|
|
461
|
+
? `${s.best_metric} (${formatChangePct(s.original_baseline, s.best_metric, programConfig.direction)})`
|
|
462
|
+
: "—"
|
|
463
|
+
out(`${padRight(r.run_id, 18)} ${padRight(statusStr, 12)} ${padRight(experiments, 13)} ${best}`)
|
|
464
|
+
}
|
|
465
|
+
return
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
const { runDir, runId } = await resolveRunDir(programDir, args.flags)
|
|
469
|
+
let state: RunState
|
|
470
|
+
try {
|
|
471
|
+
state = await readState(runDir)
|
|
472
|
+
} catch {
|
|
473
|
+
die(`Could not read state for run "${runId}".`)
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
const stats = getRunStats(state, programConfig.direction)
|
|
477
|
+
const active = await findActiveRun(programDir)
|
|
478
|
+
const daemonAlive = active?.runId === runId && active.daemonAlive
|
|
479
|
+
const isComplete = state.phase === "complete" || state.phase === "crashed"
|
|
480
|
+
|
|
481
|
+
if (json) {
|
|
482
|
+
outJson({
|
|
483
|
+
...state,
|
|
484
|
+
daemon_alive: daemonAlive,
|
|
485
|
+
elapsed: formatElapsed(state.started_at, isComplete ? state.updated_at : undefined),
|
|
486
|
+
improvement_pct: stats.improvement_pct,
|
|
487
|
+
keep_rate: stats.keep_rate,
|
|
488
|
+
metric_field: programConfig.metric_field,
|
|
489
|
+
direction: programConfig.direction,
|
|
490
|
+
})
|
|
491
|
+
return
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
const dirLabel = programConfig.direction === "lower" ? "lower is better" : "higher is better"
|
|
495
|
+
out(`Program: ${slug} (${programConfig.metric_field}, ${dirLabel})`)
|
|
496
|
+
out(`Run: ${runId}`)
|
|
497
|
+
|
|
498
|
+
if (isComplete) {
|
|
499
|
+
const reason =
|
|
500
|
+
state.termination_reason === "aborted"
|
|
501
|
+
? "aborted"
|
|
502
|
+
: state.termination_reason === "max_experiments"
|
|
503
|
+
? `reached max experiments (${state.experiment_number})`
|
|
504
|
+
: state.termination_reason === "stagnation"
|
|
505
|
+
? `stagnation (${state.total_discards} consecutive discards)`
|
|
506
|
+
: state.termination_reason === "stopped"
|
|
507
|
+
? "stopped by user"
|
|
508
|
+
: state.phase === "crashed"
|
|
509
|
+
? "crashed"
|
|
510
|
+
: "finished"
|
|
511
|
+
out(`Status: ${state.phase} (${reason})`)
|
|
512
|
+
out(
|
|
513
|
+
`Baseline: ${state.original_baseline} → Final best: ${state.best_metric} (${formatChangePct(state.original_baseline, state.best_metric, programConfig.direction)})`,
|
|
514
|
+
)
|
|
515
|
+
out(`Keeps: ${stats.total_keeps} | Discards: ${stats.total_discards} | Crashes: ${stats.total_crashes}`)
|
|
516
|
+
out(`Cost: ${formatCost(state.total_cost_usd)} | Duration: ${formatElapsed(state.started_at, state.updated_at)}`)
|
|
517
|
+
if (state.error) out(`Error: ${state.error}`)
|
|
518
|
+
} else {
|
|
519
|
+
const phaseDetail =
|
|
520
|
+
state.phase === "agent_running" || state.phase === "measuring"
|
|
521
|
+
? ` (experiment #${state.experiment_number})`
|
|
522
|
+
: ""
|
|
523
|
+
out(`Status: ${daemonAlive ? "running" : "stale"} ${state.phase}${phaseDetail}`)
|
|
524
|
+
out(
|
|
525
|
+
`Baseline: ${state.original_baseline} → Current best: ${state.best_metric} (${formatChangePct(state.original_baseline, state.best_metric, programConfig.direction)})`,
|
|
526
|
+
)
|
|
527
|
+
out(`Keeps: ${stats.total_keeps} | Discards: ${stats.total_discards} | Crashes: ${stats.total_crashes}`)
|
|
528
|
+
out(`Cost: ${formatCost(state.total_cost_usd)} | Elapsed: ${formatElapsed(state.started_at)}`)
|
|
529
|
+
}
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
async function cmdResults(args: ParsedArgs) {
|
|
533
|
+
const slug = args.positional[0]
|
|
534
|
+
if (!slug) die("Usage: autoauto results <program-slug>")
|
|
535
|
+
|
|
536
|
+
const root = await resolveRoot(args.flags)
|
|
537
|
+
const programDir = getProgramDir(root, slug)
|
|
538
|
+
const json = hasFlag(args.flags, "json")
|
|
539
|
+
const detail = getFlag(args.flags, "detail")
|
|
540
|
+
const limit = getFlag(args.flags, "limit")
|
|
541
|
+
|
|
542
|
+
let programConfig: ProgramConfig
|
|
543
|
+
try {
|
|
544
|
+
programConfig = await loadProgramConfig(programDir)
|
|
545
|
+
} catch {
|
|
546
|
+
die(`Program "${slug}" not found.`)
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
const { runDir, runId } = await resolveRunDir(programDir, args.flags)
|
|
550
|
+
const allResults = await readAllResults(runDir)
|
|
551
|
+
|
|
552
|
+
if (allResults.length === 0) {
|
|
553
|
+
die("No result rows yet. Run may still be in baseline phase.")
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
// Always compute baseline from full results before any slicing
|
|
557
|
+
const originalBaseline = allResults.find((r) => r.experiment_number === 0)?.metric_value ?? allResults[0].metric_value
|
|
558
|
+
|
|
559
|
+
// Handle --detail
|
|
560
|
+
if (detail != null) {
|
|
561
|
+
let expNum: number
|
|
562
|
+
if (detail === "latest") {
|
|
563
|
+
expNum = allResults[allResults.length - 1].experiment_number
|
|
564
|
+
} else {
|
|
565
|
+
expNum = parseInt(detail, 10)
|
|
566
|
+
if (isNaN(expNum)) die(`Invalid experiment number: "${detail}"`)
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
const result = allResults.find((r) => r.experiment_number === expNum)
|
|
570
|
+
if (!result) die(`Experiment #${expNum} not found in run ${runId}.`)
|
|
571
|
+
|
|
572
|
+
const logFile = join(runDir, streamLogName(expNum))
|
|
573
|
+
let logContent = ""
|
|
574
|
+
try {
|
|
575
|
+
logContent = await Bun.file(logFile).text()
|
|
576
|
+
} catch {
|
|
577
|
+
logContent = "(no stream log found)"
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
if (json) {
|
|
581
|
+
outJson({
|
|
582
|
+
experiment_number: result.experiment_number,
|
|
583
|
+
status: result.status,
|
|
584
|
+
metric_value: result.metric_value,
|
|
585
|
+
change_pct: result.experiment_number === 0
|
|
586
|
+
? null
|
|
587
|
+
: formatChangePct(originalBaseline, result.metric_value, programConfig.direction),
|
|
588
|
+
description: result.description,
|
|
589
|
+
log: logContent,
|
|
590
|
+
})
|
|
591
|
+
} else {
|
|
592
|
+
out(logContent)
|
|
593
|
+
}
|
|
594
|
+
return
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
// Apply --limit (after baseline computation)
|
|
598
|
+
let results = allResults
|
|
599
|
+
if (limit != null) {
|
|
600
|
+
const n = parsePositiveInt(limit)
|
|
601
|
+
if (n == null) die(`Invalid limit: "${limit}"`)
|
|
602
|
+
results = allResults.slice(-n)
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
if (json) {
|
|
606
|
+
outJson(
|
|
607
|
+
results.map((r) => ({
|
|
608
|
+
...r,
|
|
609
|
+
change_pct:
|
|
610
|
+
r.experiment_number === 0
|
|
611
|
+
? null
|
|
612
|
+
: formatChangePct(originalBaseline, r.metric_value, programConfig.direction),
|
|
613
|
+
})),
|
|
614
|
+
)
|
|
615
|
+
return
|
|
616
|
+
}
|
|
617
|
+
|
|
618
|
+
// Human-readable table
|
|
619
|
+
const metricField = programConfig.metric_field
|
|
620
|
+
const header = `${padRight("#", 5)} ${padRight("Status", 22)} ${padRight(metricField, 14)} ${padRight("Change", 10)} ${padRight("Commit", 10)} Description`
|
|
621
|
+
out(header)
|
|
622
|
+
|
|
623
|
+
for (const r of results) {
|
|
624
|
+
const change =
|
|
625
|
+
r.experiment_number === 0
|
|
626
|
+
? "—"
|
|
627
|
+
: formatChangePct(originalBaseline, r.metric_value, programConfig.direction)
|
|
628
|
+
const num = String(r.experiment_number)
|
|
629
|
+
out(
|
|
630
|
+
`${padRight(num, 5)} ${padRight(r.status, 22)} ${padRight(String(r.metric_value), 14)} ${padRight(change, 10)} ${padRight(r.commit.slice(0, 7), 10)} ${r.description}`,
|
|
631
|
+
)
|
|
632
|
+
}
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
async function cmdStop(args: ParsedArgs) {
|
|
636
|
+
const slug = args.positional[0]
|
|
637
|
+
if (!slug) die("Usage: autoauto stop <program-slug>")
|
|
638
|
+
|
|
639
|
+
const root = await resolveRoot(args.flags)
|
|
640
|
+
const programDir = getProgramDir(root, slug)
|
|
641
|
+
const json = hasFlag(args.flags, "json")
|
|
642
|
+
const abort = hasFlag(args.flags, "abort")
|
|
643
|
+
|
|
644
|
+
// Find active run (lock-based)
|
|
645
|
+
const runIdOverride = getFlag(args.flags, "run")
|
|
646
|
+
let runDir: string
|
|
647
|
+
let runId: string
|
|
648
|
+
|
|
649
|
+
if (runIdOverride) {
|
|
650
|
+
runDir = join(programDir, "runs", runIdOverride)
|
|
651
|
+
runId = runIdOverride
|
|
652
|
+
const status = await getDaemonStatus(runDir)
|
|
653
|
+
if (!status.alive) die("Daemon is not running. Run may have already completed.")
|
|
654
|
+
} else {
|
|
655
|
+
const active = await findActiveRun(programDir)
|
|
656
|
+
if (!active) die(`No active run for "${slug}".`)
|
|
657
|
+
if (!active.daemonAlive) die("Daemon is not running. Run may have already completed.")
|
|
658
|
+
runDir = active.runDir
|
|
659
|
+
runId = active.runId
|
|
660
|
+
}
|
|
661
|
+
|
|
662
|
+
if (abort) {
|
|
663
|
+
await sendAbort(runDir)
|
|
664
|
+
|
|
665
|
+
// Wait briefly for daemon to exit
|
|
666
|
+
const timeout = 10_000
|
|
667
|
+
const start = Date.now()
|
|
668
|
+
while (Date.now() - start < timeout) {
|
|
669
|
+
await new Promise((r) => setTimeout(r, 500))
|
|
670
|
+
const status = await getDaemonStatus(runDir)
|
|
671
|
+
if (!status.alive) break
|
|
672
|
+
}
|
|
673
|
+
|
|
674
|
+
// Force kill if still alive
|
|
675
|
+
const finalStatus = await getDaemonStatus(runDir)
|
|
676
|
+
if (finalStatus.alive) {
|
|
677
|
+
await forceKillDaemon(runDir)
|
|
678
|
+
}
|
|
679
|
+
|
|
680
|
+
if (json) {
|
|
681
|
+
outJson({ action: "abort", run_id: runId, status: "aborted" })
|
|
682
|
+
} else {
|
|
683
|
+
out(`Aborting ${slug} run ${runId}...`)
|
|
684
|
+
out("Run aborted. Current experiment recorded as crash.")
|
|
685
|
+
}
|
|
686
|
+
} else {
|
|
687
|
+
await sendStop(runDir)
|
|
688
|
+
|
|
689
|
+
let experimentNum = 0
|
|
690
|
+
try {
|
|
691
|
+
const state = await readState(runDir)
|
|
692
|
+
experimentNum = state.experiment_number
|
|
693
|
+
} catch {}
|
|
694
|
+
|
|
695
|
+
if (json) {
|
|
696
|
+
outJson({ action: "stop", run_id: runId, status: "stopping" })
|
|
697
|
+
} else {
|
|
698
|
+
out(`Stopping ${slug} run ${runId}...`)
|
|
699
|
+
out(`The current experiment (#${experimentNum}) will finish, then the run will stop.`)
|
|
700
|
+
out(`Use \`autoauto status ${slug}\` to check when it's done.`)
|
|
701
|
+
}
|
|
702
|
+
}
|
|
703
|
+
}
|
|
704
|
+
|
|
705
|
+
async function cmdLimit(args: ParsedArgs) {
|
|
706
|
+
const slug = args.positional[0]
|
|
707
|
+
const valueStr = args.positional[1]
|
|
708
|
+
if (!slug || valueStr == null) die("Usage: autoauto limit <program-slug> <n>")
|
|
709
|
+
|
|
710
|
+
const root = await resolveRoot(args.flags)
|
|
711
|
+
const programDir = getProgramDir(root, slug)
|
|
712
|
+
const json = hasFlag(args.flags, "json")
|
|
713
|
+
|
|
714
|
+
const active = await findActiveRun(programDir)
|
|
715
|
+
if (!active) die(`No active run for "${slug}".`)
|
|
716
|
+
if (!active.daemonAlive) die("Daemon is not running. Run may have already completed.")
|
|
717
|
+
|
|
718
|
+
const parsed = parsePositiveInt(valueStr)
|
|
719
|
+
if (parsed == null) die(`Invalid value: "${valueStr}". Must be a positive integer.`)
|
|
720
|
+
const maxExperiments = parsed
|
|
721
|
+
|
|
722
|
+
await updateMaxExperiments(active.runDir, maxExperiments)
|
|
723
|
+
|
|
724
|
+
if (json) {
|
|
725
|
+
outJson({ run_id: active.runId, max_experiments: maxExperiments })
|
|
726
|
+
} else {
|
|
727
|
+
out(`Updated ${slug} run ${active.runId}: max experiments set to ${maxExperiments}.`)
|
|
728
|
+
}
|
|
729
|
+
}
|
|
730
|
+
|
|
731
|
+
// --- Main Router ---
|
|
732
|
+
|
|
733
|
+
const COMMANDS: Record<string, (args: ParsedArgs) => Promise<void>> = {
|
|
734
|
+
list: cmdList,
|
|
735
|
+
start: cmdStart,
|
|
736
|
+
status: cmdStatus,
|
|
737
|
+
results: cmdResults,
|
|
738
|
+
stop: cmdStop,
|
|
739
|
+
limit: cmdLimit,
|
|
740
|
+
}
|
|
741
|
+
|
|
742
|
+
export async function run(argv: string[]) {
|
|
743
|
+
registerDefaultProviders()
|
|
744
|
+
const args = parseArgs(argv)
|
|
745
|
+
const handler = COMMANDS[args.command]
|
|
746
|
+
|
|
747
|
+
if (!handler) {
|
|
748
|
+
out("Usage: autoauto <command> [options]")
|
|
749
|
+
out("")
|
|
750
|
+
out("Commands:")
|
|
751
|
+
out(" list List all programs")
|
|
752
|
+
out(" start <slug> Start an experiment run")
|
|
753
|
+
out(" status <slug> Show run status")
|
|
754
|
+
out(" results <slug> Show experiment results")
|
|
755
|
+
out(" stop <slug> Stop the active run")
|
|
756
|
+
out(" limit <slug> <n|none> Update experiment cap on active run")
|
|
757
|
+
out("")
|
|
758
|
+
out("Global flags:")
|
|
759
|
+
out(" --json Output as JSON")
|
|
760
|
+
out(" --cwd <path> Override working directory")
|
|
761
|
+
out(" --provider <claude|opencode|codex> Agent provider for start")
|
|
762
|
+
process.exit(1)
|
|
763
|
+
}
|
|
764
|
+
|
|
765
|
+
try {
|
|
766
|
+
await handler(args)
|
|
767
|
+
} catch (err) {
|
|
768
|
+
die(formatShellError(err), 2)
|
|
769
|
+
} finally {
|
|
770
|
+
await closeProviders()
|
|
771
|
+
}
|
|
772
|
+
}
|