@spacek33z/autoauto 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/README.md +197 -0
  2. package/package.json +51 -0
  3. package/src/App.tsx +224 -0
  4. package/src/cli.ts +772 -0
  5. package/src/components/AgentPanel.tsx +254 -0
  6. package/src/components/Chat.test.tsx +71 -0
  7. package/src/components/Chat.tsx +308 -0
  8. package/src/components/CycleField.tsx +23 -0
  9. package/src/components/ModelPicker.tsx +97 -0
  10. package/src/components/PostUpdatePrompt.tsx +46 -0
  11. package/src/components/ResultsTable.tsx +172 -0
  12. package/src/components/RunCompletePrompt.tsx +90 -0
  13. package/src/components/RunSettingsOverlay.tsx +49 -0
  14. package/src/components/RunsTable.tsx +219 -0
  15. package/src/components/StatsHeader.tsx +100 -0
  16. package/src/daemon.ts +264 -0
  17. package/src/index.tsx +8 -0
  18. package/src/lib/agent/agent-provider.test.ts +133 -0
  19. package/src/lib/agent/claude-provider.ts +277 -0
  20. package/src/lib/agent/codex-provider.ts +413 -0
  21. package/src/lib/agent/default-providers.ts +10 -0
  22. package/src/lib/agent/index.ts +32 -0
  23. package/src/lib/agent/mock-provider.ts +61 -0
  24. package/src/lib/agent/opencode-provider.ts +424 -0
  25. package/src/lib/agent/types.ts +73 -0
  26. package/src/lib/auth.ts +11 -0
  27. package/src/lib/config.ts +152 -0
  28. package/src/lib/daemon-callbacks.ts +59 -0
  29. package/src/lib/daemon-client.ts +16 -0
  30. package/src/lib/daemon-lifecycle.ts +368 -0
  31. package/src/lib/daemon-spawn.ts +122 -0
  32. package/src/lib/daemon-status.ts +189 -0
  33. package/src/lib/daemon-watcher.ts +192 -0
  34. package/src/lib/experiment-loop.ts +679 -0
  35. package/src/lib/experiment.ts +356 -0
  36. package/src/lib/finalize.test.ts +143 -0
  37. package/src/lib/finalize.ts +511 -0
  38. package/src/lib/format.test.ts +32 -0
  39. package/src/lib/format.ts +44 -0
  40. package/src/lib/git.ts +176 -0
  41. package/src/lib/ideas-backlog.test.ts +54 -0
  42. package/src/lib/ideas-backlog.ts +109 -0
  43. package/src/lib/measure.ts +472 -0
  44. package/src/lib/model-options.ts +24 -0
  45. package/src/lib/programs.ts +247 -0
  46. package/src/lib/push-stream.ts +48 -0
  47. package/src/lib/run-context.ts +112 -0
  48. package/src/lib/run-setup.ts +34 -0
  49. package/src/lib/run.ts +383 -0
  50. package/src/lib/syntax-theme.ts +39 -0
  51. package/src/lib/system-prompts/experiment.ts +77 -0
  52. package/src/lib/system-prompts/finalize.ts +90 -0
  53. package/src/lib/system-prompts/index.ts +7 -0
  54. package/src/lib/system-prompts/setup.ts +516 -0
  55. package/src/lib/system-prompts/update.ts +188 -0
  56. package/src/lib/tool-events.ts +99 -0
  57. package/src/lib/validate-measurement.ts +326 -0
  58. package/src/lib/worktree.ts +40 -0
  59. package/src/screens/AuthErrorScreen.tsx +31 -0
  60. package/src/screens/ExecutionScreen.tsx +851 -0
  61. package/src/screens/FirstSetupScreen.tsx +168 -0
  62. package/src/screens/HomeScreen.tsx +406 -0
  63. package/src/screens/PreRunScreen.tsx +206 -0
  64. package/src/screens/SettingsScreen.tsx +189 -0
  65. package/src/screens/SetupScreen.tsx +226 -0
  66. package/src/tui.tsx +17 -0
  67. package/tsconfig.json +17 -0
@@ -0,0 +1,100 @@
1
+ interface StatsHeaderProps {
2
+ experimentNumber: number
3
+ maxExperiments: number
4
+ width: number
5
+ modelLabel: string
6
+ totalKeeps: number
7
+ totalDiscards: number
8
+ totalCrashes: number
9
+ currentBaseline: number
10
+ originalBaseline: number
11
+ bestMetric: number
12
+ direction: "lower" | "higher"
13
+ metricField: string
14
+ totalCostUsd: number
15
+ metricHistory: number[]
16
+ currentPhaseLabel: string
17
+ improvementPct: number
18
+ }
19
+
20
+ const BLOCKS = "▁▂▃▄▅▆▇█"
21
+
22
+ function renderSparkline(values: number[], direction: "lower" | "higher"): string {
23
+ if (values.length === 0) return ""
24
+
25
+ // Cap to last 50 values
26
+ const recent = values.length > 50 ? values.slice(-50) : values
27
+
28
+ const min = Math.min(...recent)
29
+ const max = Math.max(...recent)
30
+
31
+ if (min === max) return BLOCKS[4].repeat(recent.length)
32
+
33
+ return recent
34
+ .map((v) => {
35
+ let normalized = (v - min) / (max - min)
36
+ // For "lower" direction, invert so improvements (lower values) render as higher blocks
37
+ if (direction === "lower") normalized = 1 - normalized
38
+ const index = Math.round(normalized * 7)
39
+ return BLOCKS[index]
40
+ })
41
+ .join("")
42
+ }
43
+
44
+ function formatImprovementPct(pct: number): string {
45
+ if (pct === 0) return ""
46
+ return `${pct > 0 ? "+" : ""}${pct.toFixed(1)}%`
47
+ }
48
+
49
+ export function StatsHeader(props: StatsHeaderProps) {
50
+ const improvementStr = formatImprovementPct(props.improvementPct)
51
+ const sparkline = renderSparkline(props.metricHistory, props.direction)
52
+ const contentWidth = Math.max(props.width - 4, 0)
53
+
54
+ return (
55
+ <box paddingX={1} flexDirection="column">
56
+ <box width={contentWidth} flexDirection="row" justifyContent="space-between">
57
+ <text selectable>
58
+ <span fg="#9ece6a"><strong>kept {props.totalKeeps}</strong></span>
59
+ {" "}
60
+ <span fg="#ff5555">disc {props.totalDiscards}</span>
61
+ {" "}
62
+ <span fg="#ffffff">crash {props.totalCrashes}</span>
63
+ {" "}
64
+ <span fg="#ffffff">$</span>
65
+ <span fg="#ffffff">{props.totalCostUsd.toFixed(2)}</span>
66
+ {" "}
67
+ <span fg="#ffffff">#{props.experimentNumber}/{props.maxExperiments}</span>
68
+ </text>
69
+ <text fg="#666666" selectable>{props.modelLabel}</text>
70
+ </box>
71
+ <box>
72
+ <text selectable>
73
+ <span fg="#ffffff">baseline </span>
74
+ <span fg="#7aa2f7">{props.currentBaseline}</span>
75
+ {" "}
76
+ <span fg="#ffffff">{"best "}</span>
77
+ <span fg="#9ece6a">{props.bestMetric}</span>
78
+ {improvementStr ? (
79
+ <>
80
+ {" "}
81
+ <span fg="#e0af68">{improvementStr}</span>
82
+ </>
83
+ ) : null}
84
+ {sparkline ? (
85
+ <>
86
+ {" "}
87
+ <span fg="#7aa2f7">{sparkline}</span>
88
+ </>
89
+ ) : null}
90
+ </text>
91
+ </box>
92
+ <box>
93
+ <text selectable>
94
+ <span fg="#ffffff">{"> "}</span>
95
+ <span fg="#ffffff">{props.currentPhaseLabel}</span>
96
+ </text>
97
+ </box>
98
+ </box>
99
+ )
100
+ }
package/src/daemon.ts ADDED
@@ -0,0 +1,264 @@
1
+ /**
2
+ * AutoAuto Daemon — background experiment loop runner.
3
+ *
4
+ * Spawned by the TUI as a detached process. Runs the experiment loop
5
+ * inside a git worktree, writes state to files in the main .autoauto/ dir.
6
+ *
7
+ * Usage:
8
+ * bun <path>/daemon.ts --program <slug> --run-id <id> --main-root <path> --worktree <path>
9
+ */
10
+
11
+ import { join } from "node:path"
12
+ import { closeProviders } from "./lib/agent/index.ts"
13
+ import { registerDefaultProviders } from "./lib/agent/default-providers.ts"
14
+ import { loadProgramConfig } from "./lib/programs.ts"
15
+ import { readState, writeState, appendResult, serializeSecondaryValues } from "./lib/run.ts"
16
+ import { lockMeasurement, unlockMeasurement } from "./lib/run-setup.ts"
17
+ import type { RunState } from "./lib/run.ts"
18
+ import { runExperimentLoop } from "./lib/experiment-loop.ts"
19
+ import { runMeasurementSeries } from "./lib/measure.ts"
20
+ import { getFullSha, getCurrentBranch, formatShellError } from "./lib/git.ts"
21
+ import { createFileCallbacks } from "./lib/daemon-callbacks.ts"
22
+ import {
23
+ writeDaemonJson,
24
+ startHeartbeat,
25
+ readRunConfig,
26
+ runConfigToModelSlot,
27
+ readControl,
28
+ releaseLock,
29
+ recoverFromCrash,
30
+ waitForDaemonStub,
31
+ killChildProcessTree,
32
+ } from "./lib/daemon-lifecycle.ts"
33
+
34
+ // --- Parse CLI args ---
35
+
36
+ function parseArgs(): { programSlug: string; runId: string; mainRoot: string; worktreePath: string; daemonId: string; inPlace: boolean } {
37
+ const args = process.argv.slice(2)
38
+ const inPlace = args.includes("--in-place")
39
+ // Remove --in-place before key-value parsing (it's a boolean flag)
40
+ const kvArgs = args.filter((a) => a !== "--in-place")
41
+ const map = new Map<string, string>()
42
+
43
+ for (let i = 0; i < kvArgs.length; i += 2) {
44
+ const key = kvArgs[i]?.replace(/^--/, "")
45
+ const val = kvArgs[i + 1]
46
+ if (key && val) map.set(key, val)
47
+ }
48
+
49
+ const programSlug = map.get("program")
50
+ const runId = map.get("run-id")
51
+ const mainRoot = map.get("main-root")
52
+ const worktreePath = map.get("worktree")
53
+ const daemonId = map.get("daemon-id")
54
+
55
+ if (!programSlug || !runId || !mainRoot || !worktreePath || !daemonId) {
56
+ process.stderr.write("Usage: daemon.ts --program <slug> --run-id <id> --main-root <path> --worktree <path> --daemon-id <id> [--in-place]\n")
57
+ process.exit(1)
58
+ }
59
+
60
+ return { programSlug, runId, mainRoot, worktreePath, daemonId, inPlace }
61
+ }
62
+
63
+ // --- Main ---
64
+
65
+ async function main() {
66
+ registerDefaultProviders()
67
+ const { programSlug, runId, mainRoot, worktreePath, daemonId, inPlace } = parseArgs()
68
+ const programDir = join(mainRoot, ".autoauto", "programs", programSlug)
69
+ const runDir = join(programDir, "runs", runId)
70
+
71
+ // 1. Write daemon.json with daemon_id + heartbeat
72
+ await waitForDaemonStub(runDir, daemonId)
73
+ await writeDaemonJson(runDir, runId, worktreePath, daemonId)
74
+ const heartbeatInterval = startHeartbeat(runDir, daemonId)
75
+
76
+ // 2. Read per-run config
77
+ const runConfig = await readRunConfig(runDir)
78
+ const modelConfig = runConfig ? runConfigToModelSlot(runConfig) : { provider: "claude" as const, model: "sonnet", effort: "high" as const }
79
+ if (!runConfig?.max_experiments) throw new Error("run-config.json must specify max_experiments")
80
+ const maxExperiments = runConfig.max_experiments
81
+ const ideasBacklogEnabled = runConfig?.ideas_backlog_enabled ?? true
82
+
83
+ // 3. Stop/abort signals
84
+ let stopRequested = false
85
+ const abortController = new AbortController()
86
+
87
+ process.on("SIGTERM", async () => {
88
+ const control = await readControl(runDir)
89
+ if (control?.action === "abort") {
90
+ abortController.abort()
91
+ setTimeout(() => {
92
+ killChildProcessTree(process.pid).catch(() => {})
93
+ }, 3_000).unref()
94
+ } else {
95
+ // Default: stop after current experiment
96
+ stopRequested = true
97
+ }
98
+ })
99
+
100
+ try {
101
+ // 5. Crash recovery
102
+ const recoveredState = await recoverFromCrash(runDir, worktreePath)
103
+
104
+ if (recoveredState === null) {
105
+ // Either first run (no state.json yet) or crashed during baseline.
106
+ // Check if state.json exists and is in crashed state
107
+ let existingState: RunState | null = null
108
+ try {
109
+ existingState = await readState(runDir)
110
+ } catch {
111
+ // No state.json — first run
112
+ }
113
+
114
+ if (existingState?.phase === "crashed") {
115
+ // Baseline crash — nothing we can do
116
+ process.stderr.write(`Run crashed during ${existingState.error_phase}: ${existingState.error}\n`)
117
+ return
118
+ }
119
+
120
+ // 6. Fresh run: write initial state with phase: "baseline"
121
+ const config = await loadProgramConfig(programDir)
122
+ const originalBranch = await getCurrentBranch(mainRoot)
123
+ const now = new Date().toISOString()
124
+
125
+ const baselineState: RunState = {
126
+ run_id: runId,
127
+ program_slug: programSlug,
128
+ phase: "baseline",
129
+ experiment_number: 0,
130
+ original_baseline: 0,
131
+ current_baseline: 0,
132
+ best_metric: 0,
133
+ best_experiment: 0,
134
+ total_keeps: 0,
135
+ total_discards: 0,
136
+ total_crashes: 0,
137
+ branch_name: `autoauto-${programSlug}-${runId}`,
138
+ original_baseline_sha: "",
139
+ last_known_good_sha: "",
140
+ candidate_sha: null,
141
+ started_at: now,
142
+ updated_at: now,
143
+ model: modelConfig.model,
144
+ provider: modelConfig.provider,
145
+ effort: modelConfig.effort,
146
+ total_tokens: 0,
147
+ total_cost_usd: 0,
148
+ termination_reason: null,
149
+ original_branch: originalBranch,
150
+ worktree_path: worktreePath,
151
+ in_place: inPlace || undefined,
152
+ error: null,
153
+ error_phase: null,
154
+ }
155
+ await writeState(runDir, baselineState)
156
+
157
+ // 7. Lock measurement files + run baseline
158
+ await lockMeasurement(programDir)
159
+
160
+ const measureShPath = join(programDir, "measure.sh")
161
+ const buildShPath = join(programDir, "build.sh")
162
+ const baseline = await runMeasurementSeries(measureShPath, worktreePath, config, abortController.signal, buildShPath)
163
+
164
+ if (!baseline.success) {
165
+ const errorState: RunState = {
166
+ ...baselineState,
167
+ phase: "crashed",
168
+ error: `Baseline measurement failed: ${baseline.failure_reason ?? "unknown error"}`,
169
+ error_phase: "baseline",
170
+ updated_at: new Date().toISOString(),
171
+ }
172
+ await writeState(runDir, errorState)
173
+ await unlockMeasurement(programDir)
174
+ await releaseLock(programDir)
175
+ return
176
+ }
177
+
178
+ if (!baseline.quality_gates_passed) {
179
+ const errorState: RunState = {
180
+ ...baselineState,
181
+ phase: "crashed",
182
+ error: `Baseline quality gates failed: ${baseline.gate_violations.join(", ")}`,
183
+ error_phase: "baseline",
184
+ updated_at: new Date().toISOString(),
185
+ }
186
+ await writeState(runDir, errorState)
187
+ await unlockMeasurement(programDir)
188
+ await releaseLock(programDir)
189
+ return
190
+ }
191
+
192
+ const fullSha = await getFullSha(worktreePath)
193
+
194
+ await appendResult(runDir, {
195
+ experiment_number: 0,
196
+ commit: fullSha.slice(0, 7),
197
+ metric_value: baseline.median_metric,
198
+ secondary_values: serializeSecondaryValues(baseline.median_quality_gates, baseline.median_secondary_metrics),
199
+ status: "keep",
200
+ description: "baseline",
201
+ measurement_duration_ms: baseline.duration_ms,
202
+ })
203
+
204
+ const readyState: RunState = {
205
+ ...baselineState,
206
+ phase: "idle",
207
+ original_baseline: baseline.median_metric,
208
+ current_baseline: baseline.median_metric,
209
+ best_metric: baseline.median_metric,
210
+ original_baseline_sha: fullSha,
211
+ last_known_good_sha: fullSha,
212
+ updated_at: new Date().toISOString(),
213
+ }
214
+ await writeState(runDir, readyState)
215
+
216
+ // 8. Run the experiment loop
217
+ const callbacks = createFileCallbacks(runDir)
218
+ await runExperimentLoop(
219
+ worktreePath,
220
+ programDir,
221
+ runDir,
222
+ config,
223
+ modelConfig,
224
+ callbacks,
225
+ {
226
+ maxExperiments,
227
+ signal: abortController.signal,
228
+ stopRequested: () => stopRequested,
229
+ ideasBacklogEnabled,
230
+ baselineDiagnostics: baseline.diagnostics,
231
+ },
232
+ )
233
+ } else {
234
+ // Recovered from crash — resume the loop
235
+ const config = await loadProgramConfig(programDir)
236
+ const callbacks = createFileCallbacks(runDir)
237
+ await runExperimentLoop(
238
+ worktreePath,
239
+ programDir,
240
+ runDir,
241
+ config,
242
+ modelConfig,
243
+ callbacks,
244
+ {
245
+ maxExperiments,
246
+ signal: abortController.signal,
247
+ stopRequested: () => stopRequested,
248
+ ideasBacklogEnabled,
249
+ },
250
+ )
251
+ }
252
+ } finally {
253
+ // Cleanup
254
+ clearInterval(heartbeatInterval)
255
+ await closeProviders()
256
+ await releaseLock(programDir)
257
+ await unlockMeasurement(programDir).catch(() => {})
258
+ }
259
+ }
260
+
261
+ main().catch((err) => {
262
+ process.stderr.write(`Daemon fatal error: ${formatShellError(err)}\n`)
263
+ process.exit(1)
264
+ })
package/src/index.tsx ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bun
2
+
3
+ if (process.argv.length > 2) {
4
+ const { run } = await import("./cli.ts")
5
+ await run(process.argv.slice(2))
6
+ } else {
7
+ await import("./tui.tsx")
8
+ }
@@ -0,0 +1,133 @@
1
+ import { describe, test, expect } from "bun:test"
2
+ import { MockProvider } from "./mock-provider.ts"
3
+ import { setProvider, getProvider } from "./index.ts"
4
+ import type { AgentEvent } from "./types.ts"
5
+
6
+ describe("AgentProvider contract", () => {
7
+ test("one-shot session: runOnce yields events and ends with result", async () => {
8
+ const events: AgentEvent[] = [
9
+ { type: "text_delta", text: "Hello " },
10
+ { type: "text_delta", text: "world" },
11
+ { type: "tool_use", tool: "Read", input: { file_path: "/tmp/test.ts" } },
12
+ { type: "assistant_complete", text: "Hello world" },
13
+ { type: "result", success: true, cost: {
14
+ total_cost_usd: 0.01,
15
+ duration_ms: 1000,
16
+ duration_api_ms: 800,
17
+ num_turns: 1,
18
+ input_tokens: 100,
19
+ output_tokens: 50,
20
+ }},
21
+ ]
22
+
23
+ const provider = new MockProvider(events)
24
+ const session = provider.runOnce("Do something", { tools: ["Read"] })
25
+
26
+ const received: AgentEvent[] = []
27
+ for await (const event of session) {
28
+ received.push(event)
29
+ }
30
+
31
+ expect(received).toEqual(events)
32
+ expect(received.at(-1)?.type).toBe("result")
33
+ })
34
+
35
+ test("multi-turn session: pushMessage accepts messages over time", async () => {
36
+ const events: AgentEvent[] = [
37
+ { type: "assistant_complete", text: "Got it" },
38
+ { type: "result", success: true },
39
+ ]
40
+
41
+ const provider = new MockProvider(events)
42
+ const session = provider.createSession({ systemPrompt: "Be helpful" })
43
+
44
+ // Push a message (should not throw)
45
+ session.pushMessage("First message")
46
+ session.pushMessage("Second message")
47
+
48
+ const received: AgentEvent[] = []
49
+ for await (const event of session) {
50
+ received.push(event)
51
+ }
52
+
53
+ expect(received).toHaveLength(2)
54
+ expect(received[0].type).toBe("assistant_complete")
55
+ })
56
+
57
+ test("auth check: returns success", async () => {
58
+ const provider = new MockProvider()
59
+ const result = await provider.checkAuth()
60
+
61
+ expect(result.authenticated).toBe(true)
62
+ if (result.authenticated) {
63
+ expect(result.account.email).toBe("test@example.com")
64
+ }
65
+ })
66
+
67
+ test("auth check: returns failure", async () => {
68
+ const provider = new MockProvider([], {
69
+ authenticated: false,
70
+ error: "Invalid API key",
71
+ })
72
+ const result = await provider.checkAuth()
73
+
74
+ expect(result.authenticated).toBe(false)
75
+ if (!result.authenticated) {
76
+ expect(result.error).toBe("Invalid API key")
77
+ }
78
+ })
79
+
80
+ test("error during stream: provider emits error event", async () => {
81
+ const events: AgentEvent[] = [
82
+ { type: "text_delta", text: "Starting..." },
83
+ { type: "error", error: "Rate limit exceeded", retriable: true },
84
+ { type: "result", success: false, error: "Rate limit exceeded" },
85
+ ]
86
+
87
+ const provider = new MockProvider(events)
88
+ const session = provider.runOnce("Do something", {})
89
+
90
+ const received: AgentEvent[] = []
91
+ for await (const event of session) {
92
+ received.push(event)
93
+ }
94
+
95
+ const errorEvent = received.find((e) => e.type === "error")
96
+ expect(errorEvent).toBeDefined()
97
+ if (errorEvent?.type === "error") {
98
+ expect(errorEvent.retriable).toBe(true)
99
+ expect(errorEvent.error).toBe("Rate limit exceeded")
100
+ }
101
+ })
102
+
103
+ test("abort mid-stream: close() stops iteration", async () => {
104
+ const events: AgentEvent[] = [
105
+ { type: "text_delta", text: "First" },
106
+ { type: "text_delta", text: "Second" },
107
+ { type: "text_delta", text: "Third" },
108
+ { type: "result", success: true },
109
+ ]
110
+
111
+ const provider = new MockProvider(events)
112
+ const session = provider.createSession({})
113
+ session.pushMessage("Go")
114
+
115
+ const received: AgentEvent[] = []
116
+ for await (const event of session) {
117
+ received.push(event)
118
+ if (received.length === 1) {
119
+ session.close()
120
+ }
121
+ }
122
+
123
+ // Should have stopped after close() — got at most 1 event
124
+ expect(received.length).toBeLessThanOrEqual(1)
125
+ })
126
+
127
+ test("registry: getProvider throws before setProvider", () => {
128
+ // Reset — create a fresh import context isn't easy, so just test the pattern
129
+ const provider = new MockProvider()
130
+ setProvider("claude", provider)
131
+ expect(getProvider("claude")).toBe(provider)
132
+ })
133
+ })