@spacek33z/autoauto 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/README.md +197 -0
  2. package/package.json +51 -0
  3. package/src/App.tsx +224 -0
  4. package/src/cli.ts +772 -0
  5. package/src/components/AgentPanel.tsx +254 -0
  6. package/src/components/Chat.test.tsx +71 -0
  7. package/src/components/Chat.tsx +308 -0
  8. package/src/components/CycleField.tsx +23 -0
  9. package/src/components/ModelPicker.tsx +97 -0
  10. package/src/components/PostUpdatePrompt.tsx +46 -0
  11. package/src/components/ResultsTable.tsx +172 -0
  12. package/src/components/RunCompletePrompt.tsx +90 -0
  13. package/src/components/RunSettingsOverlay.tsx +49 -0
  14. package/src/components/RunsTable.tsx +219 -0
  15. package/src/components/StatsHeader.tsx +100 -0
  16. package/src/daemon.ts +264 -0
  17. package/src/index.tsx +8 -0
  18. package/src/lib/agent/agent-provider.test.ts +133 -0
  19. package/src/lib/agent/claude-provider.ts +277 -0
  20. package/src/lib/agent/codex-provider.ts +413 -0
  21. package/src/lib/agent/default-providers.ts +10 -0
  22. package/src/lib/agent/index.ts +32 -0
  23. package/src/lib/agent/mock-provider.ts +61 -0
  24. package/src/lib/agent/opencode-provider.ts +424 -0
  25. package/src/lib/agent/types.ts +73 -0
  26. package/src/lib/auth.ts +11 -0
  27. package/src/lib/config.ts +152 -0
  28. package/src/lib/daemon-callbacks.ts +59 -0
  29. package/src/lib/daemon-client.ts +16 -0
  30. package/src/lib/daemon-lifecycle.ts +368 -0
  31. package/src/lib/daemon-spawn.ts +122 -0
  32. package/src/lib/daemon-status.ts +189 -0
  33. package/src/lib/daemon-watcher.ts +192 -0
  34. package/src/lib/experiment-loop.ts +679 -0
  35. package/src/lib/experiment.ts +356 -0
  36. package/src/lib/finalize.test.ts +143 -0
  37. package/src/lib/finalize.ts +511 -0
  38. package/src/lib/format.test.ts +32 -0
  39. package/src/lib/format.ts +44 -0
  40. package/src/lib/git.ts +176 -0
  41. package/src/lib/ideas-backlog.test.ts +54 -0
  42. package/src/lib/ideas-backlog.ts +109 -0
  43. package/src/lib/measure.ts +472 -0
  44. package/src/lib/model-options.ts +24 -0
  45. package/src/lib/programs.ts +247 -0
  46. package/src/lib/push-stream.ts +48 -0
  47. package/src/lib/run-context.ts +112 -0
  48. package/src/lib/run-setup.ts +34 -0
  49. package/src/lib/run.ts +383 -0
  50. package/src/lib/syntax-theme.ts +39 -0
  51. package/src/lib/system-prompts/experiment.ts +77 -0
  52. package/src/lib/system-prompts/finalize.ts +90 -0
  53. package/src/lib/system-prompts/index.ts +7 -0
  54. package/src/lib/system-prompts/setup.ts +516 -0
  55. package/src/lib/system-prompts/update.ts +188 -0
  56. package/src/lib/tool-events.ts +99 -0
  57. package/src/lib/validate-measurement.ts +326 -0
  58. package/src/lib/worktree.ts +40 -0
  59. package/src/screens/AuthErrorScreen.tsx +31 -0
  60. package/src/screens/ExecutionScreen.tsx +851 -0
  61. package/src/screens/FirstSetupScreen.tsx +168 -0
  62. package/src/screens/HomeScreen.tsx +406 -0
  63. package/src/screens/PreRunScreen.tsx +206 -0
  64. package/src/screens/SettingsScreen.tsx +189 -0
  65. package/src/screens/SetupScreen.tsx +226 -0
  66. package/src/tui.tsx +17 -0
  67. package/tsconfig.json +17 -0
@@ -0,0 +1,247 @@
1
+ import { readdir, mkdir } from "node:fs/promises"
2
+ import { join } from "node:path"
3
+ import { $ } from "bun"
4
+
5
+ export interface Program {
6
+ name: string
7
+ configPath: string
8
+ }
9
+
10
+ export interface QualityGate {
11
+ min?: number
12
+ max?: number
13
+ }
14
+
15
+ export interface SecondaryMetric {
16
+ direction: "lower" | "higher"
17
+ }
18
+
19
+ export interface ProgramConfig {
20
+ metric_field: string
21
+ direction: "lower" | "higher"
22
+ noise_threshold: number
23
+ repeats: number
24
+ quality_gates: Record<string, QualityGate>
25
+ secondary_metrics?: Record<string, SecondaryMetric>
26
+ max_experiments: number
27
+ max_consecutive_discards?: number
28
+ max_turns?: number
29
+ }
30
+
31
+ export type Screen = "home" | "setup" | "settings" | "program-detail" | "pre-run" | "execution" | "first-setup"
32
+
33
+ export const AUTOAUTO_DIR = ".autoauto"
34
+
35
+ let cachedRoot: string | undefined
36
+
37
+ function assertFiniteNumber(value: unknown, path: string): asserts value is number {
38
+ if (typeof value !== "number" || !isFinite(value)) {
39
+ throw new Error(`config.json: ${path} must be a finite number`)
40
+ }
41
+ }
42
+
43
+ export function validateProgramConfig(raw: unknown): ProgramConfig {
44
+ if (typeof raw !== "object" || raw === null || Array.isArray(raw)) {
45
+ throw new Error("config.json: must be a JSON object")
46
+ }
47
+
48
+ const config = raw as Record<string, unknown>
49
+
50
+ if (!config.metric_field || typeof config.metric_field !== "string") {
51
+ throw new Error("config.json: metric_field must be a non-empty string")
52
+ }
53
+ if (config.direction !== "lower" && config.direction !== "higher") {
54
+ throw new Error('config.json: direction must be "lower" or "higher"')
55
+ }
56
+ assertFiniteNumber(config.noise_threshold, "noise_threshold")
57
+ if (config.noise_threshold <= 0) {
58
+ throw new Error("config.json: noise_threshold must be positive")
59
+ }
60
+ if (typeof config.repeats !== "number" || !Number.isInteger(config.repeats) || config.repeats < 1) {
61
+ throw new Error("config.json: repeats must be an integer >= 1")
62
+ }
63
+ if (
64
+ typeof config.max_experiments !== "number" ||
65
+ !Number.isInteger(config.max_experiments) ||
66
+ config.max_experiments < 1
67
+ ) {
68
+ throw new Error("config.json: max_experiments must be an integer >= 1")
69
+ }
70
+ if (
71
+ config.max_consecutive_discards !== undefined &&
72
+ (typeof config.max_consecutive_discards !== "number" ||
73
+ !Number.isInteger(config.max_consecutive_discards) ||
74
+ config.max_consecutive_discards < 1)
75
+ ) {
76
+ throw new Error("config.json: max_consecutive_discards must be an integer >= 1")
77
+ }
78
+ if (
79
+ config.max_turns !== undefined &&
80
+ (typeof config.max_turns !== "number" ||
81
+ !Number.isInteger(config.max_turns) ||
82
+ config.max_turns < 1)
83
+ ) {
84
+ throw new Error("config.json: max_turns must be an integer >= 1")
85
+ }
86
+ if (typeof config.quality_gates !== "object" || config.quality_gates === null || Array.isArray(config.quality_gates)) {
87
+ throw new Error("config.json: quality_gates must be an object")
88
+ }
89
+
90
+ for (const [field, gate] of Object.entries(config.quality_gates as Record<string, unknown>)) {
91
+ if (typeof gate !== "object" || gate === null || Array.isArray(gate)) {
92
+ throw new Error(`config.json: quality_gates.${field} must be an object`)
93
+ }
94
+
95
+ const gateConfig = gate as Record<string, unknown>
96
+ const hasMin = gateConfig.min !== undefined
97
+ const hasMax = gateConfig.max !== undefined
98
+
99
+ if (!hasMin && !hasMax) {
100
+ throw new Error(`config.json: quality_gates.${field} must define min or max`)
101
+ }
102
+ if (hasMin) assertFiniteNumber(gateConfig.min, `quality_gates.${field}.min`)
103
+ if (hasMax) assertFiniteNumber(gateConfig.max, `quality_gates.${field}.max`)
104
+ if (typeof gateConfig.min === "number" && typeof gateConfig.max === "number" && gateConfig.min > gateConfig.max) {
105
+ throw new Error(`config.json: quality_gates.${field}.min must be <= max`)
106
+ }
107
+ }
108
+
109
+ if (config.secondary_metrics !== undefined) {
110
+ if (typeof config.secondary_metrics !== "object" || config.secondary_metrics === null || Array.isArray(config.secondary_metrics)) {
111
+ throw new Error("config.json: secondary_metrics must be an object")
112
+ }
113
+
114
+ for (const [field, metric] of Object.entries(config.secondary_metrics as Record<string, unknown>)) {
115
+ if (typeof metric !== "object" || metric === null || Array.isArray(metric)) {
116
+ throw new Error(`config.json: secondary_metrics.${field} must be an object`)
117
+ }
118
+ const metricConfig = metric as Record<string, unknown>
119
+ if (metricConfig.direction !== "lower" && metricConfig.direction !== "higher") {
120
+ throw new Error(`config.json: secondary_metrics.${field}.direction must be "lower" or "higher"`)
121
+ }
122
+
123
+ // Prevent overlap with primary metric and quality gates
124
+ if (field === config.metric_field) {
125
+ throw new Error(`config.json: secondary_metrics.${field} overlaps with metric_field`)
126
+ }
127
+ if (field in (config.quality_gates as Record<string, unknown>)) {
128
+ throw new Error(`config.json: secondary_metrics.${field} overlaps with quality_gates`)
129
+ }
130
+ }
131
+ }
132
+
133
+ return config as unknown as ProgramConfig
134
+ }
135
+
136
+ /** Returns the main git repo root, resolving through worktrees. */
137
+ export async function getProjectRoot(cwd: string): Promise<string> {
138
+ if (cachedRoot) return cachedRoot
139
+ const result = await $`git rev-parse --show-superproject-working-tree`.cwd(cwd).nothrow().quiet()
140
+ const superproject = result.stdout.toString().trim()
141
+ if (superproject) {
142
+ cachedRoot = superproject
143
+ return superproject
144
+ }
145
+ const toplevel = (await $`git rev-parse --show-toplevel`.cwd(cwd).text()).trim()
146
+ cachedRoot = toplevel
147
+ return toplevel
148
+ }
149
+
150
+ export async function listPrograms(cwd: string): Promise<Program[]> {
151
+ const root = await getProjectRoot(cwd)
152
+ const programsDir = join(root, AUTOAUTO_DIR, "programs")
153
+ try {
154
+ const entries = await readdir(programsDir, { withFileTypes: true })
155
+ return entries
156
+ .filter((e) => e.isDirectory())
157
+ .map((e) => ({
158
+ name: e.name,
159
+ configPath: join(programsDir, e.name, "config.json"),
160
+ }))
161
+ } catch {
162
+ return []
163
+ }
164
+ }
165
+
166
+ /** Enriched program metadata for the home screen. */
167
+ export interface ProgramInfo {
168
+ name: string
169
+ totalRuns: number
170
+ lastRunDate: string | null
171
+ hasActiveRun: boolean
172
+ }
173
+
174
+ /** Returns the absolute path to the programs directory */
175
+ export function getProgramsDir(cwd: string): string {
176
+ return join(cwd, AUTOAUTO_DIR, "programs")
177
+ }
178
+
179
+ /** Returns the absolute path to a specific program's directory */
180
+ export function getProgramDir(cwd: string, slug: string): string {
181
+ return join(cwd, AUTOAUTO_DIR, "programs", slug)
182
+ }
183
+
184
+ /** Returns the absolute path to a specific run's directory */
185
+ export function getRunDir(cwd: string, slug: string, runId: string): string {
186
+ return join(cwd, AUTOAUTO_DIR, "programs", slug, "runs", runId)
187
+ }
188
+
189
+ /** Reads and validates config.json from a program directory. */
190
+ export async function loadProgramConfig(programDir: string): Promise<ProgramConfig> {
191
+ const raw = await Bun.file(join(programDir, "config.json")).json()
192
+ return validateProgramConfig(raw)
193
+ }
194
+
195
+ /** Summary of an existing program for duplicate detection during setup. */
196
+ export interface ProgramSummary {
197
+ slug: string
198
+ goal: string
199
+ }
200
+
201
+ /** Loads summaries (slug + goal line from program.md) for all existing programs. */
202
+ export async function loadProgramSummaries(cwd: string): Promise<ProgramSummary[]> {
203
+ const root = await getProjectRoot(cwd)
204
+ const programsDir = join(root, AUTOAUTO_DIR, "programs")
205
+ let entries: import("node:fs").Dirent[]
206
+ try {
207
+ entries = (await readdir(programsDir, { withFileTypes: true })).filter((e) => e.isDirectory())
208
+ } catch {
209
+ return []
210
+ }
211
+ const summaries = await Promise.all(
212
+ entries.map(async (e) => {
213
+ try {
214
+ const md = await Bun.file(join(programsDir, e.name, "program.md")).text()
215
+ const goalMatch = md.match(/## Goal\n+([\s\S]*?)(?:\n##|\n*$)/)
216
+ const goal = goalMatch ? goalMatch[1].trim() : "(no goal defined)"
217
+ return { slug: e.name, goal }
218
+ } catch {
219
+ return { slug: e.name, goal: "(could not read program.md)" }
220
+ }
221
+ }),
222
+ )
223
+ return summaries
224
+ }
225
+
226
+ export async function ensureAutoAutoDir(cwd: string): Promise<void> {
227
+ const root = await getProjectRoot(cwd)
228
+ const dir = join(root, AUTOAUTO_DIR)
229
+ await mkdir(dir, { recursive: true })
230
+
231
+ const gitignorePath = join(root, ".gitignore")
232
+ const gitignoreFile = Bun.file(gitignorePath)
233
+ let gitignoreChanged = false
234
+ if (await gitignoreFile.exists()) {
235
+ const existing = await gitignoreFile.text()
236
+ if (!existing.includes(AUTOAUTO_DIR)) {
237
+ await Bun.write(gitignorePath, existing.trimEnd() + `\n${AUTOAUTO_DIR}/\n`)
238
+ gitignoreChanged = true
239
+ }
240
+ } else {
241
+ await Bun.write(gitignorePath, `${AUTOAUTO_DIR}/\n`)
242
+ gitignoreChanged = true
243
+ }
244
+ if (gitignoreChanged) {
245
+ await $`git add .gitignore`.cwd(root).quiet()
246
+ }
247
+ }
@@ -0,0 +1,48 @@
1
+ export interface PushStream<T> extends AsyncIterable<T> {
2
+ push(value: T): void
3
+ end(): void
4
+ }
5
+
6
+ export function createPushStream<T>(): PushStream<T> {
7
+ const queue: T[] = []
8
+ let waiting: ((result: IteratorResult<T>) => void) | null = null
9
+ let done = false
10
+
11
+ return {
12
+ push(value: T) {
13
+ if (done) return
14
+ if (waiting) {
15
+ const resolve = waiting
16
+ waiting = null
17
+ resolve({ value, done: false })
18
+ } else {
19
+ queue.push(value)
20
+ }
21
+ },
22
+
23
+ end() {
24
+ done = true
25
+ if (waiting) {
26
+ const resolve = waiting
27
+ waiting = null
28
+ resolve({ value: undefined as never, done: true })
29
+ }
30
+ },
31
+
32
+ [Symbol.asyncIterator]() {
33
+ return {
34
+ next(): Promise<IteratorResult<T>> {
35
+ if (queue.length > 0) {
36
+ return Promise.resolve({ value: queue.shift()!, done: false })
37
+ }
38
+ if (done) {
39
+ return Promise.resolve({ value: undefined as never, done: true })
40
+ }
41
+ return new Promise((resolve) => {
42
+ waiting = resolve
43
+ })
44
+ },
45
+ }
46
+ },
47
+ }
48
+ }
@@ -0,0 +1,112 @@
1
+ import { readdir } from "node:fs/promises"
2
+ import { join } from "node:path"
3
+ import { getLatestRun, readAllResults, getRunStats } from "./run.ts"
4
+ import { loadProgramConfig } from "./programs.ts"
5
+ import { streamLogName } from "./daemon-callbacks.ts"
6
+
7
+ const MAX_LOG_LINES = 500
8
+
9
+ /**
10
+ * Builds the auto-analysis initial message for the update agent.
11
+ * Gathers context from the latest run: summary stats, last experiment log, log paths.
12
+ */
13
+ export async function buildUpdateRunContext(programDir: string): Promise<string> {
14
+ const latest = await getLatestRun(programDir)
15
+ if (!latest || !latest.state) {
16
+ return "No previous runs found for this program. Please describe what you'd like to change."
17
+ }
18
+
19
+ const { run_dir: runDir, state } = latest
20
+
21
+ const [config, results] = await Promise.all([
22
+ loadProgramConfig(programDir).catch(() => null),
23
+ readAllResults(runDir),
24
+ ])
25
+
26
+ const direction = config?.direction ?? "lower"
27
+ const stats = getRunStats(state, direction)
28
+
29
+ // Build run summary
30
+ const lines: string[] = [
31
+ "Here are the results from the latest run of this program:",
32
+ "",
33
+ "## Run Summary",
34
+ `- Phase: ${state.phase}`,
35
+ ]
36
+
37
+ if (state.termination_reason) {
38
+ lines.push(`- Termination reason: ${state.termination_reason}`)
39
+ }
40
+
41
+ lines.push(
42
+ `- Experiments: ${stats.total_experiments} total (${stats.total_keeps} kept, ${stats.total_discards} discarded, ${stats.total_crashes} crashed)`,
43
+ )
44
+
45
+ if (stats.total_experiments > 0) {
46
+ lines.push(`- Keep rate: ${(stats.keep_rate * 100).toFixed(0)}%`)
47
+ lines.push(
48
+ `- Original baseline: ${state.original_baseline} → Best: ${state.best_metric} (${stats.improvement_pct >= 0 ? "+" : ""}${stats.improvement_pct.toFixed(1)}%)`,
49
+ )
50
+ }
51
+
52
+ if (state.error) {
53
+ lines.push(`- Error: ${state.error}`)
54
+ }
55
+
56
+ // Last few results from results.tsv
57
+ if (results.length > 0) {
58
+ lines.push("", "## Recent Experiment Results")
59
+ const recent = results.slice(-5)
60
+ for (const r of recent) {
61
+ const tag = r.status === "keep" ? "KEEP" : r.status === "discard" ? "DISCARD" : r.status.toUpperCase()
62
+ lines.push(`- #${r.experiment_number} [${tag}] metric=${r.metric_value} — ${r.description}`)
63
+ }
64
+ }
65
+
66
+ // Read last experiment stream log
67
+ const lastExpNum = state.experiment_number
68
+ if (lastExpNum > 0) {
69
+ const logFile = streamLogName(lastExpNum)
70
+ const logPath = join(runDir, logFile)
71
+ try {
72
+ const logContent = await Bun.file(logPath).text()
73
+ const logLines = logContent.split("\n")
74
+ const truncated = logLines.length > MAX_LOG_LINES
75
+ const displayLines = truncated ? logLines.slice(-MAX_LOG_LINES) : logLines
76
+ lines.push(
77
+ "",
78
+ `## Last Experiment (#${lastExpNum}) Stream Log${truncated ? ` (last ${MAX_LOG_LINES} lines)` : ""}`,
79
+ "```",
80
+ displayLines.join("\n"),
81
+ "```",
82
+ )
83
+ } catch {
84
+ // Log file doesn't exist
85
+ }
86
+ }
87
+
88
+ // List all available stream logs
89
+ try {
90
+ const entries = await readdir(runDir)
91
+ const logFiles = entries.filter((f) => f.startsWith("stream-") && f.endsWith(".log")).toSorted()
92
+ if (logFiles.length > 0) {
93
+ lines.push(
94
+ "",
95
+ "## Additional Logs",
96
+ "The following log files are available if you need more context (use the Read tool):",
97
+ )
98
+ for (const f of logFiles) {
99
+ lines.push(`- ${join(runDir, f)}`)
100
+ }
101
+ }
102
+ } catch {
103
+ // Can't list directory
104
+ }
105
+
106
+ lines.push(
107
+ "",
108
+ "Please analyze these results and suggest what should be fixed or improved in the program configuration.",
109
+ )
110
+
111
+ return lines.join("\n")
112
+ }
@@ -0,0 +1,34 @@
1
+ import { mkdir, chmod } from "node:fs/promises"
2
+ import { join } from "node:path"
3
+
4
+ // --- Measurement Locking ---
5
+
6
+ /** Files protected from agent modification during experiment runs. */
7
+ export const MEASUREMENT_FILES = ["measure.sh", "config.json", "build.sh"] as const
8
+
9
+ /** Makes measurement files read-only (chmod 444). #1 safeguard against metric gaming. */
10
+ export async function lockMeasurement(programDir: string): Promise<void> {
11
+ await Promise.all(
12
+ MEASUREMENT_FILES.map((f) => chmod(join(programDir, f), 0o444).catch(() => {})),
13
+ )
14
+ }
15
+
16
+ export async function unlockMeasurement(programDir: string): Promise<void> {
17
+ await Promise.all(
18
+ MEASUREMENT_FILES.map((f) => chmod(join(programDir, f), 0o644).catch(() => {})),
19
+ )
20
+ }
21
+
22
+ // --- Run Directory ---
23
+
24
+ export async function initRunDir(programDir: string, runId: string): Promise<string> {
25
+ const runDir = join(programDir, "runs", runId)
26
+ await mkdir(runDir, { recursive: true })
27
+
28
+ await Bun.write(
29
+ join(runDir, "results.tsv"),
30
+ "experiment#\tcommit\tmetric_value\tsecondary_values\tstatus\tdescription\tmeasurement_duration_ms\tdiff_stats\n",
31
+ )
32
+
33
+ return runDir
34
+ }