@spacek33z/autoauto 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/README.md +197 -0
  2. package/package.json +51 -0
  3. package/src/App.tsx +224 -0
  4. package/src/cli.ts +772 -0
  5. package/src/components/AgentPanel.tsx +254 -0
  6. package/src/components/Chat.test.tsx +71 -0
  7. package/src/components/Chat.tsx +308 -0
  8. package/src/components/CycleField.tsx +23 -0
  9. package/src/components/ModelPicker.tsx +97 -0
  10. package/src/components/PostUpdatePrompt.tsx +46 -0
  11. package/src/components/ResultsTable.tsx +172 -0
  12. package/src/components/RunCompletePrompt.tsx +90 -0
  13. package/src/components/RunSettingsOverlay.tsx +49 -0
  14. package/src/components/RunsTable.tsx +219 -0
  15. package/src/components/StatsHeader.tsx +100 -0
  16. package/src/daemon.ts +264 -0
  17. package/src/index.tsx +8 -0
  18. package/src/lib/agent/agent-provider.test.ts +133 -0
  19. package/src/lib/agent/claude-provider.ts +277 -0
  20. package/src/lib/agent/codex-provider.ts +413 -0
  21. package/src/lib/agent/default-providers.ts +10 -0
  22. package/src/lib/agent/index.ts +32 -0
  23. package/src/lib/agent/mock-provider.ts +61 -0
  24. package/src/lib/agent/opencode-provider.ts +424 -0
  25. package/src/lib/agent/types.ts +73 -0
  26. package/src/lib/auth.ts +11 -0
  27. package/src/lib/config.ts +152 -0
  28. package/src/lib/daemon-callbacks.ts +59 -0
  29. package/src/lib/daemon-client.ts +16 -0
  30. package/src/lib/daemon-lifecycle.ts +368 -0
  31. package/src/lib/daemon-spawn.ts +122 -0
  32. package/src/lib/daemon-status.ts +189 -0
  33. package/src/lib/daemon-watcher.ts +192 -0
  34. package/src/lib/experiment-loop.ts +679 -0
  35. package/src/lib/experiment.ts +356 -0
  36. package/src/lib/finalize.test.ts +143 -0
  37. package/src/lib/finalize.ts +511 -0
  38. package/src/lib/format.test.ts +32 -0
  39. package/src/lib/format.ts +44 -0
  40. package/src/lib/git.ts +176 -0
  41. package/src/lib/ideas-backlog.test.ts +54 -0
  42. package/src/lib/ideas-backlog.ts +109 -0
  43. package/src/lib/measure.ts +472 -0
  44. package/src/lib/model-options.ts +24 -0
  45. package/src/lib/programs.ts +247 -0
  46. package/src/lib/push-stream.ts +48 -0
  47. package/src/lib/run-context.ts +112 -0
  48. package/src/lib/run-setup.ts +34 -0
  49. package/src/lib/run.ts +383 -0
  50. package/src/lib/syntax-theme.ts +39 -0
  51. package/src/lib/system-prompts/experiment.ts +77 -0
  52. package/src/lib/system-prompts/finalize.ts +90 -0
  53. package/src/lib/system-prompts/index.ts +7 -0
  54. package/src/lib/system-prompts/setup.ts +516 -0
  55. package/src/lib/system-prompts/update.ts +188 -0
  56. package/src/lib/tool-events.ts +99 -0
  57. package/src/lib/validate-measurement.ts +326 -0
  58. package/src/lib/worktree.ts +40 -0
  59. package/src/screens/AuthErrorScreen.tsx +31 -0
  60. package/src/screens/ExecutionScreen.tsx +851 -0
  61. package/src/screens/FirstSetupScreen.tsx +168 -0
  62. package/src/screens/HomeScreen.tsx +406 -0
  63. package/src/screens/PreRunScreen.tsx +206 -0
  64. package/src/screens/SettingsScreen.tsx +189 -0
  65. package/src/screens/SetupScreen.tsx +226 -0
  66. package/src/tui.tsx +17 -0
  67. package/tsconfig.json +17 -0
@@ -0,0 +1,472 @@
1
+ import { spawn, type ChildProcess } from "node:child_process"
2
+ import { join } from "node:path"
3
+ import { unlink } from "node:fs/promises"
4
+ import type { ProgramConfig } from "./programs.ts"
5
+
6
+ // --- Helpers ---
7
+
8
+ /** Kills a detached child's entire process group, falling back to direct kill. */
9
+ function killProcessGroup(proc: ChildProcess, signal: NodeJS.Signals = "SIGTERM"): void {
10
+ if (proc.killed || !proc.pid) return
11
+ try {
12
+ process.kill(-proc.pid, signal)
13
+ } catch {
14
+ proc.kill(signal)
15
+ }
16
+ }
17
+
18
+ // --- Types ---
19
+
20
+ export type MeasurementResult =
21
+ | { success: true; output: Record<string, unknown>; duration_ms: number; diagnostics?: string }
22
+ | { success: false; error: string; duration_ms: number }
23
+
24
+ export interface MeasurementSeriesResult {
25
+ success: boolean
26
+ median_metric: number
27
+ median_quality_gates: Record<string, number>
28
+ median_secondary_metrics: Record<string, number>
29
+ quality_gates_passed: boolean
30
+ gate_violations: string[]
31
+ individual_runs: MeasurementResult[]
32
+ duration_ms: number
33
+ failure_reason?: string
34
+ diagnostics?: string
35
+ }
36
+
37
+ // --- Helpers ---
38
+
39
+ function median(values: number[]): number {
40
+ const sorted = [...values].toSorted((a, b) => a - b)
41
+ const n = sorted.length
42
+ return n % 2 === 0 ? (sorted[n / 2 - 1] + sorted[n / 2]) / 2 : sorted[Math.floor(n / 2)]
43
+ }
44
+
45
+ function collectFiniteValues(
46
+ output: Record<string, unknown>,
47
+ fields: string[],
48
+ target: Record<string, number[]>,
49
+ ): void {
50
+ for (const field of fields) {
51
+ const value = output[field]
52
+ if (typeof value === "number" && isFinite(value)) {
53
+ if (!target[field]) target[field] = []
54
+ target[field].push(value)
55
+ }
56
+ }
57
+ }
58
+
59
+ function computeMedians(fieldValues: Record<string, number[]>): Record<string, number> {
60
+ const result: Record<string, number> = {}
61
+ for (const [field, values] of Object.entries(fieldValues)) {
62
+ result[field] = median(values)
63
+ }
64
+ return result
65
+ }
66
+
67
+ // --- Diagnostics Sidecar ---
68
+
69
+ const DIAGNOSTICS_FILENAME = ".autoauto-diagnostics"
70
+
71
+ async function readAndCleanDiagnostics(cwd: string): Promise<string | undefined> {
72
+ const diagnosticsPath = join(cwd, DIAGNOSTICS_FILENAME)
73
+ try {
74
+ const content = await Bun.file(diagnosticsPath).text()
75
+ await unlink(diagnosticsPath).catch(() => {})
76
+ return content.trim() || undefined
77
+ } catch {
78
+ return undefined
79
+ }
80
+ }
81
+
82
+ // --- Measurement Execution ---
83
+
84
+ /**
85
+ * Runs measure.sh once and returns parsed output.
86
+ * Uses Node spawn with timeout (matching validate-measurement.ts pattern).
87
+ */
88
+ export async function runMeasurement(
89
+ measureShPath: string,
90
+ cwd: string,
91
+ timeoutMs?: number,
92
+ signal?: AbortSignal,
93
+ ): Promise<MeasurementResult> {
94
+ if (signal?.aborted) {
95
+ return { success: false, error: "aborted", duration_ms: 0 }
96
+ }
97
+
98
+ const start = performance.now()
99
+ return new Promise((resolve) => {
100
+ const proc = spawn("bash", [measureShPath], {
101
+ cwd,
102
+ env: { ...process.env },
103
+ stdio: ["ignore", "pipe", "pipe"],
104
+ detached: true,
105
+ })
106
+ const timeoutLimit = timeoutMs ?? 60_000
107
+ let timedOut = false
108
+ const timeout = setTimeout(() => {
109
+ timedOut = true
110
+ killProcessGroup(proc)
111
+ }, timeoutLimit)
112
+
113
+ const onAbort = () => {
114
+ killProcessGroup(proc)
115
+ }
116
+ signal?.addEventListener("abort", onAbort, { once: true })
117
+
118
+ proc.stdout!.setEncoding("utf-8")
119
+ proc.stderr!.setEncoding("utf-8")
120
+
121
+ let stdout = ""
122
+ let stderr = ""
123
+
124
+ proc.stdout!.on("data", (chunk: string) => {
125
+ stdout += chunk
126
+ })
127
+ proc.stderr!.on("data", (chunk: string) => {
128
+ stderr += chunk
129
+ })
130
+
131
+ proc.on("close", (exitCode) => {
132
+ clearTimeout(timeout)
133
+ signal?.removeEventListener("abort", onAbort)
134
+ const duration_ms = Math.round(performance.now() - start)
135
+
136
+ // Failure paths: fire-and-forget cleanup so the sidecar doesn't leak
137
+ // as an untracked file, but resolve immediately without blocking on I/O.
138
+ if (signal?.aborted) {
139
+ readAndCleanDiagnostics(cwd)
140
+ resolve({ success: false, error: "aborted", duration_ms })
141
+ return
142
+ }
143
+
144
+ if (timedOut) {
145
+ readAndCleanDiagnostics(cwd)
146
+ resolve({ success: false, error: `Measurement timed out after ${timeoutLimit}ms`, duration_ms })
147
+ return
148
+ }
149
+
150
+ if (exitCode !== 0) {
151
+ readAndCleanDiagnostics(cwd)
152
+ resolve({
153
+ success: false,
154
+ error: `exit code ${exitCode}${stderr ? `: ${stderr.trim().slice(0, 200)}` : ""}`,
155
+ duration_ms,
156
+ })
157
+ return
158
+ }
159
+
160
+ let parsed: unknown
161
+ try {
162
+ parsed = JSON.parse(stdout.trim())
163
+ } catch {
164
+ readAndCleanDiagnostics(cwd)
165
+ resolve({
166
+ success: false,
167
+ error: `invalid JSON on stdout: ${stdout.trim().slice(0, 200)}`,
168
+ duration_ms,
169
+ })
170
+ return
171
+ }
172
+
173
+ if (typeof parsed !== "object" || parsed === null || Array.isArray(parsed)) {
174
+ readAndCleanDiagnostics(cwd)
175
+ resolve({
176
+ success: false,
177
+ error: `stdout must be a JSON object, got ${Array.isArray(parsed) ? "array" : typeof parsed}`,
178
+ duration_ms,
179
+ })
180
+ return
181
+ }
182
+
183
+ // Success path: await diagnostics before resolving
184
+ readAndCleanDiagnostics(cwd).then((diagnostics) => {
185
+ resolve({ success: true, output: parsed as Record<string, unknown>, duration_ms, diagnostics })
186
+ }).catch(() => {
187
+ resolve({ success: true, output: parsed as Record<string, unknown>, duration_ms })
188
+ })
189
+ })
190
+
191
+ proc.on("error", (err) => {
192
+ clearTimeout(timeout)
193
+ signal?.removeEventListener("abort", onAbort)
194
+ const duration_ms = Math.round(performance.now() - start)
195
+ resolve({ success: false, error: err.message, duration_ms })
196
+ })
197
+ })
198
+ }
199
+
200
+ // --- Build Step ---
201
+
202
+ export interface BuildResult {
203
+ success: boolean
204
+ error?: string
205
+ duration_ms: number
206
+ }
207
+
208
+ /**
209
+ * Runs build.sh once if it exists. Returns success immediately if the file is missing.
210
+ */
211
+ export async function runBuild(
212
+ buildShPath: string,
213
+ cwd: string,
214
+ signal?: AbortSignal,
215
+ ): Promise<BuildResult> {
216
+ if (!await Bun.file(buildShPath).exists()) {
217
+ return { success: true, duration_ms: 0 }
218
+ }
219
+
220
+ const start = performance.now()
221
+ return new Promise((resolve) => {
222
+ const proc = spawn("bash", [buildShPath], {
223
+ cwd,
224
+ env: { ...process.env },
225
+ stdio: ["ignore", "pipe", "pipe"],
226
+ detached: true,
227
+ })
228
+ let timedOut = false
229
+ const timeout = setTimeout(() => {
230
+ timedOut = true
231
+ killProcessGroup(proc)
232
+ }, 120_000)
233
+
234
+ const onAbort = () => {
235
+ killProcessGroup(proc)
236
+ }
237
+ signal?.addEventListener("abort", onAbort, { once: true })
238
+
239
+ let stderr = ""
240
+ proc.stderr!.setEncoding("utf-8")
241
+ proc.stderr!.on("data", (chunk: string) => {
242
+ stderr += chunk
243
+ })
244
+
245
+ proc.on("close", (exitCode) => {
246
+ clearTimeout(timeout)
247
+ signal?.removeEventListener("abort", onAbort)
248
+ const duration_ms = Math.round(performance.now() - start)
249
+
250
+ if (signal?.aborted) {
251
+ resolve({ success: false, error: "aborted", duration_ms })
252
+ return
253
+ }
254
+
255
+ if (timedOut) {
256
+ resolve({ success: false, error: "Build timed out after 120000ms", duration_ms })
257
+ return
258
+ }
259
+
260
+ if (exitCode !== 0) {
261
+ resolve({
262
+ success: false,
263
+ error: `build.sh exit code ${exitCode}${stderr ? `: ${stderr.trim().slice(0, 200)}` : ""}`,
264
+ duration_ms,
265
+ })
266
+ return
267
+ }
268
+
269
+ resolve({ success: true, duration_ms })
270
+ })
271
+
272
+ proc.on("error", (err) => {
273
+ clearTimeout(timeout)
274
+ signal?.removeEventListener("abort", onAbort)
275
+ const duration_ms = Math.round(performance.now() - start)
276
+ resolve({ success: false, error: err.message, duration_ms })
277
+ })
278
+ })
279
+ }
280
+
281
+ // --- Validation ---
282
+
283
+ function validateFiniteFields(output: Record<string, unknown>, fields: string[], label: string): string[] {
284
+ const errors: string[] = []
285
+ for (const field of fields) {
286
+ const value = output[field]
287
+ if (value === undefined) {
288
+ errors.push(`${label} "${field}" missing from output`)
289
+ } else if (typeof value !== "number" || !isFinite(value)) {
290
+ errors.push(`${label} "${field}" is not a finite number: ${value}`)
291
+ }
292
+ }
293
+ return errors
294
+ }
295
+
296
+ /** Validates a measurement output has all required fields as finite numbers. */
297
+ export function validateMeasurementOutput(
298
+ output: Record<string, unknown>,
299
+ config: ProgramConfig,
300
+ ): { valid: boolean; errors: string[] } {
301
+ const errors = [
302
+ ...validateFiniteFields(output, [config.metric_field], "metric_field"),
303
+ ...validateFiniteFields(output, Object.keys(config.quality_gates), "quality gate field"),
304
+ ...validateFiniteFields(output, Object.keys(config.secondary_metrics ?? {}), "secondary metric field"),
305
+ ]
306
+
307
+ return { valid: errors.length === 0, errors }
308
+ }
309
+
310
+ /** Checks quality gate thresholds (separate from field existence validation). */
311
+ export function checkQualityGates(
312
+ output: Record<string, number>,
313
+ config: ProgramConfig,
314
+ ): { passed: boolean; violations: string[] } {
315
+ const violations: string[] = []
316
+
317
+ for (const [field, gate] of Object.entries(config.quality_gates)) {
318
+ const value = output[field]
319
+ if (value === undefined) continue
320
+ if (gate.max !== undefined && value > gate.max) {
321
+ violations.push(`${field}=${value} exceeds max ${gate.max}`)
322
+ }
323
+ if (gate.min !== undefined && value < gate.min) {
324
+ violations.push(`${field}=${value} below min ${gate.min}`)
325
+ }
326
+ }
327
+
328
+ return { passed: violations.length === 0, violations }
329
+ }
330
+
331
+ // --- Measurement Series ---
332
+
333
+ /**
334
+ * Runs measure.sh N times (config.repeats), computes median, validates all outputs.
335
+ * Every configured repeat must succeed; partial measurement failures invalidate the series.
336
+ */
337
+ export async function runMeasurementSeries(
338
+ measureShPath: string,
339
+ cwd: string,
340
+ config: ProgramConfig,
341
+ signal?: AbortSignal,
342
+ buildShPath?: string,
343
+ ): Promise<MeasurementSeriesResult> {
344
+ const totalStart = performance.now()
345
+
346
+ // Run build step once before measuring
347
+ if (buildShPath) {
348
+ const buildResult = await runBuild(buildShPath, cwd, signal)
349
+ if (!buildResult.success) {
350
+ return {
351
+ success: false,
352
+ median_metric: 0,
353
+ median_quality_gates: {},
354
+ median_secondary_metrics: {},
355
+ quality_gates_passed: false,
356
+ gate_violations: [],
357
+ individual_runs: [],
358
+ duration_ms: Math.round(performance.now() - totalStart),
359
+ failure_reason: buildResult.error ?? "build failed",
360
+ }
361
+ }
362
+ }
363
+
364
+ const runs: MeasurementResult[] = []
365
+ const validMetrics: number[] = []
366
+ const validGateValues: Record<string, number[]> = {}
367
+ const validSecondaryValues: Record<string, number[]> = {}
368
+ let invalidOutputCount = 0
369
+
370
+ for (let i = 0; i < config.repeats; i++) {
371
+ if (signal?.aborted) break
372
+ // eslint-disable-next-line no-await-in-loop -- measurements must run sequentially
373
+ const result = await runMeasurement(measureShPath, cwd, undefined, signal)
374
+ runs.push(result)
375
+
376
+ if (!result.success) continue
377
+
378
+ const validation = validateMeasurementOutput(result.output, config)
379
+ if (!validation.valid) {
380
+ invalidOutputCount++
381
+ continue
382
+ }
383
+
384
+ validMetrics.push(result.output[config.metric_field] as number)
385
+ collectFiniteValues(result.output, Object.keys(config.quality_gates), validGateValues)
386
+ collectFiniteValues(result.output, Object.keys(config.secondary_metrics ?? {}), validSecondaryValues)
387
+ }
388
+
389
+ const duration_ms = Math.round(performance.now() - totalStart)
390
+
391
+ if (signal?.aborted) {
392
+ return {
393
+ success: false,
394
+ median_metric: 0,
395
+ median_quality_gates: {},
396
+ median_secondary_metrics: {},
397
+ quality_gates_passed: false,
398
+ gate_violations: [],
399
+ individual_runs: runs,
400
+ duration_ms,
401
+ failure_reason: "aborted",
402
+ }
403
+ }
404
+
405
+ if (runs.length !== config.repeats || validMetrics.length !== config.repeats || invalidOutputCount > 0) {
406
+ const failedRuns = runs
407
+ .filter((run): run is Extract<MeasurementResult, { success: false }> => !run.success)
408
+ .map((run) => run.error)
409
+ const invalidRuns = runs
410
+ .filter((run): run is Extract<MeasurementResult, { success: true }> => run.success)
411
+ .map((run) => validateMeasurementOutput(run.output, config).errors)
412
+ .filter((errors) => errors.length > 0)
413
+ .flat()
414
+
415
+ const reasons = [...failedRuns, ...invalidRuns]
416
+ return {
417
+ success: false,
418
+ median_metric: 0,
419
+ median_quality_gates: {},
420
+ median_secondary_metrics: {},
421
+ quality_gates_passed: false,
422
+ gate_violations: [],
423
+ individual_runs: runs,
424
+ duration_ms,
425
+ failure_reason: reasons.length > 0 ? reasons.join("; ") : "measurement series incomplete",
426
+ }
427
+ }
428
+
429
+ const medianMetric = median(validMetrics)
430
+ const medianGates = computeMedians(validGateValues)
431
+ const medianSecondary = computeMedians(validSecondaryValues)
432
+
433
+ const gateCheck = checkQualityGates(medianGates, config)
434
+
435
+ // All runs succeeded at this point (partial failures exit above),
436
+ // so the last run's diagnostics are the most recent.
437
+ const lastRun = runs.at(-1) as Extract<MeasurementResult, { success: true }>
438
+
439
+ return {
440
+ success: true,
441
+ median_metric: medianMetric,
442
+ median_quality_gates: medianGates,
443
+ median_secondary_metrics: medianSecondary,
444
+ quality_gates_passed: gateCheck.passed,
445
+ gate_violations: gateCheck.violations,
446
+ individual_runs: runs,
447
+ duration_ms,
448
+ diagnostics: lastRun.diagnostics,
449
+ }
450
+ }
451
+
452
+ // --- Comparison ---
453
+
454
+ /**
455
+ * Compares measured metric against baseline using noise threshold.
456
+ * noise_threshold is a decimal fraction (e.g. 0.02 for 2%).
457
+ */
458
+ export function compareMetric(
459
+ baseline: number,
460
+ measured: number,
461
+ noiseThreshold: number,
462
+ direction: "lower" | "higher",
463
+ ): "keep" | "regressed" | "noise" {
464
+ const relativeChange =
465
+ direction === "lower"
466
+ ? (baseline - measured) / baseline // positive = improvement for "lower"
467
+ : (measured - baseline) / baseline // positive = improvement for "higher"
468
+
469
+ if (relativeChange > noiseThreshold) return "keep"
470
+ if (relativeChange < -noiseThreshold) return "regressed"
471
+ return "noise"
472
+ }
@@ -0,0 +1,24 @@
1
+ import { getProvider, type AgentModelOption, type AgentProviderID } from "./agent/index.ts"
2
+ import type { ModelSlot } from "./config.ts"
3
+
4
+ export interface ModelPickerOption extends AgentModelOption {
5
+ value: ModelSlot
6
+ }
7
+
8
+ const DEFAULT_EFFORT = "high"
9
+
10
+ export async function loadModelPickerOptions(
11
+ providerId: AgentProviderID,
12
+ cwd: string,
13
+ forceRefresh = false,
14
+ ): Promise<ModelPickerOption[]> {
15
+ const options = await getProvider(providerId).listModels?.(cwd, forceRefresh) ?? []
16
+ return options.map((option) => ({
17
+ ...option,
18
+ value: { provider: option.provider, model: option.model, effort: DEFAULT_EFFORT },
19
+ }))
20
+ }
21
+
22
+ export async function getDefaultModel(providerId: AgentProviderID, cwd: string): Promise<string | null> {
23
+ return await getProvider(providerId).getDefaultModel?.(cwd) ?? null
24
+ }