@spacek33z/autoauto 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +197 -0
- package/package.json +51 -0
- package/src/App.tsx +224 -0
- package/src/cli.ts +772 -0
- package/src/components/AgentPanel.tsx +254 -0
- package/src/components/Chat.test.tsx +71 -0
- package/src/components/Chat.tsx +308 -0
- package/src/components/CycleField.tsx +23 -0
- package/src/components/ModelPicker.tsx +97 -0
- package/src/components/PostUpdatePrompt.tsx +46 -0
- package/src/components/ResultsTable.tsx +172 -0
- package/src/components/RunCompletePrompt.tsx +90 -0
- package/src/components/RunSettingsOverlay.tsx +49 -0
- package/src/components/RunsTable.tsx +219 -0
- package/src/components/StatsHeader.tsx +100 -0
- package/src/daemon.ts +264 -0
- package/src/index.tsx +8 -0
- package/src/lib/agent/agent-provider.test.ts +133 -0
- package/src/lib/agent/claude-provider.ts +277 -0
- package/src/lib/agent/codex-provider.ts +413 -0
- package/src/lib/agent/default-providers.ts +10 -0
- package/src/lib/agent/index.ts +32 -0
- package/src/lib/agent/mock-provider.ts +61 -0
- package/src/lib/agent/opencode-provider.ts +424 -0
- package/src/lib/agent/types.ts +73 -0
- package/src/lib/auth.ts +11 -0
- package/src/lib/config.ts +152 -0
- package/src/lib/daemon-callbacks.ts +59 -0
- package/src/lib/daemon-client.ts +16 -0
- package/src/lib/daemon-lifecycle.ts +368 -0
- package/src/lib/daemon-spawn.ts +122 -0
- package/src/lib/daemon-status.ts +189 -0
- package/src/lib/daemon-watcher.ts +192 -0
- package/src/lib/experiment-loop.ts +679 -0
- package/src/lib/experiment.ts +356 -0
- package/src/lib/finalize.test.ts +143 -0
- package/src/lib/finalize.ts +511 -0
- package/src/lib/format.test.ts +32 -0
- package/src/lib/format.ts +44 -0
- package/src/lib/git.ts +176 -0
- package/src/lib/ideas-backlog.test.ts +54 -0
- package/src/lib/ideas-backlog.ts +109 -0
- package/src/lib/measure.ts +472 -0
- package/src/lib/model-options.ts +24 -0
- package/src/lib/programs.ts +247 -0
- package/src/lib/push-stream.ts +48 -0
- package/src/lib/run-context.ts +112 -0
- package/src/lib/run-setup.ts +34 -0
- package/src/lib/run.ts +383 -0
- package/src/lib/syntax-theme.ts +39 -0
- package/src/lib/system-prompts/experiment.ts +77 -0
- package/src/lib/system-prompts/finalize.ts +90 -0
- package/src/lib/system-prompts/index.ts +7 -0
- package/src/lib/system-prompts/setup.ts +516 -0
- package/src/lib/system-prompts/update.ts +188 -0
- package/src/lib/tool-events.ts +99 -0
- package/src/lib/validate-measurement.ts +326 -0
- package/src/lib/worktree.ts +40 -0
- package/src/screens/AuthErrorScreen.tsx +31 -0
- package/src/screens/ExecutionScreen.tsx +851 -0
- package/src/screens/FirstSetupScreen.tsx +168 -0
- package/src/screens/HomeScreen.tsx +406 -0
- package/src/screens/PreRunScreen.tsx +206 -0
- package/src/screens/SettingsScreen.tsx +189 -0
- package/src/screens/SetupScreen.tsx +226 -0
- package/src/tui.tsx +17 -0
- package/tsconfig.json +17 -0
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
import { mkdir } from "node:fs/promises"
|
|
2
|
+
import { join } from "node:path"
|
|
3
|
+
import { getProjectRoot, AUTOAUTO_DIR } from "./programs.ts"
|
|
4
|
+
import type { AgentProviderID } from "./agent/index.ts"
|
|
5
|
+
|
|
6
|
+
export type EffortLevel = "low" | "medium" | "high" | "max"
|
|
7
|
+
|
|
8
|
+
export interface ModelSlot {
|
|
9
|
+
provider: AgentProviderID
|
|
10
|
+
model: string // 'sonnet' | 'opus' or full model ID
|
|
11
|
+
effort: EffortLevel
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export interface ProjectConfig {
|
|
15
|
+
executionModel: ModelSlot
|
|
16
|
+
supportModel: ModelSlot
|
|
17
|
+
ideasBacklogEnabled: boolean
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
const CONFIG_FILE = "config.json"
|
|
21
|
+
|
|
22
|
+
export const DEFAULT_CONFIG: ProjectConfig = {
|
|
23
|
+
executionModel: { provider: "claude", model: "sonnet", effort: "high" },
|
|
24
|
+
supportModel: { provider: "claude", model: "sonnet", effort: "high" },
|
|
25
|
+
ideasBacklogEnabled: true,
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/** Effort levels available for each model */
|
|
29
|
+
export const EFFORT_CHOICES: Record<string, EffortLevel[]> = {
|
|
30
|
+
sonnet: ["low", "medium", "high"],
|
|
31
|
+
opus: ["low", "medium", "high", "max"],
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export const PROVIDER_CHOICES: AgentProviderID[] = ["claude", "codex", "opencode"]
|
|
35
|
+
|
|
36
|
+
export const PROVIDER_LABELS: Record<AgentProviderID, string> = {
|
|
37
|
+
claude: "Claude",
|
|
38
|
+
codex: "Codex",
|
|
39
|
+
opencode: "OpenCode",
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/** Human-readable labels */
|
|
43
|
+
export const MODEL_LABELS: Record<string, string> = {
|
|
44
|
+
sonnet: "Sonnet",
|
|
45
|
+
opus: "Opus",
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export const EFFORT_LABELS: Record<EffortLevel, string> = {
|
|
49
|
+
low: "Low",
|
|
50
|
+
medium: "Medium",
|
|
51
|
+
high: "High",
|
|
52
|
+
max: "Max",
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/** Cycle through a readonly array by direction (-1 or +1), wrapping around */
|
|
56
|
+
export function cycleChoice<T>(choices: readonly T[], current: T, direction: -1 | 1): T {
|
|
57
|
+
const idx = choices.indexOf(current)
|
|
58
|
+
return choices[(idx + direction + choices.length) % choices.length]
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
export const EFFORT_DESCRIPTIONS: Record<EffortLevel, string> = {
|
|
62
|
+
low: "Fastest, cheapest — minimal thinking",
|
|
63
|
+
medium: "Balanced speed and quality",
|
|
64
|
+
high: "Deep reasoning (default)",
|
|
65
|
+
max: "Maximum effort (Opus only)",
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function normalizeModelSlot(slot: Partial<ModelSlot> | undefined): ModelSlot {
|
|
69
|
+
return {
|
|
70
|
+
...DEFAULT_CONFIG.executionModel,
|
|
71
|
+
...slot,
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
export function isEffortConfigurable(slot: ModelSlot): boolean {
|
|
76
|
+
return slot.provider === "claude" || slot.provider === "codex"
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
export function getEffortChoicesForSlot(slot: ModelSlot): readonly EffortLevel[] {
|
|
80
|
+
if (!isEffortConfigurable(slot)) return []
|
|
81
|
+
return EFFORT_CHOICES[slot.model] ?? EFFORT_CHOICES.sonnet
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
export function formatModelSlot(slot: ModelSlot, compact = false): string {
|
|
85
|
+
if (slot.provider === "opencode") {
|
|
86
|
+
const modelID = slot.model.includes("/") ? slot.model.slice(slot.model.indexOf("/") + 1) : slot.model
|
|
87
|
+
return compact ? `oc/${modelID}` : `OpenCode / ${slot.model}`
|
|
88
|
+
}
|
|
89
|
+
if (slot.provider === "codex") {
|
|
90
|
+
return compact ? `codex/${slot.model}` : `Codex / ${slot.model}`
|
|
91
|
+
}
|
|
92
|
+
const label = MODEL_LABELS[slot.model] ?? slot.model
|
|
93
|
+
return compact ? `claude/${slot.model}` : `Claude / ${label}`
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
export function formatModelLabel(slot: ModelSlot): string {
|
|
97
|
+
const model = formatModelSlot(slot, true)
|
|
98
|
+
return isEffortConfigurable(slot) ? `${model}/${slot.effort}` : model
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
export function formatEffortSlot(slot: ModelSlot): { label: string; description: string } {
|
|
102
|
+
if (!isEffortConfigurable(slot)) {
|
|
103
|
+
return {
|
|
104
|
+
label: "OpenCode default",
|
|
105
|
+
description: "OpenCode variant config applies",
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
return {
|
|
109
|
+
label: EFFORT_LABELS[slot.effort],
|
|
110
|
+
description: EFFORT_DESCRIPTIONS[slot.effort],
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
export function mergeSelectedModelSlot(previous: ModelSlot, selected: ModelSlot): ModelSlot {
|
|
115
|
+
const effort = isEffortConfigurable(selected) && getEffortChoicesForSlot(selected).includes(previous.effort)
|
|
116
|
+
? previous.effort
|
|
117
|
+
: selected.effort
|
|
118
|
+
return { ...selected, effort }
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
export async function configExists(cwd: string): Promise<boolean> {
|
|
122
|
+
const root = await getProjectRoot(cwd)
|
|
123
|
+
return Bun.file(join(root, AUTOAUTO_DIR, CONFIG_FILE)).exists()
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
export async function loadProjectConfig(cwd: string): Promise<ProjectConfig> {
|
|
127
|
+
const root = await getProjectRoot(cwd)
|
|
128
|
+
const configPath = join(root, AUTOAUTO_DIR, CONFIG_FILE)
|
|
129
|
+
try {
|
|
130
|
+
const parsed = await Bun.file(configPath).json() as Partial<ProjectConfig>
|
|
131
|
+
const { executionModel, supportModel, ...rest } = parsed
|
|
132
|
+
return {
|
|
133
|
+
...DEFAULT_CONFIG,
|
|
134
|
+
...rest,
|
|
135
|
+
executionModel: normalizeModelSlot(executionModel),
|
|
136
|
+
supportModel: normalizeModelSlot(supportModel),
|
|
137
|
+
}
|
|
138
|
+
} catch {
|
|
139
|
+
return { ...DEFAULT_CONFIG }
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
export async function saveProjectConfig(
|
|
144
|
+
cwd: string,
|
|
145
|
+
config: ProjectConfig,
|
|
146
|
+
): Promise<void> {
|
|
147
|
+
const root = await getProjectRoot(cwd)
|
|
148
|
+
const dir = join(root, AUTOAUTO_DIR)
|
|
149
|
+
await mkdir(dir, { recursive: true })
|
|
150
|
+
const configPath = join(dir, CONFIG_FILE)
|
|
151
|
+
await Bun.write(configPath, JSON.stringify(config, null, 2) + "\n")
|
|
152
|
+
}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import { join } from "node:path"
|
|
2
|
+
import type { LoopCallbacks } from "./experiment-loop.ts"
|
|
3
|
+
|
|
4
|
+
/** Format experiment number as zero-padded 3-digit string: 1 → "001" */
|
|
5
|
+
export function streamLogName(experimentNumber: number): string {
|
|
6
|
+
return `stream-${String(experimentNumber).padStart(3, "0")}.log`
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* FileCallbacks: a thin LoopCallbacks implementation for the daemon.
|
|
11
|
+
*
|
|
12
|
+
* Writes agent streaming text to per-experiment log files (stream-001.log, etc.)
|
|
13
|
+
* using Bun's FileSink for buffered, high-throughput appending.
|
|
14
|
+
* All other state persistence is handled by the loop itself (state.json, results.tsv).
|
|
15
|
+
*/
|
|
16
|
+
export function createFileCallbacks(runDir: string): LoopCallbacks {
|
|
17
|
+
let currentExperiment = 0
|
|
18
|
+
let writer: ReturnType<ReturnType<typeof Bun.file>["writer"]> | null = null
|
|
19
|
+
|
|
20
|
+
function getWriter(experimentNumber: number) {
|
|
21
|
+
if (experimentNumber !== currentExperiment || !writer) {
|
|
22
|
+
writer?.end()
|
|
23
|
+
currentExperiment = experimentNumber
|
|
24
|
+
writer = Bun.file(join(runDir, streamLogName(experimentNumber))).writer()
|
|
25
|
+
}
|
|
26
|
+
return writer
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
return {
|
|
30
|
+
onPhaseChange: () => {},
|
|
31
|
+
onExperimentStart: (num: number) => {
|
|
32
|
+
const w = getWriter(num)
|
|
33
|
+
w.write(`[time:${Date.now()}]\n`)
|
|
34
|
+
w.flush()
|
|
35
|
+
},
|
|
36
|
+
onExperimentEnd: () => {
|
|
37
|
+
writer?.end()
|
|
38
|
+
writer = null
|
|
39
|
+
},
|
|
40
|
+
onStateUpdate: () => {},
|
|
41
|
+
onAgentStream: (text: string) => {
|
|
42
|
+
const w = getWriter(currentExperiment)
|
|
43
|
+
w.write(text)
|
|
44
|
+
w.flush()
|
|
45
|
+
},
|
|
46
|
+
onAgentToolUse: (status: string) => {
|
|
47
|
+
const w = getWriter(currentExperiment)
|
|
48
|
+
w.write(`\n[time:${Date.now()}]\n[tool] ${status}\n`)
|
|
49
|
+
w.flush()
|
|
50
|
+
},
|
|
51
|
+
onError: () => {},
|
|
52
|
+
onExperimentCost: () => {},
|
|
53
|
+
onRebaseline: () => {},
|
|
54
|
+
onLoopComplete: () => {
|
|
55
|
+
writer?.end()
|
|
56
|
+
writer = null
|
|
57
|
+
},
|
|
58
|
+
}
|
|
59
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
// Barrel re-export — all daemon client functionality
|
|
2
|
+
// Split into: daemon-spawn.ts, daemon-watcher.ts, daemon-status.ts
|
|
3
|
+
|
|
4
|
+
export { spawnDaemon } from "./daemon-spawn.ts"
|
|
5
|
+
export { watchRunDir, type WatchCallbacks, type DaemonWatcher } from "./daemon-watcher.ts"
|
|
6
|
+
export {
|
|
7
|
+
getDaemonStatus,
|
|
8
|
+
reconstructState,
|
|
9
|
+
sendStop,
|
|
10
|
+
sendAbort,
|
|
11
|
+
forceKillDaemon,
|
|
12
|
+
updateMaxExperiments,
|
|
13
|
+
getMaxExperiments,
|
|
14
|
+
findActiveRun,
|
|
15
|
+
type DaemonStatus,
|
|
16
|
+
} from "./daemon-status.ts"
|
|
@@ -0,0 +1,368 @@
|
|
|
1
|
+
import { rename, unlink, open } from "node:fs/promises"
|
|
2
|
+
import { join } from "node:path"
|
|
3
|
+
import { $ } from "bun"
|
|
4
|
+
import type { RunState } from "./run.ts"
|
|
5
|
+
import { writeState, readState, appendResult, readAllResults } from "./run.ts"
|
|
6
|
+
import { resetHard } from "./git.ts"
|
|
7
|
+
import type { ModelSlot } from "./config.ts"
|
|
8
|
+
import type { AgentProviderID } from "./agent/index.ts"
|
|
9
|
+
|
|
10
|
+
// --- Types ---
|
|
11
|
+
|
|
12
|
+
export interface DaemonJson {
|
|
13
|
+
run_id: string
|
|
14
|
+
pid: number
|
|
15
|
+
started_at: string
|
|
16
|
+
worktree_path: string
|
|
17
|
+
daemon_id?: string
|
|
18
|
+
heartbeat_at?: string
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export interface RunConfig {
|
|
22
|
+
provider?: AgentProviderID
|
|
23
|
+
model: string
|
|
24
|
+
effort: string
|
|
25
|
+
max_experiments: number
|
|
26
|
+
ideas_backlog_enabled?: boolean
|
|
27
|
+
in_place?: boolean
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export interface ControlAction {
|
|
31
|
+
action: "stop" | "abort"
|
|
32
|
+
timestamp: string
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export interface RunLock {
|
|
36
|
+
run_id: string
|
|
37
|
+
daemon_id: string
|
|
38
|
+
pid: number
|
|
39
|
+
worktree_path: string
|
|
40
|
+
created_at: string
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// --- Daemon Identity ---
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Writes full daemon.json with daemon_id + heartbeat, overwriting the TUI's initial stub.
|
|
47
|
+
*/
|
|
48
|
+
export async function writeDaemonJson(
|
|
49
|
+
runDir: string,
|
|
50
|
+
runId: string,
|
|
51
|
+
worktreePath: string,
|
|
52
|
+
daemonId: string,
|
|
53
|
+
): Promise<string> {
|
|
54
|
+
const now = new Date().toISOString()
|
|
55
|
+
const existing = await readDaemonJson(runDir)
|
|
56
|
+
const json: DaemonJson = {
|
|
57
|
+
run_id: runId,
|
|
58
|
+
pid: process.pid,
|
|
59
|
+
started_at: existing?.started_at ?? now,
|
|
60
|
+
worktree_path: worktreePath,
|
|
61
|
+
daemon_id: daemonId,
|
|
62
|
+
heartbeat_at: now,
|
|
63
|
+
}
|
|
64
|
+
const tmpPath = join(runDir, "daemon.json.tmp")
|
|
65
|
+
await Bun.write(tmpPath, JSON.stringify(json, null, 2) + "\n")
|
|
66
|
+
await rename(tmpPath, join(runDir, "daemon.json"))
|
|
67
|
+
return daemonId
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
export async function waitForDaemonStub(runDir: string, daemonId: string, timeoutMs = 5_000): Promise<void> {
|
|
71
|
+
const deadline = Date.now() + timeoutMs
|
|
72
|
+
while (Date.now() < deadline) {
|
|
73
|
+
const daemon = await readDaemonJson(runDir)
|
|
74
|
+
if (daemon?.daemon_id === daemonId) return
|
|
75
|
+
await new Promise((resolve) => setTimeout(resolve, 50))
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
export async function readDaemonJson(runDir: string): Promise<DaemonJson | null> {
|
|
80
|
+
try {
|
|
81
|
+
return await Bun.file(join(runDir, "daemon.json")).json() as DaemonJson
|
|
82
|
+
} catch {
|
|
83
|
+
return null
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Updates heartbeat_at in daemon.json. Called every 10s by the daemon.
|
|
89
|
+
*/
|
|
90
|
+
export async function updateHeartbeat(runDir: string, daemonId: string): Promise<void> {
|
|
91
|
+
const existing = await readDaemonJson(runDir)
|
|
92
|
+
if (!existing || existing.daemon_id !== daemonId) return
|
|
93
|
+
|
|
94
|
+
const updated: DaemonJson = { ...existing, heartbeat_at: new Date().toISOString() }
|
|
95
|
+
const tmpPath = join(runDir, "daemon.json.tmp")
|
|
96
|
+
await Bun.write(tmpPath, JSON.stringify(updated, null, 2) + "\n")
|
|
97
|
+
await rename(tmpPath, join(runDir, "daemon.json"))
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* Starts a heartbeat interval. Returns the interval handle for cleanup.
|
|
102
|
+
*/
|
|
103
|
+
export function startHeartbeat(runDir: string, daemonId: string, intervalMs = 10_000): ReturnType<typeof setInterval> {
|
|
104
|
+
return setInterval(() => {
|
|
105
|
+
updateHeartbeat(runDir, daemonId).catch(() => {})
|
|
106
|
+
}, intervalMs)
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// --- Run Config ---
|
|
110
|
+
|
|
111
|
+
export async function readRunConfig(runDir: string): Promise<RunConfig | null> {
|
|
112
|
+
try {
|
|
113
|
+
return await Bun.file(join(runDir, "run-config.json")).json() as RunConfig
|
|
114
|
+
} catch {
|
|
115
|
+
return null
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
export async function writeRunConfig(runDir: string, config: RunConfig): Promise<void> {
|
|
120
|
+
await Bun.write(join(runDir, "run-config.json"), JSON.stringify(config, null, 2) + "\n")
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
export function runConfigToModelSlot(config: RunConfig): ModelSlot {
|
|
124
|
+
return {
|
|
125
|
+
provider: config.provider ?? "claude",
|
|
126
|
+
model: config.model,
|
|
127
|
+
effort: config.effort as ModelSlot["effort"],
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// --- Locking ---
|
|
132
|
+
|
|
133
|
+
const LOCK_FILE = "run.lock"
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* Acquires a per-program lock. Uses exclusive file creation (O_EXCL) for atomicity.
|
|
137
|
+
* Returns true if lock acquired, false if already locked by a live daemon.
|
|
138
|
+
* Handles stale lock detection via daemon_id cross-check + heartbeat staleness.
|
|
139
|
+
*/
|
|
140
|
+
export async function acquireLock(
|
|
141
|
+
programDir: string,
|
|
142
|
+
runId: string,
|
|
143
|
+
daemonId: string,
|
|
144
|
+
pid: number,
|
|
145
|
+
worktreePath: string,
|
|
146
|
+
): Promise<boolean> {
|
|
147
|
+
const lockPath = join(programDir, LOCK_FILE)
|
|
148
|
+
const lock: RunLock = {
|
|
149
|
+
run_id: runId,
|
|
150
|
+
daemon_id: daemonId,
|
|
151
|
+
pid,
|
|
152
|
+
worktree_path: worktreePath,
|
|
153
|
+
created_at: new Date().toISOString(),
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
try {
|
|
157
|
+
// O_EXCL: fail if file already exists
|
|
158
|
+
const fd = await open(lockPath, "wx")
|
|
159
|
+
await fd.writeFile(JSON.stringify(lock, null, 2) + "\n")
|
|
160
|
+
await fd.close()
|
|
161
|
+
return true
|
|
162
|
+
} catch (err: unknown) {
|
|
163
|
+
if ((err as NodeJS.ErrnoException).code !== "EEXIST") throw err
|
|
164
|
+
|
|
165
|
+
// Lock exists — check if stale
|
|
166
|
+
const isStale = await isLockStale(programDir)
|
|
167
|
+
if (isStale) {
|
|
168
|
+
await unlink(lockPath).catch(() => {})
|
|
169
|
+
// Retry once after removing stale lock
|
|
170
|
+
try {
|
|
171
|
+
const fd = await open(lockPath, "wx")
|
|
172
|
+
await fd.writeFile(JSON.stringify(lock, null, 2) + "\n")
|
|
173
|
+
await fd.close()
|
|
174
|
+
return true
|
|
175
|
+
} catch {
|
|
176
|
+
return false
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
return false
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
export async function updateLockPid(programDir: string, runId: string, daemonId: string, pid: number): Promise<void> {
|
|
185
|
+
const lockPath = join(programDir, LOCK_FILE)
|
|
186
|
+
const lock = await readLock(programDir)
|
|
187
|
+
if (!lock || lock.run_id !== runId || lock.daemon_id !== daemonId) return
|
|
188
|
+
const updated: RunLock = { ...lock, pid }
|
|
189
|
+
const tmpPath = `${lockPath}.tmp`
|
|
190
|
+
await Bun.write(tmpPath, JSON.stringify(updated, null, 2) + "\n")
|
|
191
|
+
await rename(tmpPath, lockPath)
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
export async function releaseLock(programDir: string): Promise<void> {
|
|
195
|
+
try {
|
|
196
|
+
await unlink(join(programDir, LOCK_FILE))
|
|
197
|
+
} catch {
|
|
198
|
+
// Lock may already be removed
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
export async function readLock(programDir: string): Promise<RunLock | null> {
|
|
203
|
+
try {
|
|
204
|
+
return await Bun.file(join(programDir, LOCK_FILE)).json() as RunLock
|
|
205
|
+
} catch {
|
|
206
|
+
return null
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
/**
|
|
211
|
+
* Checks if an existing lock is stale. A lock is stale if:
|
|
212
|
+
* - daemon.json doesn't exist in the run dir (daemon never started)
|
|
213
|
+
* - daemon_id in lock doesn't match daemon_id in daemon.json
|
|
214
|
+
* - heartbeat_at is older than 30s (daemon is dead regardless of PID)
|
|
215
|
+
*/
|
|
216
|
+
async function isLockStale(programDir: string): Promise<boolean> {
|
|
217
|
+
const lock = await readLock(programDir)
|
|
218
|
+
if (!lock) return true
|
|
219
|
+
|
|
220
|
+
// Find the run dir from the lock's run_id
|
|
221
|
+
const runDir = join(programDir, "runs", lock.run_id)
|
|
222
|
+
const daemon = await readDaemonJson(runDir)
|
|
223
|
+
const lockAge = Date.now() - new Date(lock.created_at).getTime()
|
|
224
|
+
|
|
225
|
+
if (!daemon) return lockAge > 30_000 // allow daemon startup handshake
|
|
226
|
+
if (daemon.daemon_id !== lock.daemon_id) return true // different daemon
|
|
227
|
+
|
|
228
|
+
if (!daemon.heartbeat_at) return lockAge > 30_000
|
|
229
|
+
|
|
230
|
+
if (daemon.heartbeat_at) {
|
|
231
|
+
const heartbeatAge = Date.now() - new Date(daemon.heartbeat_at).getTime()
|
|
232
|
+
if (heartbeatAge > 30_000) return true // heartbeat stale — daemon is dead
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
return false
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
// --- Control ---
|
|
239
|
+
|
|
240
|
+
export async function readControl(runDir: string): Promise<ControlAction | null> {
|
|
241
|
+
try {
|
|
242
|
+
return await Bun.file(join(runDir, "control.json")).json() as ControlAction
|
|
243
|
+
} catch {
|
|
244
|
+
return null
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
export async function writeControl(runDir: string, action: ControlAction): Promise<void> {
|
|
249
|
+
await Bun.write(join(runDir, "control.json"), JSON.stringify(action, null, 2) + "\n")
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
// --- Crash Recovery ---
|
|
253
|
+
|
|
254
|
+
/**
|
|
255
|
+
* Handles crash recovery on daemon startup. Reads state.json and cleans up
|
|
256
|
+
* any in-flight operation by restoring the worktree to a known-good state.
|
|
257
|
+
*
|
|
258
|
+
* Returns the recovered state, or null if no recovery was needed.
|
|
259
|
+
*/
|
|
260
|
+
export async function recoverFromCrash(
|
|
261
|
+
runDir: string,
|
|
262
|
+
worktreePath: string,
|
|
263
|
+
): Promise<RunState | null> {
|
|
264
|
+
let state: RunState
|
|
265
|
+
try {
|
|
266
|
+
state = await readState(runDir)
|
|
267
|
+
} catch {
|
|
268
|
+
return null // no state.json — first run, no recovery needed
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
const phase = state.phase
|
|
272
|
+
|
|
273
|
+
// Terminal states — no recovery needed
|
|
274
|
+
if (phase === "complete" || phase === "crashed") return null
|
|
275
|
+
|
|
276
|
+
// Clean states — can resume. "stopping" means the previous daemon accepted a stop,
|
|
277
|
+
// so do not silently restart work.
|
|
278
|
+
if (phase === "idle" || phase === "kept") return state
|
|
279
|
+
if (phase === "stopping") {
|
|
280
|
+
const stoppedState: RunState = {
|
|
281
|
+
...state,
|
|
282
|
+
phase: "complete",
|
|
283
|
+
termination_reason: "stopped",
|
|
284
|
+
updated_at: new Date().toISOString(),
|
|
285
|
+
}
|
|
286
|
+
await writeState(runDir, stoppedState)
|
|
287
|
+
return null
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
// In-flight states — need cleanup
|
|
291
|
+
if (phase === "baseline") {
|
|
292
|
+
// Baseline was interrupted. Nothing to recover.
|
|
293
|
+
const crashedState: RunState = {
|
|
294
|
+
...state,
|
|
295
|
+
phase: "crashed",
|
|
296
|
+
error: "baseline measurement interrupted by crash",
|
|
297
|
+
error_phase: "baseline",
|
|
298
|
+
updated_at: new Date().toISOString(),
|
|
299
|
+
}
|
|
300
|
+
await writeState(runDir, crashedState)
|
|
301
|
+
return null
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
// agent_running, measuring, reverting — restore worktree to last known good
|
|
305
|
+
if (state.last_known_good_sha) {
|
|
306
|
+
await resetHard(worktreePath, state.last_known_good_sha)
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
const results = await readAllResults(runDir).catch(() => [])
|
|
310
|
+
const alreadyLogged = results.some((r) => r.experiment_number === state.experiment_number && r.status === "crash")
|
|
311
|
+
if (!alreadyLogged && state.experiment_number > 0) {
|
|
312
|
+
await appendResult(runDir, {
|
|
313
|
+
experiment_number: state.experiment_number,
|
|
314
|
+
commit: (state.candidate_sha ?? state.last_known_good_sha ?? "").slice(0, 7),
|
|
315
|
+
metric_value: state.current_baseline,
|
|
316
|
+
secondary_values: "",
|
|
317
|
+
status: "crash",
|
|
318
|
+
description: `daemon recovered interrupted ${phase}`,
|
|
319
|
+
measurement_duration_ms: 0,
|
|
320
|
+
})
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
const recoveredState: RunState = {
|
|
324
|
+
...state,
|
|
325
|
+
phase: "idle",
|
|
326
|
+
candidate_sha: null,
|
|
327
|
+
total_crashes: alreadyLogged ? state.total_crashes : state.total_crashes + 1,
|
|
328
|
+
error: `recovered interrupted ${phase}`,
|
|
329
|
+
error_phase: phase,
|
|
330
|
+
updated_at: new Date().toISOString(),
|
|
331
|
+
}
|
|
332
|
+
await writeState(runDir, recoveredState)
|
|
333
|
+
|
|
334
|
+
return recoveredState
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
// --- Child Process Cleanup ---
|
|
338
|
+
|
|
339
|
+
export async function killChildProcessTree(parentPid: number, signal: NodeJS.Signals = "SIGTERM"): Promise<void> {
|
|
340
|
+
const stdout = await $`ps -axo pid=,ppid=`.nothrow().text()
|
|
341
|
+
const childrenByParent = new Map<number, number[]>()
|
|
342
|
+
|
|
343
|
+
for (const line of stdout.split("\n")) {
|
|
344
|
+
const [pidRaw, ppidRaw] = line.trim().split(/\s+/)
|
|
345
|
+
const pid = Number(pidRaw)
|
|
346
|
+
const ppid = Number(ppidRaw)
|
|
347
|
+
if (!Number.isFinite(pid) || !Number.isFinite(ppid)) continue
|
|
348
|
+
const children = childrenByParent.get(ppid) ?? []
|
|
349
|
+
children.push(pid)
|
|
350
|
+
childrenByParent.set(ppid, children)
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
const children: number[] = []
|
|
354
|
+
const stack = [...(childrenByParent.get(parentPid) ?? [])]
|
|
355
|
+
while (stack.length > 0) {
|
|
356
|
+
const pid = stack.pop()!
|
|
357
|
+
children.push(pid)
|
|
358
|
+
stack.push(...(childrenByParent.get(pid) ?? []))
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
for (const pid of children.toReversed()) {
|
|
362
|
+
try {
|
|
363
|
+
process.kill(pid, signal)
|
|
364
|
+
} catch {
|
|
365
|
+
// Already exited.
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
}
|