@spacek33z/autoauto 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +197 -0
- package/package.json +51 -0
- package/src/App.tsx +224 -0
- package/src/cli.ts +772 -0
- package/src/components/AgentPanel.tsx +254 -0
- package/src/components/Chat.test.tsx +71 -0
- package/src/components/Chat.tsx +308 -0
- package/src/components/CycleField.tsx +23 -0
- package/src/components/ModelPicker.tsx +97 -0
- package/src/components/PostUpdatePrompt.tsx +46 -0
- package/src/components/ResultsTable.tsx +172 -0
- package/src/components/RunCompletePrompt.tsx +90 -0
- package/src/components/RunSettingsOverlay.tsx +49 -0
- package/src/components/RunsTable.tsx +219 -0
- package/src/components/StatsHeader.tsx +100 -0
- package/src/daemon.ts +264 -0
- package/src/index.tsx +8 -0
- package/src/lib/agent/agent-provider.test.ts +133 -0
- package/src/lib/agent/claude-provider.ts +277 -0
- package/src/lib/agent/codex-provider.ts +413 -0
- package/src/lib/agent/default-providers.ts +10 -0
- package/src/lib/agent/index.ts +32 -0
- package/src/lib/agent/mock-provider.ts +61 -0
- package/src/lib/agent/opencode-provider.ts +424 -0
- package/src/lib/agent/types.ts +73 -0
- package/src/lib/auth.ts +11 -0
- package/src/lib/config.ts +152 -0
- package/src/lib/daemon-callbacks.ts +59 -0
- package/src/lib/daemon-client.ts +16 -0
- package/src/lib/daemon-lifecycle.ts +368 -0
- package/src/lib/daemon-spawn.ts +122 -0
- package/src/lib/daemon-status.ts +189 -0
- package/src/lib/daemon-watcher.ts +192 -0
- package/src/lib/experiment-loop.ts +679 -0
- package/src/lib/experiment.ts +356 -0
- package/src/lib/finalize.test.ts +143 -0
- package/src/lib/finalize.ts +511 -0
- package/src/lib/format.test.ts +32 -0
- package/src/lib/format.ts +44 -0
- package/src/lib/git.ts +176 -0
- package/src/lib/ideas-backlog.test.ts +54 -0
- package/src/lib/ideas-backlog.ts +109 -0
- package/src/lib/measure.ts +472 -0
- package/src/lib/model-options.ts +24 -0
- package/src/lib/programs.ts +247 -0
- package/src/lib/push-stream.ts +48 -0
- package/src/lib/run-context.ts +112 -0
- package/src/lib/run-setup.ts +34 -0
- package/src/lib/run.ts +383 -0
- package/src/lib/syntax-theme.ts +39 -0
- package/src/lib/system-prompts/experiment.ts +77 -0
- package/src/lib/system-prompts/finalize.ts +90 -0
- package/src/lib/system-prompts/index.ts +7 -0
- package/src/lib/system-prompts/setup.ts +516 -0
- package/src/lib/system-prompts/update.ts +188 -0
- package/src/lib/tool-events.ts +99 -0
- package/src/lib/validate-measurement.ts +326 -0
- package/src/lib/worktree.ts +40 -0
- package/src/screens/AuthErrorScreen.tsx +31 -0
- package/src/screens/ExecutionScreen.tsx +851 -0
- package/src/screens/FirstSetupScreen.tsx +168 -0
- package/src/screens/HomeScreen.tsx +406 -0
- package/src/screens/PreRunScreen.tsx +206 -0
- package/src/screens/SettingsScreen.tsx +189 -0
- package/src/screens/SetupScreen.tsx +226 -0
- package/src/tui.tsx +17 -0
- package/tsconfig.json +17 -0
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
import { readdir, mkdir } from "node:fs/promises"
|
|
2
|
+
import { join } from "node:path"
|
|
3
|
+
import { $ } from "bun"
|
|
4
|
+
|
|
5
|
+
export interface Program {
|
|
6
|
+
name: string
|
|
7
|
+
configPath: string
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
export interface QualityGate {
|
|
11
|
+
min?: number
|
|
12
|
+
max?: number
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export interface SecondaryMetric {
|
|
16
|
+
direction: "lower" | "higher"
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export interface ProgramConfig {
|
|
20
|
+
metric_field: string
|
|
21
|
+
direction: "lower" | "higher"
|
|
22
|
+
noise_threshold: number
|
|
23
|
+
repeats: number
|
|
24
|
+
quality_gates: Record<string, QualityGate>
|
|
25
|
+
secondary_metrics?: Record<string, SecondaryMetric>
|
|
26
|
+
max_experiments: number
|
|
27
|
+
max_consecutive_discards?: number
|
|
28
|
+
max_turns?: number
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export type Screen = "home" | "setup" | "settings" | "program-detail" | "pre-run" | "execution" | "first-setup"
|
|
32
|
+
|
|
33
|
+
export const AUTOAUTO_DIR = ".autoauto"
|
|
34
|
+
|
|
35
|
+
let cachedRoot: string | undefined
|
|
36
|
+
|
|
37
|
+
function assertFiniteNumber(value: unknown, path: string): asserts value is number {
|
|
38
|
+
if (typeof value !== "number" || !isFinite(value)) {
|
|
39
|
+
throw new Error(`config.json: ${path} must be a finite number`)
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export function validateProgramConfig(raw: unknown): ProgramConfig {
|
|
44
|
+
if (typeof raw !== "object" || raw === null || Array.isArray(raw)) {
|
|
45
|
+
throw new Error("config.json: must be a JSON object")
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
const config = raw as Record<string, unknown>
|
|
49
|
+
|
|
50
|
+
if (!config.metric_field || typeof config.metric_field !== "string") {
|
|
51
|
+
throw new Error("config.json: metric_field must be a non-empty string")
|
|
52
|
+
}
|
|
53
|
+
if (config.direction !== "lower" && config.direction !== "higher") {
|
|
54
|
+
throw new Error('config.json: direction must be "lower" or "higher"')
|
|
55
|
+
}
|
|
56
|
+
assertFiniteNumber(config.noise_threshold, "noise_threshold")
|
|
57
|
+
if (config.noise_threshold <= 0) {
|
|
58
|
+
throw new Error("config.json: noise_threshold must be positive")
|
|
59
|
+
}
|
|
60
|
+
if (typeof config.repeats !== "number" || !Number.isInteger(config.repeats) || config.repeats < 1) {
|
|
61
|
+
throw new Error("config.json: repeats must be an integer >= 1")
|
|
62
|
+
}
|
|
63
|
+
if (
|
|
64
|
+
typeof config.max_experiments !== "number" ||
|
|
65
|
+
!Number.isInteger(config.max_experiments) ||
|
|
66
|
+
config.max_experiments < 1
|
|
67
|
+
) {
|
|
68
|
+
throw new Error("config.json: max_experiments must be an integer >= 1")
|
|
69
|
+
}
|
|
70
|
+
if (
|
|
71
|
+
config.max_consecutive_discards !== undefined &&
|
|
72
|
+
(typeof config.max_consecutive_discards !== "number" ||
|
|
73
|
+
!Number.isInteger(config.max_consecutive_discards) ||
|
|
74
|
+
config.max_consecutive_discards < 1)
|
|
75
|
+
) {
|
|
76
|
+
throw new Error("config.json: max_consecutive_discards must be an integer >= 1")
|
|
77
|
+
}
|
|
78
|
+
if (
|
|
79
|
+
config.max_turns !== undefined &&
|
|
80
|
+
(typeof config.max_turns !== "number" ||
|
|
81
|
+
!Number.isInteger(config.max_turns) ||
|
|
82
|
+
config.max_turns < 1)
|
|
83
|
+
) {
|
|
84
|
+
throw new Error("config.json: max_turns must be an integer >= 1")
|
|
85
|
+
}
|
|
86
|
+
if (typeof config.quality_gates !== "object" || config.quality_gates === null || Array.isArray(config.quality_gates)) {
|
|
87
|
+
throw new Error("config.json: quality_gates must be an object")
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
for (const [field, gate] of Object.entries(config.quality_gates as Record<string, unknown>)) {
|
|
91
|
+
if (typeof gate !== "object" || gate === null || Array.isArray(gate)) {
|
|
92
|
+
throw new Error(`config.json: quality_gates.${field} must be an object`)
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
const gateConfig = gate as Record<string, unknown>
|
|
96
|
+
const hasMin = gateConfig.min !== undefined
|
|
97
|
+
const hasMax = gateConfig.max !== undefined
|
|
98
|
+
|
|
99
|
+
if (!hasMin && !hasMax) {
|
|
100
|
+
throw new Error(`config.json: quality_gates.${field} must define min or max`)
|
|
101
|
+
}
|
|
102
|
+
if (hasMin) assertFiniteNumber(gateConfig.min, `quality_gates.${field}.min`)
|
|
103
|
+
if (hasMax) assertFiniteNumber(gateConfig.max, `quality_gates.${field}.max`)
|
|
104
|
+
if (typeof gateConfig.min === "number" && typeof gateConfig.max === "number" && gateConfig.min > gateConfig.max) {
|
|
105
|
+
throw new Error(`config.json: quality_gates.${field}.min must be <= max`)
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
if (config.secondary_metrics !== undefined) {
|
|
110
|
+
if (typeof config.secondary_metrics !== "object" || config.secondary_metrics === null || Array.isArray(config.secondary_metrics)) {
|
|
111
|
+
throw new Error("config.json: secondary_metrics must be an object")
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
for (const [field, metric] of Object.entries(config.secondary_metrics as Record<string, unknown>)) {
|
|
115
|
+
if (typeof metric !== "object" || metric === null || Array.isArray(metric)) {
|
|
116
|
+
throw new Error(`config.json: secondary_metrics.${field} must be an object`)
|
|
117
|
+
}
|
|
118
|
+
const metricConfig = metric as Record<string, unknown>
|
|
119
|
+
if (metricConfig.direction !== "lower" && metricConfig.direction !== "higher") {
|
|
120
|
+
throw new Error(`config.json: secondary_metrics.${field}.direction must be "lower" or "higher"`)
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// Prevent overlap with primary metric and quality gates
|
|
124
|
+
if (field === config.metric_field) {
|
|
125
|
+
throw new Error(`config.json: secondary_metrics.${field} overlaps with metric_field`)
|
|
126
|
+
}
|
|
127
|
+
if (field in (config.quality_gates as Record<string, unknown>)) {
|
|
128
|
+
throw new Error(`config.json: secondary_metrics.${field} overlaps with quality_gates`)
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
return config as unknown as ProgramConfig
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
/** Returns the main git repo root, resolving through worktrees. */
|
|
137
|
+
export async function getProjectRoot(cwd: string): Promise<string> {
|
|
138
|
+
if (cachedRoot) return cachedRoot
|
|
139
|
+
const result = await $`git rev-parse --show-superproject-working-tree`.cwd(cwd).nothrow().quiet()
|
|
140
|
+
const superproject = result.stdout.toString().trim()
|
|
141
|
+
if (superproject) {
|
|
142
|
+
cachedRoot = superproject
|
|
143
|
+
return superproject
|
|
144
|
+
}
|
|
145
|
+
const toplevel = (await $`git rev-parse --show-toplevel`.cwd(cwd).text()).trim()
|
|
146
|
+
cachedRoot = toplevel
|
|
147
|
+
return toplevel
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
export async function listPrograms(cwd: string): Promise<Program[]> {
|
|
151
|
+
const root = await getProjectRoot(cwd)
|
|
152
|
+
const programsDir = join(root, AUTOAUTO_DIR, "programs")
|
|
153
|
+
try {
|
|
154
|
+
const entries = await readdir(programsDir, { withFileTypes: true })
|
|
155
|
+
return entries
|
|
156
|
+
.filter((e) => e.isDirectory())
|
|
157
|
+
.map((e) => ({
|
|
158
|
+
name: e.name,
|
|
159
|
+
configPath: join(programsDir, e.name, "config.json"),
|
|
160
|
+
}))
|
|
161
|
+
} catch {
|
|
162
|
+
return []
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
/** Enriched program metadata for the home screen. */
|
|
167
|
+
export interface ProgramInfo {
|
|
168
|
+
name: string
|
|
169
|
+
totalRuns: number
|
|
170
|
+
lastRunDate: string | null
|
|
171
|
+
hasActiveRun: boolean
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
/** Returns the absolute path to the programs directory */
|
|
175
|
+
export function getProgramsDir(cwd: string): string {
|
|
176
|
+
return join(cwd, AUTOAUTO_DIR, "programs")
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
/** Returns the absolute path to a specific program's directory */
|
|
180
|
+
export function getProgramDir(cwd: string, slug: string): string {
|
|
181
|
+
return join(cwd, AUTOAUTO_DIR, "programs", slug)
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
/** Returns the absolute path to a specific run's directory */
|
|
185
|
+
export function getRunDir(cwd: string, slug: string, runId: string): string {
|
|
186
|
+
return join(cwd, AUTOAUTO_DIR, "programs", slug, "runs", runId)
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
/** Reads and validates config.json from a program directory. */
|
|
190
|
+
export async function loadProgramConfig(programDir: string): Promise<ProgramConfig> {
|
|
191
|
+
const raw = await Bun.file(join(programDir, "config.json")).json()
|
|
192
|
+
return validateProgramConfig(raw)
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
/** Summary of an existing program for duplicate detection during setup. */
|
|
196
|
+
export interface ProgramSummary {
|
|
197
|
+
slug: string
|
|
198
|
+
goal: string
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
/** Loads summaries (slug + goal line from program.md) for all existing programs. */
|
|
202
|
+
export async function loadProgramSummaries(cwd: string): Promise<ProgramSummary[]> {
|
|
203
|
+
const root = await getProjectRoot(cwd)
|
|
204
|
+
const programsDir = join(root, AUTOAUTO_DIR, "programs")
|
|
205
|
+
let entries: import("node:fs").Dirent[]
|
|
206
|
+
try {
|
|
207
|
+
entries = (await readdir(programsDir, { withFileTypes: true })).filter((e) => e.isDirectory())
|
|
208
|
+
} catch {
|
|
209
|
+
return []
|
|
210
|
+
}
|
|
211
|
+
const summaries = await Promise.all(
|
|
212
|
+
entries.map(async (e) => {
|
|
213
|
+
try {
|
|
214
|
+
const md = await Bun.file(join(programsDir, e.name, "program.md")).text()
|
|
215
|
+
const goalMatch = md.match(/## Goal\n+([\s\S]*?)(?:\n##|\n*$)/)
|
|
216
|
+
const goal = goalMatch ? goalMatch[1].trim() : "(no goal defined)"
|
|
217
|
+
return { slug: e.name, goal }
|
|
218
|
+
} catch {
|
|
219
|
+
return { slug: e.name, goal: "(could not read program.md)" }
|
|
220
|
+
}
|
|
221
|
+
}),
|
|
222
|
+
)
|
|
223
|
+
return summaries
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
export async function ensureAutoAutoDir(cwd: string): Promise<void> {
|
|
227
|
+
const root = await getProjectRoot(cwd)
|
|
228
|
+
const dir = join(root, AUTOAUTO_DIR)
|
|
229
|
+
await mkdir(dir, { recursive: true })
|
|
230
|
+
|
|
231
|
+
const gitignorePath = join(root, ".gitignore")
|
|
232
|
+
const gitignoreFile = Bun.file(gitignorePath)
|
|
233
|
+
let gitignoreChanged = false
|
|
234
|
+
if (await gitignoreFile.exists()) {
|
|
235
|
+
const existing = await gitignoreFile.text()
|
|
236
|
+
if (!existing.includes(AUTOAUTO_DIR)) {
|
|
237
|
+
await Bun.write(gitignorePath, existing.trimEnd() + `\n${AUTOAUTO_DIR}/\n`)
|
|
238
|
+
gitignoreChanged = true
|
|
239
|
+
}
|
|
240
|
+
} else {
|
|
241
|
+
await Bun.write(gitignorePath, `${AUTOAUTO_DIR}/\n`)
|
|
242
|
+
gitignoreChanged = true
|
|
243
|
+
}
|
|
244
|
+
if (gitignoreChanged) {
|
|
245
|
+
await $`git add .gitignore`.cwd(root).quiet()
|
|
246
|
+
}
|
|
247
|
+
}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
export interface PushStream<T> extends AsyncIterable<T> {
|
|
2
|
+
push(value: T): void
|
|
3
|
+
end(): void
|
|
4
|
+
}
|
|
5
|
+
|
|
6
|
+
export function createPushStream<T>(): PushStream<T> {
|
|
7
|
+
const queue: T[] = []
|
|
8
|
+
let waiting: ((result: IteratorResult<T>) => void) | null = null
|
|
9
|
+
let done = false
|
|
10
|
+
|
|
11
|
+
return {
|
|
12
|
+
push(value: T) {
|
|
13
|
+
if (done) return
|
|
14
|
+
if (waiting) {
|
|
15
|
+
const resolve = waiting
|
|
16
|
+
waiting = null
|
|
17
|
+
resolve({ value, done: false })
|
|
18
|
+
} else {
|
|
19
|
+
queue.push(value)
|
|
20
|
+
}
|
|
21
|
+
},
|
|
22
|
+
|
|
23
|
+
end() {
|
|
24
|
+
done = true
|
|
25
|
+
if (waiting) {
|
|
26
|
+
const resolve = waiting
|
|
27
|
+
waiting = null
|
|
28
|
+
resolve({ value: undefined as never, done: true })
|
|
29
|
+
}
|
|
30
|
+
},
|
|
31
|
+
|
|
32
|
+
[Symbol.asyncIterator]() {
|
|
33
|
+
return {
|
|
34
|
+
next(): Promise<IteratorResult<T>> {
|
|
35
|
+
if (queue.length > 0) {
|
|
36
|
+
return Promise.resolve({ value: queue.shift()!, done: false })
|
|
37
|
+
}
|
|
38
|
+
if (done) {
|
|
39
|
+
return Promise.resolve({ value: undefined as never, done: true })
|
|
40
|
+
}
|
|
41
|
+
return new Promise((resolve) => {
|
|
42
|
+
waiting = resolve
|
|
43
|
+
})
|
|
44
|
+
},
|
|
45
|
+
}
|
|
46
|
+
},
|
|
47
|
+
}
|
|
48
|
+
}
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
import { readdir } from "node:fs/promises"
|
|
2
|
+
import { join } from "node:path"
|
|
3
|
+
import { getLatestRun, readAllResults, getRunStats } from "./run.ts"
|
|
4
|
+
import { loadProgramConfig } from "./programs.ts"
|
|
5
|
+
import { streamLogName } from "./daemon-callbacks.ts"
|
|
6
|
+
|
|
7
|
+
const MAX_LOG_LINES = 500
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Builds the auto-analysis initial message for the update agent.
|
|
11
|
+
* Gathers context from the latest run: summary stats, last experiment log, log paths.
|
|
12
|
+
*/
|
|
13
|
+
export async function buildUpdateRunContext(programDir: string): Promise<string> {
|
|
14
|
+
const latest = await getLatestRun(programDir)
|
|
15
|
+
if (!latest || !latest.state) {
|
|
16
|
+
return "No previous runs found for this program. Please describe what you'd like to change."
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
const { run_dir: runDir, state } = latest
|
|
20
|
+
|
|
21
|
+
const [config, results] = await Promise.all([
|
|
22
|
+
loadProgramConfig(programDir).catch(() => null),
|
|
23
|
+
readAllResults(runDir),
|
|
24
|
+
])
|
|
25
|
+
|
|
26
|
+
const direction = config?.direction ?? "lower"
|
|
27
|
+
const stats = getRunStats(state, direction)
|
|
28
|
+
|
|
29
|
+
// Build run summary
|
|
30
|
+
const lines: string[] = [
|
|
31
|
+
"Here are the results from the latest run of this program:",
|
|
32
|
+
"",
|
|
33
|
+
"## Run Summary",
|
|
34
|
+
`- Phase: ${state.phase}`,
|
|
35
|
+
]
|
|
36
|
+
|
|
37
|
+
if (state.termination_reason) {
|
|
38
|
+
lines.push(`- Termination reason: ${state.termination_reason}`)
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
lines.push(
|
|
42
|
+
`- Experiments: ${stats.total_experiments} total (${stats.total_keeps} kept, ${stats.total_discards} discarded, ${stats.total_crashes} crashed)`,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
if (stats.total_experiments > 0) {
|
|
46
|
+
lines.push(`- Keep rate: ${(stats.keep_rate * 100).toFixed(0)}%`)
|
|
47
|
+
lines.push(
|
|
48
|
+
`- Original baseline: ${state.original_baseline} → Best: ${state.best_metric} (${stats.improvement_pct >= 0 ? "+" : ""}${stats.improvement_pct.toFixed(1)}%)`,
|
|
49
|
+
)
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
if (state.error) {
|
|
53
|
+
lines.push(`- Error: ${state.error}`)
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// Last few results from results.tsv
|
|
57
|
+
if (results.length > 0) {
|
|
58
|
+
lines.push("", "## Recent Experiment Results")
|
|
59
|
+
const recent = results.slice(-5)
|
|
60
|
+
for (const r of recent) {
|
|
61
|
+
const tag = r.status === "keep" ? "KEEP" : r.status === "discard" ? "DISCARD" : r.status.toUpperCase()
|
|
62
|
+
lines.push(`- #${r.experiment_number} [${tag}] metric=${r.metric_value} — ${r.description}`)
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// Read last experiment stream log
|
|
67
|
+
const lastExpNum = state.experiment_number
|
|
68
|
+
if (lastExpNum > 0) {
|
|
69
|
+
const logFile = streamLogName(lastExpNum)
|
|
70
|
+
const logPath = join(runDir, logFile)
|
|
71
|
+
try {
|
|
72
|
+
const logContent = await Bun.file(logPath).text()
|
|
73
|
+
const logLines = logContent.split("\n")
|
|
74
|
+
const truncated = logLines.length > MAX_LOG_LINES
|
|
75
|
+
const displayLines = truncated ? logLines.slice(-MAX_LOG_LINES) : logLines
|
|
76
|
+
lines.push(
|
|
77
|
+
"",
|
|
78
|
+
`## Last Experiment (#${lastExpNum}) Stream Log${truncated ? ` (last ${MAX_LOG_LINES} lines)` : ""}`,
|
|
79
|
+
"```",
|
|
80
|
+
displayLines.join("\n"),
|
|
81
|
+
"```",
|
|
82
|
+
)
|
|
83
|
+
} catch {
|
|
84
|
+
// Log file doesn't exist
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// List all available stream logs
|
|
89
|
+
try {
|
|
90
|
+
const entries = await readdir(runDir)
|
|
91
|
+
const logFiles = entries.filter((f) => f.startsWith("stream-") && f.endsWith(".log")).toSorted()
|
|
92
|
+
if (logFiles.length > 0) {
|
|
93
|
+
lines.push(
|
|
94
|
+
"",
|
|
95
|
+
"## Additional Logs",
|
|
96
|
+
"The following log files are available if you need more context (use the Read tool):",
|
|
97
|
+
)
|
|
98
|
+
for (const f of logFiles) {
|
|
99
|
+
lines.push(`- ${join(runDir, f)}`)
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
} catch {
|
|
103
|
+
// Can't list directory
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
lines.push(
|
|
107
|
+
"",
|
|
108
|
+
"Please analyze these results and suggest what should be fixed or improved in the program configuration.",
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
return lines.join("\n")
|
|
112
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import { mkdir, chmod } from "node:fs/promises"
|
|
2
|
+
import { join } from "node:path"
|
|
3
|
+
|
|
4
|
+
// --- Measurement Locking ---
|
|
5
|
+
|
|
6
|
+
/** Files protected from agent modification during experiment runs. */
|
|
7
|
+
export const MEASUREMENT_FILES = ["measure.sh", "config.json", "build.sh"] as const
|
|
8
|
+
|
|
9
|
+
/** Makes measurement files read-only (chmod 444). #1 safeguard against metric gaming. */
|
|
10
|
+
export async function lockMeasurement(programDir: string): Promise<void> {
|
|
11
|
+
await Promise.all(
|
|
12
|
+
MEASUREMENT_FILES.map((f) => chmod(join(programDir, f), 0o444).catch(() => {})),
|
|
13
|
+
)
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export async function unlockMeasurement(programDir: string): Promise<void> {
|
|
17
|
+
await Promise.all(
|
|
18
|
+
MEASUREMENT_FILES.map((f) => chmod(join(programDir, f), 0o644).catch(() => {})),
|
|
19
|
+
)
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
// --- Run Directory ---
|
|
23
|
+
|
|
24
|
+
export async function initRunDir(programDir: string, runId: string): Promise<string> {
|
|
25
|
+
const runDir = join(programDir, "runs", runId)
|
|
26
|
+
await mkdir(runDir, { recursive: true })
|
|
27
|
+
|
|
28
|
+
await Bun.write(
|
|
29
|
+
join(runDir, "results.tsv"),
|
|
30
|
+
"experiment#\tcommit\tmetric_value\tsecondary_values\tstatus\tdescription\tmeasurement_duration_ms\tdiff_stats\n",
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
return runDir
|
|
34
|
+
}
|