@tritard/waterbrother 0.9.1 → 0.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tritard/waterbrother",
3
- "version": "0.9.1",
3
+ "version": "0.9.3",
4
4
  "description": "Waterbrother: Grok-powered coding CLI with local tools, sessions, operator modes, and approval controls",
5
5
  "type": "module",
6
6
  "bin": {
package/src/agent.js CHANGED
@@ -74,7 +74,8 @@ When you use tools:
74
74
  - avoid hype such as "premium", "luxurious", "studio-grade", or "improved!"
75
75
  - Explain what you changed and why.
76
76
  - Never claim you ran commands you did not run.
77
- - If a tool fails, show the failure and recover.`;
77
+ - If a tool fails, show the failure and recover.
78
+ - You are a coding tool for real software engineering work. If a request is clearly a joke, hypothetical, non-technical, or not related to actual software development, respond conversationally WITHOUT using any tools. Do not create files, write scripts, or make edits for non-engineering requests. Examples of things you should NOT build: personality generators, dating advice scripts, joke apps, horoscope generators, or any request that is clearly not serious engineering work.`;
78
79
 
79
80
  const COMPACTION_SYSTEM_PROMPT = `You summarize coding assistant transcripts for context compaction.
80
81
  Output concise markdown with these sections:
package/src/cli.js CHANGED
@@ -34,7 +34,7 @@ import { createPanelRenderer, buildPanelState } from "./panel.js";
34
34
  import { deriveTaskNameFromPrompt, nextActionsForState, routeNaturalInput } from "./router.js";
35
35
  import { compressEpisode, saveEpisode, loadRecentEpisodes, findRelevantEpisodes, buildEpisodicMemoryBlock, buildReminderBlock } from "./episodic.js";
36
36
  import { formatPlanForDisplay } from "./planner.js";
37
- import { parseCharterFromGoal, runExperimentLoop, formatExperimentSummary } from "./experiment.js";
37
+ import { parseCharterFromGoal, runExperimentLoop, formatExperimentSummary, gitReturnToBranch } from "./experiment.js";
38
38
 
39
39
  const execFileAsync = promisify(execFile);
40
40
  const PACKAGE_ROOT = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "..");
@@ -6144,7 +6144,7 @@ async function promptLoop(agent, session, context) {
6144
6144
  // Build charter
6145
6145
  const charter = parseCharterFromGoal(goalArg);
6146
6146
 
6147
- // Ask for metric command if not inferable
6147
+ // Ask for metric command
6148
6148
  if (!charter.metric.command) {
6149
6149
  try {
6150
6150
  const metricCmd = await promptLine("metric command (e.g. npm test, python bench.py): ", { input: process.stdin, output: process.stdout });
@@ -6159,49 +6159,69 @@ async function promptLoop(agent, session, context) {
6159
6159
  }
6160
6160
  }
6161
6161
 
6162
- // Ask for attempt count
6162
+ // Ask for attempt budget (0 = infinite, runs until Ctrl+C or time limit)
6163
6163
  try {
6164
- const attemptsStr = await promptLine(`max attempts [${charter.budget.maxAttempts}]: `, { input: process.stdin, output: process.stdout });
6164
+ const attemptsStr = await promptLine("max attempts (0 = run until interrupted) [0]: ", { input: process.stdin, output: process.stdout });
6165
6165
  const parsed = parseInt(attemptsStr.trim(), 10);
6166
- if (parsed > 0) charter.budget.maxAttempts = Math.min(parsed, 20);
6166
+ if (parsed > 0) charter.budget.maxAttempts = parsed;
6167
6167
  } catch {}
6168
6168
 
6169
+ // Ask for time budget
6170
+ try {
6171
+ const timeStr = await promptLine(`time limit in minutes [${charter.budget.maxMinutes}]: `, { input: process.stdin, output: process.stdout });
6172
+ const parsed = parseInt(timeStr.trim(), 10);
6173
+ if (parsed > 0) charter.budget.maxMinutes = parsed;
6174
+ } catch {}
6175
+
6176
+ const isInfinite = !charter.budget.maxAttempts || charter.budget.maxAttempts <= 0;
6169
6177
  console.log(`────────────────────────────────────────────────────────────`);
6170
6178
  console.log(`experiment: ${charter.goal}`);
6171
6179
  console.log(`metric: ${charter.metric.command} (${charter.metric.direction} is better)`);
6172
- console.log(`budget: ${charter.budget.maxAttempts} attempts, ${charter.budget.maxMinutes}m max`);
6173
- if (charter.constraints.length > 0) {
6174
- for (const c of charter.constraints) console.log(` constraint: ${c}`);
6175
- }
6180
+ console.log(`budget: ${isInfinite ? "∞ attempts" : `${charter.budget.maxAttempts} attempts`}, ${charter.budget.maxMinutes}m max`);
6181
+ console.log(`simplicity bias: ${charter.simplicityBias ? "on" : "off"}`);
6176
6182
  console.log(`────────────────────────────────────────────────────────────`);
6177
- console.log("measuring baseline...");
6183
+ console.log("creating experiment branch and measuring baseline...");
6178
6184
 
6179
6185
  const spinner = createProgressSpinner("running experiment...");
6186
+ let interrupted = false;
6187
+
6188
+ const abortController = typeof AbortController === "function" ? new AbortController() : null;
6189
+ const detachInterrupt = createInterruptListener(() => {
6190
+ interrupted = true;
6191
+ spinner.setLabel("stopping after current attempt...");
6192
+ }, { enableEsc: process.stdin.isTTY, shouldIgnoreEsc: () => approvalPromptActive });
6180
6193
 
6181
6194
  try {
6182
6195
  const results = await runExperimentLoop({
6183
6196
  charter,
6184
6197
  cwd: context.cwd,
6198
+ tag: goalArg.toLowerCase().replace(/[^a-z0-9]+/g, "-").slice(0, 30),
6185
6199
  handlers: {
6186
6200
  onBaseline(value) {
6187
6201
  spinner.stop();
6188
6202
  console.log(`baseline: ${value}`);
6189
6203
  console.log(`────────────────────────────────────────────────────────────`);
6190
6204
  },
6191
- async planChange({ goal, constraints, metric, previousAttempts }) {
6192
- // Use planner model if available, otherwise main model
6205
+ onInterrupted() {
6206
+ return interrupted;
6207
+ },
6208
+ async planChange({ goal, constraints, metric, previousAttempts, simplicityBias }) {
6193
6209
  const model = context.runtime.plannerModel || agent.getModel();
6194
6210
  const previousSummary = previousAttempts.map((a) =>
6195
- `attempt ${a.number}: ${a.hypothesis} → ${a.promoted ? "kept" : "reverted"} (${a.value})`
6211
+ `attempt ${a.number}: ${a.hypothesis} → ${a.status} (${a.value})`
6196
6212
  ).join("\n");
6197
6213
 
6214
+ const simplicityNote = simplicityBias
6215
+ ? "\n\nSimplicity criterion: prefer simpler changes. A small improvement from deleting code is better than a large improvement from adding complexity. If improvement is ~0 but code is simpler, that's a win."
6216
+ : "";
6217
+
6198
6218
  const prompt = [
6199
6219
  `Goal: ${goal}`,
6200
6220
  `Metric: ${metric.command} (${metric.direction} is better, current best: ${metric.currentBest})`,
6201
6221
  constraints.length > 0 ? `Constraints: ${constraints.join("; ")}` : "",
6202
6222
  previousSummary ? `Previous attempts:\n${previousSummary}` : "",
6203
- "Propose ONE specific code change that could improve the metric. Be concrete about what file and what to change.",
6204
- "Respond with a one-line hypothesis and a detailed prompt for the executor."
6223
+ "Propose ONE specific code change. Be concrete: which file, which function, what change.",
6224
+ `Respond with JSON: { "hypothesis": "one-line summary", "prompt": "detailed executor instructions" }${simplicityNote}`
6205
6225
  ].filter(Boolean).join("\n\n");
6206
6226
 
6207
6227
  try {
@@ -6211,32 +6231,47 @@ async function promptLoop(agent, session, context) {
6211
6231
  baseUrl: context.runtime.baseUrl,
6212
6232
  model,
6213
6233
  messages: [
6214
- { role: "system", content: "You are an optimization expert. Propose one concrete code change to improve a metric. Respond with JSON: { \"hypothesis\": \"one-line summary\", \"prompt\": \"detailed instructions for the code editor\" }" },
6234
+ { role: "system", content: "You are an autonomous researcher optimizing code. Each attempt must try something different from previous attempts. Learn from kept vs discarded results. Respond with JSON only." },
6215
6235
  { role: "user", content: prompt }
6216
6236
  ],
6217
- temperature: 0.5
6237
+ temperature: 0.6
6218
6238
  });
6219
- return completion.json || { hypothesis: `attempt`, prompt: goal };
6239
+ return completion.json || { hypothesis: "attempt", prompt: goal };
6220
6240
  } catch {
6221
- return { hypothesis: `attempt`, prompt: goal };
6241
+ return { hypothesis: "attempt", prompt: goal };
6222
6242
  }
6223
6243
  },
6224
6244
  async executeChange({ prompt }) {
6225
6245
  spinner.setLabel("implementing change...");
6226
- // Use the executor model to make the change
6227
6246
  await agent.runTurn(prompt, {
6228
6247
  onAssistantDelta() {},
6229
6248
  onToolStart() { spinner.setLabel("editing..."); },
6230
- onToolEnd() { spinner.setLabel("implementing change..."); }
6249
+ onToolEnd() { spinner.setLabel("implementing..."); }
6231
6250
  });
6232
6251
  await agent.toolRuntime.completeTurn({});
6233
6252
  },
6253
+ async fixCrash({ error, prompt }) {
6254
+ // Try to fix the crash — give the agent the error and ask it to fix
6255
+ spinner.setLabel("fixing crash...");
6256
+ try {
6257
+ await agent.runTurn(
6258
+ `The previous change crashed with this error:\n${error}\n\nFix the issue. The original goal was: ${prompt}`,
6259
+ { onAssistantDelta() {}, onToolStart() {}, onToolEnd() {} }
6260
+ );
6261
+ await agent.toolRuntime.completeTurn({});
6262
+ return true;
6263
+ } catch {
6264
+ return false;
6265
+ }
6266
+ },
6234
6267
  onAttemptStart(number, hypothesis) {
6235
6268
  spinner.setLabel(`attempt ${number}: ${hypothesis}`);
6236
6269
  console.log(`\n ${number}. trying: ${hypothesis}`);
6237
6270
  },
6238
6271
  onAttemptEnd(attempt) {
6239
- const icon = attempt.promoted ? green("✓ kept") : red("✗ reverted");
6272
+ const icon = attempt.status === "keep" ? green("✓ kept")
6273
+ : attempt.status === "crash" ? yellow("💥 crash")
6274
+ : red("✗ reverted");
6240
6275
  const value = attempt.value !== null ? ` → ${attempt.value}` : " → failed";
6241
6276
  console.log(` ${icon}${value}`);
6242
6277
  },
@@ -6246,8 +6281,24 @@ async function promptLoop(agent, session, context) {
6246
6281
  }
6247
6282
  });
6248
6283
 
6284
+ detachInterrupt();
6249
6285
  console.log("\n" + formatExperimentSummary(results));
6286
+
6287
+ // Offer to return to original branch or stay
6288
+ if (results.originalBranch && results.branch) {
6289
+ console.log(`\nexperiment branch: ${results.branch}`);
6290
+ console.log(`original branch: ${results.originalBranch}`);
6291
+ try {
6292
+ const stay = await promptYesNo("Stay on experiment branch?", { input: process.stdin, output: process.stdout });
6293
+ if (!stay) {
6294
+ const { gitReturnToBranch } = await import("./experiment.js");
6295
+ await gitReturnToBranch({ cwd: context.cwd, branch: results.originalBranch });
6296
+ console.log(`returned to ${results.originalBranch}`);
6297
+ }
6298
+ } catch {}
6299
+ }
6250
6300
  } catch (error) {
6301
+ detachInterrupt();
6251
6302
  spinner.stop();
6252
6303
  console.log(`experiment failed: ${error instanceof Error ? error.message : String(error)}`);
6253
6304
  }
package/src/experiment.js CHANGED
@@ -1,10 +1,16 @@
1
1
  import { execFile } from "node:child_process";
2
+ import fs from "node:fs/promises";
3
+ import path from "node:path";
2
4
  import { promisify } from "node:util";
3
5
 
4
6
  const execFileAsync = promisify(execFile);
5
7
 
8
+ const MAX_CRASH_RETRIES = 2;
9
+ const RESULTS_FILENAME = "results.tsv";
10
+ const TSV_HEADER = "commit\tvalue\tstatus\tdescription";
11
+
6
12
  /**
7
- * Charter: the experiment's rules.
13
+ * Charter: the experiment's rules (inspired by Karpathy's program.md).
8
14
  * {
9
15
  * goal: "speed up auth middleware",
10
16
  * metric: {
@@ -13,15 +19,15 @@ const execFileAsync = promisify(execFile);
13
19
  * direction: "lower" // "lower" | "higher"
14
20
  * },
15
21
  * budget: {
16
- * maxAttempts: 5,
17
- * maxMinutes: 30
22
+ * maxAttempts: 0, // 0 = infinite (run until interrupted)
23
+ * maxMinutes: 60
18
24
  * },
19
- * constraints: ["do not change the public API", "keep all existing tests passing"]
25
+ * constraints: ["do not change the public API", "keep all existing tests passing"],
26
+ * simplicityBias: true // prefer simpler changes over complex ones
20
27
  * }
21
28
  */
22
29
 
23
30
  export function parseCharterFromGoal(goalText) {
24
- // Extract metric hints from natural language
25
31
  const lower = goalText.toLowerCase();
26
32
  let direction = "lower";
27
33
  let extract = "duration";
@@ -43,8 +49,9 @@ export function parseCharterFromGoal(goalText) {
43
49
  return {
44
50
  goal: goalText,
45
51
  metric: { command: "", extract, direction },
46
- budget: { maxAttempts: 5, maxMinutes: 30 },
47
- constraints: []
52
+ budget: { maxAttempts: 0, maxMinutes: 60 },
53
+ constraints: [],
54
+ simplicityBias: true
48
55
  };
49
56
  }
50
57
 
@@ -52,20 +59,18 @@ export async function runMetric({ command, extract, cwd }) {
52
59
  const startTime = Date.now();
53
60
  try {
54
61
  const isWin = process.platform === "win32";
55
- const execOpts = { cwd, env: process.env, maxBuffer: 8 * 1024 * 1024, timeout: 120000 };
56
- let stdout, stderr, exitCode;
62
+ const execOpts = { cwd, env: process.env, maxBuffer: 8 * 1024 * 1024, timeout: 600000 };
63
+ let stdout, stderr;
57
64
 
58
65
  if (isWin) {
59
66
  const result = await execFileAsync(command, [], { ...execOpts, shell: true });
60
67
  stdout = String(result.stdout || "");
61
68
  stderr = String(result.stderr || "");
62
- exitCode = 0;
63
69
  } else {
64
70
  const parts = command.split(/\s+/);
65
71
  const result = await execFileAsync(parts[0], parts.slice(1), execOpts);
66
72
  stdout = String(result.stdout || "");
67
73
  stderr = String(result.stderr || "");
68
- exitCode = 0;
69
74
  }
70
75
 
71
76
  const elapsed = Date.now() - startTime;
@@ -74,7 +79,7 @@ export async function runMetric({ command, extract, cwd }) {
74
79
  return { ok: true, value: elapsed, raw: stdout, elapsed };
75
80
  }
76
81
  if (extract === "exit_code") {
77
- return { ok: true, value: exitCode, raw: stdout, elapsed };
82
+ return { ok: true, value: 0, raw: stdout, elapsed };
78
83
  }
79
84
  if (extract === "stdout_number") {
80
85
  const match = (stdout + stderr).match(/(\d+\.?\d*)/);
@@ -90,18 +95,36 @@ export async function runMetric({ command, extract, cwd }) {
90
95
  return {
91
96
  ok: false,
92
97
  value: null,
93
- raw: error.stderr || error.message || String(error),
98
+ raw: String(error.stderr || error.message || error).slice(0, 2000),
94
99
  elapsed,
95
100
  exitCode: error.code || 1
96
101
  };
97
102
  }
98
103
  }
99
104
 
105
+ // --- Git operations ---
106
+
107
+ export async function gitCreateBranch({ cwd, tag }) {
108
+ const branch = `experiment/${tag}`;
109
+ try {
110
+ await execFileAsync("git", ["checkout", "-b", branch], { cwd });
111
+ return { ok: true, branch };
112
+ } catch {
113
+ // Branch might already exist
114
+ try {
115
+ await execFileAsync("git", ["checkout", branch], { cwd });
116
+ return { ok: true, branch };
117
+ } catch {
118
+ return { ok: false, branch };
119
+ }
120
+ }
121
+ }
122
+
100
123
  export async function gitCheckpoint({ cwd, label }) {
101
124
  try {
102
125
  await execFileAsync("git", ["add", "-A"], { cwd });
103
126
  await execFileAsync("git", ["commit", "-m", `[experiment] ${label}`, "--allow-empty"], { cwd });
104
- const { stdout } = await execFileAsync("git", ["rev-parse", "HEAD"], { cwd });
127
+ const { stdout } = await execFileAsync("git", ["rev-parse", "--short", "HEAD"], { cwd });
105
128
  return { ok: true, sha: stdout.trim() };
106
129
  } catch {
107
130
  return { ok: false, sha: null };
@@ -117,6 +140,69 @@ export async function gitRevert({ cwd, sha }) {
117
140
  }
118
141
  }
119
142
 
143
+ export async function gitReturnToBranch({ cwd, branch }) {
144
+ try {
145
+ await execFileAsync("git", ["checkout", branch], { cwd });
146
+ return { ok: true };
147
+ } catch {
148
+ return { ok: false };
149
+ }
150
+ }
151
+
152
+ async function gitGetCurrentBranch({ cwd }) {
153
+ try {
154
+ const { stdout } = await execFileAsync("git", ["rev-parse", "--abbrev-ref", "HEAD"], { cwd });
155
+ return stdout.trim();
156
+ } catch {
157
+ return null;
158
+ }
159
+ }
160
+
161
+ // --- Results TSV (persistent scoreboard, untracked by git) ---
162
+
163
+ async function ensureResultsTsv(cwd) {
164
+ const tsvPath = path.join(cwd, RESULTS_FILENAME);
165
+ try {
166
+ await fs.access(tsvPath);
167
+ } catch {
168
+ await fs.writeFile(tsvPath, `${TSV_HEADER}\n`, "utf8");
169
+ }
170
+ // Add to .gitignore if not already there
171
+ try {
172
+ const gitignorePath = path.join(cwd, ".gitignore");
173
+ let gitignore = "";
174
+ try { gitignore = await fs.readFile(gitignorePath, "utf8"); } catch {}
175
+ if (!gitignore.includes(RESULTS_FILENAME)) {
176
+ await fs.writeFile(gitignorePath, `${gitignore.trimEnd()}\n${RESULTS_FILENAME}\n`, "utf8");
177
+ }
178
+ } catch {}
179
+ return tsvPath;
180
+ }
181
+
182
+ export async function appendResult({ cwd, commit, value, status, description }) {
183
+ const tsvPath = await ensureResultsTsv(cwd);
184
+ const valueStr = value !== null && value !== undefined ? String(value) : "0";
185
+ const desc = String(description || "").replace(/\t/g, " ").replace(/\n/g, " ").slice(0, 200);
186
+ const line = `${commit || "-------"}\t${valueStr}\t${status}\t${desc}\n`;
187
+ await fs.appendFile(tsvPath, line, "utf8");
188
+ }
189
+
190
+ export async function readResults(cwd) {
191
+ const tsvPath = path.join(cwd, RESULTS_FILENAME);
192
+ try {
193
+ const raw = await fs.readFile(tsvPath, "utf8");
194
+ const lines = raw.trim().split("\n").slice(1); // skip header
195
+ return lines.map((line) => {
196
+ const [commit, value, status, description] = line.split("\t");
197
+ return { commit, value: parseFloat(value) || 0, status, description: description || "" };
198
+ });
199
+ } catch {
200
+ return [];
201
+ }
202
+ }
203
+
204
+ // --- Core logic ---
205
+
120
206
  export function isBetter(newValue, oldValue, direction) {
121
207
  if (newValue === null || oldValue === null) return false;
122
208
  if (direction === "lower") return newValue < oldValue;
@@ -125,20 +211,23 @@ export function isBetter(newValue, oldValue, direction) {
125
211
  }
126
212
 
127
213
  export function formatAttemptResult(attempt) {
128
- const status = attempt.promoted ? "✓ kept" : "✗ reverted";
214
+ const icon = attempt.status === "keep" ? "✓" : attempt.status === "crash" ? "💥" : "✗";
215
+ const label = attempt.status === "keep" ? "kept" : attempt.status === "crash" ? "crash" : "reverted";
129
216
  const delta = attempt.baseline !== null && attempt.value !== null
130
217
  ? ` (${attempt.value > attempt.baseline ? "+" : ""}${(attempt.value - attempt.baseline).toFixed(2)})`
131
218
  : "";
132
- return ` ${attempt.number}. ${status} ${attempt.value !== null ? attempt.value : "failed"}${delta} — ${attempt.hypothesis}`;
219
+ const valueStr = attempt.value !== null ? String(attempt.value) : "failed";
220
+ return ` ${attempt.number}. ${icon} ${label} ${valueStr}${delta} — ${attempt.hypothesis}`;
133
221
  }
134
222
 
135
223
  export function formatExperimentSummary(results) {
136
224
  const lines = [];
137
- const { charter, baseline, attempts, bestValue, bestAttempt, totalElapsed } = results;
225
+ const { charter, baseline, attempts, bestValue, bestAttempt, totalElapsed, branch } = results;
138
226
 
139
227
  lines.push(`────────────────────────────────────────────────────────────`);
140
228
  lines.push(`experiment: ${charter.goal}`);
141
229
  lines.push(`metric: ${charter.metric.command} (${charter.metric.direction} is better)`);
230
+ if (branch) lines.push(`branch: ${branch}`);
142
231
  lines.push(`baseline: ${baseline}`);
143
232
  lines.push(`────────────────────────────────────────────────────────────`);
144
233
 
@@ -157,97 +246,169 @@ export function formatExperimentSummary(results) {
157
246
  }
158
247
  const mins = (totalElapsed / 60000).toFixed(1);
159
248
  lines.push(`${attempts.length} attempts in ${mins}m`);
249
+ lines.push(`results logged to ${RESULTS_FILENAME}`);
160
250
  lines.push(`────────────────────────────────────────────────────────────`);
161
251
 
162
252
  return lines.join("\n");
163
253
  }
164
254
 
165
255
  /**
166
- * Run the experiment loop.
256
+ * Run the experiment loop (autoresearch-style).
257
+ *
258
+ * Key differences from v1:
259
+ * - Runs on a dedicated branch (experiment/<tag>)
260
+ * - Logs every attempt to results.tsv (persistent, git-ignored)
261
+ * - maxAttempts=0 means infinite (run until interrupted or time budget)
262
+ * - Crash recovery: retries up to MAX_CRASH_RETRIES before giving up on an idea
263
+ * - Simplicity bias: planner is told to prefer simpler changes
264
+ * - Output redirected: metric output goes to log, not context
167
265
  *
168
266
  * handlers: {
169
- * onBaseline(value) — baseline measured
170
- * onAttemptStart(number, hypothesis) — attempt starting
171
- * onAttemptEnd(attempt) — attempt finished (kept or reverted)
172
- * onDone(results) — experiment complete
173
- * planChange({ goal, constraints, metric, previousAttempts, cwd }) → { hypothesis, prompt }
267
+ * onBaseline(value)
268
+ * onAttemptStart(number, hypothesis)
269
+ * onAttemptEnd(attempt)
270
+ * onDone(results)
271
+ * onInterrupted() boolean (check if user pressed Ctrl+C)
272
+ * planChange({ goal, constraints, metric, previousAttempts, cwd, simplicityBias }) → { hypothesis, prompt }
174
273
  * executeChange({ prompt, cwd }) → void
274
+ * fixCrash({ error, prompt, cwd }) → boolean (true if fixed, false to give up)
175
275
  * }
176
276
  */
177
- export async function runExperimentLoop({ charter, cwd, handlers = {} }) {
277
+ export async function runExperimentLoop({ charter, cwd, tag, handlers = {} }) {
178
278
  const startTime = Date.now();
179
279
  const { metric, budget } = charter;
180
280
  const attempts = [];
281
+ const isInfinite = !budget.maxAttempts || budget.maxAttempts <= 0;
282
+
283
+ // Create dedicated branch
284
+ const originalBranch = await gitGetCurrentBranch({ cwd });
285
+ const branchTag = tag || `exp-${Date.now().toString(36)}`;
286
+ const { branch } = await gitCreateBranch({ cwd, tag: branchTag });
287
+
288
+ // Ensure results.tsv exists
289
+ await ensureResultsTsv(cwd);
181
290
 
182
291
  // Measure baseline
183
292
  const baselineResult = await runMetric({ command: metric.command, extract: metric.extract, cwd });
184
293
  if (!baselineResult.ok) {
294
+ // Return to original branch on failure
295
+ if (originalBranch) await gitReturnToBranch({ cwd, branch: originalBranch });
185
296
  throw new Error(`Baseline metric failed: ${baselineResult.raw}`);
186
297
  }
187
298
  const baseline = baselineResult.value;
188
299
  if (handlers.onBaseline) handlers.onBaseline(baseline);
189
300
 
190
- // Checkpoint baseline state
301
+ // Checkpoint baseline
191
302
  const baselineCheckpoint = await gitCheckpoint({ cwd, label: `baseline (${baseline})` });
303
+ await appendResult({ cwd, commit: baselineCheckpoint.sha, value: baseline, status: "keep", description: "baseline" });
304
+
192
305
  let currentBest = baseline;
193
306
  let bestAttempt = null;
194
307
  let lastGoodSha = baselineCheckpoint.sha;
308
+ let attemptNum = 0;
195
309
 
196
- for (let i = 0; i < budget.maxAttempts; i++) {
310
+ while (true) {
311
+ attemptNum++;
312
+
313
+ // Check budget
197
314
  const elapsed = (Date.now() - startTime) / 60000;
198
315
  if (elapsed >= budget.maxMinutes) break;
316
+ if (!isInfinite && attemptNum > budget.maxAttempts) break;
317
+
318
+ // Check for interruption
319
+ if (handlers.onInterrupted && handlers.onInterrupted()) break;
199
320
 
200
- // Ask planner for a hypothesis
201
- let hypothesis = `attempt ${i + 1}`;
321
+ // Plan the change
322
+ let hypothesis = `attempt ${attemptNum}`;
202
323
  let changePrompt = charter.goal;
203
324
  if (handlers.planChange) {
204
- const plan = await handlers.planChange({
205
- goal: charter.goal,
206
- constraints: charter.constraints,
207
- metric: { command: metric.command, direction: metric.direction, currentBest },
208
- previousAttempts: attempts,
209
- cwd
210
- });
211
- hypothesis = plan.hypothesis || hypothesis;
212
- changePrompt = plan.prompt || changePrompt;
325
+ try {
326
+ const plan = await handlers.planChange({
327
+ goal: charter.goal,
328
+ constraints: charter.constraints,
329
+ metric: { command: metric.command, direction: metric.direction, currentBest },
330
+ previousAttempts: attempts,
331
+ simplicityBias: charter.simplicityBias !== false,
332
+ cwd
333
+ });
334
+ hypothesis = plan.hypothesis || hypothesis;
335
+ changePrompt = plan.prompt || changePrompt;
336
+ } catch {
337
+ // Planner failed — use generic prompt
338
+ }
213
339
  }
214
340
 
215
- if (handlers.onAttemptStart) handlers.onAttemptStart(i + 1, hypothesis);
341
+ if (handlers.onAttemptStart) handlers.onAttemptStart(attemptNum, hypothesis);
216
342
 
217
- // Execute the change
218
- if (handlers.executeChange) {
343
+ // Execute the change (with crash recovery)
344
+ let executed = false;
345
+ let crashError = null;
346
+ for (let retry = 0; retry <= MAX_CRASH_RETRIES; retry++) {
219
347
  try {
220
- await handlers.executeChange({ prompt: changePrompt, cwd });
221
- } catch {
222
- // Execution failed — revert and continue
223
- await gitRevert({ cwd, sha: lastGoodSha });
224
- attempts.push({ number: i + 1, hypothesis, value: null, baseline: currentBest, promoted: false, error: true });
225
- if (handlers.onAttemptEnd) handlers.onAttemptEnd(attempts[attempts.length - 1]);
226
- continue;
348
+ if (handlers.executeChange) {
349
+ await handlers.executeChange({ prompt: changePrompt, cwd });
350
+ }
351
+ executed = true;
352
+ break;
353
+ } catch (err) {
354
+ crashError = err;
355
+ // Try to fix the crash if handler is available
356
+ if (retry < MAX_CRASH_RETRIES && handlers.fixCrash) {
357
+ const fixed = await handlers.fixCrash({
358
+ error: err instanceof Error ? err.message : String(err),
359
+ prompt: changePrompt,
360
+ cwd
361
+ });
362
+ if (!fixed) break;
363
+ // Fixed — retry execution
364
+ }
227
365
  }
228
366
  }
229
367
 
368
+ if (!executed) {
369
+ // Crash — revert and log
370
+ await gitRevert({ cwd, sha: lastGoodSha });
371
+ const attempt = { number: attemptNum, hypothesis, value: null, baseline: currentBest, status: "crash", error: true };
372
+ attempts.push(attempt);
373
+ await appendResult({ cwd, commit: "-------", value: 0, status: "crash", description: hypothesis });
374
+ if (handlers.onAttemptEnd) handlers.onAttemptEnd(attempt);
375
+ continue;
376
+ }
377
+
230
378
  // Measure result
231
379
  const result = await runMetric({ command: metric.command, extract: metric.extract, cwd });
380
+
381
+ if (!result.ok) {
382
+ // Metric failed (runtime crash) — revert
383
+ await gitRevert({ cwd, sha: lastGoodSha });
384
+ const attempt = { number: attemptNum, hypothesis, value: null, baseline: currentBest, status: "crash", error: true };
385
+ attempts.push(attempt);
386
+ await appendResult({ cwd, commit: "-------", value: 0, status: "crash", description: `${hypothesis} (metric failed)` });
387
+ if (handlers.onAttemptEnd) handlers.onAttemptEnd(attempt);
388
+ continue;
389
+ }
390
+
232
391
  const attempt = {
233
- number: i + 1,
392
+ number: attemptNum,
234
393
  hypothesis,
235
- value: result.ok ? result.value : null,
394
+ value: result.value,
236
395
  baseline: currentBest,
237
- promoted: false,
238
- error: !result.ok
396
+ status: "discard",
397
+ error: false
239
398
  };
240
399
 
241
- if (result.ok && isBetter(result.value, currentBest, metric.direction)) {
242
- // Keep — checkpoint the improvement
243
- attempt.promoted = true;
400
+ if (isBetter(result.value, currentBest, metric.direction)) {
401
+ // Keep — advance the branch
402
+ attempt.status = "keep";
244
403
  currentBest = result.value;
245
404
  bestAttempt = attempt;
246
- const cp = await gitCheckpoint({ cwd, label: `attempt ${i + 1}: ${hypothesis} (${result.value})` });
405
+ const cp = await gitCheckpoint({ cwd, label: `attempt ${attemptNum}: ${hypothesis} (${result.value})` });
247
406
  lastGoodSha = cp.sha || lastGoodSha;
407
+ await appendResult({ cwd, commit: cp.sha, value: result.value, status: "keep", description: hypothesis });
248
408
  } else {
249
- // Revertgo back to last good state
409
+ // Discardrevert to last good state
250
410
  await gitRevert({ cwd, sha: lastGoodSha });
411
+ await appendResult({ cwd, commit: lastGoodSha, value: result.value, status: "discard", description: hypothesis });
251
412
  }
252
413
 
253
414
  attempts.push(attempt);
@@ -260,7 +421,9 @@ export async function runExperimentLoop({ charter, cwd, handlers = {} }) {
260
421
  attempts,
261
422
  bestValue: currentBest,
262
423
  bestAttempt,
263
- totalElapsed: Date.now() - startTime
424
+ totalElapsed: Date.now() - startTime,
425
+ branch,
426
+ originalBranch
264
427
  };
265
428
 
266
429
  if (handlers.onDone) handlers.onDone(results);