npm - @tritard/waterbrother - Versions diffs - 0.9.1 → 0.9.3 - Mend

@tritard/waterbrother 0.9.1 → 0.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@tritard/waterbrother",
-  "version": "0.9.1",
+  "version": "0.9.3",
   "description": "Waterbrother: Grok-powered coding CLI with local tools, sessions, operator modes, and approval controls",
   "type": "module",
   "bin": {

package/src/agent.js CHANGED Viewed

@@ -74,7 +74,8 @@ When you use tools:
   - avoid hype such as "premium", "luxurious", "studio-grade", or "improved!"
 - Explain what you changed and why.
 - Never claim you ran commands you did not run.
-- If a tool fails, show the failure and recover.`;
+- If a tool fails, show the failure and recover.
+- You are a coding tool for real software engineering work. If a request is clearly a joke, hypothetical, non-technical, or not related to actual software development, respond conversationally WITHOUT using any tools. Do not create files, write scripts, or make edits for non-engineering requests. Examples of things you should NOT build: personality generators, dating advice scripts, joke apps, horoscope generators, or any request that is clearly not serious engineering work.`;
 const COMPACTION_SYSTEM_PROMPT = `You summarize coding assistant transcripts for context compaction.
 Output concise markdown with these sections:

package/src/cli.js CHANGED Viewed

@@ -34,7 +34,7 @@ import { createPanelRenderer, buildPanelState } from "./panel.js";
 import { deriveTaskNameFromPrompt, nextActionsForState, routeNaturalInput } from "./router.js";
 import { compressEpisode, saveEpisode, loadRecentEpisodes, findRelevantEpisodes, buildEpisodicMemoryBlock, buildReminderBlock } from "./episodic.js";
 import { formatPlanForDisplay } from "./planner.js";
-import { parseCharterFromGoal, runExperimentLoop, formatExperimentSummary } from "./experiment.js";
+import { parseCharterFromGoal, runExperimentLoop, formatExperimentSummary, gitReturnToBranch } from "./experiment.js";
 const execFileAsync = promisify(execFile);
 const PACKAGE_ROOT = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "..");
@@ -6144,7 +6144,7 @@ async function promptLoop(agent, session, context) {
       // Build charter
       const charter = parseCharterFromGoal(goalArg);
-      // Ask for metric command if not inferable
+      // Ask for metric command
       if (!charter.metric.command) {
         try {
           const metricCmd = await promptLine("metric command (e.g. npm test, python bench.py): ", { input: process.stdin, output: process.stdout });
@@ -6159,49 +6159,69 @@ async function promptLoop(agent, session, context) {
         }
       }
-      // Ask for attempt count
+      // Ask for attempt budget (0 = infinite, runs until Ctrl+C or time limit)
       try {
-        const attemptsStr = await promptLine(`max attempts [${charter.budget.maxAttempts}]: `, { input: process.stdin, output: process.stdout });
+        const attemptsStr = await promptLine("max attempts (0 = run until interrupted) [0]: ", { input: process.stdin, output: process.stdout });
         const parsed = parseInt(attemptsStr.trim(), 10);
-        if (parsed > 0) charter.budget.maxAttempts = Math.min(parsed, 20);
+        if (parsed > 0) charter.budget.maxAttempts = parsed;
       } catch {}
+      // Ask for time budget
+      try {
+        const timeStr = await promptLine(`time limit in minutes [${charter.budget.maxMinutes}]: `, { input: process.stdin, output: process.stdout });
+        const parsed = parseInt(timeStr.trim(), 10);
+        if (parsed > 0) charter.budget.maxMinutes = parsed;
+      } catch {}
+      const isInfinite = !charter.budget.maxAttempts || charter.budget.maxAttempts <= 0;
       console.log(`────────────────────────────────────────────────────────────`);
       console.log(`experiment: ${charter.goal}`);
       console.log(`metric: ${charter.metric.command} (${charter.metric.direction} is better)`);
-      console.log(`budget: ${charter.budget.maxAttempts} attempts, ${charter.budget.maxMinutes}m max`);
-      if (charter.constraints.length > 0) {
-        for (const c of charter.constraints) console.log(`  constraint: ${c}`);
-      }
+      console.log(`budget: ${isInfinite ? "∞ attempts" : `${charter.budget.maxAttempts} attempts`}, ${charter.budget.maxMinutes}m max`);
+      console.log(`simplicity bias: ${charter.simplicityBias ? "on" : "off"}`);
       console.log(`────────────────────────────────────────────────────────────`);
-      console.log("measuring baseline...");
+      console.log("creating experiment branch and measuring baseline...");
       const spinner = createProgressSpinner("running experiment...");
+      let interrupted = false;
+      const abortController = typeof AbortController === "function" ? new AbortController() : null;
+      const detachInterrupt = createInterruptListener(() => {
+        interrupted = true;
+        spinner.setLabel("stopping after current attempt...");
+      }, { enableEsc: process.stdin.isTTY, shouldIgnoreEsc: () => approvalPromptActive });
       try {
         const results = await runExperimentLoop({
           charter,
           cwd: context.cwd,
+          tag: goalArg.toLowerCase().replace(/[^a-z0-9]+/g, "-").slice(0, 30),
           handlers: {
             onBaseline(value) {
               spinner.stop();
               console.log(`baseline: ${value}`);
               console.log(`────────────────────────────────────────────────────────────`);
             },
-            async planChange({ goal, constraints, metric, previousAttempts }) {
-              // Use planner model if available, otherwise main model
+            onInterrupted() {
+              return interrupted;
+            },
+            async planChange({ goal, constraints, metric, previousAttempts, simplicityBias }) {
               const model = context.runtime.plannerModel || agent.getModel();
               const previousSummary = previousAttempts.map((a) =>
-                `attempt ${a.number}: ${a.hypothesis} → ${a.promoted ? "kept" : "reverted"} (${a.value})`
+                `attempt ${a.number}: ${a.hypothesis} → ${a.status} (${a.value})`
               ).join("\n");
+              const simplicityNote = simplicityBias
+                ? "\n\nSimplicity criterion: prefer simpler changes. A small improvement from deleting code is better than a large improvement from adding complexity. If improvement is ~0 but code is simpler, that's a win."
+                : "";
               const prompt = [
                 `Goal: ${goal}`,
                 `Metric: ${metric.command} (${metric.direction} is better, current best: ${metric.currentBest})`,
                 constraints.length > 0 ? `Constraints: ${constraints.join("; ")}` : "",
                 previousSummary ? `Previous attempts:\n${previousSummary}` : "",
-                "Propose ONE specific code change that could improve the metric. Be concrete about what file and what to change.",
-                "Respond with a one-line hypothesis and a detailed prompt for the executor."
+                "Propose ONE specific code change. Be concrete: which file, which function, what change.",
+                `Respond with JSON: { "hypothesis": "one-line summary", "prompt": "detailed executor instructions" }${simplicityNote}`
               ].filter(Boolean).join("\n\n");
               try {
@@ -6211,32 +6231,47 @@ async function promptLoop(agent, session, context) {
                   baseUrl: context.runtime.baseUrl,
                   model,
                   messages: [
-                    { role: "system", content: "You are an optimization expert. Propose one concrete code change to improve a metric. Respond with JSON: { \"hypothesis\": \"one-line summary\", \"prompt\": \"detailed instructions for the code editor\" }" },
+                    { role: "system", content: "You are an autonomous researcher optimizing code. Each attempt must try something different from previous attempts. Learn from kept vs discarded results. Respond with JSON only." },
                     { role: "user", content: prompt }
                   ],
-                  temperature: 0.5
+                  temperature: 0.6
                 });
-                return completion.json || { hypothesis: `attempt`, prompt: goal };
+                return completion.json || { hypothesis: "attempt", prompt: goal };
               } catch {
-                return { hypothesis: `attempt`, prompt: goal };
+                return { hypothesis: "attempt", prompt: goal };
               }
             },
             async executeChange({ prompt }) {
               spinner.setLabel("implementing change...");
-              // Use the executor model to make the change
               await agent.runTurn(prompt, {
                 onAssistantDelta() {},
                 onToolStart() { spinner.setLabel("editing..."); },
-                onToolEnd() { spinner.setLabel("implementing change..."); }
+                onToolEnd() { spinner.setLabel("implementing..."); }
               });
               await agent.toolRuntime.completeTurn({});
             },
+            async fixCrash({ error, prompt }) {
+              // Try to fix the crash — give the agent the error and ask it to fix
+              spinner.setLabel("fixing crash...");
+              try {
+                await agent.runTurn(
+                  `The previous change crashed with this error:\n${error}\n\nFix the issue. The original goal was: ${prompt}`,
+                  { onAssistantDelta() {}, onToolStart() {}, onToolEnd() {} }
+                );
+                await agent.toolRuntime.completeTurn({});
+                return true;
+              } catch {
+                return false;
+              }
+            },
             onAttemptStart(number, hypothesis) {
               spinner.setLabel(`attempt ${number}: ${hypothesis}`);
               console.log(`\n  ${number}. trying: ${hypothesis}`);
             },
             onAttemptEnd(attempt) {
-              const icon = attempt.promoted ? green("✓ kept") : red("✗ reverted");
+              const icon = attempt.status === "keep" ? green("✓ kept")
+                : attempt.status === "crash" ? yellow("💥 crash")
+                : red("✗ reverted");
               const value = attempt.value !== null ? ` → ${attempt.value}` : " → failed";
               console.log(`     ${icon}${value}`);
             },
@@ -6246,8 +6281,24 @@ async function promptLoop(agent, session, context) {
           }
         });
+        detachInterrupt();
         console.log("\n" + formatExperimentSummary(results));
+        // Offer to return to original branch or stay
+        if (results.originalBranch && results.branch) {
+          console.log(`\nexperiment branch: ${results.branch}`);
+          console.log(`original branch: ${results.originalBranch}`);
+          try {
+            const stay = await promptYesNo("Stay on experiment branch?", { input: process.stdin, output: process.stdout });
+            if (!stay) {
+              const { gitReturnToBranch } = await import("./experiment.js");
+              await gitReturnToBranch({ cwd: context.cwd, branch: results.originalBranch });
+              console.log(`returned to ${results.originalBranch}`);
+            }
+          } catch {}
+        }
       } catch (error) {
+        detachInterrupt();
         spinner.stop();
         console.log(`experiment failed: ${error instanceof Error ? error.message : String(error)}`);
       }

package/src/experiment.js CHANGED Viewed

@@ -1,10 +1,16 @@
 import { execFile } from "node:child_process";
+import fs from "node:fs/promises";
+import path from "node:path";
 import { promisify } from "node:util";
 const execFileAsync = promisify(execFile);
+const MAX_CRASH_RETRIES = 2;
+const RESULTS_FILENAME = "results.tsv";
+const TSV_HEADER = "commit\tvalue\tstatus\tdescription";
 /**
- * Charter: the experiment's rules.
+ * Charter: the experiment's rules (inspired by Karpathy's program.md).
  * {
  *   goal: "speed up auth middleware",
  *   metric: {
@@ -13,15 +19,15 @@ const execFileAsync = promisify(execFile);
  *     direction: "lower"           // "lower" | "higher"
  *   },
  *   budget: {
- *     maxAttempts: 5,
- *     maxMinutes: 30
+ *     maxAttempts: 0,              // 0 = infinite (run until interrupted)
+ *     maxMinutes: 60
  *   },
- *   constraints: ["do not change the public API", "keep all existing tests passing"]
+ *   constraints: ["do not change the public API", "keep all existing tests passing"],
+ *   simplicityBias: true           // prefer simpler changes over complex ones
  * }
  */
 export function parseCharterFromGoal(goalText) {
-  // Extract metric hints from natural language
   const lower = goalText.toLowerCase();
   let direction = "lower";
   let extract = "duration";
@@ -43,8 +49,9 @@ export function parseCharterFromGoal(goalText) {
   return {
     goal: goalText,
     metric: { command: "", extract, direction },
-    budget: { maxAttempts: 5, maxMinutes: 30 },
-    constraints: []
+    budget: { maxAttempts: 0, maxMinutes: 60 },
+    constraints: [],
+    simplicityBias: true
   };
 }
@@ -52,20 +59,18 @@ export async function runMetric({ command, extract, cwd }) {
   const startTime = Date.now();
   try {
     const isWin = process.platform === "win32";
-    const execOpts = { cwd, env: process.env, maxBuffer: 8 * 1024 * 1024, timeout: 120000 };
-    let stdout, stderr, exitCode;
+    const execOpts = { cwd, env: process.env, maxBuffer: 8 * 1024 * 1024, timeout: 600000 };
+    let stdout, stderr;
     if (isWin) {
       const result = await execFileAsync(command, [], { ...execOpts, shell: true });
       stdout = String(result.stdout || "");
       stderr = String(result.stderr || "");
-      exitCode = 0;
     } else {
       const parts = command.split(/\s+/);
       const result = await execFileAsync(parts[0], parts.slice(1), execOpts);
       stdout = String(result.stdout || "");
       stderr = String(result.stderr || "");
-      exitCode = 0;
     }
     const elapsed = Date.now() - startTime;
@@ -74,7 +79,7 @@ export async function runMetric({ command, extract, cwd }) {
       return { ok: true, value: elapsed, raw: stdout, elapsed };
     }
     if (extract === "exit_code") {
-      return { ok: true, value: exitCode, raw: stdout, elapsed };
+      return { ok: true, value: 0, raw: stdout, elapsed };
     }
     if (extract === "stdout_number") {
       const match = (stdout + stderr).match(/(\d+\.?\d*)/);
@@ -90,18 +95,36 @@ export async function runMetric({ command, extract, cwd }) {
     return {
       ok: false,
       value: null,
-      raw: error.stderr || error.message || String(error),
+      raw: String(error.stderr || error.message || error).slice(0, 2000),
       elapsed,
       exitCode: error.code || 1
     };
   }
 }
+// --- Git operations ---
+export async function gitCreateBranch({ cwd, tag }) {
+  const branch = `experiment/${tag}`;
+  try {
+    await execFileAsync("git", ["checkout", "-b", branch], { cwd });
+    return { ok: true, branch };
+  } catch {
+    // Branch might already exist
+    try {
+      await execFileAsync("git", ["checkout", branch], { cwd });
+      return { ok: true, branch };
+    } catch {
+      return { ok: false, branch };
+    }
+  }
+}
 export async function gitCheckpoint({ cwd, label }) {
   try {
     await execFileAsync("git", ["add", "-A"], { cwd });
     await execFileAsync("git", ["commit", "-m", `[experiment] ${label}`, "--allow-empty"], { cwd });
-    const { stdout } = await execFileAsync("git", ["rev-parse", "HEAD"], { cwd });
+    const { stdout } = await execFileAsync("git", ["rev-parse", "--short", "HEAD"], { cwd });
     return { ok: true, sha: stdout.trim() };
   } catch {
     return { ok: false, sha: null };
@@ -117,6 +140,69 @@ export async function gitRevert({ cwd, sha }) {
   }
 }
+export async function gitReturnToBranch({ cwd, branch }) {
+  try {
+    await execFileAsync("git", ["checkout", branch], { cwd });
+    return { ok: true };
+  } catch {
+    return { ok: false };
+  }
+}
+async function gitGetCurrentBranch({ cwd }) {
+  try {
+    const { stdout } = await execFileAsync("git", ["rev-parse", "--abbrev-ref", "HEAD"], { cwd });
+    return stdout.trim();
+  } catch {
+    return null;
+  }
+}
+// --- Results TSV (persistent scoreboard, untracked by git) ---
+async function ensureResultsTsv(cwd) {
+  const tsvPath = path.join(cwd, RESULTS_FILENAME);
+  try {
+    await fs.access(tsvPath);
+  } catch {
+    await fs.writeFile(tsvPath, `${TSV_HEADER}\n`, "utf8");
+  }
+  // Add to .gitignore if not already there
+  try {
+    const gitignorePath = path.join(cwd, ".gitignore");
+    let gitignore = "";
+    try { gitignore = await fs.readFile(gitignorePath, "utf8"); } catch {}
+    if (!gitignore.includes(RESULTS_FILENAME)) {
+      await fs.writeFile(gitignorePath, `${gitignore.trimEnd()}\n${RESULTS_FILENAME}\n`, "utf8");
+    }
+  } catch {}
+  return tsvPath;
+}
+export async function appendResult({ cwd, commit, value, status, description }) {
+  const tsvPath = await ensureResultsTsv(cwd);
+  const valueStr = value !== null && value !== undefined ? String(value) : "0";
+  const desc = String(description || "").replace(/\t/g, " ").replace(/\n/g, " ").slice(0, 200);
+  const line = `${commit || "-------"}\t${valueStr}\t${status}\t${desc}\n`;
+  await fs.appendFile(tsvPath, line, "utf8");
+}
+export async function readResults(cwd) {
+  const tsvPath = path.join(cwd, RESULTS_FILENAME);
+  try {
+    const raw = await fs.readFile(tsvPath, "utf8");
+    const lines = raw.trim().split("\n").slice(1); // skip header
+    return lines.map((line) => {
+      const [commit, value, status, description] = line.split("\t");
+      return { commit, value: parseFloat(value) || 0, status, description: description || "" };
+    });
+  } catch {
+    return [];
+  }
+}
+// --- Core logic ---
 export function isBetter(newValue, oldValue, direction) {
   if (newValue === null || oldValue === null) return false;
   if (direction === "lower") return newValue < oldValue;
@@ -125,20 +211,23 @@ export function isBetter(newValue, oldValue, direction) {
 }
 export function formatAttemptResult(attempt) {
-  const status = attempt.promoted ? "✓ kept" : "✗ reverted";
+  const icon = attempt.status === "keep" ? "✓" : attempt.status === "crash" ? "💥" : "✗";
+  const label = attempt.status === "keep" ? "kept" : attempt.status === "crash" ? "crash" : "reverted";
   const delta = attempt.baseline !== null && attempt.value !== null
     ? ` (${attempt.value > attempt.baseline ? "+" : ""}${(attempt.value - attempt.baseline).toFixed(2)})`
     : "";
-  return `  ${attempt.number}. ${status}  ${attempt.value !== null ? attempt.value : "failed"}${delta}  — ${attempt.hypothesis}`;
+  const valueStr = attempt.value !== null ? String(attempt.value) : "failed";
+  return `  ${attempt.number}. ${icon} ${label}  ${valueStr}${delta}  — ${attempt.hypothesis}`;
 }
 export function formatExperimentSummary(results) {
   const lines = [];
-  const { charter, baseline, attempts, bestValue, bestAttempt, totalElapsed } = results;
+  const { charter, baseline, attempts, bestValue, bestAttempt, totalElapsed, branch } = results;
   lines.push(`────────────────────────────────────────────────────────────`);
   lines.push(`experiment: ${charter.goal}`);
   lines.push(`metric: ${charter.metric.command} (${charter.metric.direction} is better)`);
+  if (branch) lines.push(`branch: ${branch}`);
   lines.push(`baseline: ${baseline}`);
   lines.push(`────────────────────────────────────────────────────────────`);
@@ -157,97 +246,169 @@ export function formatExperimentSummary(results) {
   }
   const mins = (totalElapsed / 60000).toFixed(1);
   lines.push(`${attempts.length} attempts in ${mins}m`);
+  lines.push(`results logged to ${RESULTS_FILENAME}`);
   lines.push(`────────────────────────────────────────────────────────────`);
   return lines.join("\n");
 }
 /**
- * Run the experiment loop.
+ * Run the experiment loop (autoresearch-style).
+ *
+ * Key differences from v1:
+ * - Runs on a dedicated branch (experiment/<tag>)
+ * - Logs every attempt to results.tsv (persistent, git-ignored)
+ * - maxAttempts=0 means infinite (run until interrupted or time budget)
+ * - Crash recovery: retries up to MAX_CRASH_RETRIES before giving up on an idea
+ * - Simplicity bias: planner is told to prefer simpler changes
+ * - Output redirected: metric output goes to log, not context
  *
  * handlers: {
- *   onBaseline(value) — baseline measured
- *   onAttemptStart(number, hypothesis) — attempt starting
- *   onAttemptEnd(attempt) — attempt finished (kept or reverted)
- *   onDone(results) — experiment complete
- *   planChange({ goal, constraints, metric, previousAttempts, cwd }) → { hypothesis, prompt }
+ *   onBaseline(value)
+ *   onAttemptStart(number, hypothesis)
+ *   onAttemptEnd(attempt)
+ *   onDone(results)
+ *   onInterrupted() → boolean  (check if user pressed Ctrl+C)
+ *   planChange({ goal, constraints, metric, previousAttempts, cwd, simplicityBias }) → { hypothesis, prompt }
  *   executeChange({ prompt, cwd }) → void
+ *   fixCrash({ error, prompt, cwd }) → boolean (true if fixed, false to give up)
  * }
  */
-export async function runExperimentLoop({ charter, cwd, handlers = {} }) {
+export async function runExperimentLoop({ charter, cwd, tag, handlers = {} }) {
   const startTime = Date.now();
   const { metric, budget } = charter;
   const attempts = [];
+  const isInfinite = !budget.maxAttempts || budget.maxAttempts <= 0;
+  // Create dedicated branch
+  const originalBranch = await gitGetCurrentBranch({ cwd });
+  const branchTag = tag || `exp-${Date.now().toString(36)}`;
+  const { branch } = await gitCreateBranch({ cwd, tag: branchTag });
+  // Ensure results.tsv exists
+  await ensureResultsTsv(cwd);
   // Measure baseline
   const baselineResult = await runMetric({ command: metric.command, extract: metric.extract, cwd });
   if (!baselineResult.ok) {
+    // Return to original branch on failure
+    if (originalBranch) await gitReturnToBranch({ cwd, branch: originalBranch });
     throw new Error(`Baseline metric failed: ${baselineResult.raw}`);
   }
   const baseline = baselineResult.value;
   if (handlers.onBaseline) handlers.onBaseline(baseline);
-  // Checkpoint baseline state
+  // Checkpoint baseline
   const baselineCheckpoint = await gitCheckpoint({ cwd, label: `baseline (${baseline})` });
+  await appendResult({ cwd, commit: baselineCheckpoint.sha, value: baseline, status: "keep", description: "baseline" });
   let currentBest = baseline;
   let bestAttempt = null;
   let lastGoodSha = baselineCheckpoint.sha;
+  let attemptNum = 0;
-  for (let i = 0; i < budget.maxAttempts; i++) {
+  while (true) {
+    attemptNum++;
+    // Check budget
     const elapsed = (Date.now() - startTime) / 60000;
     if (elapsed >= budget.maxMinutes) break;
+    if (!isInfinite && attemptNum > budget.maxAttempts) break;
+    // Check for interruption
+    if (handlers.onInterrupted && handlers.onInterrupted()) break;
-    // Ask planner for a hypothesis
-    let hypothesis = `attempt ${i + 1}`;
+    // Plan the change
+    let hypothesis = `attempt ${attemptNum}`;
     let changePrompt = charter.goal;
     if (handlers.planChange) {
-      const plan = await handlers.planChange({
-        goal: charter.goal,
-        constraints: charter.constraints,
-        metric: { command: metric.command, direction: metric.direction, currentBest },
-        previousAttempts: attempts,
-        cwd
-      });
-      hypothesis = plan.hypothesis || hypothesis;
-      changePrompt = plan.prompt || changePrompt;
+      try {
+        const plan = await handlers.planChange({
+          goal: charter.goal,
+          constraints: charter.constraints,
+          metric: { command: metric.command, direction: metric.direction, currentBest },
+          previousAttempts: attempts,
+          simplicityBias: charter.simplicityBias !== false,
+          cwd
+        });
+        hypothesis = plan.hypothesis || hypothesis;
+        changePrompt = plan.prompt || changePrompt;
+      } catch {
+        // Planner failed — use generic prompt
+      }
     }
-    if (handlers.onAttemptStart) handlers.onAttemptStart(i + 1, hypothesis);
+    if (handlers.onAttemptStart) handlers.onAttemptStart(attemptNum, hypothesis);
-    // Execute the change
-    if (handlers.executeChange) {
+    // Execute the change (with crash recovery)
+    let executed = false;
+    let crashError = null;
+    for (let retry = 0; retry <= MAX_CRASH_RETRIES; retry++) {
       try {
-        await handlers.executeChange({ prompt: changePrompt, cwd });
-      } catch {
-        // Execution failed — revert and continue
-        await gitRevert({ cwd, sha: lastGoodSha });
-        attempts.push({ number: i + 1, hypothesis, value: null, baseline: currentBest, promoted: false, error: true });
-        if (handlers.onAttemptEnd) handlers.onAttemptEnd(attempts[attempts.length - 1]);
-        continue;
+        if (handlers.executeChange) {
+          await handlers.executeChange({ prompt: changePrompt, cwd });
+        }
+        executed = true;
+        break;
+      } catch (err) {
+        crashError = err;
+        // Try to fix the crash if handler is available
+        if (retry < MAX_CRASH_RETRIES && handlers.fixCrash) {
+          const fixed = await handlers.fixCrash({
+            error: err instanceof Error ? err.message : String(err),
+            prompt: changePrompt,
+            cwd
+          });
+          if (!fixed) break;
+          // Fixed — retry execution
+        }
       }
     }
+    if (!executed) {
+      // Crash — revert and log
+      await gitRevert({ cwd, sha: lastGoodSha });
+      const attempt = { number: attemptNum, hypothesis, value: null, baseline: currentBest, status: "crash", error: true };
+      attempts.push(attempt);
+      await appendResult({ cwd, commit: "-------", value: 0, status: "crash", description: hypothesis });
+      if (handlers.onAttemptEnd) handlers.onAttemptEnd(attempt);
+      continue;
+    }
     // Measure result
     const result = await runMetric({ command: metric.command, extract: metric.extract, cwd });
+    if (!result.ok) {
+      // Metric failed (runtime crash) — revert
+      await gitRevert({ cwd, sha: lastGoodSha });
+      const attempt = { number: attemptNum, hypothesis, value: null, baseline: currentBest, status: "crash", error: true };
+      attempts.push(attempt);
+      await appendResult({ cwd, commit: "-------", value: 0, status: "crash", description: `${hypothesis} (metric failed)` });
+      if (handlers.onAttemptEnd) handlers.onAttemptEnd(attempt);
+      continue;
+    }
     const attempt = {
-      number: i + 1,
+      number: attemptNum,
       hypothesis,
-      value: result.ok ? result.value : null,
+      value: result.value,
       baseline: currentBest,
-      promoted: false,
-      error: !result.ok
+      status: "discard",
+      error: false
     };
-    if (result.ok && isBetter(result.value, currentBest, metric.direction)) {
-      // Keep — checkpoint the improvement
-      attempt.promoted = true;
+    if (isBetter(result.value, currentBest, metric.direction)) {
+      // Keep — advance the branch
+      attempt.status = "keep";
       currentBest = result.value;
       bestAttempt = attempt;
-      const cp = await gitCheckpoint({ cwd, label: `attempt ${i + 1}: ${hypothesis} (${result.value})` });
+      const cp = await gitCheckpoint({ cwd, label: `attempt ${attemptNum}: ${hypothesis} (${result.value})` });
       lastGoodSha = cp.sha || lastGoodSha;
+      await appendResult({ cwd, commit: cp.sha, value: result.value, status: "keep", description: hypothesis });
     } else {
-      // Revert — go back to last good state
+      // Discard — revert to last good state
       await gitRevert({ cwd, sha: lastGoodSha });
+      await appendResult({ cwd, commit: lastGoodSha, value: result.value, status: "discard", description: hypothesis });
     }
     attempts.push(attempt);
@@ -260,7 +421,9 @@ export async function runExperimentLoop({ charter, cwd, handlers = {} }) {
     attempts,
     bestValue: currentBest,
     bestAttempt,
-    totalElapsed: Date.now() - startTime
+    totalElapsed: Date.now() - startTime,
+    branch,
+    originalBranch
   };
   if (handlers.onDone) handlers.onDone(results);