npm - @tritard/waterbrother - Versions diffs - 0.14.0 → 0.14.1 - Mend

@tritard/waterbrother 0.14.0 → 0.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@tritard/waterbrother",
-  "version": "0.14.0",
+  "version": "0.14.1",
   "description": "Waterbrother: Grok-powered coding CLI with local tools, sessions, operator modes, and approval controls",
   "type": "module",
   "bin": {

package/src/cli.js CHANGED Viewed

@@ -6706,11 +6706,32 @@ Be concrete about surfaces — name actual pages/flows. Choose the best stack fo
             onInterrupted() {
               return interrupted;
             },
-            async planChange({ goal, constraints, metric, previousAttempts, simplicityBias }) {
+            async readTargetFile({ cwd, goal }) {
+              // Extract file path from goal (e.g. "reduce lines in router.js" → "src/router.js")
+              const fileMatch = goal.match(/\b([\w./\\-]+\.\w{1,5})\b/);
+              if (!fileMatch) return null;
+              const targetFile = fileMatch[1];
+              try {
+                const candidates = [targetFile, `src/${targetFile}`, `lib/${targetFile}`, `app/${targetFile}`];
+                for (const candidate of candidates) {
+                  try {
+                    const content = await import("node:fs/promises").then((fs) => fs.readFile(`${cwd}/${candidate}`, "utf8"));
+                    return `--- ${candidate} ---\n${content.slice(0, 4000)}`;
+                  } catch {}
+                }
+              } catch {}
+              return null;
+            },
+            async planChange({ goal, constraints, metric, previousAttempts, simplicityBias, targetFileContent }) {
               const model = context.runtime.plannerModel || agent.getModel();
-              const previousSummary = previousAttempts.map((a) =>
-                `attempt ${a.number}: ${a.hypothesis} → ${a.status} (${a.value})`
-              ).join("\n");
+              // Rich attempt history — include WHY things failed
+              const previousSummary = previousAttempts.map((a) => {
+                let line = `attempt ${a.number}: ${a.hypothesis} → ${a.status} (${a.value})`;
+                if (a.errorDetail) line += ` | error: ${a.errorDetail}`;
+                if (a.status === "discard" && a.metricOutput) line += ` | output: ${a.metricOutput.slice(0, 80)}`;
+                return line;
+              }).join("\n");
               const simplicityNote = simplicityBias
                 ? "\n\nSimplicity criterion: prefer simpler changes. A small improvement from deleting code is better than a large improvement from adding complexity. If improvement is ~0 but code is simpler, that's a win."
@@ -6719,10 +6740,11 @@ Be concrete about surfaces — name actual pages/flows. Choose the best stack fo
               const prompt = [
                 `Goal: ${goal}`,
                 `Metric: ${metric.command} (${metric.direction} is better, current best: ${metric.currentBest})`,
+                targetFileContent ? `Current file content:\n${targetFileContent}` : "",
                 constraints.length > 0 ? `Constraints: ${constraints.join("; ")}` : "",
-                previousSummary ? `Previous attempts:\n${previousSummary}` : "",
-                "Propose ONE specific code change. Be concrete: which file, which function, what change.",
-                `Respond with JSON: { "hypothesis": "one-line summary", "prompt": "detailed executor instructions" }${simplicityNote}`
+                previousSummary ? `Previous attempts (learn from these — do NOT repeat failed ideas):\n${previousSummary}` : "",
+                "Propose ONE specific code change. Reference exact line numbers or function names from the file above. Be concrete.",
+                `Respond with JSON: { "hypothesis": "one-line summary", "prompt": "detailed executor instructions referencing specific lines/functions" }${simplicityNote}`
               ].filter(Boolean).join("\n\n");
               try {
@@ -6732,7 +6754,7 @@ Be concrete about surfaces — name actual pages/flows. Choose the best stack fo
                   baseUrl: context.runtime.baseUrl,
                   model,
                   messages: [
-                    { role: "system", content: "You are an autonomous researcher optimizing code. Each attempt must try something different from previous attempts. Learn from kept vs discarded results. Respond with JSON only." },
+                    { role: "system", content: "You are an autonomous researcher optimizing code. You can see the actual file content. Each attempt MUST try something fundamentally different from previous attempts. If an approach was discarded, do NOT try a variant of it — try a completely different strategy. Learn from error details and metric output. Respond with JSON only." },
                     { role: "user", content: prompt }
                   ],
                   temperature: 0.6
@@ -6776,6 +6798,17 @@ Be concrete about surfaces — name actual pages/flows. Choose the best stack fo
               const value = attempt.value !== null ? ` → ${attempt.value}` : " → failed";
               console.log(`     ${icon}${value}`);
             },
+            onScorecard({ attempt, metric: metricInfo, baseline: bl, currentBest: cb }) {
+              try {
+                const { computeScorecard: compSc, saveScorecard: saveSc } = require("./scorecard.js");
+                const sc = compSc({
+                  task: { id: `exp-${attempt.number}`, name: `experiment attempt ${attempt.number}`, chosenOption: attempt.hypothesis },
+                  receipt: { changedFiles: [], verification: [{ ok: attempt.status === "keep", command: metricInfo.command }], review: { verdict: attempt.status === "keep" ? "ship" : "block", concerns: [] }, mutated: attempt.status !== "crash" },
+                  userAction: attempt.status === "keep" ? "accepted" : "redo"
+                });
+                saveSc({ cwd: context.cwd, scorecard: sc });
+              } catch {}
+            },
             onDone() {
               spinner.stop();
             }

package/src/experiment.js CHANGED Viewed

@@ -269,9 +269,11 @@ export function formatExperimentSummary(results) {
  *   onAttemptEnd(attempt)
  *   onDone(results)
  *   onInterrupted() → boolean  (check if user pressed Ctrl+C)
- *   planChange({ goal, constraints, metric, previousAttempts, cwd, simplicityBias }) → { hypothesis, prompt }
+ *   planChange({ goal, constraints, metric, previousAttempts, cwd, simplicityBias, targetFileContent }) → { hypothesis, prompt }
  *   executeChange({ prompt, cwd }) → void
  *   fixCrash({ error, prompt, cwd }) → boolean (true if fixed, false to give up)
+ *   readTargetFile({ cwd, goal }) → string|null  (read the file being optimized)
+ *   onScorecard(scorecard) → void
  * }
  */
 export async function runExperimentLoop({ charter, cwd, tag, handlers = {} }) {
@@ -318,6 +320,14 @@ export async function runExperimentLoop({ charter, cwd, tag, handlers = {} }) {
     // Check for interruption
     if (handlers.onInterrupted && handlers.onInterrupted()) break;
+    // Read target file before planning (so planner sees actual code)
+    let targetFileContent = null;
+    if (handlers.readTargetFile) {
+      try {
+        targetFileContent = await handlers.readTargetFile({ cwd, goal: charter.goal });
+      } catch {}
+    }
     // Plan the change
     let hypothesis = `attempt ${attemptNum}`;
     let changePrompt = charter.goal;
@@ -329,6 +339,7 @@ export async function runExperimentLoop({ charter, cwd, tag, handlers = {} }) {
           metric: { command: metric.command, direction: metric.direction, currentBest },
           previousAttempts: attempts,
           simplicityBias: charter.simplicityBias !== false,
+          targetFileContent,
           cwd
         });
         hypothesis = plan.hypothesis || hypothesis;
@@ -366,11 +377,12 @@ export async function runExperimentLoop({ charter, cwd, tag, handlers = {} }) {
     }
     if (!executed) {
-      // Crash — revert and log
+      // Crash — revert and log with error details
       await gitRevert({ cwd, sha: lastGoodSha });
-      const attempt = { number: attemptNum, hypothesis, value: null, baseline: currentBest, status: "crash", error: true };
+      const errorMsg = crashError instanceof Error ? crashError.message : String(crashError || "unknown");
+      const attempt = { number: attemptNum, hypothesis, value: null, baseline: currentBest, status: "crash", error: true, errorDetail: errorMsg.slice(0, 200) };
       attempts.push(attempt);
-      await appendResult({ cwd, commit: "-------", value: 0, status: "crash", description: hypothesis });
+      await appendResult({ cwd, commit: "-------", value: 0, status: "crash", description: `${hypothesis} | error: ${errorMsg.slice(0, 100)}` });
       if (handlers.onAttemptEnd) handlers.onAttemptEnd(attempt);
       continue;
     }
@@ -379,11 +391,11 @@ export async function runExperimentLoop({ charter, cwd, tag, handlers = {} }) {
     const result = await runMetric({ command: metric.command, extract: metric.extract, cwd });
     if (!result.ok) {
-      // Metric failed (runtime crash) — revert
+      // Metric failed (runtime crash) — revert with error output
       await gitRevert({ cwd, sha: lastGoodSha });
-      const attempt = { number: attemptNum, hypothesis, value: null, baseline: currentBest, status: "crash", error: true };
+      const attempt = { number: attemptNum, hypothesis, value: null, baseline: currentBest, status: "crash", error: true, errorDetail: result.raw?.slice(0, 200) || "metric failed" };
       attempts.push(attempt);
-      await appendResult({ cwd, commit: "-------", value: 0, status: "crash", description: `${hypothesis} (metric failed)` });
+      await appendResult({ cwd, commit: "-------", value: 0, status: "crash", description: `${hypothesis} | metric: ${result.raw?.slice(0, 80) || "failed"}` });
       if (handlers.onAttemptEnd) handlers.onAttemptEnd(attempt);
       continue;
     }
@@ -394,7 +406,8 @@ export async function runExperimentLoop({ charter, cwd, tag, handlers = {} }) {
       value: result.value,
       baseline: currentBest,
       status: "discard",
-      error: false
+      error: false,
+      metricOutput: result.raw?.slice(0, 200) || null
     };
     if (isBetter(result.value, currentBest, metric.direction)) {
@@ -408,7 +421,12 @@ export async function runExperimentLoop({ charter, cwd, tag, handlers = {} }) {
     } else {
       // Discard — revert to last good state
       await gitRevert({ cwd, sha: lastGoodSha });
-      await appendResult({ cwd, commit: lastGoodSha, value: result.value, status: "discard", description: hypothesis });
+      await appendResult({ cwd, commit: lastGoodSha, value: result.value, status: "discard", description: `${hypothesis} | no improvement (${result.value} vs ${currentBest})` });
+    }
+    // Emit scorecard for this attempt
+    if (handlers.onScorecard) {
+      try { handlers.onScorecard({ attempt, metric: { command: metric.command, direction: metric.direction }, baseline, currentBest }); } catch {}
     }
     attempts.push(attempt);