@tritard/waterbrother 0.14.0 → 0.14.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tritard/waterbrother",
3
- "version": "0.14.0",
3
+ "version": "0.14.2",
4
4
  "description": "Waterbrother: Grok-powered coding CLI with local tools, sessions, operator modes, and approval controls",
5
5
  "type": "module",
6
6
  "bin": {
package/src/cli.js CHANGED
@@ -4704,29 +4704,74 @@ async function readInteractiveLine(options = {}) {
4704
4704
  }
4705
4705
  }
4706
4706
 
4707
+ // Track whether keypress handler processed the last data chunk
4708
+ let keypressHandled = false;
4709
+ const origOnKeypress = onKeypress;
4710
+ onKeypress = function (str, key) {
4711
+ keypressHandled = true;
4712
+ origOnKeypress(str, key);
4713
+ };
4714
+
4707
4715
  function onData(chunk) {
4708
4716
  if (settled) return;
4709
4717
  const text = String(chunk || "");
4710
4718
  if (!text) return;
4711
- const normalized = normalizePastedChunk(text);
4712
- const looksLikeBracketedPaste = text.includes("\x1b[200~") || text.includes("\x1b[201~");
4713
- const looksLikePastedBlock =
4714
- looksLikeBracketedPaste ||
4715
- (normalized.length > 1 && normalized.includes("\n") && /[^\n]/.test(normalized));
4716
-
4717
- if (!looksLikePastedBlock) {
4718
- return;
4719
- }
4720
4719
 
4721
- if (normalized) {
4722
- buffer += normalized;
4723
- selectedIndex = 0;
4724
- render();
4725
- }
4720
+ // Give keypress handler a chance to fire first (it's synchronous)
4721
+ keypressHandled = false;
4722
+ // readline.emitKeypressEvents will fire onKeypress synchronously before onData returns
4723
+ // on some platforms. Set a microtask to check if keypress handled it.
4724
+ Promise.resolve().then(() => {
4725
+ if (keypressHandled || settled) return;
4726
+
4727
+ // Keypress didn't fire — handle raw data as fallback
4728
+ const normalized = normalizePastedChunk(text);
4729
+ const looksLikeBracketedPaste = text.includes("\x1b[200~") || text.includes("\x1b[201~");
4730
+ const looksLikePastedBlock =
4731
+ looksLikeBracketedPaste ||
4732
+ (normalized.length > 1 && normalized.includes("\n") && /[^\n]/.test(normalized));
4733
+
4734
+ if (looksLikePastedBlock) {
4735
+ if (normalized) {
4736
+ buffer += normalized;
4737
+ selectedIndex = 0;
4738
+ render();
4739
+ }
4740
+ ignoredPastePrintable += [...normalized].filter((char) => char !== "\n").length;
4741
+ ignoredPasteEnters += (normalized.match(/\n/g) || []).length;
4742
+ pasteSuppressUntil = Date.now() + 300;
4743
+ return;
4744
+ }
4726
4745
 
4727
- ignoredPastePrintable += [...normalized].filter((char) => char !== "\n").length;
4728
- ignoredPasteEnters += (normalized.match(/\n/g) || []).length;
4729
- pasteSuppressUntil = Date.now() + 300;
4746
+ // Single character fallback keypress emitter failed to fire
4747
+ for (const ch of text) {
4748
+ if (ch === "\r" || ch === "\n") {
4749
+ handleSubmit();
4750
+ return;
4751
+ }
4752
+ if (ch === "\u0003") {
4753
+ if (settled) return;
4754
+ settled = true;
4755
+ cleanup();
4756
+ output.write("\n");
4757
+ reject(new Error("Interrupted"));
4758
+ return;
4759
+ }
4760
+ if (ch === "\u007f" || ch === "\b") {
4761
+ if (buffer.length > 0) {
4762
+ buffer = buffer.slice(0, -1);
4763
+ selectedIndex = 0;
4764
+ render();
4765
+ }
4766
+ continue;
4767
+ }
4768
+ if (ch.charCodeAt(0) < 32 || ch.charCodeAt(0) === 127) continue;
4769
+ if (ch.includes("\x1b")) continue;
4770
+ buffer += ch;
4771
+ selectedIndex = 0;
4772
+ render();
4773
+ }
4774
+ });
4730
4775
  }
4731
4776
 
4732
4777
  // Ensure stdin is in a clean state before attaching listeners.
@@ -6706,11 +6751,32 @@ Be concrete about surfaces — name actual pages/flows. Choose the best stack fo
6706
6751
  onInterrupted() {
6707
6752
  return interrupted;
6708
6753
  },
6709
- async planChange({ goal, constraints, metric, previousAttempts, simplicityBias }) {
6754
+ async readTargetFile({ cwd, goal }) {
6755
+ // Extract file path from goal (e.g. "reduce lines in router.js" → "src/router.js")
6756
+ const fileMatch = goal.match(/\b([\w./\\-]+\.\w{1,5})\b/);
6757
+ if (!fileMatch) return null;
6758
+ const targetFile = fileMatch[1];
6759
+ try {
6760
+ const candidates = [targetFile, `src/${targetFile}`, `lib/${targetFile}`, `app/${targetFile}`];
6761
+ for (const candidate of candidates) {
6762
+ try {
6763
+ const content = await import("node:fs/promises").then((fs) => fs.readFile(`${cwd}/${candidate}`, "utf8"));
6764
+ return `--- ${candidate} ---\n${content.slice(0, 4000)}`;
6765
+ } catch {}
6766
+ }
6767
+ } catch {}
6768
+ return null;
6769
+ },
6770
+ async planChange({ goal, constraints, metric, previousAttempts, simplicityBias, targetFileContent }) {
6710
6771
  const model = context.runtime.plannerModel || agent.getModel();
6711
- const previousSummary = previousAttempts.map((a) =>
6712
- `attempt ${a.number}: ${a.hypothesis} ${a.status} (${a.value})`
6713
- ).join("\n");
6772
+
6773
+ // Rich attempt history include WHY things failed
6774
+ const previousSummary = previousAttempts.map((a) => {
6775
+ let line = `attempt ${a.number}: ${a.hypothesis} → ${a.status} (${a.value})`;
6776
+ if (a.errorDetail) line += ` | error: ${a.errorDetail}`;
6777
+ if (a.status === "discard" && a.metricOutput) line += ` | output: ${a.metricOutput.slice(0, 80)}`;
6778
+ return line;
6779
+ }).join("\n");
6714
6780
 
6715
6781
  const simplicityNote = simplicityBias
6716
6782
  ? "\n\nSimplicity criterion: prefer simpler changes. A small improvement from deleting code is better than a large improvement from adding complexity. If improvement is ~0 but code is simpler, that's a win."
@@ -6719,10 +6785,11 @@ Be concrete about surfaces — name actual pages/flows. Choose the best stack fo
6719
6785
  const prompt = [
6720
6786
  `Goal: ${goal}`,
6721
6787
  `Metric: ${metric.command} (${metric.direction} is better, current best: ${metric.currentBest})`,
6788
+ targetFileContent ? `Current file content:\n${targetFileContent}` : "",
6722
6789
  constraints.length > 0 ? `Constraints: ${constraints.join("; ")}` : "",
6723
- previousSummary ? `Previous attempts:\n${previousSummary}` : "",
6724
- "Propose ONE specific code change. Be concrete: which file, which function, what change.",
6725
- `Respond with JSON: { "hypothesis": "one-line summary", "prompt": "detailed executor instructions" }${simplicityNote}`
6790
+ previousSummary ? `Previous attempts (learn from these — do NOT repeat failed ideas):\n${previousSummary}` : "",
6791
+ "Propose ONE specific code change. Reference exact line numbers or function names from the file above. Be concrete.",
6792
+ `Respond with JSON: { "hypothesis": "one-line summary", "prompt": "detailed executor instructions referencing specific lines/functions" }${simplicityNote}`
6726
6793
  ].filter(Boolean).join("\n\n");
6727
6794
 
6728
6795
  try {
@@ -6732,7 +6799,7 @@ Be concrete about surfaces — name actual pages/flows. Choose the best stack fo
6732
6799
  baseUrl: context.runtime.baseUrl,
6733
6800
  model,
6734
6801
  messages: [
6735
- { role: "system", content: "You are an autonomous researcher optimizing code. Each attempt must try something different from previous attempts. Learn from kept vs discarded results. Respond with JSON only." },
6802
+ { role: "system", content: "You are an autonomous researcher optimizing code. You can see the actual file content. Each attempt MUST try something fundamentally different from previous attempts. If an approach was discarded, do NOT try a variant of it — try a completely different strategy. Learn from error details and metric output. Respond with JSON only." },
6736
6803
  { role: "user", content: prompt }
6737
6804
  ],
6738
6805
  temperature: 0.6
@@ -6776,6 +6843,17 @@ Be concrete about surfaces — name actual pages/flows. Choose the best stack fo
6776
6843
  const value = attempt.value !== null ? ` → ${attempt.value}` : " → failed";
6777
6844
  console.log(` ${icon}${value}`);
6778
6845
  },
6846
+ onScorecard({ attempt, metric: metricInfo, baseline: bl, currentBest: cb }) {
6847
+ try {
6848
+ const { computeScorecard: compSc, saveScorecard: saveSc } = require("./scorecard.js");
6849
+ const sc = compSc({
6850
+ task: { id: `exp-${attempt.number}`, name: `experiment attempt ${attempt.number}`, chosenOption: attempt.hypothesis },
6851
+ receipt: { changedFiles: [], verification: [{ ok: attempt.status === "keep", command: metricInfo.command }], review: { verdict: attempt.status === "keep" ? "ship" : "block", concerns: [] }, mutated: attempt.status !== "crash" },
6852
+ userAction: attempt.status === "keep" ? "accepted" : "redo"
6853
+ });
6854
+ saveSc({ cwd: context.cwd, scorecard: sc });
6855
+ } catch {}
6856
+ },
6779
6857
  onDone() {
6780
6858
  spinner.stop();
6781
6859
  }
package/src/experiment.js CHANGED
@@ -269,9 +269,11 @@ export function formatExperimentSummary(results) {
269
269
  * onAttemptEnd(attempt)
270
270
  * onDone(results)
271
271
  * onInterrupted() → boolean (check if user pressed Ctrl+C)
272
- * planChange({ goal, constraints, metric, previousAttempts, cwd, simplicityBias }) → { hypothesis, prompt }
272
+ * planChange({ goal, constraints, metric, previousAttempts, cwd, simplicityBias, targetFileContent }) → { hypothesis, prompt }
273
273
  * executeChange({ prompt, cwd }) → void
274
274
  * fixCrash({ error, prompt, cwd }) → boolean (true if fixed, false to give up)
275
+ * readTargetFile({ cwd, goal }) → string|null (read the file being optimized)
276
+ * onScorecard(scorecard) → void
275
277
  * }
276
278
  */
277
279
  export async function runExperimentLoop({ charter, cwd, tag, handlers = {} }) {
@@ -318,6 +320,14 @@ export async function runExperimentLoop({ charter, cwd, tag, handlers = {} }) {
318
320
  // Check for interruption
319
321
  if (handlers.onInterrupted && handlers.onInterrupted()) break;
320
322
 
323
+ // Read target file before planning (so planner sees actual code)
324
+ let targetFileContent = null;
325
+ if (handlers.readTargetFile) {
326
+ try {
327
+ targetFileContent = await handlers.readTargetFile({ cwd, goal: charter.goal });
328
+ } catch {}
329
+ }
330
+
321
331
  // Plan the change
322
332
  let hypothesis = `attempt ${attemptNum}`;
323
333
  let changePrompt = charter.goal;
@@ -329,6 +339,7 @@ export async function runExperimentLoop({ charter, cwd, tag, handlers = {} }) {
329
339
  metric: { command: metric.command, direction: metric.direction, currentBest },
330
340
  previousAttempts: attempts,
331
341
  simplicityBias: charter.simplicityBias !== false,
342
+ targetFileContent,
332
343
  cwd
333
344
  });
334
345
  hypothesis = plan.hypothesis || hypothesis;
@@ -366,11 +377,12 @@ export async function runExperimentLoop({ charter, cwd, tag, handlers = {} }) {
366
377
  }
367
378
 
368
379
  if (!executed) {
369
- // Crash — revert and log
380
+ // Crash — revert and log with error details
370
381
  await gitRevert({ cwd, sha: lastGoodSha });
371
- const attempt = { number: attemptNum, hypothesis, value: null, baseline: currentBest, status: "crash", error: true };
382
+ const errorMsg = crashError instanceof Error ? crashError.message : String(crashError || "unknown");
383
+ const attempt = { number: attemptNum, hypothesis, value: null, baseline: currentBest, status: "crash", error: true, errorDetail: errorMsg.slice(0, 200) };
372
384
  attempts.push(attempt);
373
- await appendResult({ cwd, commit: "-------", value: 0, status: "crash", description: hypothesis });
385
+ await appendResult({ cwd, commit: "-------", value: 0, status: "crash", description: `${hypothesis} | error: ${errorMsg.slice(0, 100)}` });
374
386
  if (handlers.onAttemptEnd) handlers.onAttemptEnd(attempt);
375
387
  continue;
376
388
  }
@@ -379,11 +391,11 @@ export async function runExperimentLoop({ charter, cwd, tag, handlers = {} }) {
379
391
  const result = await runMetric({ command: metric.command, extract: metric.extract, cwd });
380
392
 
381
393
  if (!result.ok) {
382
- // Metric failed (runtime crash) — revert
394
+ // Metric failed (runtime crash) — revert with error output
383
395
  await gitRevert({ cwd, sha: lastGoodSha });
384
- const attempt = { number: attemptNum, hypothesis, value: null, baseline: currentBest, status: "crash", error: true };
396
+ const attempt = { number: attemptNum, hypothesis, value: null, baseline: currentBest, status: "crash", error: true, errorDetail: result.raw?.slice(0, 200) || "metric failed" };
385
397
  attempts.push(attempt);
386
- await appendResult({ cwd, commit: "-------", value: 0, status: "crash", description: `${hypothesis} (metric failed)` });
398
+ await appendResult({ cwd, commit: "-------", value: 0, status: "crash", description: `${hypothesis} | metric: ${result.raw?.slice(0, 80) || "failed"}` });
387
399
  if (handlers.onAttemptEnd) handlers.onAttemptEnd(attempt);
388
400
  continue;
389
401
  }
@@ -394,7 +406,8 @@ export async function runExperimentLoop({ charter, cwd, tag, handlers = {} }) {
394
406
  value: result.value,
395
407
  baseline: currentBest,
396
408
  status: "discard",
397
- error: false
409
+ error: false,
410
+ metricOutput: result.raw?.slice(0, 200) || null
398
411
  };
399
412
 
400
413
  if (isBetter(result.value, currentBest, metric.direction)) {
@@ -408,7 +421,12 @@ export async function runExperimentLoop({ charter, cwd, tag, handlers = {} }) {
408
421
  } else {
409
422
  // Discard — revert to last good state
410
423
  await gitRevert({ cwd, sha: lastGoodSha });
411
- await appendResult({ cwd, commit: lastGoodSha, value: result.value, status: "discard", description: hypothesis });
424
+ await appendResult({ cwd, commit: lastGoodSha, value: result.value, status: "discard", description: `${hypothesis} | no improvement (${result.value} vs ${currentBest})` });
425
+ }
426
+
427
+ // Emit scorecard for this attempt
428
+ if (handlers.onScorecard) {
429
+ try { handlers.onScorecard({ attempt, metric: { command: metric.command, direction: metric.direction }, baseline, currentBest }); } catch {}
412
430
  }
413
431
 
414
432
  attempts.push(attempt);