@tritard/waterbrother 0.14.0 → 0.14.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/cli.js +103 -25
- package/src/experiment.js +27 -9
package/package.json
CHANGED
package/src/cli.js
CHANGED
|
@@ -4704,29 +4704,74 @@ async function readInteractiveLine(options = {}) {
|
|
|
4704
4704
|
}
|
|
4705
4705
|
}
|
|
4706
4706
|
|
|
4707
|
+
// Track whether keypress handler processed the last data chunk
|
|
4708
|
+
let keypressHandled = false;
|
|
4709
|
+
const origOnKeypress = onKeypress;
|
|
4710
|
+
onKeypress = function (str, key) {
|
|
4711
|
+
keypressHandled = true;
|
|
4712
|
+
origOnKeypress(str, key);
|
|
4713
|
+
};
|
|
4714
|
+
|
|
4707
4715
|
function onData(chunk) {
|
|
4708
4716
|
if (settled) return;
|
|
4709
4717
|
const text = String(chunk || "");
|
|
4710
4718
|
if (!text) return;
|
|
4711
|
-
const normalized = normalizePastedChunk(text);
|
|
4712
|
-
const looksLikeBracketedPaste = text.includes("\x1b[200~") || text.includes("\x1b[201~");
|
|
4713
|
-
const looksLikePastedBlock =
|
|
4714
|
-
looksLikeBracketedPaste ||
|
|
4715
|
-
(normalized.length > 1 && normalized.includes("\n") && /[^\n]/.test(normalized));
|
|
4716
|
-
|
|
4717
|
-
if (!looksLikePastedBlock) {
|
|
4718
|
-
return;
|
|
4719
|
-
}
|
|
4720
4719
|
|
|
4721
|
-
|
|
4722
|
-
|
|
4723
|
-
|
|
4724
|
-
|
|
4725
|
-
|
|
4720
|
+
// Give keypress handler a chance to fire first (it's synchronous)
|
|
4721
|
+
keypressHandled = false;
|
|
4722
|
+
// readline.emitKeypressEvents will fire onKeypress synchronously before onData returns
|
|
4723
|
+
// on some platforms. Set a microtask to check if keypress handled it.
|
|
4724
|
+
Promise.resolve().then(() => {
|
|
4725
|
+
if (keypressHandled || settled) return;
|
|
4726
|
+
|
|
4727
|
+
// Keypress didn't fire — handle raw data as fallback
|
|
4728
|
+
const normalized = normalizePastedChunk(text);
|
|
4729
|
+
const looksLikeBracketedPaste = text.includes("\x1b[200~") || text.includes("\x1b[201~");
|
|
4730
|
+
const looksLikePastedBlock =
|
|
4731
|
+
looksLikeBracketedPaste ||
|
|
4732
|
+
(normalized.length > 1 && normalized.includes("\n") && /[^\n]/.test(normalized));
|
|
4733
|
+
|
|
4734
|
+
if (looksLikePastedBlock) {
|
|
4735
|
+
if (normalized) {
|
|
4736
|
+
buffer += normalized;
|
|
4737
|
+
selectedIndex = 0;
|
|
4738
|
+
render();
|
|
4739
|
+
}
|
|
4740
|
+
ignoredPastePrintable += [...normalized].filter((char) => char !== "\n").length;
|
|
4741
|
+
ignoredPasteEnters += (normalized.match(/\n/g) || []).length;
|
|
4742
|
+
pasteSuppressUntil = Date.now() + 300;
|
|
4743
|
+
return;
|
|
4744
|
+
}
|
|
4726
4745
|
|
|
4727
|
-
|
|
4728
|
-
|
|
4729
|
-
|
|
4746
|
+
// Single character fallback — keypress emitter failed to fire
|
|
4747
|
+
for (const ch of text) {
|
|
4748
|
+
if (ch === "\r" || ch === "\n") {
|
|
4749
|
+
handleSubmit();
|
|
4750
|
+
return;
|
|
4751
|
+
}
|
|
4752
|
+
if (ch === "\u0003") {
|
|
4753
|
+
if (settled) return;
|
|
4754
|
+
settled = true;
|
|
4755
|
+
cleanup();
|
|
4756
|
+
output.write("\n");
|
|
4757
|
+
reject(new Error("Interrupted"));
|
|
4758
|
+
return;
|
|
4759
|
+
}
|
|
4760
|
+
if (ch === "\u007f" || ch === "\b") {
|
|
4761
|
+
if (buffer.length > 0) {
|
|
4762
|
+
buffer = buffer.slice(0, -1);
|
|
4763
|
+
selectedIndex = 0;
|
|
4764
|
+
render();
|
|
4765
|
+
}
|
|
4766
|
+
continue;
|
|
4767
|
+
}
|
|
4768
|
+
if (ch.charCodeAt(0) < 32 || ch.charCodeAt(0) === 127) continue;
|
|
4769
|
+
if (ch.includes("\x1b")) continue;
|
|
4770
|
+
buffer += ch;
|
|
4771
|
+
selectedIndex = 0;
|
|
4772
|
+
render();
|
|
4773
|
+
}
|
|
4774
|
+
});
|
|
4730
4775
|
}
|
|
4731
4776
|
|
|
4732
4777
|
// Ensure stdin is in a clean state before attaching listeners.
|
|
@@ -6706,11 +6751,32 @@ Be concrete about surfaces — name actual pages/flows. Choose the best stack fo
|
|
|
6706
6751
|
onInterrupted() {
|
|
6707
6752
|
return interrupted;
|
|
6708
6753
|
},
|
|
6709
|
-
async
|
|
6754
|
+
async readTargetFile({ cwd, goal }) {
|
|
6755
|
+
// Extract file path from goal (e.g. "reduce lines in router.js" → "src/router.js")
|
|
6756
|
+
const fileMatch = goal.match(/\b([\w./\\-]+\.\w{1,5})\b/);
|
|
6757
|
+
if (!fileMatch) return null;
|
|
6758
|
+
const targetFile = fileMatch[1];
|
|
6759
|
+
try {
|
|
6760
|
+
const candidates = [targetFile, `src/${targetFile}`, `lib/${targetFile}`, `app/${targetFile}`];
|
|
6761
|
+
for (const candidate of candidates) {
|
|
6762
|
+
try {
|
|
6763
|
+
const content = await import("node:fs/promises").then((fs) => fs.readFile(`${cwd}/${candidate}`, "utf8"));
|
|
6764
|
+
return `--- ${candidate} ---\n${content.slice(0, 4000)}`;
|
|
6765
|
+
} catch {}
|
|
6766
|
+
}
|
|
6767
|
+
} catch {}
|
|
6768
|
+
return null;
|
|
6769
|
+
},
|
|
6770
|
+
async planChange({ goal, constraints, metric, previousAttempts, simplicityBias, targetFileContent }) {
|
|
6710
6771
|
const model = context.runtime.plannerModel || agent.getModel();
|
|
6711
|
-
|
|
6712
|
-
|
|
6713
|
-
|
|
6772
|
+
|
|
6773
|
+
// Rich attempt history — include WHY things failed
|
|
6774
|
+
const previousSummary = previousAttempts.map((a) => {
|
|
6775
|
+
let line = `attempt ${a.number}: ${a.hypothesis} → ${a.status} (${a.value})`;
|
|
6776
|
+
if (a.errorDetail) line += ` | error: ${a.errorDetail}`;
|
|
6777
|
+
if (a.status === "discard" && a.metricOutput) line += ` | output: ${a.metricOutput.slice(0, 80)}`;
|
|
6778
|
+
return line;
|
|
6779
|
+
}).join("\n");
|
|
6714
6780
|
|
|
6715
6781
|
const simplicityNote = simplicityBias
|
|
6716
6782
|
? "\n\nSimplicity criterion: prefer simpler changes. A small improvement from deleting code is better than a large improvement from adding complexity. If improvement is ~0 but code is simpler, that's a win."
|
|
@@ -6719,10 +6785,11 @@ Be concrete about surfaces — name actual pages/flows. Choose the best stack fo
|
|
|
6719
6785
|
const prompt = [
|
|
6720
6786
|
`Goal: ${goal}`,
|
|
6721
6787
|
`Metric: ${metric.command} (${metric.direction} is better, current best: ${metric.currentBest})`,
|
|
6788
|
+
targetFileContent ? `Current file content:\n${targetFileContent}` : "",
|
|
6722
6789
|
constraints.length > 0 ? `Constraints: ${constraints.join("; ")}` : "",
|
|
6723
|
-
previousSummary ? `Previous attempts:\n${previousSummary}` : "",
|
|
6724
|
-
"Propose ONE specific code change.
|
|
6725
|
-
`Respond with JSON: { "hypothesis": "one-line summary", "prompt": "detailed executor instructions" }${simplicityNote}`
|
|
6790
|
+
previousSummary ? `Previous attempts (learn from these — do NOT repeat failed ideas):\n${previousSummary}` : "",
|
|
6791
|
+
"Propose ONE specific code change. Reference exact line numbers or function names from the file above. Be concrete.",
|
|
6792
|
+
`Respond with JSON: { "hypothesis": "one-line summary", "prompt": "detailed executor instructions referencing specific lines/functions" }${simplicityNote}`
|
|
6726
6793
|
].filter(Boolean).join("\n\n");
|
|
6727
6794
|
|
|
6728
6795
|
try {
|
|
@@ -6732,7 +6799,7 @@ Be concrete about surfaces — name actual pages/flows. Choose the best stack fo
|
|
|
6732
6799
|
baseUrl: context.runtime.baseUrl,
|
|
6733
6800
|
model,
|
|
6734
6801
|
messages: [
|
|
6735
|
-
{ role: "system", content: "You are an autonomous researcher optimizing code. Each attempt
|
|
6802
|
+
{ role: "system", content: "You are an autonomous researcher optimizing code. You can see the actual file content. Each attempt MUST try something fundamentally different from previous attempts. If an approach was discarded, do NOT try a variant of it — try a completely different strategy. Learn from error details and metric output. Respond with JSON only." },
|
|
6736
6803
|
{ role: "user", content: prompt }
|
|
6737
6804
|
],
|
|
6738
6805
|
temperature: 0.6
|
|
@@ -6776,6 +6843,17 @@ Be concrete about surfaces — name actual pages/flows. Choose the best stack fo
|
|
|
6776
6843
|
const value = attempt.value !== null ? ` → ${attempt.value}` : " → failed";
|
|
6777
6844
|
console.log(` ${icon}${value}`);
|
|
6778
6845
|
},
|
|
6846
|
+
onScorecard({ attempt, metric: metricInfo, baseline: bl, currentBest: cb }) {
|
|
6847
|
+
try {
|
|
6848
|
+
const { computeScorecard: compSc, saveScorecard: saveSc } = require("./scorecard.js");
|
|
6849
|
+
const sc = compSc({
|
|
6850
|
+
task: { id: `exp-${attempt.number}`, name: `experiment attempt ${attempt.number}`, chosenOption: attempt.hypothesis },
|
|
6851
|
+
receipt: { changedFiles: [], verification: [{ ok: attempt.status === "keep", command: metricInfo.command }], review: { verdict: attempt.status === "keep" ? "ship" : "block", concerns: [] }, mutated: attempt.status !== "crash" },
|
|
6852
|
+
userAction: attempt.status === "keep" ? "accepted" : "redo"
|
|
6853
|
+
});
|
|
6854
|
+
saveSc({ cwd: context.cwd, scorecard: sc });
|
|
6855
|
+
} catch {}
|
|
6856
|
+
},
|
|
6779
6857
|
onDone() {
|
|
6780
6858
|
spinner.stop();
|
|
6781
6859
|
}
|
package/src/experiment.js
CHANGED
|
@@ -269,9 +269,11 @@ export function formatExperimentSummary(results) {
|
|
|
269
269
|
* onAttemptEnd(attempt)
|
|
270
270
|
* onDone(results)
|
|
271
271
|
* onInterrupted() → boolean (check if user pressed Ctrl+C)
|
|
272
|
-
* planChange({ goal, constraints, metric, previousAttempts, cwd, simplicityBias }) → { hypothesis, prompt }
|
|
272
|
+
* planChange({ goal, constraints, metric, previousAttempts, cwd, simplicityBias, targetFileContent }) → { hypothesis, prompt }
|
|
273
273
|
* executeChange({ prompt, cwd }) → void
|
|
274
274
|
* fixCrash({ error, prompt, cwd }) → boolean (true if fixed, false to give up)
|
|
275
|
+
* readTargetFile({ cwd, goal }) → string|null (read the file being optimized)
|
|
276
|
+
* onScorecard(scorecard) → void
|
|
275
277
|
* }
|
|
276
278
|
*/
|
|
277
279
|
export async function runExperimentLoop({ charter, cwd, tag, handlers = {} }) {
|
|
@@ -318,6 +320,14 @@ export async function runExperimentLoop({ charter, cwd, tag, handlers = {} }) {
|
|
|
318
320
|
// Check for interruption
|
|
319
321
|
if (handlers.onInterrupted && handlers.onInterrupted()) break;
|
|
320
322
|
|
|
323
|
+
// Read target file before planning (so planner sees actual code)
|
|
324
|
+
let targetFileContent = null;
|
|
325
|
+
if (handlers.readTargetFile) {
|
|
326
|
+
try {
|
|
327
|
+
targetFileContent = await handlers.readTargetFile({ cwd, goal: charter.goal });
|
|
328
|
+
} catch {}
|
|
329
|
+
}
|
|
330
|
+
|
|
321
331
|
// Plan the change
|
|
322
332
|
let hypothesis = `attempt ${attemptNum}`;
|
|
323
333
|
let changePrompt = charter.goal;
|
|
@@ -329,6 +339,7 @@ export async function runExperimentLoop({ charter, cwd, tag, handlers = {} }) {
|
|
|
329
339
|
metric: { command: metric.command, direction: metric.direction, currentBest },
|
|
330
340
|
previousAttempts: attempts,
|
|
331
341
|
simplicityBias: charter.simplicityBias !== false,
|
|
342
|
+
targetFileContent,
|
|
332
343
|
cwd
|
|
333
344
|
});
|
|
334
345
|
hypothesis = plan.hypothesis || hypothesis;
|
|
@@ -366,11 +377,12 @@ export async function runExperimentLoop({ charter, cwd, tag, handlers = {} }) {
|
|
|
366
377
|
}
|
|
367
378
|
|
|
368
379
|
if (!executed) {
|
|
369
|
-
// Crash — revert and log
|
|
380
|
+
// Crash — revert and log with error details
|
|
370
381
|
await gitRevert({ cwd, sha: lastGoodSha });
|
|
371
|
-
const
|
|
382
|
+
const errorMsg = crashError instanceof Error ? crashError.message : String(crashError || "unknown");
|
|
383
|
+
const attempt = { number: attemptNum, hypothesis, value: null, baseline: currentBest, status: "crash", error: true, errorDetail: errorMsg.slice(0, 200) };
|
|
372
384
|
attempts.push(attempt);
|
|
373
|
-
await appendResult({ cwd, commit: "-------", value: 0, status: "crash", description: hypothesis });
|
|
385
|
+
await appendResult({ cwd, commit: "-------", value: 0, status: "crash", description: `${hypothesis} | error: ${errorMsg.slice(0, 100)}` });
|
|
374
386
|
if (handlers.onAttemptEnd) handlers.onAttemptEnd(attempt);
|
|
375
387
|
continue;
|
|
376
388
|
}
|
|
@@ -379,11 +391,11 @@ export async function runExperimentLoop({ charter, cwd, tag, handlers = {} }) {
|
|
|
379
391
|
const result = await runMetric({ command: metric.command, extract: metric.extract, cwd });
|
|
380
392
|
|
|
381
393
|
if (!result.ok) {
|
|
382
|
-
// Metric failed (runtime crash) — revert
|
|
394
|
+
// Metric failed (runtime crash) — revert with error output
|
|
383
395
|
await gitRevert({ cwd, sha: lastGoodSha });
|
|
384
|
-
const attempt = { number: attemptNum, hypothesis, value: null, baseline: currentBest, status: "crash", error: true };
|
|
396
|
+
const attempt = { number: attemptNum, hypothesis, value: null, baseline: currentBest, status: "crash", error: true, errorDetail: result.raw?.slice(0, 200) || "metric failed" };
|
|
385
397
|
attempts.push(attempt);
|
|
386
|
-
await appendResult({ cwd, commit: "-------", value: 0, status: "crash", description: `${hypothesis}
|
|
398
|
+
await appendResult({ cwd, commit: "-------", value: 0, status: "crash", description: `${hypothesis} | metric: ${result.raw?.slice(0, 80) || "failed"}` });
|
|
387
399
|
if (handlers.onAttemptEnd) handlers.onAttemptEnd(attempt);
|
|
388
400
|
continue;
|
|
389
401
|
}
|
|
@@ -394,7 +406,8 @@ export async function runExperimentLoop({ charter, cwd, tag, handlers = {} }) {
|
|
|
394
406
|
value: result.value,
|
|
395
407
|
baseline: currentBest,
|
|
396
408
|
status: "discard",
|
|
397
|
-
error: false
|
|
409
|
+
error: false,
|
|
410
|
+
metricOutput: result.raw?.slice(0, 200) || null
|
|
398
411
|
};
|
|
399
412
|
|
|
400
413
|
if (isBetter(result.value, currentBest, metric.direction)) {
|
|
@@ -408,7 +421,12 @@ export async function runExperimentLoop({ charter, cwd, tag, handlers = {} }) {
|
|
|
408
421
|
} else {
|
|
409
422
|
// Discard — revert to last good state
|
|
410
423
|
await gitRevert({ cwd, sha: lastGoodSha });
|
|
411
|
-
await appendResult({ cwd, commit: lastGoodSha, value: result.value, status: "discard", description: hypothesis });
|
|
424
|
+
await appendResult({ cwd, commit: lastGoodSha, value: result.value, status: "discard", description: `${hypothesis} | no improvement (${result.value} vs ${currentBest})` });
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
// Emit scorecard for this attempt
|
|
428
|
+
if (handlers.onScorecard) {
|
|
429
|
+
try { handlers.onScorecard({ attempt, metric: { command: metric.command, direction: metric.direction }, baseline, currentBest }); } catch {}
|
|
412
430
|
}
|
|
413
431
|
|
|
414
432
|
attempts.push(attempt);
|