@tritard/waterbrother 0.14.0 → 0.14.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/cli.js +41 -8
- package/src/experiment.js +27 -9
package/package.json
CHANGED
package/src/cli.js
CHANGED
|
@@ -6706,11 +6706,32 @@ Be concrete about surfaces — name actual pages/flows. Choose the best stack fo
|
|
|
6706
6706
|
onInterrupted() {
|
|
6707
6707
|
return interrupted;
|
|
6708
6708
|
},
|
|
6709
|
-
async
|
|
6709
|
+
async readTargetFile({ cwd, goal }) {
|
|
6710
|
+
// Extract file path from goal (e.g. "reduce lines in router.js" → "src/router.js")
|
|
6711
|
+
const fileMatch = goal.match(/\b([\w./\\-]+\.\w{1,5})\b/);
|
|
6712
|
+
if (!fileMatch) return null;
|
|
6713
|
+
const targetFile = fileMatch[1];
|
|
6714
|
+
try {
|
|
6715
|
+
const candidates = [targetFile, `src/${targetFile}`, `lib/${targetFile}`, `app/${targetFile}`];
|
|
6716
|
+
for (const candidate of candidates) {
|
|
6717
|
+
try {
|
|
6718
|
+
const content = await import("node:fs/promises").then((fs) => fs.readFile(`${cwd}/${candidate}`, "utf8"));
|
|
6719
|
+
return `--- ${candidate} ---\n${content.slice(0, 4000)}`;
|
|
6720
|
+
} catch {}
|
|
6721
|
+
}
|
|
6722
|
+
} catch {}
|
|
6723
|
+
return null;
|
|
6724
|
+
},
|
|
6725
|
+
async planChange({ goal, constraints, metric, previousAttempts, simplicityBias, targetFileContent }) {
|
|
6710
6726
|
const model = context.runtime.plannerModel || agent.getModel();
|
|
6711
|
-
|
|
6712
|
-
|
|
6713
|
-
|
|
6727
|
+
|
|
6728
|
+
// Rich attempt history — include WHY things failed
|
|
6729
|
+
const previousSummary = previousAttempts.map((a) => {
|
|
6730
|
+
let line = `attempt ${a.number}: ${a.hypothesis} → ${a.status} (${a.value})`;
|
|
6731
|
+
if (a.errorDetail) line += ` | error: ${a.errorDetail}`;
|
|
6732
|
+
if (a.status === "discard" && a.metricOutput) line += ` | output: ${a.metricOutput.slice(0, 80)}`;
|
|
6733
|
+
return line;
|
|
6734
|
+
}).join("\n");
|
|
6714
6735
|
|
|
6715
6736
|
const simplicityNote = simplicityBias
|
|
6716
6737
|
? "\n\nSimplicity criterion: prefer simpler changes. A small improvement from deleting code is better than a large improvement from adding complexity. If improvement is ~0 but code is simpler, that's a win."
|
|
@@ -6719,10 +6740,11 @@ Be concrete about surfaces — name actual pages/flows. Choose the best stack fo
|
|
|
6719
6740
|
const prompt = [
|
|
6720
6741
|
`Goal: ${goal}`,
|
|
6721
6742
|
`Metric: ${metric.command} (${metric.direction} is better, current best: ${metric.currentBest})`,
|
|
6743
|
+
targetFileContent ? `Current file content:\n${targetFileContent}` : "",
|
|
6722
6744
|
constraints.length > 0 ? `Constraints: ${constraints.join("; ")}` : "",
|
|
6723
|
-
previousSummary ? `Previous attempts:\n${previousSummary}` : "",
|
|
6724
|
-
"Propose ONE specific code change.
|
|
6725
|
-
`Respond with JSON: { "hypothesis": "one-line summary", "prompt": "detailed executor instructions" }${simplicityNote}`
|
|
6745
|
+
previousSummary ? `Previous attempts (learn from these — do NOT repeat failed ideas):\n${previousSummary}` : "",
|
|
6746
|
+
"Propose ONE specific code change. Reference exact line numbers or function names from the file above. Be concrete.",
|
|
6747
|
+
`Respond with JSON: { "hypothesis": "one-line summary", "prompt": "detailed executor instructions referencing specific lines/functions" }${simplicityNote}`
|
|
6726
6748
|
].filter(Boolean).join("\n\n");
|
|
6727
6749
|
|
|
6728
6750
|
try {
|
|
@@ -6732,7 +6754,7 @@ Be concrete about surfaces — name actual pages/flows. Choose the best stack fo
|
|
|
6732
6754
|
baseUrl: context.runtime.baseUrl,
|
|
6733
6755
|
model,
|
|
6734
6756
|
messages: [
|
|
6735
|
-
{ role: "system", content: "You are an autonomous researcher optimizing code. Each attempt
|
|
6757
|
+
{ role: "system", content: "You are an autonomous researcher optimizing code. You can see the actual file content. Each attempt MUST try something fundamentally different from previous attempts. If an approach was discarded, do NOT try a variant of it — try a completely different strategy. Learn from error details and metric output. Respond with JSON only." },
|
|
6736
6758
|
{ role: "user", content: prompt }
|
|
6737
6759
|
],
|
|
6738
6760
|
temperature: 0.6
|
|
@@ -6776,6 +6798,17 @@ Be concrete about surfaces — name actual pages/flows. Choose the best stack fo
|
|
|
6776
6798
|
const value = attempt.value !== null ? ` → ${attempt.value}` : " → failed";
|
|
6777
6799
|
console.log(` ${icon}${value}`);
|
|
6778
6800
|
},
|
|
6801
|
+
onScorecard({ attempt, metric: metricInfo, baseline: bl, currentBest: cb }) {
|
|
6802
|
+
try {
|
|
6803
|
+
const { computeScorecard: compSc, saveScorecard: saveSc } = require("./scorecard.js");
|
|
6804
|
+
const sc = compSc({
|
|
6805
|
+
task: { id: `exp-${attempt.number}`, name: `experiment attempt ${attempt.number}`, chosenOption: attempt.hypothesis },
|
|
6806
|
+
receipt: { changedFiles: [], verification: [{ ok: attempt.status === "keep", command: metricInfo.command }], review: { verdict: attempt.status === "keep" ? "ship" : "block", concerns: [] }, mutated: attempt.status !== "crash" },
|
|
6807
|
+
userAction: attempt.status === "keep" ? "accepted" : "redo"
|
|
6808
|
+
});
|
|
6809
|
+
saveSc({ cwd: context.cwd, scorecard: sc });
|
|
6810
|
+
} catch {}
|
|
6811
|
+
},
|
|
6779
6812
|
onDone() {
|
|
6780
6813
|
spinner.stop();
|
|
6781
6814
|
}
|
package/src/experiment.js
CHANGED
|
@@ -269,9 +269,11 @@ export function formatExperimentSummary(results) {
|
|
|
269
269
|
* onAttemptEnd(attempt)
|
|
270
270
|
* onDone(results)
|
|
271
271
|
* onInterrupted() → boolean (check if user pressed Ctrl+C)
|
|
272
|
-
* planChange({ goal, constraints, metric, previousAttempts, cwd, simplicityBias }) → { hypothesis, prompt }
|
|
272
|
+
* planChange({ goal, constraints, metric, previousAttempts, cwd, simplicityBias, targetFileContent }) → { hypothesis, prompt }
|
|
273
273
|
* executeChange({ prompt, cwd }) → void
|
|
274
274
|
* fixCrash({ error, prompt, cwd }) → boolean (true if fixed, false to give up)
|
|
275
|
+
* readTargetFile({ cwd, goal }) → string|null (read the file being optimized)
|
|
276
|
+
* onScorecard(scorecard) → void
|
|
275
277
|
* }
|
|
276
278
|
*/
|
|
277
279
|
export async function runExperimentLoop({ charter, cwd, tag, handlers = {} }) {
|
|
@@ -318,6 +320,14 @@ export async function runExperimentLoop({ charter, cwd, tag, handlers = {} }) {
|
|
|
318
320
|
// Check for interruption
|
|
319
321
|
if (handlers.onInterrupted && handlers.onInterrupted()) break;
|
|
320
322
|
|
|
323
|
+
// Read target file before planning (so planner sees actual code)
|
|
324
|
+
let targetFileContent = null;
|
|
325
|
+
if (handlers.readTargetFile) {
|
|
326
|
+
try {
|
|
327
|
+
targetFileContent = await handlers.readTargetFile({ cwd, goal: charter.goal });
|
|
328
|
+
} catch {}
|
|
329
|
+
}
|
|
330
|
+
|
|
321
331
|
// Plan the change
|
|
322
332
|
let hypothesis = `attempt ${attemptNum}`;
|
|
323
333
|
let changePrompt = charter.goal;
|
|
@@ -329,6 +339,7 @@ export async function runExperimentLoop({ charter, cwd, tag, handlers = {} }) {
|
|
|
329
339
|
metric: { command: metric.command, direction: metric.direction, currentBest },
|
|
330
340
|
previousAttempts: attempts,
|
|
331
341
|
simplicityBias: charter.simplicityBias !== false,
|
|
342
|
+
targetFileContent,
|
|
332
343
|
cwd
|
|
333
344
|
});
|
|
334
345
|
hypothesis = plan.hypothesis || hypothesis;
|
|
@@ -366,11 +377,12 @@ export async function runExperimentLoop({ charter, cwd, tag, handlers = {} }) {
|
|
|
366
377
|
}
|
|
367
378
|
|
|
368
379
|
if (!executed) {
|
|
369
|
-
// Crash — revert and log
|
|
380
|
+
// Crash — revert and log with error details
|
|
370
381
|
await gitRevert({ cwd, sha: lastGoodSha });
|
|
371
|
-
const
|
|
382
|
+
const errorMsg = crashError instanceof Error ? crashError.message : String(crashError || "unknown");
|
|
383
|
+
const attempt = { number: attemptNum, hypothesis, value: null, baseline: currentBest, status: "crash", error: true, errorDetail: errorMsg.slice(0, 200) };
|
|
372
384
|
attempts.push(attempt);
|
|
373
|
-
await appendResult({ cwd, commit: "-------", value: 0, status: "crash", description: hypothesis });
|
|
385
|
+
await appendResult({ cwd, commit: "-------", value: 0, status: "crash", description: `${hypothesis} | error: ${errorMsg.slice(0, 100)}` });
|
|
374
386
|
if (handlers.onAttemptEnd) handlers.onAttemptEnd(attempt);
|
|
375
387
|
continue;
|
|
376
388
|
}
|
|
@@ -379,11 +391,11 @@ export async function runExperimentLoop({ charter, cwd, tag, handlers = {} }) {
|
|
|
379
391
|
const result = await runMetric({ command: metric.command, extract: metric.extract, cwd });
|
|
380
392
|
|
|
381
393
|
if (!result.ok) {
|
|
382
|
-
// Metric failed (runtime crash) — revert
|
|
394
|
+
// Metric failed (runtime crash) — revert with error output
|
|
383
395
|
await gitRevert({ cwd, sha: lastGoodSha });
|
|
384
|
-
const attempt = { number: attemptNum, hypothesis, value: null, baseline: currentBest, status: "crash", error: true };
|
|
396
|
+
const attempt = { number: attemptNum, hypothesis, value: null, baseline: currentBest, status: "crash", error: true, errorDetail: result.raw?.slice(0, 200) || "metric failed" };
|
|
385
397
|
attempts.push(attempt);
|
|
386
|
-
await appendResult({ cwd, commit: "-------", value: 0, status: "crash", description: `${hypothesis}
|
|
398
|
+
await appendResult({ cwd, commit: "-------", value: 0, status: "crash", description: `${hypothesis} | metric: ${result.raw?.slice(0, 80) || "failed"}` });
|
|
387
399
|
if (handlers.onAttemptEnd) handlers.onAttemptEnd(attempt);
|
|
388
400
|
continue;
|
|
389
401
|
}
|
|
@@ -394,7 +406,8 @@ export async function runExperimentLoop({ charter, cwd, tag, handlers = {} }) {
|
|
|
394
406
|
value: result.value,
|
|
395
407
|
baseline: currentBest,
|
|
396
408
|
status: "discard",
|
|
397
|
-
error: false
|
|
409
|
+
error: false,
|
|
410
|
+
metricOutput: result.raw?.slice(0, 200) || null
|
|
398
411
|
};
|
|
399
412
|
|
|
400
413
|
if (isBetter(result.value, currentBest, metric.direction)) {
|
|
@@ -408,7 +421,12 @@ export async function runExperimentLoop({ charter, cwd, tag, handlers = {} }) {
|
|
|
408
421
|
} else {
|
|
409
422
|
// Discard — revert to last good state
|
|
410
423
|
await gitRevert({ cwd, sha: lastGoodSha });
|
|
411
|
-
await appendResult({ cwd, commit: lastGoodSha, value: result.value, status: "discard", description: hypothesis });
|
|
424
|
+
await appendResult({ cwd, commit: lastGoodSha, value: result.value, status: "discard", description: `${hypothesis} | no improvement (${result.value} vs ${currentBest})` });
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
// Emit scorecard for this attempt
|
|
428
|
+
if (handlers.onScorecard) {
|
|
429
|
+
try { handlers.onScorecard({ attempt, metric: { command: metric.command, direction: metric.direction }, baseline, currentBest }); } catch {}
|
|
412
430
|
}
|
|
413
431
|
|
|
414
432
|
attempts.push(attempt);
|