npm - @tritard/waterbrother - Versions diffs - 0.12.8 → 0.13.0 - Mend

@tritard/waterbrother 0.12.8 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@tritard/waterbrother",
-  "version": "0.12.8",
+  "version": "0.13.0",
   "description": "Waterbrother: Grok-powered coding CLI with local tools, sessions, operator modes, and approval controls",
   "type": "module",
   "bin": {

package/src/agent.js CHANGED Viewed

@@ -136,6 +136,7 @@ function buildSystemPrompt(profile, experienceMode = "standard", autonomyMode =
         ctxLines.push(`Benchmark site-type rules:\n- ${frontend.benchmarkSiteTypeRules.join("\n- ")}`);
       }
     }
+    if (executionContext.calibration) ctxLines.push(`Scope calibration (from scored build history):\n${executionContext.calibration}`);
     if (executionContext.reminders) ctxLines.push(`Scope reminders:\n${executionContext.reminders}`);
     if (ctxLines.length > 0) base += `\n\nExecution context:\n${ctxLines.join("\n")}`;
   }

package/src/cli.js CHANGED Viewed

@@ -33,6 +33,7 @@ import { runBuildWorkflow, startFeatureTask, runChallengeWorkflow } from "./work
 import { createPanelRenderer, buildPanelState } from "./panel.js";
 import { deriveTaskNameFromPrompt, nextActionsForState, routeNaturalInput } from "./router.js";
 import { compressEpisode, compressSessionEpisode, saveEpisode, loadRecentEpisodes, findRelevantEpisodes, buildEpisodicMemoryBlock, buildReminderBlock } from "./episodic.js";
+import { formatScorecardSummary } from "./scorecard.js";
 import { createProduct, loadProduct, saveProduct, hasProduct, generateBlueprint, buildProductContext, detectProductRequest, parseProductIntent, addSurface, createCampaign, getActiveCampaign, matchTemplate, applyTemplate, startPreview, killPreview } from "./product.js";
 import { runQualityChecks, formatQualityFindings, buildQualityFixPrompt } from "./quality.js";
 import { scanForInitiatives, formatInitiatives, buildInitiativeFixPrompt } from "./initiative.js";
@@ -5244,7 +5245,7 @@ Be concrete about surfaces — name actual pages/flows. Choose the best stack fo
       }
       // Refine: natural language changes to a built product
-      if (/^(fix these|fix quality|fix initiatives|fix product)$/.test(lower) && !context.runtime.activeTask) {
+      if (/^(fix these|fix quality|fix initiatives|fix product|fix issues|fix the issues|fix it|fix everything|fix all)$/.test(lower) && !context.runtime.activeTask) {
         // Only handle product fixes when no task is active — otherwise let router handle "fix these" for task reviews
         if (context.runtime.pendingInitiatives?.length > 0) {
           const spinner = createProgressSpinner("fixing product gaps...");
@@ -5270,7 +5271,7 @@ Be concrete about surfaces — name actual pages/flows. Choose the best stack fo
         }
       }
-      if (/^(fix these|fix quality)$/.test(lower) && !context.runtime.activeTask) {
+      if (/^(fix these|fix quality|fix issues|fix the issues|fix it|fix everything|fix all)$/.test(lower) && !context.runtime.activeTask) {
         const spinner = createProgressSpinner("fixing quality issues...");
         try {
           const findings = await runQualityChecks({ cwd: context.cwd });
@@ -6446,6 +6447,11 @@ Be concrete about surfaces — name actual pages/flows. Choose the best stack fo
           lines.push(`${dim("impact:")} ${parts.join(", ")}`);
         }
+        // Scorecard
+        if (buildResult.scorecard) {
+          lines.push(`${dim("score:")} ${formatScorecardSummary(buildResult.scorecard)}`);
+        }
         // Sentinel verdict
         if (buildResult.review) {
           const v = buildResult.review.verdict;

package/src/scorecard.js ADDED Viewed

@@ -0,0 +1,321 @@
+import fs from "node:fs/promises";
+import path from "node:path";
+import crypto from "node:crypto";
+const MAX_INDEX_ENTRIES = 200;
+const MAX_CALIBRATION_CHARS = 1500;
+function scorecardsDir(cwd) {
+  return path.join(cwd, ".waterbrother", "memory", "scorecards");
+}
+function indexPath(cwd) {
+  return path.join(scorecardsDir(cwd), "index.json");
+}
+function scorecardPath(cwd, id) {
+  return path.join(scorecardsDir(cwd), `${id}.json`);
+}
+function makeId(name) {
+  const slug = String(name || "")
+    .toLowerCase()
+    .replace(/[^a-z0-9]+/g, "-")
+    .replace(/^-|-$/g, "")
+    .slice(0, 40);
+  const rand = crypto.randomBytes(3).toString("hex");
+  return slug ? `sc_${slug}-${rand}` : `sc_${rand}`;
+}
+// --- Score computation ---
+function computeVerificationScore(verification) {
+  if (!Array.isArray(verification) || verification.length === 0) return null;
+  const passed = verification.filter((v) => v.pass || v.ok).length;
+  return passed / verification.length;
+}
+function computeSentinelScore(verdict) {
+  if (verdict === "ship") return 1.0;
+  if (verdict === "caution") return 0.5;
+  if (verdict === "block") return 0.0;
+  return null;
+}
+function computeQualityScore(warningCount) {
+  if (warningCount === null || warningCount === undefined) return null;
+  const maxWarnings = 10;
+  return Math.max(0, 1 - (warningCount / maxWarnings));
+}
+function computeUserScore(action) {
+  if (action === "accepted" || action === "ship it") return 1.0;
+  if (action === "fix-these" || action === "fix these") return 0.0;
+  if (action === "redo") return -0.5;
+  if (action === "challenge") return 0.25;
+  return null;
+}
+function computeComposite({ verificationScore, sentinelScore, qualityScore, userScore }) {
+  const weights = { verification: 0.25, sentinel: 0.25, quality: 0.25, user: 0.25 };
+  let total = 0;
+  let weightSum = 0;
+  if (verificationScore !== null) { total += verificationScore * weights.verification; weightSum += weights.verification; }
+  if (sentinelScore !== null) { total += sentinelScore * weights.sentinel; weightSum += weights.sentinel; }
+  if (qualityScore !== null) { total += qualityScore * weights.quality; weightSum += weights.quality; }
+  if (userScore !== null) { total += userScore * weights.user; weightSum += weights.user; }
+  return weightSum > 0 ? Math.round((total / weightSum) * 100) / 100 : null;
+}
+// --- Brier score (Layer 3) ---
+export function computeBrierScores(predictions, outcomes) {
+  if (!predictions || !outcomes) return null;
+  const scores = {};
+  if (predictions.testPass !== undefined && Array.isArray(outcomes.verification)) {
+    const actual = outcomes.verification.every((v) => v.pass || v.ok) ? 1 : 0;
+    scores.testPass = Math.round(Math.pow(predictions.testPass - actual, 2) * 1000) / 1000;
+  }
+  if (predictions.sentinelShip !== undefined && outcomes.sentinel?.verdict) {
+    const actual = outcomes.sentinel.verdict === "ship" ? 1 : 0;
+    scores.sentinelShip = Math.round(Math.pow(predictions.sentinelShip - actual, 2) * 1000) / 1000;
+  }
+  if (predictions.userAcceptFirstTry !== undefined && outcomes.userAction) {
+    const actual = (outcomes.userAction === "accepted" || outcomes.userAction === "ship it") ? 1 : 0;
+    scores.userAcceptFirstTry = Math.round(Math.pow(predictions.userAcceptFirstTry - actual, 2) * 1000) / 1000;
+  }
+  return Object.keys(scores).length > 0 ? scores : null;
+}
+// --- Scorecard creation (Layer 1: passive scoring) ---
+export function computeScorecard({ task, receipt, qualityFindings, userAction }) {
+  const id = makeId(task?.name || task?.id || "build");
+  // Extract outcomes from receipt
+  const verification = (receipt?.verification || []).map((v) => ({
+    command: v.command || v.label || "check",
+    pass: Boolean(v.ok)
+  }));
+  const sentinel = receipt?.review
+    ? { verdict: receipt.review.verdict, concerns: receipt.review.concerns || [] }
+    : null;
+  const challenge = receipt?.challenge
+    ? { concerns: receipt.challenge.concerns || [] }
+    : null;
+  const warningCount = Array.isArray(qualityFindings)
+    ? qualityFindings.filter((f) => f.severity === "warning").length
+    : null;
+  const quality = {
+    warnings: warningCount || 0,
+    findings: Array.isArray(qualityFindings)
+      ? qualityFindings.filter((f) => f.severity === "warning").map((f) => f.message).slice(0, 5)
+      : []
+  };
+  // Compute scores
+  const verificationScore = computeVerificationScore(verification);
+  const sentinelScore = computeSentinelScore(sentinel?.verdict);
+  const qualityScore = computeQualityScore(warningCount);
+  const userScoreVal = computeUserScore(userAction);
+  const composite = computeComposite({ verificationScore, sentinelScore, qualityScore, userScore: userScoreVal });
+  // Derive scope from receipt
+  const scope = [];
+  if (receipt?.changedFiles?.length) {
+    const dirs = new Set();
+    for (const f of receipt.changedFiles) {
+      const dir = f.replace(/\\/g, "/").split("/").slice(0, -1).join("/");
+      if (dir) dirs.add(`${dir}/**`);
+    }
+    scope.push(...dirs);
+  }
+  return {
+    id,
+    taskId: task?.id || null,
+    taskName: task?.name || null,
+    scope,
+    approach: task?.chosenOption || null,
+    timestamp: new Date().toISOString(),
+    predictions: null,
+    outcomes: {
+      verification,
+      sentinel,
+      challenge,
+      quality,
+      designReview: receipt?.designReview ? { verdict: receipt.designReview.verdict } : null,
+      userAction: userAction || null,
+      experimentDelta: null
+    },
+    scores: {
+      verificationScore,
+      sentinelScore,
+      qualityScore,
+      userScore: userScoreVal,
+      composite
+    },
+    brierScores: null
+  };
+}
+// --- Storage ---
+async function readIndex(cwd) {
+  try {
+    const raw = await fs.readFile(indexPath(cwd), "utf8");
+    return JSON.parse(raw);
+  } catch {
+    return [];
+  }
+}
+async function writeIndex(cwd, index) {
+  await fs.mkdir(scorecardsDir(cwd), { recursive: true });
+  await fs.writeFile(indexPath(cwd), `${JSON.stringify(index, null, 2)}\n`, "utf8");
+}
+export async function saveScorecard({ cwd, scorecard }) {
+  await fs.mkdir(scorecardsDir(cwd), { recursive: true });
+  await fs.writeFile(scorecardPath(cwd, scorecard.id), `${JSON.stringify(scorecard, null, 2)}\n`, "utf8");
+  const index = await readIndex(cwd);
+  index.unshift({
+    id: scorecard.id,
+    taskName: scorecard.taskName,
+    scope: scorecard.scope,
+    approach: scorecard.approach,
+    composite: scorecard.scores.composite,
+    timestamp: scorecard.timestamp
+  });
+  if (index.length > MAX_INDEX_ENTRIES) index.length = MAX_INDEX_ENTRIES;
+  await writeIndex(cwd, index);
+}
+export async function findRelevantScorecards({ cwd, filePatterns = [], limit = 10 }) {
+  const index = await readIndex(cwd);
+  if (index.length === 0) return [];
+  const queryDirs = filePatterns.map((p) => p.replace(/\/?\*\*$/, "").replace(/\\/g, "/").toLowerCase());
+  const scored = [];
+  for (const entry of index) {
+    let relevance = 0;
+    if (queryDirs.length > 0 && Array.isArray(entry.scope)) {
+      for (const s of entry.scope) {
+        const sDir = s.replace(/\/?\*\*$/, "").replace(/\\/g, "/").toLowerCase();
+        for (const qd of queryDirs) {
+          if (sDir.startsWith(qd) || qd.startsWith(sDir)) { relevance += 3; break; }
+        }
+      }
+    }
+    if (relevance > 0) scored.push({ entry, relevance });
+  }
+  scored.sort((a, b) => b.relevance - a.relevance || new Date(b.entry.timestamp) - new Date(a.entry.timestamp));
+  const top = scored.slice(0, limit);
+  const cards = [];
+  for (const { entry } of top) {
+    try {
+      const raw = await fs.readFile(scorecardPath(cwd, entry.id), "utf8");
+      cards.push(JSON.parse(raw));
+    } catch {}
+  }
+  return cards;
+}
+export async function loadRecentScorecards({ cwd, limit = 10 }) {
+  const index = await readIndex(cwd);
+  const cards = [];
+  for (const entry of index.slice(0, limit)) {
+    try {
+      const raw = await fs.readFile(scorecardPath(cwd, entry.id), "utf8");
+      cards.push(JSON.parse(raw));
+    } catch {}
+  }
+  return cards;
+}
+// --- Layer 2: Context injection ---
+export function buildCalibrationBlock(scorecards) {
+  if (!scorecards || scorecards.length === 0) return "";
+  const lines = ["Build history for this scope:"];
+  let chars = lines[0].length;
+  // Group by approach
+  const byApproach = {};
+  for (const sc of scorecards) {
+    const key = sc.approach || "unknown";
+    if (!byApproach[key]) byApproach[key] = [];
+    byApproach[key].push(sc);
+  }
+  for (const [approach, cards] of Object.entries(byApproach)) {
+    const avg = cards.reduce((sum, c) => sum + (c.scores.composite || 0), 0) / cards.length;
+    const verdicts = cards.map((c) => c.outcomes.sentinel?.verdict).filter(Boolean);
+    const actions = cards.map((c) => c.outcomes.userAction).filter(Boolean);
+    const line = `- ${approach}: avg score ${avg.toFixed(2)} (${verdicts.join(", ")}) → user: ${actions.join(", ")}`;
+    if (chars + line.length > MAX_CALIBRATION_CHARS) break;
+    lines.push(line);
+    chars += line.length;
+  }
+  // Aggregate blind spots
+  const allFindings = {};
+  for (const sc of scorecards) {
+    for (const f of (sc.outcomes.quality?.findings || [])) {
+      allFindings[f] = (allFindings[f] || 0) + 1;
+    }
+  }
+  const blindSpots = Object.entries(allFindings)
+    .sort((a, b) => b[1] - a[1])
+    .slice(0, 3)
+    .map(([finding, count]) => `${finding} (${count}x)`);
+  if (blindSpots.length > 0) {
+    const bsLine = `Quality blind spots: ${blindSpots.join(", ")}`;
+    if (chars + bsLine.length <= MAX_CALIBRATION_CHARS) {
+      lines.push(bsLine);
+    }
+  }
+  // Brier calibration note (Layer 3)
+  const brierCards = scorecards.filter((sc) => sc.brierScores);
+  if (brierCards.length >= 3) {
+    const avgBrier = brierCards.reduce((sum, sc) => {
+      const vals = Object.values(sc.brierScores);
+      return sum + (vals.reduce((a, b) => a + b, 0) / vals.length);
+    }, 0) / brierCards.length;
+    if (avgBrier > 0.3) {
+      lines.push(`Calibration warning: avg Brier ${avgBrier.toFixed(2)} — lower your confidence estimates.`);
+    }
+  }
+  return lines.join("\n");
+}
+// --- Summary for display ---
+export function formatScorecardSummary(scorecard) {
+  const s = scorecard.scores;
+  const parts = [];
+  if (s.verificationScore !== null) parts.push(`verify:${(s.verificationScore * 100).toFixed(0)}%`);
+  if (s.sentinelScore !== null) parts.push(`sentinel:${(s.sentinelScore * 100).toFixed(0)}%`);
+  if (s.qualityScore !== null) parts.push(`quality:${(s.qualityScore * 100).toFixed(0)}%`);
+  if (s.userScore !== null) parts.push(`user:${(s.userScore * 100).toFixed(0)}%`);
+  if (s.composite !== null) parts.push(`composite:${(s.composite * 100).toFixed(0)}%`);
+  return parts.join("  ");
+}

package/src/workflow.js CHANGED Viewed

@@ -16,6 +16,7 @@ import {
 } from "./frontend.js";
 import { runPlannerPass, formatPlanForExecutor, formatPlanForDisplay } from "./planner.js";
 import { runVerificationPass, formatVerifierResults, hasFailures } from "./verifier.js";
+import { computeScorecard, saveScorecard, findRelevantScorecards, buildCalibrationBlock } from "./scorecard.js";
 export async function runBuildWorkflow({
   agent,
@@ -27,6 +28,18 @@ export async function runBuildWorkflow({
   if (!task) throw new Error("no active task");
   if (!promptText) throw new Error("build requires a prompt");
+  // Layer 2: Inject calibration from scored memory before planning
+  let calibrationBlock = "";
+  try {
+    const contractPaths = task.activeContract?.paths || [];
+    if (contractPaths.length > 0) {
+      const relevantCards = await findRelevantScorecards({ cwd: context.cwd, filePatterns: contractPaths, limit: 5 });
+      if (relevantCards.length > 0) {
+        calibrationBlock = buildCalibrationBlock(relevantCards);
+      }
+    }
+  } catch {}
   // Planner/Executor split: if plannerModel is configured, run planner first
   const plannerModel = context.runtime?.plannerModel;
   let planBlock = "";
@@ -68,6 +81,7 @@ export async function runBuildWorkflow({
     Object.assign(executionCtx, frontendCtx);
   }
   if (planBlock) executionCtx.plan = planBlock;
+  if (calibrationBlock) executionCtx.calibration = calibrationBlock;
   agent.setExecutionContext(executionCtx);
   // Pre-seed contract if task has one
@@ -311,6 +325,20 @@ export async function runBuildWorkflow({
     context.runtime.lastImpact = impact || null;
   }
+  // Layer 1: Compute and save scorecard (passive scoring)
+  let scorecard = null;
+  if (finalReceipt?.mutated) {
+    try {
+      scorecard = computeScorecard({
+        task,
+        receipt: finalReceipt,
+        qualityFindings: null, // quality findings come from the caller if available
+        userAction: null       // populated later when user acts
+      });
+      await saveScorecard({ cwd: context.cwd, scorecard });
+    } catch {}
+  }
   return {
     response,
     receipt: finalReceipt,
@@ -320,7 +348,8 @@ export async function runBuildWorkflow({
     screenshotReview,
     impactSummary: impact ? summarizeImpactMap(impact) : null,
     verifierResults,
-    verifierSummary: verifierResults ? formatVerifierResults(verifierResults) : null
+    verifierSummary: verifierResults ? formatVerifierResults(verifierResults) : null,
+    scorecard
   };
 }