npm - @tritard/waterbrother - Versions diffs - 0.8.27 → 0.8.29 - Mend

@tritard/waterbrother 0.8.27 → 0.8.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@tritard/waterbrother",
-  "version": "0.8.27",
+  "version": "0.8.29",
   "description": "Waterbrother: Grok-powered coding CLI with local tools, sessions, operator modes, and approval controls",
   "type": "module",
   "bin": {

package/src/cli.js CHANGED Viewed

@@ -606,6 +606,50 @@ function buildSyntheticAssistantOutput(receipt) {
   return null;
 }
+function hasFrontendCodeEcho(text) {
+  const body = String(text || "");
+  return /```(?:html|css|js|javascript|jsx|tsx)?[\s\S]{120,}```/i.test(body) || /<!DOCTYPE html>/i.test(body);
+}
+function hasWriteMutationTool(receipt) {
+  const tools = Array.isArray(receipt?.tools) ? receipt.tools : [];
+  return tools.some((tool) => ["write_file", "replace_in_file", "apply_patch"].includes(tool?.name));
+}
+function deriveContractWriteTarget(contract) {
+  const paths = Array.isArray(contract?.paths) ? contract.paths : [];
+  for (const item of paths) {
+    const raw = String(item || "").trim();
+    if (!raw) continue;
+    const normalized = raw.replace(/\/\*\*?$/g, "").replace(/\*+$/g, "");
+    if (normalized) return normalized;
+  }
+  return null;
+}
+function shouldRecoverFrontendCodeEcho({ frontendExecutionContext, receipt, assistantText }) {
+  if (!frontendExecutionContext?.frontend) return false;
+  if (!receipt || receipt.mutated) return false;
+  if (!hasFrontendCodeEcho(assistantText)) return false;
+  if (hasWriteMutationTool(receipt)) return false;
+  const tools = Array.isArray(receipt.tools) ? receipt.tools : [];
+  return tools.some((tool) => tool?.name === "declare_contract") || tools.some((tool) => tool?.name === "make_directory");
+}
+function buildFrontendWriteRecoveryPrompt({ originalPrompt, contract }) {
+  const target = deriveContractWriteTarget(contract);
+  const lines = [
+    "You already planned the work but printed code into chat instead of writing files.",
+    `Original task: ${String(originalPrompt || "").trim()}`,
+    target ? `Write the generated frontend into the declared scope now: ${target}` : "Write the generated frontend into the declared contract scope now.",
+    "Do not print long code blocks in chat.",
+    "Use write_file (or replace_in_file/apply_patch if needed) to create the actual site files.",
+    "If this is a new site in a folder, default to writing index.html there unless multiple files are clearly justified.",
+    "After writing, reply briefly with only the files created or updated."
+  ];
+  return lines.join("\n\n");
+}
 function color256(fg, text) {
   return `\x1b[38;5;${fg}m${text}\x1b[0m`;
 }
@@ -3576,6 +3620,40 @@ async function runTextTurnInteractive({
     precomputedReceipt = await agent.toolRuntime.completeTurn({ signal: abortController?.signal });
     renderedAssistantText = buildSyntheticAssistantOutput(precomputedReceipt) || renderedAssistantText;
   }
+  if (!precomputedReceipt && frontendExecutionContext) {
+    const candidateReceipt = await agent.toolRuntime.completeTurn({ signal: abortController?.signal });
+    if (shouldRecoverFrontendCodeEcho({ frontendExecutionContext, receipt: candidateReceipt, assistantText: response.content || "" })) {
+      const recoverySpinner = createProgressSpinner("writing files...");
+      printLiveTrace("frontend recovery: assistant echoed code, retrying with write_file", context.runtime.traceMode);
+      agent.toolRuntime.setReadOnlyRoots(readOnlyRoots);
+      agent.toolRuntime.setWriteRoots(writeRoots);
+      if (frontendExecutionContext) {
+        const merged = { ...(previousExecutionContext || {}), ...frontendExecutionContext };
+        if (previousExecutionContext?.reminders && frontendExecutionContext.reminders) {
+          merged.reminders = `${previousExecutionContext.reminders}\n${frontendExecutionContext.reminders}`;
+        }
+        agent.setExecutionContext(merged);
+      }
+      try {
+        response = await agent.runTurn(buildFrontendWriteRecoveryPrompt({ originalPrompt: effectivePromptText, contract: candidateReceipt?.contract }), {
+          signal: abortController?.signal,
+          onStateChange(state) {
+            printLiveTrace(`state=${state}`, context.runtime.traceMode, { verboseOnly: true });
+          }
+        });
+        renderedAssistantText = response.content || "";
+      } finally {
+        recoverySpinner.stop();
+        agent.toolRuntime.setReadOnlyRoots([]);
+        agent.toolRuntime.setWriteRoots([]);
+        if (frontendExecutionContext) {
+          agent.setExecutionContext(previousExecutionContext);
+        }
+      }
+    } else {
+      precomputedReceipt = candidateReceipt;
+    }
+  }
   printAssistantOutput(renderedAssistantText);
   await setSessionRunState(currentSession, agent, "done");
   printTurnSummary(turnSummary, response, { modelId: agent.getModel(), costTracker: context.costTracker, traceMode: context.runtime.traceMode });

package/src/frontend.js CHANGED Viewed

@@ -69,14 +69,16 @@ const BENCHMARK_SITE_TYPE_RULES = {
   blog: [
     "Benchmark blog mode: use neutral structural placeholders or concrete subject matter instead of publication worldbuilding or reflective-editorial atmosphere prose.",
     "Benchmark blog mode: do not wrap the page in a generic publication shell like Journal, Featured Essay, Latest Dispatches, Print Edition, Submit Work, or similar magazine-site framing.",
-    "Benchmark blog mode: avoid the default editorial scaffold of hero, story list, archive rail, topics grid, and publication footer unless the user explicitly asked for a magazine-style site."
+    "Benchmark blog mode: avoid the default editorial scaffold of hero, story list, archive rail, topics grid, and publication footer unless the user explicitly asked for a magazine-style site.",
+    "Benchmark blog mode: do not use Tailwind CDN starter theming, picsum/placehold imagery, or fake keyboard/search chrome."
   ],
   store: [
     "Benchmark ecommerce mode: prioritize conversion architecture over editorial styling. The page should sell, not just look clean.",
     "Benchmark ecommerce mode: include proof, objection handling, and trust signals that belong on a real single-product PDP.",
     "Benchmark ecommerce mode: do not leave the main merchandising surface as a literal placeholder or demo box.",
     "Benchmark ecommerce mode: do not use Tailwind CDN starter theming or placeholder product images such as picsum/placehold on the live merchandising surface.",
-    "Benchmark ecommerce mode: fake command palettes, fake app shortcuts, and unrelated theme chrome are disallowed."
+    "Benchmark ecommerce mode: fake command palettes, fake app shortcuts, and unrelated theme chrome are disallowed.",
+    "Benchmark ecommerce mode: fake review counts, fake bestseller labels, and fake as-featured-in proof are disallowed."
   ]
 };
@@ -440,6 +442,10 @@ export function detectFrontendSlop({ promptText = "", assistantText = "", receip
     flags.push("benchmark store used placeholder product imagery");
     score += 4;
   }
+  if (siteType === "store" && benchmarkMode && /\b(?:best seller|best seller|\d[\d,]*\s+reviews|as featured in|featured in|trusted by|studio engineers approved)\b/i.test(haystack)) {
+    flags.push("benchmark store used fake proof or badge chrome");
+    score += 4;
+  }
   if (siteType === "store" && benchmarkMode && !/\b(?:review|reviews|testimonial|rated|stars?|customers?)\b/i.test(haystack)) {
     flags.push("benchmark store lacks social proof or review architecture");
     score += 2;
@@ -460,6 +466,14 @@ export function detectFrontendSlop({ promptText = "", assistantText = "", receip
     flags.push("benchmark blog used generic publication-shell framing");
     score += 3;
   }
+  if (siteType === "blog" && benchmarkMode && /cdn\.tailwindcss\.com/i.test(haystack)) {
+    flags.push("benchmark blog relied on Tailwind CDN starter theming");
+    score += 4;
+  }
+  if (siteType === "blog" && benchmarkMode && /\b(?:picsum\.photos|placehold\.co|placeholder\.com)\b/i.test(haystack)) {
+    flags.push("benchmark blog used placeholder imagery");
+    score += 4;
+  }
   if (/\b(?:command palette would open here|metaKey && e\.key === ['"]k['"]|keyboard accessibility)\b/i.test(haystack)) {
     flags.push("fake keyboard or command-palette gimmick");
     score += 3;
@@ -478,7 +492,7 @@ export function detectFrontendSlop({ promptText = "", assistantText = "", receip
   return {
     score,
     flags,
-    hardBlock: flags.some((flag) => /fictional publication identity|generic publication-shell framing|fake keyboard|reflective-editorial atmosphere|primary merchandising surface as a placeholder|Tailwind CDN starter theming|placeholder product imagery/.test(flag)),
+    hardBlock: flags.some((flag) => /fictional publication identity|generic publication-shell framing|fake keyboard|reflective-editorial atmosphere|primary merchandising surface as a placeholder|Tailwind CDN starter theming|placeholder product imagery|placeholder imagery|fake proof or badge chrome/.test(flag)),
     severe: score >= 5,
     summary: flags.length > 0 ? `frontend slop flags: ${flags.join(", ")}` : "no deterministic frontend slop flags"
   };
@@ -520,8 +534,10 @@ export function buildFrontendRevisionPrompt({
     "Cut reflective-editorial filler copy and replace it with either concrete language or neutral structural placeholders.",
     "For benchmark blog tasks, default to neutral structural placeholder content instead of invented publication framing, issue metadata, or named contributors.",
     "For benchmark blog tasks, do not use generic publication-shell labels like Journal, Featured Essay, Latest Dispatches, Print Edition, Submit Work, or publication-footer framing.",
+    "For benchmark blog tasks, do not use Tailwind CDN starter theming, picsum-style placeholder imagery, or fake search/shortcut chrome.",
     "For benchmark store tasks, do not leave the product image area as a labeled placeholder. Use product-shaped merchandising composition, proof, and objection-handling blocks instead.",
     "For benchmark store tasks, do not use Tailwind CDN starter theming, picsum-style placeholder imagery, or fake command-palette behavior.",
+    "For benchmark store tasks, do not invent review counts, bestseller labels, or as-featured-in proof unless the user explicitly requested fictional marketing chrome.",
     "Reduce section count if needed and push one stronger asymmetrical composition instead of a sequence of balanced blocks.",
     "Simplify the page if needed. Stronger direction with fewer elements is preferred over busier generic output.",
     "Rewrite the weakest sections rather than making superficial tweaks."

package/src/tools.js CHANGED Viewed

@@ -1037,12 +1037,23 @@ export function createToolRuntime({
     return { decision: "ask", reason: "No matching allow rule" };
   }
+  function getContractWriteRoots() {
+    const rawPaths = Array.isArray(currentTurn.contract?.paths) ? currentTurn.contract.paths : [];
+    return normalizePathList(
+      rawPaths
+        .map((item) => String(item || "").trim())
+        .filter(Boolean)
+        .map((item) => item.replace(/\/\*\*?$/g, "").replace(/\*+$/g, ""))
+        .filter(Boolean)
+    );
+  }
   function contractAllows(toolName, args = {}) {
     if (!currentTurn.contract) return { ok: !toolRequiresContract(toolName, args), reason: "No active contract" };
     if ((toolName === "write_file" || toolName === "replace_in_file" || toolName === "make_directory" || toolName === "delete_path") && args.path) {
       try {
         resolveSandboxPath(cwd, args.path, allowOutsideCwd, {
-          allowedWriteRoots: getCurrentWriteRoots()
+          allowedWriteRoots: [...getCurrentWriteRoots(), ...getContractWriteRoots()]
         });
       } catch {
         const touchedPaths = getTouchedPathsForTool(toolName, args);