npm - @tritard/waterbrother - Versions diffs - 0.9.0 → 0.9.1 - Mend

@tritard/waterbrother 0.9.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@tritard/waterbrother",
-  "version": "0.9.0",
+  "version": "0.9.1",
   "description": "Waterbrother: Grok-powered coding CLI with local tools, sessions, operator modes, and approval controls",
   "type": "module",
   "bin": {

package/src/cli.js CHANGED Viewed

@@ -14,7 +14,19 @@ import { expandHomePath } from "./path-utils.js";
 import { AUTONOMY_MODES, buildOperatorIdentity, EXPERIENCE_MODES, modeDefaults, normalizeAutonomyMode, normalizeExperienceMode } from "./modes.js";
 import { computeImpactMap } from "./impact.js";
 import { reviewTurn } from "./reviewer.js";
-import { buildFrontendExecutionContext, reviewFrontendTurn, shouldRunFrontendReview } from "./frontend.js";
+import {
+  buildFrontendExecutionContext,
+  getFrontendAcceptanceFailure,
+  buildFrontendRebuildPrompt,
+  buildFrontendRevisionPrompt,
+  detectFrontendSlop,
+  inspectFrontendArtifacts,
+  mergeFrontendSlop,
+  reviewFrontendTurn,
+  shouldAutoReviseFrontend,
+  shouldForceFrontendRebuild,
+  shouldRunFrontendReview
+} from "./frontend.js";
 import { loadTask, saveTask, listTasks, setActiveTask, getActiveTask, closeTask } from "./task-store.js";
 import { runDecisionPass, runInventPass, formatDecisionForDisplay, formatDecisionCompact, formatDecisionDetail } from "./decider.js";
 import { runBuildWorkflow, startFeatureTask, runChallengeWorkflow } from "./workflow.js";
@@ -608,6 +620,63 @@ function buildSyntheticAssistantOutput(receipt) {
   return null;
 }
+function formatReceiptFileList(receipt, limit = 6) {
+  const files = Array.isArray(receipt?.changedFiles)
+    ? receipt.changedFiles.map((filePath) => String(filePath || "").trim()).filter(Boolean)
+    : [];
+  return [...new Set(files)].slice(0, limit);
+}
+function buildBenchmarkFrontendStatusOutput({
+  assistantText = "",
+  receipt = null,
+  artifacts = null,
+  frontend = null
+} = {}) {
+  if (!frontend?.benchmarkMode) return assistantText;
+  const designReview = artifacts?.designReview || receipt?.designReview || null;
+  const screenshotReview = artifacts?.screenshotReview || receipt?.screenshotReview || null;
+  const designSlop = artifacts?.designSlop || receipt?.designSlop || null;
+  const acceptanceFailure = getFrontendAcceptanceFailure({
+    frontend,
+    slop: designSlop,
+    designReview,
+    screenshotReview
+  });
+  const designVerdict = String(designReview?.verdict || "").trim().toLowerCase();
+  const renderVerdict = String(screenshotReview?.verdict || "").trim().toLowerCase();
+  const shouldDowngrade =
+    Boolean(acceptanceFailure) ||
+    designVerdict === "weak" ||
+    renderVerdict === "weak" ||
+    (designVerdict === "caution" && renderVerdict !== "strong");
+  if (!shouldDowngrade) return assistantText;
+  const lines = [];
+  const files = formatReceiptFileList(receipt);
+  if (files.length > 0) {
+    lines.push("Updated benchmark frontend files:");
+    for (const filePath of files) lines.push(`- ${filePath}`);
+  }
+  if (acceptanceFailure?.reason) {
+    lines.push(`Current status: ${acceptanceFailure.reason}.`);
+  } else {
+    const statusParts = [];
+    if (designVerdict) statusParts.push(`design ${designVerdict}`);
+    if (renderVerdict) statusParts.push(`render ${renderVerdict}`);
+    if (artifacts?.designRevision?.triggered) {
+      statusParts.push(`auto-revised ${artifacts.designRevision.passes}x`);
+    }
+    if (statusParts.length > 0) {
+      lines.push(`Current status: ${statusParts.join(", ")}.`);
+    }
+  }
+  if (designReview?.summary) lines.push(designReview.summary);
+  if (renderVerdict === "weak" && screenshotReview?.summary) lines.push(screenshotReview.summary);
+  lines.push("Result needs another pass before it should be treated as finished.");
+  return lines.filter(Boolean).join("\n\n");
+}
 function hasFrontendCodeEcho(text) {
   const body = String(text || "");
   return /```(?:html|css|js|javascript|jsx|tsx)?[\s\S]{120,}```/i.test(body) || /<!DOCTYPE html>/i.test(body);
@@ -638,6 +707,36 @@ function shouldRecoverFrontendCodeEcho({ frontendExecutionContext, receipt, assi
   return tools.some((tool) => tool?.name === "declare_contract") || tools.some((tool) => tool?.name === "make_directory");
 }
+function shouldRecoverFrontendBlockedShell({ frontendExecutionContext, receipt }) {
+  if (!frontendExecutionContext?.frontend) return false;
+  if (!receipt || receipt.mutated) return false;
+  const tools = Array.isArray(receipt?.tools) ? receipt.tools : [];
+  return tools.some((tool) => {
+    if (tool?.name !== "run_shell" || tool?.status !== "blocked") return false;
+    const message = String(tool?.result_preview || tool?.error || "").toLowerCase();
+    const argsPreview = parseToolArgsPreview(tool);
+    const command = String(argsPreview?.command || "").trim().toLowerCase();
+    return /declare_contract/.test(message) || /mutating shell command not allowed/.test(message) || /\bmkdir\b|\btouch\b|\bcp\b|\bmv\b|\brm\b/.test(command);
+  });
+}
+function isFrontendArtifactPath(filePath = "") {
+  const value = String(filePath || "").trim().toLowerCase();
+  if (!value) return false;
+  return /\.(html|css|scss|sass|less|js|jsx|tsx|vue|svelte)$/.test(value);
+}
+function shouldRecoverFrontendMissingFiles({ frontendExecutionContext, receipt }) {
+  if (!frontendExecutionContext?.frontend) return false;
+  if (!receipt?.mutated) return false;
+  const changedFiles = Array.isArray(receipt?.changedFiles) ? receipt.changedFiles : [];
+  if (changedFiles.some((filePath) => isFrontendArtifactPath(filePath))) return false;
+  const tools = Array.isArray(receipt?.tools) ? receipt.tools : [];
+  const touchedScopeOnly = changedFiles.length > 0;
+  const attemptedWrite = tools.some((tool) => ["write_file", "replace_in_file", "apply_patch"].includes(tool?.name));
+  return touchedScopeOnly && (attemptedWrite || tools.some((tool) => tool?.name === "make_directory"));
+}
 function buildFrontendWriteRecoveryPrompt({ originalPrompt, contract }) {
   const target = deriveContractWriteTarget(contract);
   const lines = [
@@ -652,6 +751,32 @@ function buildFrontendWriteRecoveryPrompt({ originalPrompt, contract }) {
   return lines.join("\n\n");
 }
+function buildFrontendBlockedShellRecoveryPrompt({ originalPrompt }) {
+  return [
+    "You tried to use a mutating shell command before declaring contract scope.",
+    `Original task: ${String(originalPrompt || "").trim()}`,
+    "Do not use run_shell for this frontend task.",
+    "First call declare_contract with the target Desktop folder/file scope.",
+    "Then use make_directory and write_file to create the site files.",
+    "If this is a new site in a folder, write index.html there unless multiple files are clearly justified.",
+    "Reply briefly with only the files created or updated after the tool calls succeed."
+  ].join("\n\n");
+}
+function buildFrontendMissingFilesRecoveryPrompt({ originalPrompt, contract, changedFiles = [] }) {
+  const target = deriveContractWriteTarget(contract);
+  const lines = [
+    "You created the frontend scope but did not write the actual site file.",
+    `Original task: ${String(originalPrompt || "").trim()}`,
+    changedFiles.length > 0 ? `Current touched paths: ${changedFiles.join(", ")}` : "",
+    target ? `Write the actual site into the declared scope now: ${target}` : "Write the actual site into the declared contract scope now.",
+    "Do not stop after creating the folder.",
+    "Use write_file to create the real frontend file now. If this is a new site in a folder, default to index.html unless multiple files are clearly justified.",
+    "Reply briefly with only the files created or updated."
+  ].filter(Boolean);
+  return lines.join("\n\n");
+}
 function color256(fg, text) {
   return `\x1b[38;5;${fg}m${text}\x1b[0m`;
 }
@@ -1039,6 +1164,264 @@ async function enrichTurnArtifacts({ agent, context, promptText, assistantText,
   return receipt;
 }
+async function analyzeTurnArtifacts({
+  agent,
+  context,
+  promptText,
+  assistantText,
+  receipt,
+  frontend = null,
+  signal
+}) {
+  let impact = receipt.impact || null;
+  if (receipt.mutated && context.runtime.impact?.enabled !== false) {
+    impact = await computeImpactMap({
+      cwd: context.cwd,
+      changedFiles: receipt.changedFiles || [],
+      maxRelated: context.runtime.impact?.maxRelated,
+      maxTests: context.runtime.impact?.maxTests
+    });
+  }
+  let review = receipt.review || null;
+  if (receipt.mutated && context.runtime.reviewer?.enabled !== false) {
+    try {
+      review = await reviewTurn({
+        apiKey: context.runtime.apiKey,
+        baseUrl: context.runtime.baseUrl,
+        model: context.runtime.reviewer?.model || agent.getModel(),
+        promptText,
+        assistantText,
+        receipt: { ...receipt, diff: receipt.diff || "" },
+        impact,
+        maxDiffChars: context.runtime.reviewer?.maxDiffChars,
+        signal
+      });
+    } catch (error) {
+      review = {
+        verdict: "caution",
+        summary: `review failed: ${error instanceof Error ? error.message : String(error)}`,
+        concerns: ["Sentinel reviewer could not complete."],
+        followups: []
+      };
+    }
+  }
+  let designReview = receipt.designReview || null;
+  if (shouldRunFrontendReview({ promptText, receipt, profile: agent.getProfile() })) {
+    try {
+      designReview = await reviewFrontendTurn({
+        apiKey: context.runtime.apiKey,
+        baseUrl: context.runtime.baseUrl,
+        model: context.runtime.reviewer?.model || agent.getModel(),
+        promptText,
+        assistantText,
+        receipt: { ...receipt, diff: receipt.diff || "" },
+        signal
+      });
+    } catch (error) {
+      designReview = {
+        verdict: "caution",
+        summary: `design review failed: ${error instanceof Error ? error.message : String(error)}`,
+        strengths: [],
+        issues: ["Frontend design reviewer could not complete."],
+        nextPass: []
+      };
+    }
+  }
+  const screenshotReview = null;
+  const screenshotPath = null;
+  const deterministicSlop = designReview
+    ? detectFrontendSlop({ promptText, assistantText, receipt, designReview })
+    : null;
+  const artifactSlop = designReview
+    ? await inspectFrontendArtifacts({ cwd: context.cwd, promptText, receipt, frontend })
+    : null;
+  const designSlop = mergeFrontendSlop(deterministicSlop, artifactSlop);
+  return {
+    impact,
+    review,
+    designReview,
+    designSlop,
+    screenshotReview,
+    screenshotPath
+  };
+}
+async function finalizeReceiptArtifacts({
+  agent,
+  context,
+  receipt,
+  artifacts,
+  signal
+}) {
+  if (!receipt) return null;
+  const updates = {};
+  if (artifacts?.impact) updates.impact = artifacts.impact;
+  if (artifacts?.review) updates.review = artifacts.review;
+  if (artifacts?.designReview) updates.designReview = artifacts.designReview;
+  if (artifacts?.designSlop) updates.designSlop = artifacts.designSlop;
+  if (artifacts?.screenshotReview) updates.screenshotReview = artifacts.screenshotReview;
+  if (artifacts?.screenshotPath) updates.screenshotPath = artifacts.screenshotPath;
+  if (artifacts?.designRevision) updates.designRevision = artifacts.designRevision;
+  const finalReceipt = Object.keys(updates).length > 0
+    ? (await agent.toolRuntime.updateReceipt(receipt.id, updates) || receipt)
+    : receipt;
+  context.runtime.lastReceipt = finalReceipt;
+  context.runtime.lastImpact = artifacts?.impact || finalReceipt.impact || null;
+  return finalReceipt;
+}
+async function maybeReviseInteractiveFrontend({
+  agent,
+  context,
+  promptText,
+  response,
+  receipt,
+  frontendExecutionContext = null,
+  previousExecutionContext = null,
+  signal
+}) {
+  if (!frontendExecutionContext || !receipt) {
+    return { response, receipt, artifacts: null };
+  }
+  const baseExecutionContext = { ...(previousExecutionContext || {}), ...frontendExecutionContext };
+  if (previousExecutionContext?.reminders && frontendExecutionContext.reminders) {
+    baseExecutionContext.reminders = `${previousExecutionContext.reminders}\n${frontendExecutionContext.reminders}`;
+  }
+  let activeResponse = response;
+  let activeReceipt = receipt;
+  let artifacts = await analyzeTurnArtifacts({
+    agent,
+    context,
+    promptText,
+    assistantText: activeResponse.content || "",
+    receipt: activeReceipt,
+    frontend: frontendExecutionContext.frontend || null,
+    signal
+  });
+  let revisionCount = 0;
+  const revisionHistory = [];
+  while (shouldAutoReviseFrontend({
+    designReview: artifacts.designReview,
+    slop: artifacts.designSlop,
+    revisionCount,
+    frontend: frontendExecutionContext.frontend || null
+  })) {
+    const passNumber = revisionCount + 1;
+    const forceRebuild = shouldForceFrontendRebuild({
+      frontend: frontendExecutionContext.frontend || null,
+      slop: artifacts.designSlop,
+      revisionCount
+    });
+    revisionHistory.push({
+      passNumber,
+      verdict: artifacts.designReview?.verdict || null,
+      summary: String(artifacts.designReview?.summary || "").trim(),
+      slopFlags: Array.isArray(artifacts.designSlop?.flags) ? [...artifacts.designSlop.flags] : [],
+      mode: forceRebuild ? "rebuild" : "revise"
+    });
+    const revisionPrompt = forceRebuild
+      ? buildFrontendRebuildPrompt({
+        originalPrompt: promptText,
+        frontend: frontendExecutionContext.frontend || null,
+        designReview: artifacts.designReview,
+        slop: artifacts.designSlop,
+        screenshotReview: artifacts.screenshotReview
+      })
+      : buildFrontendRevisionPrompt({
+        originalPrompt: promptText,
+        designReview: artifacts.designReview,
+        slop: artifacts.designSlop,
+        screenshotReview: artifacts.screenshotReview
+      });
+    const revisionSpinner = createProgressSpinner(
+      forceRebuild ? `rebuilding frontend (${passNumber})...` : `revising frontend (${passNumber})...`
+    );
+    printLiveTrace(
+      forceRebuild ? `frontend rebuild pass ${passNumber}` : `frontend revision pass ${passNumber}`,
+      context.runtime.traceMode
+    );
+    if (activeReceipt.contract) {
+      agent.toolRuntime.setCurrentContract(activeReceipt.contract);
+    }
+    agent.setExecutionContext({
+      ...baseExecutionContext,
+      phase: forceRebuild ? `design-rebuild-${passNumber}` : `design-revision-${passNumber}`,
+      reminders: [
+        baseExecutionContext.reminders || "",
+        forceRebuild
+          ? `Automatic rebuild pass ${passNumber}: discard the previous frontend direction and rebuild within the same contract using the benchmark starter skeleton.`
+          : passNumber === 1
+            ? "Automatic second pass: fix the flagged frontend design issues without widening scope."
+            : `Automatic follow-up pass ${passNumber}: remove any remaining benchmark hard-fail patterns.`
+      ].filter(Boolean).join("\n")
+    });
+    try {
+      activeResponse = await agent.runTurn(revisionPrompt, {
+        signal,
+        onStateChange(state) {
+          printLiveTrace(`state=${state}`, context.runtime.traceMode, { verboseOnly: true });
+        },
+        onToolStart(toolCall) {
+          const toolName = toolCall?.function?.name || "tool";
+          printLiveTrace(`using ${toolName}`, context.runtime.traceMode);
+        },
+        onToolEnd(toolCall, result) {
+          const toolName = toolCall?.function?.name || "tool";
+          const status = parseToolResultShape(result);
+          const label =
+            status === "ok" ? `${toolName} ok` : status === "blocked" ? `${toolName} blocked` : `${toolName} ${status}`;
+          printLiveTrace(label, context.runtime.traceMode);
+        }
+      });
+      activeReceipt = await agent.toolRuntime.completeTurn({ signal });
+      if (!activeReceipt) break;
+      artifacts = await analyzeTurnArtifacts({
+        agent,
+        context,
+        promptText,
+        assistantText: activeResponse.content || "",
+        receipt: activeReceipt,
+        frontend: frontendExecutionContext.frontend || null,
+        signal
+      });
+      revisionCount += 1;
+    } finally {
+      revisionSpinner.stop();
+      agent.setExecutionContext(previousExecutionContext);
+    }
+  }
+  if (revisionCount > 0) {
+    artifacts.designRevision = {
+      triggered: true,
+      passes: revisionCount,
+      history: revisionHistory
+    };
+  }
+  const acceptanceFailure = getFrontendAcceptanceFailure({
+    frontend: frontendExecutionContext.frontend || null,
+    slop: artifacts.designSlop,
+    designReview: artifacts.designReview,
+    screenshotReview: artifacts.screenshotReview
+  });
+  if (acceptanceFailure) {
+    throw new Error(acceptanceFailure.reason);
+  }
+  return { response: activeResponse, receipt: activeReceipt, artifacts };
+}
 function describeOperator(runtime, agent) {
   const identity = buildOperatorIdentity({
     mode: agent.getExperienceMode(),
@@ -3624,7 +4007,35 @@ async function runTextTurnInteractive({
   }
   if (!precomputedReceipt && frontendExecutionContext) {
     const candidateReceipt = await agent.toolRuntime.completeTurn({ signal: abortController?.signal });
-    if (shouldRecoverFrontendCodeEcho({ frontendExecutionContext, receipt: candidateReceipt, assistantText: response.content || "" })) {
+    if (shouldRecoverFrontendBlockedShell({ frontendExecutionContext, receipt: candidateReceipt })) {
+      const recoverySpinner = createProgressSpinner("retrying frontend tools...");
+      printLiveTrace("frontend recovery: blocked mutating shell, retrying with declare_contract and file tools", context.runtime.traceMode);
+      agent.toolRuntime.setReadOnlyRoots(readOnlyRoots);
+      agent.toolRuntime.setWriteRoots(writeRoots);
+      if (frontendExecutionContext) {
+        const merged = { ...(previousExecutionContext || {}), ...frontendExecutionContext };
+        if (previousExecutionContext?.reminders && frontendExecutionContext.reminders) {
+          merged.reminders = `${previousExecutionContext.reminders}\n${frontendExecutionContext.reminders}`;
+        }
+        agent.setExecutionContext(merged);
+      }
+      try {
+        response = await agent.runTurn(buildFrontendBlockedShellRecoveryPrompt({ originalPrompt: effectivePromptText }), {
+          signal: abortController?.signal,
+          onStateChange(state) {
+            printLiveTrace(`state=${state}`, context.runtime.traceMode, { verboseOnly: true });
+          }
+        });
+        renderedAssistantText = response.content || "";
+      } finally {
+        recoverySpinner.stop();
+        agent.toolRuntime.setReadOnlyRoots([]);
+        agent.toolRuntime.setWriteRoots([]);
+        if (frontendExecutionContext) {
+          agent.setExecutionContext(previousExecutionContext);
+        }
+      }
+    } else if (shouldRecoverFrontendCodeEcho({ frontendExecutionContext, receipt: candidateReceipt, assistantText: response.content || "" })) {
       const recoverySpinner = createProgressSpinner("writing files...");
       printLiveTrace("frontend recovery: assistant echoed code, retrying with write_file", context.runtime.traceMode);
       agent.toolRuntime.setReadOnlyRoots(readOnlyRoots);
@@ -3656,18 +4067,89 @@ async function runTextTurnInteractive({
       precomputedReceipt = candidateReceipt;
     }
   }
+  let finalizedArtifacts = null;
+  let finalizedReceipt = precomputedReceipt || await agent.toolRuntime.completeTurn({ signal: abortController?.signal });
+  if (frontendExecutionContext && finalizedReceipt && shouldRecoverFrontendMissingFiles({ frontendExecutionContext, receipt: finalizedReceipt })) {
+    const recoverySpinner = createProgressSpinner("writing site files...");
+    printLiveTrace("frontend recovery: scope created but no site files written, retrying with write_file", context.runtime.traceMode);
+    agent.toolRuntime.setReadOnlyRoots(readOnlyRoots);
+    agent.toolRuntime.setWriteRoots(writeRoots);
+    if (frontendExecutionContext) {
+      const merged = { ...(previousExecutionContext || {}), ...frontendExecutionContext };
+      if (previousExecutionContext?.reminders && frontendExecutionContext.reminders) {
+        merged.reminders = `${previousExecutionContext.reminders}\n${frontendExecutionContext.reminders}`;
+      }
+      agent.setExecutionContext(merged);
+    }
+    try {
+      response = await agent.runTurn(buildFrontendMissingFilesRecoveryPrompt({
+        originalPrompt: effectivePromptText,
+        contract: finalizedReceipt?.contract,
+        changedFiles: Array.isArray(finalizedReceipt?.changedFiles) ? finalizedReceipt.changedFiles : []
+      }), {
+        signal: abortController?.signal,
+        onStateChange(state) {
+          printLiveTrace(`state=${state}`, context.runtime.traceMode, { verboseOnly: true });
+        }
+      });
+      renderedAssistantText = response.content || renderedAssistantText;
+      finalizedReceipt = await agent.toolRuntime.completeTurn({ signal: abortController?.signal });
+    } finally {
+      recoverySpinner.stop();
+      agent.toolRuntime.setReadOnlyRoots([]);
+      agent.toolRuntime.setWriteRoots([]);
+      if (frontendExecutionContext) {
+        agent.setExecutionContext(previousExecutionContext);
+      }
+    }
+  }
+  if (frontendExecutionContext && finalizedReceipt) {
+    const revisedFrontendTurn = await maybeReviseInteractiveFrontend({
+      agent,
+      context,
+      promptText: effectivePromptText,
+      response,
+      receipt: finalizedReceipt,
+      frontendExecutionContext,
+      previousExecutionContext,
+      signal: abortController?.signal
+    });
+    response = revisedFrontendTurn.response || response;
+    renderedAssistantText = response.content || renderedAssistantText;
+    finalizedReceipt = revisedFrontendTurn.receipt || finalizedReceipt;
+    finalizedArtifacts = revisedFrontendTurn.artifacts || null;
+  }
+  renderedAssistantText = buildBenchmarkFrontendStatusOutput({
+    assistantText: renderedAssistantText,
+    receipt: finalizedReceipt,
+    artifacts: finalizedArtifacts,
+    frontend: frontendExecutionContext?.frontend || null
+  });
   printAssistantOutput(renderedAssistantText);
   await setSessionRunState(currentSession, agent, "done");
   printTurnSummary(turnSummary, response, { modelId: agent.getModel(), costTracker: context.costTracker, traceMode: context.runtime.traceMode });
   printTraceTimeline(turnSummary, context.runtime.traceMode);
-  const receipt = await enrichTurnArtifacts({
-    agent,
-    context,
-    promptText: effectivePromptText,
-    assistantText: response.content || '',
-    signal: abortController?.signal
-  });
-  const finalReceipt = receipt || precomputedReceipt;
+  let finalReceipt = null;
+  if (finalizedReceipt && finalizedArtifacts) {
+    finalReceipt = await finalizeReceiptArtifacts({
+      agent,
+      context,
+      receipt: finalizedReceipt,
+      artifacts: finalizedArtifacts,
+      signal: abortController?.signal
+    });
+  } else {
+    const receipt = await enrichTurnArtifacts({
+      agent,
+      context,
+      promptText: effectivePromptText,
+      assistantText: response.content || "",
+      signal: abortController?.signal
+    });
+    finalReceipt = receipt || finalizedReceipt;
+  }
   if (finalReceipt) {
     currentSession.lastReceiptId = finalReceipt.id;
     if (shouldPrintReceiptSummary(finalReceipt, context.runtime.receiptMode)) {