npm - bonecode - Versions diffs - 1.4.1 → 1.4.3 - Mend

bonecode 1.4.1 → 1.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/dist/src/engine/session/build_mode.d.ts +6 -0
package/dist/src/engine/session/build_mode.js +141 -13
package/dist/src/engine/session/build_mode.js.map +1 -1
package/dist/src/engine/session/leaked_tool_call.d.ts +49 -0
package/dist/src/engine/session/leaked_tool_call.js +174 -0
package/dist/src/engine/session/leaked_tool_call.js.map +1 -0
package/dist/src/engine/session/prompt.js +167 -0
package/dist/src/engine/session/prompt.js.map +1 -1
package/package.json +1 -1
package/scripts/debug_extract.js +40 -0
package/scripts/test_build_mode.js +132 -0
package/scripts/test_identical_response.js +129 -0
package/scripts/test_leaked_tool_call.js +269 -0
package/src/engine/session/build_mode.ts +157 -13
package/src/engine/session/leaked_tool_call.ts +166 -0
package/src/engine/session/prompt.ts +203 -0

package/scripts/test_identical_response.js ADDED Viewed

@@ -0,0 +1,129 @@
+#!/usr/bin/env node
+/**
+ * Unit tests for the identical-response loop detector in prompt.ts.
+ *
+ * The detector exists because small/local models sometimes emit the same
+ * assistant message turn after turn when they're confused about whether to
+ * call a tool. Without bailout, the loop keeps re-issuing the same prompt
+ * and getting the same prose back forever.
+ *
+ * We can't easily run runAgentLoop (needs DB + provider), but we can verify
+ * the helper functions exist and the loop reads/updates the recentResponses
+ * tracking array.
+ */
+"use strict";
+const fs = require("fs");
+const path = require("path");
+const crypto = require("crypto");
+const G = "\x1b[32m"; const R = "\x1b[31m"; const C = "\x1b[36m";
+const B = "\x1b[1m"; const D = "\x1b[2m"; const N = "\x1b[0m";
+let passed = 0;
+let failed = 0;
+const failures = [];
+function ok(name, info = "") { passed++; console.log(`  ${G}✓${N} ${name}${info ? `  ${D}${info}${N}` : ""}`); }
+function fail(name, msg) { failed++; failures.push(`${name}: ${msg}`); console.log(`  ${R}✗${N} ${name}  ${R}${msg}${N}`); }
+function header(s) { console.log(`\n${C}${B}${s}${N}`); }
+const ROOT = path.resolve(__dirname, "..");
+const promptSrc = fs.readFileSync(path.join(ROOT, "src", "engine", "session", "prompt.ts"), "utf-8");
+// ─── [1] Helpers exist ────────────────────────────────────────────────────────
+header("[1] Identical-response detector — helper functions exist");
+(() => {
+  if (/async function assistantTextFingerprint/.test(promptSrc)) ok("assistantTextFingerprint function defined");
+  else fail("assistantTextFingerprint", "missing from prompt.ts");
+})();
+(() => {
+  // The fingerprint should normalize whitespace and use SHA1
+  const fnMatch = promptSrc.match(/async function assistantTextFingerprint[\s\S]*?\n\}/);
+  if (!fnMatch) { fail("assistantTextFingerprint body", "function not extracted"); return; }
+  const body = fnMatch[0];
+  if (/createHash\(['"]sha1['"]\)/.test(body)) ok("uses SHA1 hashing");
+  else fail("hashing", "fingerprint should use SHA1");
+  if (/toLowerCase\(\)|replace\(\/\\\s\+\/g/.test(body)) ok("normalizes whitespace + case");
+  else fail("normalization", "fingerprint should normalize before hashing");
+  if (/\.length\s*<\s*\d+/.test(body)) ok("rejects short strings (low entropy)");
+  else fail("min length", "fingerprint should skip short strings");
+})();
+// ─── [2] Loop has recentResponses tracking ───────────────────────────────────
+header("[2] Agent loop — recentResponses tracking");
+(() => {
+  if (/const recentResponses\s*:\s*string\[\]\s*=\s*\[\]/.test(promptSrc)) ok("recentResponses array declared");
+  else fail("recentResponses", "missing in prompt.ts");
+})();
+(() => {
+  // Should push fingerprint and shift after threshold
+  if (/recentResponses\.push\(fingerprint\)/.test(promptSrc)) ok("pushes fingerprint each turn");
+  else fail("push", "fingerprint not pushed");
+  if (/recentResponses\.shift\(\)/.test(promptSrc)) ok("shifts old fingerprints");
+  else fail("shift", "no rolling window");
+})();
+(() => {
+  // Should break loop on duplicate
+  if (/recentResponses\.includes\(fingerprint\)/.test(promptSrc)) ok("checks for duplicate fingerprint");
+  else fail("duplicate check", "missing");
+})();
+(() => {
+  if (/identical_response_detected/.test(promptSrc)) ok("logs identical_response_detected");
+  else fail("log event", "no diagnostic log");
+  if (/Model produced an identical response/.test(promptSrc)) ok("user-facing warning emitted");
+  else fail("warning", "no session.warning broadcast");
+})();
+// ─── [3] Fingerprint behavior — manual reproduction ──────────────────────────
+header("[3] Fingerprint algorithm — reproduces same hash for normalized input");
+(() => {
+  // Re-implement the fingerprint logic in pure JS and verify it produces
+  // matching hashes for whitespace-equivalent inputs.
+  function fp(text) {
+    if (!text || text.length < 80) return null;
+    const normalized = text.toLowerCase().replace(/\s+/g, " ").trim();
+    const sample = normalized.slice(0, 1000);
+    return crypto.createHash("sha1").update(sample).digest("hex");
+  }
+  const a = "I'll start by creating a structure for the medieval market in BoneScript and then we will iterate.";
+  const b = "I'll  start  by  creating  a  structure  for  the   medieval  market in BoneScript and then we will iterate.";
+  const c = "i'll start by creating a structure for the medieval market in bonescript and then we will iterate.";
+  if (fp(a) === fp(b)) ok("whitespace differences ignored");
+  else fail("whitespace", `${fp(a)} ≠ ${fp(b)}`);
+  if (fp(a) === fp(c)) ok("case differences ignored");
+  else fail("case", `${fp(a)} ≠ ${fp(c)}`);
+  const d = "totally different text that is long enough to fingerprint without colliding";
+  if (fp(a) !== fp(d)) ok("different text → different fingerprint");
+  else fail("collision", "different inputs have same fingerprint");
+  const short = "too short";
+  if (fp(short) === null) ok("short input returns null");
+  else fail("short", "should not fingerprint short text");
+})();
+// ─── Summary ─────────────────────────────────────────────────────────────────
+console.log();
+if (failed === 0) {
+  console.log(`${G}${B}✓ All ${passed} tests passed${N}`);
+  process.exit(0);
+} else {
+  console.log(`${R}${B}✗ ${failed} failed, ${passed} passed${N}`);
+  for (const f of failures) console.log(`  ${R}- ${f}${N}`);
+  process.exit(1);
+}

package/scripts/test_leaked_tool_call.js ADDED Viewed

@@ -0,0 +1,269 @@
+#!/usr/bin/env node
+/**
+ * Tests for the leaked tool-call parser. Loads the compiled module directly
+ * so tests run against the same code that ships.
+ *
+ * Patterns tested are taken from real model outputs:
+ *   - gemma:  <|tool_call>call:edit{file_path:<|"|>foo.bone<|"|>}<tool_call|>
+ *   - qwen:   <tool_call>{"name":"write","arguments":{"path":"x"}}</tool_call>
+ *   - llama3: <|python_tag|>write({"path":"x"})<|/python_tag|>
+ *   - openai-style fenced: ```tool_code\nname(arg=val)\n```
+ *
+ * Also re-tests isBuildPrompt against the prompts that previously slipped
+ * through (e.g. "using BoneScript as the backend, write a python ...").
+ */
+"use strict";
+const fs = require("fs");
+const path = require("path");
+const G = "\x1b[32m"; const R = "\x1b[31m"; const C = "\x1b[36m";
+const B = "\x1b[1m"; const D = "\x1b[2m"; const N = "\x1b[0m";
+let passed = 0;
+let failed = 0;
+const failures = [];
+function ok(name, info = "") {
+  passed++;
+  console.log(`  ${G}✓${N} ${name}${info ? `  ${D}${info}${N}` : ""}`);
+}
+function fail(name, msg) {
+  failed++;
+  failures.push(`${name}: ${msg}`);
+  console.log(`  ${R}✗${N} ${name}  ${R}${msg}${N}`);
+}
+function header(s) { console.log(`\n${C}${B}${s}${N}`); }
+const ROOT = path.resolve(__dirname, "..");
+const modulePath = path.join(ROOT, "dist", "src", "engine", "session", "leaked_tool_call.js");
+if (!fs.existsSync(modulePath)) {
+  console.error(`${R}Compiled module not found at ${modulePath}.${N}`);
+  console.error(`Run \`npm run build\` first.`);
+  process.exit(1);
+}
+const {
+  extractLeakedToolCall,
+  parseLeakedBody,
+  parseKwargs,
+  parseLooseObject,
+} = require(modulePath);
+// ─── Tests: gemma-style markers ───────────────────────────────────────────────
+header("[1] Gemma-style leaked calls (the user's exact bug)");
+(() => {
+  const text = `I'll create the file.\n<|tool_call>call:edit{file_path:<|"|>medieval_market.bone<|"|>}<tool_call|>\nDone.`;
+  const r = extractLeakedToolCall(text);
+  if (r && r.toolName === "edit" && r.toolInput.file_path === "medieval_market.bone") {
+    ok("gemma <|tool_call>call:name{...}<tool_call|>", `→ edit(file_path="${r.toolInput.file_path}")`);
+  } else {
+    fail("gemma exact bug", JSON.stringify(r));
+  }
+})();
+(() => {
+  const text = `<|tool_call|>{"name":"write","arguments":{"path":"foo.ts","content":"hello"}}<|/tool_call|>`;
+  const r = extractLeakedToolCall(text);
+  if (r && r.toolName === "write" && r.toolInput.path === "foo.ts" && r.toolInput.content === "hello") {
+    ok("gemma <|tool_call|>{json}<|/tool_call|>");
+  } else {
+    fail("gemma JSON form", JSON.stringify(r));
+  }
+})();
+// ─── Tests: qwen-style markers ────────────────────────────────────────────────
+header("[2] Qwen-style leaked calls");
+(() => {
+  const text = `<tool_call>{"name":"bash","arguments":{"command":"ls -la"}}</tool_call>`;
+  const r = extractLeakedToolCall(text);
+  if (r && r.toolName === "bash" && r.toolInput.command === "ls -la") {
+    ok("<tool_call>{json}</tool_call>");
+  } else {
+    fail("qwen", JSON.stringify(r));
+  }
+})();
+(() => {
+  const text = `<tool_call>{"tool":"read","args":{"path":"src/main.ts"}}</tool_call>`;
+  const r = extractLeakedToolCall(text);
+  if (r && r.toolName === "read" && r.toolInput.path === "src/main.ts") {
+    ok("<tool_call>{tool: ..., args: ...}</tool_call>");
+  } else {
+    fail("qwen alt keys", JSON.stringify(r));
+  }
+})();
+// ─── Tests: llama3-style python_tag ───────────────────────────────────────────
+header("[3] llama3-style <|python_tag|>");
+(() => {
+  const text = `<|python_tag|>write({"path":"x.txt","content":"y"})<|/python_tag|>`;
+  const r = extractLeakedToolCall(text);
+  if (r && r.toolName === "write" && r.toolInput.path === "x.txt" && r.toolInput.content === "y") {
+    ok("llama3 python_tag with JSON arg");
+  } else {
+    fail("llama3", JSON.stringify(r));
+  }
+})();
+// ─── Tests: function-call kwargs ──────────────────────────────────────────────
+header("[4] Function-call kwargs syntax");
+(() => {
+  const args = parseKwargs(`path="foo.ts", content="hello world"`);
+  if (args && args.path === "foo.ts" && args.content === "hello world") ok("string kwargs");
+  else fail("string kwargs", JSON.stringify(args));
+})();
+(() => {
+  const args = parseKwargs(`count=42, ratio=3.14, enabled=true, missing=null`);
+  if (args && args.count === 42 && args.ratio === 3.14 && args.enabled === true && args.missing === null) {
+    ok("typed kwargs (number, float, bool, null)");
+  } else {
+    fail("typed kwargs", JSON.stringify(args));
+  }
+})();
+(() => {
+  const args = parseKwargs(`file_path=<|"|>medieval_market.bone<|"|>`);
+  if (args && args.file_path === "medieval_market.bone") ok(`<|"|> escapes are stripped`);
+  else fail("escape markers", JSON.stringify(args));
+})();
+// ─── Tests: loose-object form ─────────────────────────────────────────────────
+header("[5] Loose-object form (pseudo-JSON)");
+(() => {
+  const o = parseLooseObject(`file_path:"foo.bone", count:3`);
+  if (o && o.file_path === "foo.bone" && o.count === 3) ok("colon-separated loose object");
+  else fail("loose object", JSON.stringify(o));
+})();
+// ─── Tests: fenced tool_code ──────────────────────────────────────────────────
+header("[6] Fenced tool_code blocks");
+(() => {
+  const text = "Some prose\n```tool_code\nwrite(path=\"x\", content=\"y\")\n```\nMore prose.";
+  const r = extractLeakedToolCall(text);
+  if (r && r.toolName === "write" && r.toolInput.path === "x" && r.toolInput.content === "y") {
+    ok("```tool_code\\nname(args)\\n```");
+  } else {
+    fail("fenced tool_code", JSON.stringify(r));
+  }
+})();
+// ─── Tests: false-positives ───────────────────────────────────────────────────
+header("[7] No false-positives on plain text");
+const cleanCases = [
+  "I'll create a file called foo.bone now.",
+  "Use the `write` tool to save the file.",
+  "Here's how you'd do it: write(path, content) — but that's pseudocode.",
+  "<not_a_tool_call>just text</not_a_tool_call>",
+  "",
+  "<tool_call></tool_call>",
+];
+for (const c of cleanCases) {
+  const r = extractLeakedToolCall(c);
+  if (r === null) ok(`clean: "${c.slice(0, 50)}..."`);
+  else fail(`false positive`, `"${c}" → ${JSON.stringify(r)}`);
+}
+// ─── Tests: stripping positions ───────────────────────────────────────────────
+header("[8] startIndex/endIndex enable text stripping");
+(() => {
+  const text = `Before <|tool_call|>{"name":"write","arguments":{}}<|/tool_call|> after`;
+  const r = extractLeakedToolCall(text);
+  if (!r) {
+    fail("strip positions", "no match");
+  } else {
+    const stripped = text.slice(0, r.startIndex) + text.slice(r.endIndex);
+    if (stripped === "Before  after") ok("text stripped cleanly", `"${stripped}"`);
+    else fail("strip", `got "${stripped}"`);
+  }
+})();
+// ─── Tests: build mode trigger detection ──────────────────────────────────────
+header("[9] isBuildPrompt covers the previously-missed prompts");
+const bmModulePath = path.join(ROOT, "dist", "src", "engine", "session", "build_mode.js");
+if (!fs.existsSync(bmModulePath)) {
+  fail("build_mode module", "compiled file missing");
+} else {
+  const { isBuildPrompt } = require(bmModulePath);
+  const newCases = [
+    // The exact prompt that previously failed in the user's session
+    "using BoneScript as the backend, write a python 2d mideveal copper silver gold platinum transaction market simulation",
+    "with bonescript, build a chat app",
+    "in BoneScript, design a multi-tenant CRM",
+    "BoneScript backend for a music streaming service",
+    "write me a REST API for a todo list",
+    "develop a graphql api for users",
+    "scaffold a web application with auth",
+  ];
+  for (const p of newCases) {
+    if (isBuildPrompt(p)) ok(`triggers: "${p.slice(0, 60)}..."`);
+    else fail(`missed`, p);
+  }
+  const negative = [
+    "what does this function do",
+    "explain the difference between let and const",
+    "fix the typo on line 5",
+  ];
+  for (const p of negative) {
+    if (!isBuildPrompt(p)) ok(`not triggered: "${p}"`);
+    else fail(`over-matched`, p);
+  }
+}
+// ─── Tests: parseLeakedBody handles edge cases ────────────────────────────────
+header("[10] parseLeakedBody edge cases");
+(() => {
+  const r = parseLeakedBody("");
+  if (r === null) ok("empty body returns null");
+  else fail("empty", JSON.stringify(r));
+})();
+(() => {
+  const r = parseLeakedBody("not a tool call at all");
+  if (r === null) ok("garbage body returns null");
+  else fail("garbage", JSON.stringify(r));
+})();
+(() => {
+  // Function call with JSON arg
+  const r = parseLeakedBody('write({"path": "a.txt", "content": "b"})');
+  if (r && r.toolName === "write" && r.toolInput.path === "a.txt" && r.toolInput.content === "b") {
+    ok("function with JSON arg");
+  } else {
+    fail("function JSON arg", JSON.stringify(r));
+  }
+})();
+console.log();
+if (failed === 0) {
+  console.log(`${G}${B}✓ All ${passed} tests passed${N}`);
+  process.exit(0);
+} else {
+  console.log(`${R}${B}✗ ${failed} failed, ${passed} passed${N}`);
+  for (const f of failures) console.log(`  ${R}- ${f}${N}`);
+  process.exit(1);
+}

package/src/engine/session/build_mode.ts CHANGED Viewed

@@ -69,6 +69,8 @@ export interface BuildState {
   error?: string;
   /** Set after probe: whether the model can emit OpenAI-format tool calls. */
   tool_capable?: boolean;
+  /** Files written so far in this build, with todo that produced each. */
+  written_files?: Record<string, { todo_id: string; size: number; written_at: number }>;
 }
 export interface VerificationResult {
@@ -402,13 +404,24 @@ async function stageExecute(state: BuildState, input: BuildModeInput): Promise<B
     if (state.tool_capable) {
       // ── Tool-calling path ────────────────────────────────────────────────
+      const knownFilesNote =
+        Object.keys(state.written_files || {}).length > 0
+          ? `\nFILES ALREADY WRITTEN (do NOT recreate or rewrite these — they're done):\n` +
+            Object.keys(state.written_files || {}).map((p) => `  - ${p}`).join("\n")
+          : "";
       const focusedPrompt = [
         `<build-task>`,
+        `You are doing ONE task in a larger build that's already in progress.`,
+        `IGNORE earlier conversation context — those questions are already answered.`,
+        `DO NOT ask clarifying questions. The plan is locked. Just do this task.`,
+        ``,
         `Title: ${next.title}`,
         `Description: ${next.description}`,
+        knownFilesNote,
         ``,
-        `This is one task in a larger build. Complete this task NOW by calling the appropriate tools.`,
-        `Do not describe what you would do — call the tools.`,
+        `Output ONE OR MORE concrete tool calls (write/edit/bash) for THIS task.`,
+        `Do NOT respond with prose. Do NOT explain. Call the tools NOW.`,
         `</build-task>`,
       ].join("\n");
@@ -503,6 +516,32 @@ async function countToolCallsSince(session_id: string, since_message_id: string)
   }
 }
+/**
+ * Like countToolCallsSince, but excludes calls synthesized from leaked tool-call
+ * markers. Used by the probe so we know whether the model can actually emit
+ * native tool calls — not just leak text we recovered. Models that only ever
+ * leak should be routed through the JSON-manifest fallback.
+ *
+ * Uses LIKE on the serialized tool_input JSON instead of jsonb operators so
+ * the same query works in both Postgres (real JSONB) and the SQLite fallback
+ * (TEXT column with stripped JSON operators).
+ */
+async function countNativeToolCallsSince(session_id: string, since_message_id: string): Promise<number> {
+  try {
+    const r = await pool.query(
+      `SELECT COUNT(*) AS n FROM tool_calls
+       WHERE session_id = $1
+         AND created_at >= (SELECT created_at FROM messages WHERE id = $2)
+         AND CAST(tool_input AS TEXT) NOT LIKE '%__synthesized%'`,
+      [session_id, since_message_id]
+    );
+    const raw = r.rows[0]?.n;
+    return typeof raw === "string" ? parseInt(raw, 10) || 0 : raw || 0;
+  } catch {
+    return 0;
+  }
+}
 // ─── Tool-capability probe & JSON-manifest fallback ───────────────────────────
 /**
@@ -553,7 +592,11 @@ async function probeToolCapability(input: BuildModeInput): Promise<boolean> {
       agent_name: "build",
     });
-    const calls = await countToolCallsSince(input.session_id, probeMsgId);
+    // Only count NATIVE tool calls. Calls synthesized from leaked tool-marker
+    // text don't count — those are unreliable in real builds (the model needs
+    // to format the leak perfectly every time, which it usually doesn't).
+    // If the model only ever leaks, we want JSON-manifest fallback instead.
+    const calls = await countNativeToolCallsSince(input.session_id, probeMsgId);
     return calls > 0;
   } catch {
     return false;
@@ -610,6 +653,22 @@ async function executeFallback(
     .map((t) => `- ${t.title}`)
     .join("\n");
+  // Build a manifest of files already written so the model doesn't blindly
+  // overwrite them. We include the first ~30 lines of each file so the model
+  // can see what's there and decide whether to extend or leave alone.
+  state.written_files = state.written_files || {};
+  const existingPaths = Object.keys(state.written_files);
+  const existingFilesSnippets: string[] = [];
+  for (const p of existingPaths) {
+    try {
+      const target = path.resolve(worktree, p);
+      const content = await fs.readFile(target, "utf-8").catch(() => "");
+      const snippet = content.split("\n").slice(0, 30).join("\n");
+      const truncated = content.split("\n").length > 30 ? "\n... (truncated)" : "";
+      existingFilesSnippets.push(`${p} (${state.written_files[p].size} bytes):\n${snippet}${truncated}`);
+    } catch {}
+  }
   const result = await askJson<{
     files?: Array<{ path: string; content: string }>;
     commands?: string[];
@@ -617,20 +676,36 @@ async function executeFallback(
     model_id: input.model_id,
     provider_id: input.provider_id,
     system: [
-      "You are completing one task in a project build. Produce a JSON manifest of the files to create or update and shell commands to run for THIS task only.",
+      "You are completing one task in an incremental project build. The project already has files from earlier tasks; THIS task adds the next layer.",
       "",
-      "RULES:",
-      "- Output a single JSON object: { \"files\": [...], \"commands\": [...] }",
+      "PRODUCE A JSON MANIFEST of files to create OR commands to run for THIS task only.",
+      "",
+      "CRITICAL RULES:",
+      `- Output a single JSON object: { "files": [...], "commands": [...] }`,
       "- Each file must have a relative `path` and full `content`. Do not abbreviate file content.",
       "- File paths must be relative to the project root (no leading slash, no '..').",
-      "- Commands run in the project root. Use them only for compilation, package install, or migrations.",
+      "",
+      "FILE OVERWRITE POLICY:",
+      "- If a file already exists (listed under <existing-files>), DO NOT include it in your manifest unless this task is specifically about modifying it.",
+      "- If you must update an existing file, you MUST include the COMPLETE new content (including everything from the existing version that you want to keep). Partial content will overwrite and destroy whatever is there.",
+      "- For NEW files, just provide the new content.",
+      "- Prefer adding NEW files over modifying existing ones whenever possible.",
+      "",
+      "OUTPUT FORMAT:",
       "- Do not include explanatory prose. The JSON IS the entire response.",
+      "- Do not wrap in markdown code fences.",
     ].join("\n"),
     user: [
       `<design>`,
       designContext,
       `</design>`,
       ``,
+      `<existing-files>`,
+      existingFilesSnippets.length
+        ? existingFilesSnippets.join("\n\n---\n\n")
+        : "(no files written yet)",
+      `</existing-files>`,
+      ``,
       `<completed-tasks>`,
       completedFiles || "(none yet)",
       `</completed-tasks>`,
@@ -639,6 +714,8 @@ async function executeFallback(
       `Title: ${todo.title}`,
       `Description: ${todo.description}`,
       `</current-task>`,
+      ``,
+      `Produce the JSON manifest for the current task. Do NOT re-emit existing files unless this task explicitly modifies them.`,
     ].join("\n"),
     schema_hint: `{ "files": [{ "path": string, "content": string }], "commands": string[] }`,
   });
@@ -663,10 +740,38 @@ async function executeFallback(
       errors.push(`refused path outside worktree: ${f.path}`);
       continue;
     }
+    // OVERWRITE GUARD: if the file was written by an earlier todo and this
+    // todo's title doesn't suggest it's specifically about this file, refuse
+    // and force the model to think harder. Always allow overwrite if the new
+    // content is larger (model is appending) or the path appears in the todo.
+    const existing = state.written_files[f.path];
+    if (existing && existing.todo_id !== todo.id) {
+      const oldContent = await fs.readFile(target, "utf-8").catch(() => "");
+      const newSize = Buffer.byteLength(f.content, "utf-8");
+      const titleMentionsPath = todo.title.toLowerCase().includes(f.path.toLowerCase()) ||
+        todo.description.toLowerCase().includes(f.path.toLowerCase());
+      const isStrictShrink = newSize < oldContent.length * 0.5; // shrunk by >50% = clobber
+      if (isStrictShrink && !titleMentionsPath) {
+        errors.push(
+          `refused overwrite: ${f.path} would shrink from ${oldContent.length} to ${newSize} bytes ` +
+          `(model likely truncating). Skipping.`
+        );
+        continue;
+      }
+    }
     try {
       await fs.mkdir(path.dirname(target), { recursive: true });
       await fs.writeFile(target, f.content, "utf-8");
       filesWritten++;
+      // Track in state
+      state.written_files[f.path] = {
+        todo_id: todo.id,
+        size: Buffer.byteLength(f.content, "utf-8"),
+        written_at: Date.now(),
+      };
       // Surface a tool.completed event so the TUI shows an Edit/Write line
       const callId = `fallback-${uuid()}`;
       const broadcastModule = await import("../../../bone/output/session/src/websocket");
@@ -674,14 +779,14 @@ async function executeFallback(
         type: "tool.requested",
         session_id: input.session_id,
         tool_call_id: callId,
-        tool_name: "write",
+        tool_name: existing ? "edit" : "write",
         tool_input: { path: f.path, content: f.content.slice(0, 200) },
       });
       broadcastModule.broadcastToChannel("part_stream", {
         type: "tool.completed",
         session_id: input.session_id,
         tool_call_id: callId,
-        tool_name: "write",
+        tool_name: existing ? "edit" : "write",
         tool_input: { path: f.path },
         duration_ms: 0,
       });
@@ -821,15 +926,43 @@ async function stageVerify(state: BuildState, input: BuildModeInput): Promise<Bu
 // ─── Driver ───────────────────────────────────────────────────────────────────
-export async function runBuildMode(input: BuildModeInput): Promise<BuildState> {
-  let state: BuildState = (await loadState(input.session_id)) ?? {
+/**
+ * Start a fresh BuildState for the given prompt. Centralized so we don't
+ * accidentally drift between the lazy-init path in `runBuildMode` and the
+ * restart path used after a previous build is already `done` or `failed`.
+ */
+function freshState(prompt: string): BuildState {
+  return {
     stage: "clarify",
-    original_prompt: input.prompt,
+    original_prompt: prompt,
     design: null,
     todos: [],
     iteration: 0,
     max_iterations: 30,
+    written_files: {},
   };
+}
+export async function runBuildMode(input: BuildModeInput): Promise<BuildState> {
+  let state: BuildState = (await loadState(input.session_id)) ?? freshState(input.prompt);
+  // If the previous build is already done/failed, and the user is asking for
+  // something new (different prompt), start a fresh build rather than no-op.
+  // Without this, follow-up "build me X" prompts after a completed build
+  // silently do nothing because the loop sees state.stage === "done" and
+  // exits immediately.
+  if (
+    (state.stage === "done" || state.stage === "failed") &&
+    input.prompt &&
+    input.prompt !== state.original_prompt
+  ) {
+    emit(input.session_id, "build.restart", {
+      previous_stage: state.stage,
+      previous_prompt: state.original_prompt,
+    });
+    state = freshState(input.prompt);
+    await saveState(input.session_id, state);
+  }
   // Resume from saved state if applicable. If the user is sending a new prompt
   // and we're already in clarify with pending questions, treat the prompt as
@@ -887,9 +1020,20 @@ export function isBuildPrompt(prompt: string): boolean {
     /\bmake\s+(?:a|an|the)\s+(?:full|complete|whole|new)\b/,
     /\bproject\s+(?:from\s+scratch|to)\b/,
     /\bsimulation\s+(?:with|using|of)\b/,
-    /\bbackend\s+(?:for|with|using)\b/,
+    /\bbackend\s+(?:for|with|using|service)\b/,
     /\bspec(?:ification)?\s+(?:for|of)\b/,
     /\bend[- ]to[- ]end\b/,
+    // Verb-led "write/build/code/develop" requests with a noun follow
+    /\b(?:write|code|develop|generate|scaffold)\s+(?:me\s+)?(?:a|an|the)\s+\w+/,
+    // BoneScript-specific phrases — if they say bonescript at all, treat as build
+    /\busing\s+bonescript\b/,
+    /\bwith\s+bonescript\b/,
+    /\bin\s+bonescript\b/,
+    /\bbonescript\s+(?:as|for|backend)\b/,
+    // Generic "<adjective> <noun-app>" patterns indicating a system request
+    /\b(?:rest|graphql)\s+api\b/,
+    /\bweb\s+app(?:lication)?\b/,
+    /\bgame\s+(?:simulation|engine|server)\b/,
   ];
   return triggers.some((re) => re.test(p));
 }