npm - bonecode - Versions diffs - 1.4.1 → 1.4.3 - Mend

bonecode 1.4.1 → 1.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/dist/src/engine/session/build_mode.d.ts +6 -0
package/dist/src/engine/session/build_mode.js +141 -13
package/dist/src/engine/session/build_mode.js.map +1 -1
package/dist/src/engine/session/leaked_tool_call.d.ts +49 -0
package/dist/src/engine/session/leaked_tool_call.js +174 -0
package/dist/src/engine/session/leaked_tool_call.js.map +1 -0
package/dist/src/engine/session/prompt.js +167 -0
package/dist/src/engine/session/prompt.js.map +1 -1
package/package.json +1 -1
package/scripts/debug_extract.js +40 -0
package/scripts/test_build_mode.js +132 -0
package/scripts/test_identical_response.js +129 -0
package/scripts/test_leaked_tool_call.js +269 -0
package/src/engine/session/build_mode.ts +157 -13
package/src/engine/session/leaked_tool_call.ts +166 -0
package/src/engine/session/prompt.ts +203 -0

package/src/engine/session/leaked_tool_call.ts ADDED Viewed

@@ -0,0 +1,166 @@
+/**
+ * Pure, side-effect-free parser for leaked tool-call markers.
+ *
+ * Some local models (gemma, qwen, llama variants) emit their internal
+ * tool-call markers as raw text instead of producing structured tool_call
+ * events. The AI SDK's parser misses these, so the model's prose appears in
+ * the output but no tool ever runs.
+ *
+ * This module recovers the intended call by pattern-matching the leaked text.
+ * No DB, no network, no global state — pure functions only, fully testable.
+ *
+ * Patterns recognized (across multiple template formats):
+ *   <|tool_call|>{"name":"write","arguments":{...}}<|/tool_call|>
+ *   <|tool_call>name:write{...args...}<tool_call|>
+ *   <tool_call>{"name":"write","arguments":{...}}</tool_call>
+ *   <function_call>{"name":"write","arguments":{...}}</function_call>
+ *   ```tool_code\nwrite(path="x", content="y")\n```
+ *   <|python_tag|>write({"path": "x"})<|/python_tag|>
+ */
+export interface LeakedToolCall {
+  toolName: string;
+  toolInput: Record<string, any>;
+  startIndex: number;
+  endIndex: number;
+}
+export function extractLeakedToolCall(text: string): LeakedToolCall | null {
+  // Pattern 1: <|tool_call|>...<|/tool_call|> or <tool_call>...</tool_call>
+  const blockPatterns = [
+    /<\|tool_call\|?>([\s\S]*?)<\|?\/?tool_call\|?>/i,
+    /<tool_call>([\s\S]*?)<\/?tool_call>/i,
+    /<function_call>([\s\S]*?)<\/?function_call>/i,
+    /<\|python_tag\|>([\s\S]*?)<\|?\/?python_tag\|?>/i,
+  ];
+  for (const re of blockPatterns) {
+    const m = text.match(re);
+    if (!m || m.index === undefined) continue;
+    const body = m[1];
+    const parsed = parseLeakedBody(body);
+    if (parsed) {
+      return { ...parsed, startIndex: m.index, endIndex: m.index + m[0].length };
+    }
+  }
+  // Pattern 2: ```tool_code ... ```
+  const codeBlock = text.match(/```(?:tool_code|tool_call|function|python)\s*\n([\s\S]*?)\n```/i);
+  if (codeBlock && codeBlock.index !== undefined) {
+    const parsed = parseLeakedBody(codeBlock[1]);
+    if (parsed) {
+      return { ...parsed, startIndex: codeBlock.index, endIndex: codeBlock.index + codeBlock[0].length };
+    }
+  }
+  return null;
+}
+/**
+ * Parse the body of a leaked tool-call block. Tries multiple formats:
+ *   - JSON: {"name": "write", "arguments": {...}} or {"tool":"write","args":{...}}
+ *   - Function-call style: write(path="x", content="y")
+ *   - Pseudo-syntax: call:write{path:"x"}
+ */
+export function parseLeakedBody(body: string): { toolName: string; toolInput: Record<string, any> } | null {
+  if (!body) return null;
+  const trimmed = body.trim();
+  // Try JSON first
+  try {
+    const json = JSON.parse(trimmed);
+    if (json && typeof json === "object") {
+      const name = json.name || json.tool || json.tool_name || json.function;
+      const args = json.arguments || json.args || json.parameters || json.input || {};
+      if (typeof name === "string" && name.length > 0) {
+        const parsedArgs = typeof args === "string" ? safeParseJson(args) : args;
+        return { toolName: name, toolInput: parsedArgs ?? {} };
+      }
+    }
+  } catch {}
+  // Try function-call style: name(arg1=val1, arg2="val2")
+  const fnMatch = trimmed.match(/^([a-zA-Z_][\w]*)\s*\(([\s\S]*)\)\s*$/);
+  if (fnMatch) {
+    const toolName = fnMatch[1];
+    const argsStr = fnMatch[2];
+    // Try JSON-shaped arg first: write({"path": "x"})
+    const innerJson = safeParseJson(argsStr);
+    if (innerJson && typeof innerJson === "object" && !Array.isArray(innerJson)) {
+      return { toolName, toolInput: innerJson };
+    }
+    const toolInput = parseKwargs(argsStr);
+    if (toolInput) return { toolName, toolInput };
+  }
+  // Try pseudo-syntax: call:name{key:"val", ...} or name:foo{...}
+  const callMatch = trimmed.match(/(?:call:|name:|tool:|function:)([a-zA-Z_][\w]*)\s*\{([\s\S]*)\}\s*/i);
+  if (callMatch) {
+    const toolName = callMatch[1];
+    const innerJson = "{" + callMatch[2] + "}";
+    const toolInput = safeParseJson(innerJson) || parseLooseObject(callMatch[2]);
+    if (toolInput) return { toolName, toolInput };
+  }
+  return null;
+}
+export function safeParseJson(s: string): any | null {
+  try {
+    return JSON.parse(s);
+  } catch {
+    return null;
+  }
+}
+/**
+ * Parse Python-style kwargs from a function-call body:
+ *   path="x", content="y", count=42
+ * Strips `<|"|>` style escape markers some templates inject.
+ */
+export function parseKwargs(s: string): Record<string, any> | null {
+  if (!s.trim()) return {};
+  const cleaned = s.replace(/<\|"\|>/g, '"').replace(/<\|'\|>/g, "'");
+  const result: Record<string, any> = {};
+  const re = /([a-zA-Z_][\w]*)\s*=\s*("([^"\\]|\\.)*"|'([^'\\]|\\.)*'|-?\d+(?:\.\d+)?|true|false|null)/g;
+  let m: RegExpExecArray | null;
+  let matched = false;
+  while ((m = re.exec(cleaned)) !== null) {
+    matched = true;
+    const key = m[1];
+    const raw = m[2];
+    let value: any = raw;
+    if (raw === "true") value = true;
+    else if (raw === "false") value = false;
+    else if (raw === "null") value = null;
+    else if (/^-?\d/.test(raw)) value = parseFloat(raw);
+    else value = raw.slice(1, -1).replace(/\\(.)/g, "$1");
+    result[key] = value;
+  }
+  return matched ? result : null;
+}
+/**
+ * Parse a loose key:value object body (no surrounding braces, no enforced
+ * JSON quoting). Used for pseudo-syntax fallbacks like:
+ *   file_path:<|"|>medieval_market.bone<|"|>
+ */
+export function parseLooseObject(s: string): Record<string, any> | null {
+  const cleaned = s.replace(/<\|"\|>/g, '"').replace(/<\|'\|>/g, "'");
+  const result: Record<string, any> = {};
+  const re = /([a-zA-Z_][\w]*)\s*[:=]\s*("([^"\\]|\\.)*"|'([^'\\]|\\.)*'|-?\d+(?:\.\d+)?|true|false|null|[^\s,}]+)/g;
+  let m: RegExpExecArray | null;
+  let matched = false;
+  while ((m = re.exec(cleaned)) !== null) {
+    matched = true;
+    const key = m[1];
+    const raw = m[2];
+    let value: any = raw;
+    if (raw === "true") value = true;
+    else if (raw === "false") value = false;
+    else if (raw === "null") value = null;
+    else if (/^-?\d/.test(raw)) value = parseFloat(raw);
+    else if (raw.startsWith('"') || raw.startsWith("'")) value = raw.slice(1, -1).replace(/\\(.)/g, "$1");
+    result[key] = value;
+  }
+  return matched ? result : null;
+}

package/src/engine/session/prompt.ts CHANGED Viewed

@@ -42,6 +42,7 @@ import { buildCompactionSummary } from "./compaction_logic";
 import { getSystemPrompt } from "./system_prompt";
 import { loadInstructionFiles } from "./instruction_loader";
 import { buildToolRegistry } from "./tool_registry";
+import { extractLeakedToolCall } from "./leaked_tool_call";
 // ─── Types ────────────────────────────────────────────────────────────────────
@@ -98,6 +99,10 @@ export async function runAgentLoop(input: PromptInput): Promise<LoopResult> {
   let turn = 0;
   let lazyReminderSent = false;
   let lastFinishReason = "unknown";
+  // Track the last 2 assistant texts so we can detect when the model is
+  // stuck producing identical output. Some local models repeat the same
+  // response when they're confused about whether to call a tool or speak.
+  const recentResponses: string[] = [];
   try {
     // ── Main multi-turn loop ──────────────────────────────────────────────────
@@ -176,6 +181,27 @@ export async function runAgentLoop(input: PromptInput): Promise<LoopResult> {
       // 4. "tool-calls" with no actual tool calls = model confused — stop
       const terminalReasons = new Set(["stop", "length", "content-filter", "end-turn"]);
+      // Identical-response detector: small models sometimes emit the same
+      // response twice in a row when confused about tool calls. We capture
+      // a fingerprint and bail out before producing 3-4 copies.
+      const fingerprint = await assistantTextFingerprint(session_id, assistantMsgId);
+      if (fingerprint && recentResponses.includes(fingerprint)) {
+        logger.warn("identical_response_detected", {
+          event: "stuck_loop",
+          metadata: { session_id, turn, fingerprint: fingerprint.slice(0, 80) },
+        });
+        broadcastToChannel("session_events", {
+          type: "session.warning",
+          session_id,
+          message: "Model produced an identical response — exiting to avoid an infinite loop. Try rephrasing or switching models.",
+        });
+        break;
+      }
+      if (fingerprint) {
+        recentResponses.push(fingerprint);
+        if (recentResponses.length > 2) recentResponses.shift();
+      }
       // Detect "lazy assistant" — the model claims it's editing/creating files
       // in prose but never actually called a tool. Common with non-tool-tuned
       // local models. Once per session, push a synthetic reminder and re-run.
@@ -349,6 +375,36 @@ async function streamOnce(ctx: {
         }
         currentTextContent += text;
+        // Detect models leaking their internal tool-call markers as raw text
+        // (gemma, qwen, llama variants do this when the tokenizer template
+        // doesn't match the AI SDK's expected format). When we find a complete
+        // leaked call, synthesize a real tool execution.
+        const leak = extractLeakedToolCall(currentTextContent);
+        if (leak) {
+          // Strip the leaked markers from the displayed text part
+          currentTextContent = currentTextContent.slice(0, leak.startIndex) +
+            currentTextContent.slice(leak.endIndex);
+          await pool.query(
+            `UPDATE parts SET data = $2, updated_at = NOW() WHERE id = $1`,
+            [currentTextPartId, JSON.stringify({ text: currentTextContent })]
+          );
+          // Execute the synthesized tool call directly via the registry
+          await executeSynthesizedToolCall({
+            session_id,
+            agentId: ctx.agentId,
+            assistantMsgId,
+            toolName: leak.toolName,
+            toolInput: leak.toolInput,
+            tools,
+          });
+          // Mark the turn as having tool calls so the loop continues
+          hasToolCalls = true;
+          break;
+        }
         // Broadcast delta to WebSocket part_stream for live streaming
         broadcastToChannel("part_stream", {
           type: "part.delta",
@@ -563,6 +619,29 @@ async function runCompaction(
 // ─── Message History Builder ──────────────────────────────────────────────────
+// Compute a stable fingerprint of the assistant's most recent text so we can
+// detect when the model is producing identical responses turn after turn.
+// We hash a normalized version (whitespace collapsed, lowercased) of the
+// concatenated text parts to avoid spurious mismatches from minor whitespace.
+async function assistantTextFingerprint(session_id: string, messageId: string): Promise<string | null> {
+  try {
+    const r = await pool.query(
+      `SELECT data FROM parts WHERE message_id = $1 AND part_type = 'text' ORDER BY order_index ASC`,
+      [messageId]
+    );
+    const text = r.rows.map((row: any) => row.data?.text || "").join(" ").trim();
+    if (!text || text.length < 80) return null; // too short to fingerprint reliably
+    const normalized = text.toLowerCase().replace(/\s+/g, " ").trim();
+    // Hash the first 1000 chars — enough to detect duplicates without being
+    // confused by minor changes in the middle.
+    const sample = normalized.slice(0, 1000);
+    const crypto = require("crypto");
+    return crypto.createHash("sha1").update(sample).digest("hex");
+  } catch {
+    return null;
+  }
+}
 // Detect a "lazy" response — assistant text says it will edit/create files
 // but no tool was actually invoked. Common with non-tool-tuned local models.
 async function wasLazyResponse(session_id: string, messageId: string): Promise<boolean> {
@@ -837,3 +916,127 @@ function supportsTools(model_id: string): boolean {
   // Default: try with tools, fall back gracefully on error
   return true;
 }
+// ─── Synthesized tool-call execution ──────────────────────────────────────────
+/**
+ * Execute a synthesized tool call when we detect a leak. Mirrors the work the
+ * AI SDK would normally do: insert a tool_invocation part, broadcast events,
+ * run the registered tool's execute() function.
+ */
+async function executeSynthesizedToolCall(input: {
+  session_id: string;
+  agentId: string;
+  assistantMsgId: string;
+  toolName: string;
+  toolInput: Record<string, any>;
+  tools: Record<string, any>;
+}): Promise<void> {
+  const { session_id, agentId, assistantMsgId, toolName, toolInput, tools } = input;
+  // Map common aliases (write_file → write, edit_file → edit, etc.)
+  const aliases: Record<string, string> = {
+    write_file: "write",
+    edit_file: "edit",
+    read_file: "read",
+    run_command: "bash",
+    shell: "bash",
+    search_files: "grep",
+  };
+  const resolvedName = aliases[toolName] || toolName;
+  const tool = tools[resolvedName];
+  if (!tool || !tool.execute) {
+    logger.warn("synthesized_tool_unknown", { event: "leak", metadata: { toolName, resolvedName } });
+    return;
+  }
+  const callId = uuid();
+  // Persist the tool call record. We tag it as "synthesized" so the
+  // tool-capability probe in build_mode can tell the model didn't really
+  // emit a native tool call — it just leaked tool-marker text that we
+  // recovered. Models that always need recovery should be treated as
+  // tool-incapable so we route them through the JSON-manifest fallback
+  // (which is more reliable than counting on every prompt to leak cleanly).
+  try {
+    await pool.query(
+      `INSERT INTO tool_calls (id, session_id, agent_id, tool_name, tool_input, state) VALUES ($1, $2, $3, $4, $5, 'running')`,
+      [callId, session_id, agentId, resolvedName, JSON.stringify({ ...toolInput, __synthesized: true })]
+    );
+  } catch {}
+  // Broadcast tool.requested so the TUI shows "← Edit foo.bone"
+  broadcastToChannel("part_stream", {
+    type: "tool.requested",
+    session_id,
+    tool_call_id: callId,
+    tool_name: resolvedName,
+    tool_input: toolInput,
+  });
+  // Persist as a tool_invocation part on the assistant message
+  const partId = uuid();
+  await pool.query(
+    `INSERT INTO parts (id, message_id, session_id, part_type, data, order_index) VALUES ($1, $2, $3, 'tool_invocation', $4, 0)`,
+    [partId, assistantMsgId, session_id, JSON.stringify({ tool_call_id: callId, tool_name: resolvedName, args: toolInput, state: "running" })]
+  );
+  // Run the actual tool — emit ToolCallRequested so the same machinery as a
+  // real tool call kicks in.
+  await eventBus.publish("ToolCallRequested", {
+    tool_call_id: callId,
+    session_id,
+    agent_id: agentId,
+    tool_name: resolvedName,
+    tool_input: toolInput,
+    requested_at: new Date().toISOString(),
+  }, "AgentLoop").catch(() => {});
+  const startMs = Date.now();
+  let success = true;
+  let output = "";
+  try {
+    const result = await tool.execute(toolInput, { toolCallId: callId });
+    output = typeof result === "string" ? result : (result?.output || "");
+  } catch (e: any) {
+    success = false;
+    output = e?.message || "tool execution failed";
+  }
+  // Update the part with the result
+  const durationMs = Date.now() - startMs;
+  try {
+    await pool.query(
+      `UPDATE parts SET data = $2, updated_at = NOW() WHERE id = $1`,
+      [partId, JSON.stringify({
+        tool_call_id: callId,
+        tool_name: resolvedName,
+        args: toolInput,
+        state: success ? "done" : "failed",
+        output,
+      })]
+    );
+    await pool.query(
+      `UPDATE tool_calls SET state = $2, tool_output = $3, duration_ms = $4, updated_at = NOW() WHERE id = $1`,
+      [callId, success ? "done" : "failed", JSON.stringify({ output }), durationMs]
+    );
+  } catch {}
+  // Broadcast completion
+  broadcastToChannel("part_stream", {
+    type: success ? "tool.completed" : "tool.failed",
+    session_id,
+    tool_call_id: callId,
+    tool_name: resolvedName,
+    tool_input: toolInput,
+    duration_ms: durationMs,
+    ...(success ? {} : { error: output }),
+  });
+  await eventBus.publish("ToolCallCompleted", {
+    tool_call_id: callId,
+    session_id,
+    tool_name: resolvedName,
+    duration_ms: durationMs,
+    completed_at: new Date().toISOString(),
+  }, "AgentLoop").catch(() => {});
+}