bonecode 1.4.1 → 1.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,166 @@
1
+ /**
2
+ * Pure, side-effect-free parser for leaked tool-call markers.
3
+ *
4
+ * Some local models (gemma, qwen, llama variants) emit their internal
5
+ * tool-call markers as raw text instead of producing structured tool_call
6
+ * events. The AI SDK's parser misses these, so the model's prose appears in
7
+ * the output but no tool ever runs.
8
+ *
9
+ * This module recovers the intended call by pattern-matching the leaked text.
10
+ * No DB, no network, no global state — pure functions only, fully testable.
11
+ *
12
+ * Patterns recognized (across multiple template formats):
13
+ * <|tool_call|>{"name":"write","arguments":{...}}<|/tool_call|>
14
+ * <|tool_call>name:write{...args...}<tool_call|>
15
+ * <tool_call>{"name":"write","arguments":{...}}</tool_call>
16
+ * <function_call>{"name":"write","arguments":{...}}</function_call>
17
+ * ```tool_code\nwrite(path="x", content="y")\n```
18
+ * <|python_tag|>write({"path": "x"})<|/python_tag|>
19
+ */
20
+
21
+ export interface LeakedToolCall {
22
+ toolName: string;
23
+ toolInput: Record<string, any>;
24
+ startIndex: number;
25
+ endIndex: number;
26
+ }
27
+
28
+ export function extractLeakedToolCall(text: string): LeakedToolCall | null {
29
+ // Pattern 1: <|tool_call|>...<|/tool_call|> or <tool_call>...</tool_call>
30
+ const blockPatterns = [
31
+ /<\|tool_call\|?>([\s\S]*?)<\|?\/?tool_call\|?>/i,
32
+ /<tool_call>([\s\S]*?)<\/?tool_call>/i,
33
+ /<function_call>([\s\S]*?)<\/?function_call>/i,
34
+ /<\|python_tag\|>([\s\S]*?)<\|?\/?python_tag\|?>/i,
35
+ ];
36
+ for (const re of blockPatterns) {
37
+ const m = text.match(re);
38
+ if (!m || m.index === undefined) continue;
39
+ const body = m[1];
40
+ const parsed = parseLeakedBody(body);
41
+ if (parsed) {
42
+ return { ...parsed, startIndex: m.index, endIndex: m.index + m[0].length };
43
+ }
44
+ }
45
+
46
+ // Pattern 2: ```tool_code ... ```
47
+ const codeBlock = text.match(/```(?:tool_code|tool_call|function|python)\s*\n([\s\S]*?)\n```/i);
48
+ if (codeBlock && codeBlock.index !== undefined) {
49
+ const parsed = parseLeakedBody(codeBlock[1]);
50
+ if (parsed) {
51
+ return { ...parsed, startIndex: codeBlock.index, endIndex: codeBlock.index + codeBlock[0].length };
52
+ }
53
+ }
54
+
55
+ return null;
56
+ }
57
+
58
+ /**
59
+ * Parse the body of a leaked tool-call block. Tries multiple formats:
60
+ * - JSON: {"name": "write", "arguments": {...}} or {"tool":"write","args":{...}}
61
+ * - Function-call style: write(path="x", content="y")
62
+ * - Pseudo-syntax: call:write{path:"x"}
63
+ */
64
+ export function parseLeakedBody(body: string): { toolName: string; toolInput: Record<string, any> } | null {
65
+ if (!body) return null;
66
+ const trimmed = body.trim();
67
+
68
+ // Try JSON first
69
+ try {
70
+ const json = JSON.parse(trimmed);
71
+ if (json && typeof json === "object") {
72
+ const name = json.name || json.tool || json.tool_name || json.function;
73
+ const args = json.arguments || json.args || json.parameters || json.input || {};
74
+ if (typeof name === "string" && name.length > 0) {
75
+ const parsedArgs = typeof args === "string" ? safeParseJson(args) : args;
76
+ return { toolName: name, toolInput: parsedArgs ?? {} };
77
+ }
78
+ }
79
+ } catch {}
80
+
81
+ // Try function-call style: name(arg1=val1, arg2="val2")
82
+ const fnMatch = trimmed.match(/^([a-zA-Z_][\w]*)\s*\(([\s\S]*)\)\s*$/);
83
+ if (fnMatch) {
84
+ const toolName = fnMatch[1];
85
+ const argsStr = fnMatch[2];
86
+ // Try JSON-shaped arg first: write({"path": "x"})
87
+ const innerJson = safeParseJson(argsStr);
88
+ if (innerJson && typeof innerJson === "object" && !Array.isArray(innerJson)) {
89
+ return { toolName, toolInput: innerJson };
90
+ }
91
+ const toolInput = parseKwargs(argsStr);
92
+ if (toolInput) return { toolName, toolInput };
93
+ }
94
+
95
+ // Try pseudo-syntax: call:name{key:"val", ...} or name:foo{...}
96
+ const callMatch = trimmed.match(/(?:call:|name:|tool:|function:)([a-zA-Z_][\w]*)\s*\{([\s\S]*)\}\s*/i);
97
+ if (callMatch) {
98
+ const toolName = callMatch[1];
99
+ const innerJson = "{" + callMatch[2] + "}";
100
+ const toolInput = safeParseJson(innerJson) || parseLooseObject(callMatch[2]);
101
+ if (toolInput) return { toolName, toolInput };
102
+ }
103
+
104
+ return null;
105
+ }
106
+
107
+ export function safeParseJson(s: string): any | null {
108
+ try {
109
+ return JSON.parse(s);
110
+ } catch {
111
+ return null;
112
+ }
113
+ }
114
+
115
+ /**
116
+ * Parse Python-style kwargs from a function-call body:
117
+ * path="x", content="y", count=42
118
+ * Strips `<|"|>` style escape markers some templates inject.
119
+ */
120
+ export function parseKwargs(s: string): Record<string, any> | null {
121
+ if (!s.trim()) return {};
122
+ const cleaned = s.replace(/<\|"\|>/g, '"').replace(/<\|'\|>/g, "'");
123
+ const result: Record<string, any> = {};
124
+ const re = /([a-zA-Z_][\w]*)\s*=\s*("([^"\\]|\\.)*"|'([^'\\]|\\.)*'|-?\d+(?:\.\d+)?|true|false|null)/g;
125
+ let m: RegExpExecArray | null;
126
+ let matched = false;
127
+ while ((m = re.exec(cleaned)) !== null) {
128
+ matched = true;
129
+ const key = m[1];
130
+ const raw = m[2];
131
+ let value: any = raw;
132
+ if (raw === "true") value = true;
133
+ else if (raw === "false") value = false;
134
+ else if (raw === "null") value = null;
135
+ else if (/^-?\d/.test(raw)) value = parseFloat(raw);
136
+ else value = raw.slice(1, -1).replace(/\\(.)/g, "$1");
137
+ result[key] = value;
138
+ }
139
+ return matched ? result : null;
140
+ }
141
+
142
+ /**
143
+ * Parse a loose key:value object body (no surrounding braces, no enforced
144
+ * JSON quoting). Used for pseudo-syntax fallbacks like:
145
+ * file_path:<|"|>medieval_market.bone<|"|>
146
+ */
147
+ export function parseLooseObject(s: string): Record<string, any> | null {
148
+ const cleaned = s.replace(/<\|"\|>/g, '"').replace(/<\|'\|>/g, "'");
149
+ const result: Record<string, any> = {};
150
+ const re = /([a-zA-Z_][\w]*)\s*[:=]\s*("([^"\\]|\\.)*"|'([^'\\]|\\.)*'|-?\d+(?:\.\d+)?|true|false|null|[^\s,}]+)/g;
151
+ let m: RegExpExecArray | null;
152
+ let matched = false;
153
+ while ((m = re.exec(cleaned)) !== null) {
154
+ matched = true;
155
+ const key = m[1];
156
+ const raw = m[2];
157
+ let value: any = raw;
158
+ if (raw === "true") value = true;
159
+ else if (raw === "false") value = false;
160
+ else if (raw === "null") value = null;
161
+ else if (/^-?\d/.test(raw)) value = parseFloat(raw);
162
+ else if (raw.startsWith('"') || raw.startsWith("'")) value = raw.slice(1, -1).replace(/\\(.)/g, "$1");
163
+ result[key] = value;
164
+ }
165
+ return matched ? result : null;
166
+ }
@@ -42,6 +42,7 @@ import { buildCompactionSummary } from "./compaction_logic";
42
42
  import { getSystemPrompt } from "./system_prompt";
43
43
  import { loadInstructionFiles } from "./instruction_loader";
44
44
  import { buildToolRegistry } from "./tool_registry";
45
+ import { extractLeakedToolCall } from "./leaked_tool_call";
45
46
 
46
47
  // ─── Types ────────────────────────────────────────────────────────────────────
47
48
 
@@ -98,6 +99,10 @@ export async function runAgentLoop(input: PromptInput): Promise<LoopResult> {
98
99
  let turn = 0;
99
100
  let lazyReminderSent = false;
100
101
  let lastFinishReason = "unknown";
102
+ // Track the last 2 assistant texts so we can detect when the model is
103
+ // stuck producing identical output. Some local models repeat the same
104
+ // response when they're confused about whether to call a tool or speak.
105
+ const recentResponses: string[] = [];
101
106
 
102
107
  try {
103
108
  // ── Main multi-turn loop ──────────────────────────────────────────────────
@@ -176,6 +181,27 @@ export async function runAgentLoop(input: PromptInput): Promise<LoopResult> {
176
181
  // 4. "tool-calls" with no actual tool calls = model confused — stop
177
182
  const terminalReasons = new Set(["stop", "length", "content-filter", "end-turn"]);
178
183
 
184
+ // Identical-response detector: small models sometimes emit the same
185
+ // response twice in a row when confused about tool calls. We capture
186
+ // a fingerprint and bail out before producing 3-4 copies.
187
+ const fingerprint = await assistantTextFingerprint(session_id, assistantMsgId);
188
+ if (fingerprint && recentResponses.includes(fingerprint)) {
189
+ logger.warn("identical_response_detected", {
190
+ event: "stuck_loop",
191
+ metadata: { session_id, turn, fingerprint: fingerprint.slice(0, 80) },
192
+ });
193
+ broadcastToChannel("session_events", {
194
+ type: "session.warning",
195
+ session_id,
196
+ message: "Model produced an identical response — exiting to avoid an infinite loop. Try rephrasing or switching models.",
197
+ });
198
+ break;
199
+ }
200
+ if (fingerprint) {
201
+ recentResponses.push(fingerprint);
202
+ if (recentResponses.length > 2) recentResponses.shift();
203
+ }
204
+
179
205
  // Detect "lazy assistant" — the model claims it's editing/creating files
180
206
  // in prose but never actually called a tool. Common with non-tool-tuned
181
207
  // local models. Once per session, push a synthetic reminder and re-run.
@@ -349,6 +375,36 @@ async function streamOnce(ctx: {
349
375
  }
350
376
 
351
377
  currentTextContent += text;
378
+
379
+ // Detect models leaking their internal tool-call markers as raw text
380
+ // (gemma, qwen, llama variants do this when the tokenizer template
381
+ // doesn't match the AI SDK's expected format). When we find a complete
382
+ // leaked call, synthesize a real tool execution.
383
+ const leak = extractLeakedToolCall(currentTextContent);
384
+ if (leak) {
385
+ // Strip the leaked markers from the displayed text part
386
+ currentTextContent = currentTextContent.slice(0, leak.startIndex) +
387
+ currentTextContent.slice(leak.endIndex);
388
+ await pool.query(
389
+ `UPDATE parts SET data = $2, updated_at = NOW() WHERE id = $1`,
390
+ [currentTextPartId, JSON.stringify({ text: currentTextContent })]
391
+ );
392
+
393
+ // Execute the synthesized tool call directly via the registry
394
+ await executeSynthesizedToolCall({
395
+ session_id,
396
+ agentId: ctx.agentId,
397
+ assistantMsgId,
398
+ toolName: leak.toolName,
399
+ toolInput: leak.toolInput,
400
+ tools,
401
+ });
402
+
403
+ // Mark the turn as having tool calls so the loop continues
404
+ hasToolCalls = true;
405
+ break;
406
+ }
407
+
352
408
  // Broadcast delta to WebSocket part_stream for live streaming
353
409
  broadcastToChannel("part_stream", {
354
410
  type: "part.delta",
@@ -563,6 +619,29 @@ async function runCompaction(
563
619
 
564
620
  // ─── Message History Builder ──────────────────────────────────────────────────
565
621
 
622
+ // Compute a stable fingerprint of the assistant's most recent text so we can
623
+ // detect when the model is producing identical responses turn after turn.
624
+ // We hash a normalized version (whitespace collapsed, lowercased) of the
625
+ // concatenated text parts to avoid spurious mismatches from minor whitespace.
626
+ async function assistantTextFingerprint(session_id: string, messageId: string): Promise<string | null> {
627
+ try {
628
+ const r = await pool.query(
629
+ `SELECT data FROM parts WHERE message_id = $1 AND part_type = 'text' ORDER BY order_index ASC`,
630
+ [messageId]
631
+ );
632
+ const text = r.rows.map((row: any) => row.data?.text || "").join(" ").trim();
633
+ if (!text || text.length < 80) return null; // too short to fingerprint reliably
634
+ const normalized = text.toLowerCase().replace(/\s+/g, " ").trim();
635
+ // Hash the first 1000 chars — enough to detect duplicates without being
636
+ // confused by minor changes in the middle.
637
+ const sample = normalized.slice(0, 1000);
638
+ const crypto = require("crypto");
639
+ return crypto.createHash("sha1").update(sample).digest("hex");
640
+ } catch {
641
+ return null;
642
+ }
643
+ }
644
+
566
645
  // Detect a "lazy" response — assistant text says it will edit/create files
567
646
  // but no tool was actually invoked. Common with non-tool-tuned local models.
568
647
  async function wasLazyResponse(session_id: string, messageId: string): Promise<boolean> {
@@ -837,3 +916,127 @@ function supportsTools(model_id: string): boolean {
837
916
  // Default: try with tools, fall back gracefully on error
838
917
  return true;
839
918
  }
919
+
920
+ // ─── Synthesized tool-call execution ──────────────────────────────────────────
921
+
922
+ /**
923
+ * Execute a synthesized tool call when we detect a leak. Mirrors the work the
924
+ * AI SDK would normally do: insert a tool_invocation part, broadcast events,
925
+ * run the registered tool's execute() function.
926
+ */
927
+ async function executeSynthesizedToolCall(input: {
928
+ session_id: string;
929
+ agentId: string;
930
+ assistantMsgId: string;
931
+ toolName: string;
932
+ toolInput: Record<string, any>;
933
+ tools: Record<string, any>;
934
+ }): Promise<void> {
935
+ const { session_id, agentId, assistantMsgId, toolName, toolInput, tools } = input;
936
+
937
+ // Map common aliases (write_file → write, edit_file → edit, etc.)
938
+ const aliases: Record<string, string> = {
939
+ write_file: "write",
940
+ edit_file: "edit",
941
+ read_file: "read",
942
+ run_command: "bash",
943
+ shell: "bash",
944
+ search_files: "grep",
945
+ };
946
+ const resolvedName = aliases[toolName] || toolName;
947
+ const tool = tools[resolvedName];
948
+ if (!tool || !tool.execute) {
949
+ logger.warn("synthesized_tool_unknown", { event: "leak", metadata: { toolName, resolvedName } });
950
+ return;
951
+ }
952
+
953
+ const callId = uuid();
954
+ // Persist the tool call record. We tag it as "synthesized" so the
955
+ // tool-capability probe in build_mode can tell the model didn't really
956
+ // emit a native tool call — it just leaked tool-marker text that we
957
+ // recovered. Models that always need recovery should be treated as
958
+ // tool-incapable so we route them through the JSON-manifest fallback
959
+ // (which is more reliable than counting on every prompt to leak cleanly).
960
+ try {
961
+ await pool.query(
962
+ `INSERT INTO tool_calls (id, session_id, agent_id, tool_name, tool_input, state) VALUES ($1, $2, $3, $4, $5, 'running')`,
963
+ [callId, session_id, agentId, resolvedName, JSON.stringify({ ...toolInput, __synthesized: true })]
964
+ );
965
+ } catch {}
966
+
967
+ // Broadcast tool.requested so the TUI shows "← Edit foo.bone"
968
+ broadcastToChannel("part_stream", {
969
+ type: "tool.requested",
970
+ session_id,
971
+ tool_call_id: callId,
972
+ tool_name: resolvedName,
973
+ tool_input: toolInput,
974
+ });
975
+
976
+ // Persist as a tool_invocation part on the assistant message
977
+ const partId = uuid();
978
+ await pool.query(
979
+ `INSERT INTO parts (id, message_id, session_id, part_type, data, order_index) VALUES ($1, $2, $3, 'tool_invocation', $4, 0)`,
980
+ [partId, assistantMsgId, session_id, JSON.stringify({ tool_call_id: callId, tool_name: resolvedName, args: toolInput, state: "running" })]
981
+ );
982
+
983
+ // Run the actual tool — emit ToolCallRequested so the same machinery as a
984
+ // real tool call kicks in.
985
+ await eventBus.publish("ToolCallRequested", {
986
+ tool_call_id: callId,
987
+ session_id,
988
+ agent_id: agentId,
989
+ tool_name: resolvedName,
990
+ tool_input: toolInput,
991
+ requested_at: new Date().toISOString(),
992
+ }, "AgentLoop").catch(() => {});
993
+
994
+ const startMs = Date.now();
995
+ let success = true;
996
+ let output = "";
997
+ try {
998
+ const result = await tool.execute(toolInput, { toolCallId: callId });
999
+ output = typeof result === "string" ? result : (result?.output || "");
1000
+ } catch (e: any) {
1001
+ success = false;
1002
+ output = e?.message || "tool execution failed";
1003
+ }
1004
+
1005
+ // Update the part with the result
1006
+ const durationMs = Date.now() - startMs;
1007
+ try {
1008
+ await pool.query(
1009
+ `UPDATE parts SET data = $2, updated_at = NOW() WHERE id = $1`,
1010
+ [partId, JSON.stringify({
1011
+ tool_call_id: callId,
1012
+ tool_name: resolvedName,
1013
+ args: toolInput,
1014
+ state: success ? "done" : "failed",
1015
+ output,
1016
+ })]
1017
+ );
1018
+ await pool.query(
1019
+ `UPDATE tool_calls SET state = $2, tool_output = $3, duration_ms = $4, updated_at = NOW() WHERE id = $1`,
1020
+ [callId, success ? "done" : "failed", JSON.stringify({ output }), durationMs]
1021
+ );
1022
+ } catch {}
1023
+
1024
+ // Broadcast completion
1025
+ broadcastToChannel("part_stream", {
1026
+ type: success ? "tool.completed" : "tool.failed",
1027
+ session_id,
1028
+ tool_call_id: callId,
1029
+ tool_name: resolvedName,
1030
+ tool_input: toolInput,
1031
+ duration_ms: durationMs,
1032
+ ...(success ? {} : { error: output }),
1033
+ });
1034
+
1035
+ await eventBus.publish("ToolCallCompleted", {
1036
+ tool_call_id: callId,
1037
+ session_id,
1038
+ tool_name: resolvedName,
1039
+ duration_ms: durationMs,
1040
+ completed_at: new Date().toISOString(),
1041
+ }, "AgentLoop").catch(() => {});
1042
+ }