bonecode 1.4.1 → 1.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/engine/session/build_mode.js +12 -1
- package/dist/src/engine/session/build_mode.js.map +1 -1
- package/dist/src/engine/session/leaked_tool_call.d.ts +49 -0
- package/dist/src/engine/session/leaked_tool_call.js +174 -0
- package/dist/src/engine/session/leaked_tool_call.js.map +1 -0
- package/dist/src/engine/session/prompt.js +116 -0
- package/dist/src/engine/session/prompt.js.map +1 -1
- package/package.json +1 -1
- package/scripts/debug_extract.js +40 -0
- package/scripts/test_leaked_tool_call.js +269 -0
- package/src/engine/session/build_mode.ts +12 -1
- package/src/engine/session/leaked_tool_call.ts +166 -0
- package/src/engine/session/prompt.ts +150 -0
package/package.json
CHANGED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
const fs = require("fs");
|
|
3
|
+
const path = require("path");
|
|
4
|
+
|
|
5
|
+
const src = fs.readFileSync(path.resolve(__dirname, "..", "src", "engine", "session", "prompt.ts"), "utf-8");
|
|
6
|
+
|
|
7
|
+
function extractFunc(name) {
|
|
8
|
+
const re = new RegExp(`(?:export\\s+)?function ${name}(?:<[^>]+>)?\\s*\\(`);
|
|
9
|
+
const m = src.match(re);
|
|
10
|
+
if (!m) return null;
|
|
11
|
+
const start = m.index + (m[0].startsWith("export") ? "export ".length : 0);
|
|
12
|
+
let depth = 0, inStr = false, strChar = "", inTpl = false, inRegex = false;
|
|
13
|
+
let inLine = false, inBlock = false, escape = false;
|
|
14
|
+
let i = src.indexOf("{", start);
|
|
15
|
+
let prev = " ";
|
|
16
|
+
for (; i < src.length; i++) {
|
|
17
|
+
const ch = src[i], next = src[i + 1];
|
|
18
|
+
if (escape) { escape = false; prev = ch; continue; }
|
|
19
|
+
if (ch === "\\") { escape = true; prev = ch; continue; }
|
|
20
|
+
if (inLine) { if (ch === "\n") inLine = false; prev = ch; continue; }
|
|
21
|
+
if (inBlock) { if (ch === "*" && next === "/") { inBlock = false; i++; } prev = ch; continue; }
|
|
22
|
+
if (inStr) { if (ch === strChar) inStr = false; prev = ch; continue; }
|
|
23
|
+
if (inTpl) { if (ch === "`") inTpl = false; prev = ch; continue; }
|
|
24
|
+
if (inRegex) { if (ch === "/" && prev !== "\\") inRegex = false; prev = ch; continue; }
|
|
25
|
+
if (ch === "/" && next === "/") { inLine = true; i++; prev = ch; continue; }
|
|
26
|
+
if (ch === "/" && next === "*") { inBlock = true; i++; prev = ch; continue; }
|
|
27
|
+
if (ch === "/" && /[=({,;!&|?:\s]/.test(prev)) { inRegex = true; prev = ch; continue; }
|
|
28
|
+
if (ch === '"' || ch === "'") { inStr = true; strChar = ch; prev = ch; continue; }
|
|
29
|
+
if (ch === "`") { inTpl = true; prev = ch; continue; }
|
|
30
|
+
if (ch === "{") depth++;
|
|
31
|
+
else if (ch === "}") { depth--; if (depth === 0) return src.slice(start, i + 1); }
|
|
32
|
+
prev = ch;
|
|
33
|
+
}
|
|
34
|
+
return null;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
console.log("--- parseKwargs ---");
|
|
38
|
+
console.log(extractFunc("parseKwargs"));
|
|
39
|
+
console.log("\n\n--- parseLooseObject ---");
|
|
40
|
+
console.log(extractFunc("parseLooseObject"));
|
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Tests for the leaked tool-call parser. Loads the compiled module directly
|
|
4
|
+
* so tests run against the same code that ships.
|
|
5
|
+
*
|
|
6
|
+
* Patterns tested are taken from real model outputs:
|
|
7
|
+
* - gemma: <|tool_call>call:edit{file_path:<|"|>foo.bone<|"|>}<tool_call|>
|
|
8
|
+
* - qwen: <tool_call>{"name":"write","arguments":{"path":"x"}}</tool_call>
|
|
9
|
+
* - llama3: <|python_tag|>write({"path":"x"})<|/python_tag|>
|
|
10
|
+
* - openai-style fenced: ```tool_code\nname(arg=val)\n```
|
|
11
|
+
*
|
|
12
|
+
* Also re-tests isBuildPrompt against the prompts that previously slipped
|
|
13
|
+
* through (e.g. "using BoneScript as the backend, write a python ...").
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
"use strict";
|
|
17
|
+
const fs = require("fs");
|
|
18
|
+
const path = require("path");
|
|
19
|
+
|
|
20
|
+
const G = "\x1b[32m"; const R = "\x1b[31m"; const C = "\x1b[36m";
|
|
21
|
+
const B = "\x1b[1m"; const D = "\x1b[2m"; const N = "\x1b[0m";
|
|
22
|
+
|
|
23
|
+
let passed = 0;
|
|
24
|
+
let failed = 0;
|
|
25
|
+
const failures = [];
|
|
26
|
+
|
|
27
|
+
function ok(name, info = "") {
|
|
28
|
+
passed++;
|
|
29
|
+
console.log(` ${G}✓${N} ${name}${info ? ` ${D}${info}${N}` : ""}`);
|
|
30
|
+
}
|
|
31
|
+
function fail(name, msg) {
|
|
32
|
+
failed++;
|
|
33
|
+
failures.push(`${name}: ${msg}`);
|
|
34
|
+
console.log(` ${R}✗${N} ${name} ${R}${msg}${N}`);
|
|
35
|
+
}
|
|
36
|
+
function header(s) { console.log(`\n${C}${B}${s}${N}`); }
|
|
37
|
+
|
|
38
|
+
const ROOT = path.resolve(__dirname, "..");
|
|
39
|
+
const modulePath = path.join(ROOT, "dist", "src", "engine", "session", "leaked_tool_call.js");
|
|
40
|
+
|
|
41
|
+
if (!fs.existsSync(modulePath)) {
|
|
42
|
+
console.error(`${R}Compiled module not found at ${modulePath}.${N}`);
|
|
43
|
+
console.error(`Run \`npm run build\` first.`);
|
|
44
|
+
process.exit(1);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
const {
|
|
48
|
+
extractLeakedToolCall,
|
|
49
|
+
parseLeakedBody,
|
|
50
|
+
parseKwargs,
|
|
51
|
+
parseLooseObject,
|
|
52
|
+
} = require(modulePath);
|
|
53
|
+
|
|
54
|
+
// ─── Tests: gemma-style markers ───────────────────────────────────────────────
|
|
55
|
+
|
|
56
|
+
header("[1] Gemma-style leaked calls (the user's exact bug)");
|
|
57
|
+
|
|
58
|
+
(() => {
|
|
59
|
+
const text = `I'll create the file.\n<|tool_call>call:edit{file_path:<|"|>medieval_market.bone<|"|>}<tool_call|>\nDone.`;
|
|
60
|
+
const r = extractLeakedToolCall(text);
|
|
61
|
+
if (r && r.toolName === "edit" && r.toolInput.file_path === "medieval_market.bone") {
|
|
62
|
+
ok("gemma <|tool_call>call:name{...}<tool_call|>", `→ edit(file_path="${r.toolInput.file_path}")`);
|
|
63
|
+
} else {
|
|
64
|
+
fail("gemma exact bug", JSON.stringify(r));
|
|
65
|
+
}
|
|
66
|
+
})();
|
|
67
|
+
|
|
68
|
+
(() => {
|
|
69
|
+
const text = `<|tool_call|>{"name":"write","arguments":{"path":"foo.ts","content":"hello"}}<|/tool_call|>`;
|
|
70
|
+
const r = extractLeakedToolCall(text);
|
|
71
|
+
if (r && r.toolName === "write" && r.toolInput.path === "foo.ts" && r.toolInput.content === "hello") {
|
|
72
|
+
ok("gemma <|tool_call|>{json}<|/tool_call|>");
|
|
73
|
+
} else {
|
|
74
|
+
fail("gemma JSON form", JSON.stringify(r));
|
|
75
|
+
}
|
|
76
|
+
})();
|
|
77
|
+
|
|
78
|
+
// ─── Tests: qwen-style markers ────────────────────────────────────────────────
|
|
79
|
+
|
|
80
|
+
header("[2] Qwen-style leaked calls");
|
|
81
|
+
|
|
82
|
+
(() => {
|
|
83
|
+
const text = `<tool_call>{"name":"bash","arguments":{"command":"ls -la"}}</tool_call>`;
|
|
84
|
+
const r = extractLeakedToolCall(text);
|
|
85
|
+
if (r && r.toolName === "bash" && r.toolInput.command === "ls -la") {
|
|
86
|
+
ok("<tool_call>{json}</tool_call>");
|
|
87
|
+
} else {
|
|
88
|
+
fail("qwen", JSON.stringify(r));
|
|
89
|
+
}
|
|
90
|
+
})();
|
|
91
|
+
|
|
92
|
+
(() => {
|
|
93
|
+
const text = `<tool_call>{"tool":"read","args":{"path":"src/main.ts"}}</tool_call>`;
|
|
94
|
+
const r = extractLeakedToolCall(text);
|
|
95
|
+
if (r && r.toolName === "read" && r.toolInput.path === "src/main.ts") {
|
|
96
|
+
ok("<tool_call>{tool: ..., args: ...}</tool_call>");
|
|
97
|
+
} else {
|
|
98
|
+
fail("qwen alt keys", JSON.stringify(r));
|
|
99
|
+
}
|
|
100
|
+
})();
|
|
101
|
+
|
|
102
|
+
// ─── Tests: llama3-style python_tag ───────────────────────────────────────────
|
|
103
|
+
|
|
104
|
+
header("[3] llama3-style <|python_tag|>");
|
|
105
|
+
|
|
106
|
+
(() => {
|
|
107
|
+
const text = `<|python_tag|>write({"path":"x.txt","content":"y"})<|/python_tag|>`;
|
|
108
|
+
const r = extractLeakedToolCall(text);
|
|
109
|
+
if (r && r.toolName === "write" && r.toolInput.path === "x.txt" && r.toolInput.content === "y") {
|
|
110
|
+
ok("llama3 python_tag with JSON arg");
|
|
111
|
+
} else {
|
|
112
|
+
fail("llama3", JSON.stringify(r));
|
|
113
|
+
}
|
|
114
|
+
})();
|
|
115
|
+
|
|
116
|
+
// ─── Tests: function-call kwargs ──────────────────────────────────────────────
|
|
117
|
+
|
|
118
|
+
header("[4] Function-call kwargs syntax");
|
|
119
|
+
|
|
120
|
+
(() => {
|
|
121
|
+
const args = parseKwargs(`path="foo.ts", content="hello world"`);
|
|
122
|
+
if (args && args.path === "foo.ts" && args.content === "hello world") ok("string kwargs");
|
|
123
|
+
else fail("string kwargs", JSON.stringify(args));
|
|
124
|
+
})();
|
|
125
|
+
|
|
126
|
+
(() => {
|
|
127
|
+
const args = parseKwargs(`count=42, ratio=3.14, enabled=true, missing=null`);
|
|
128
|
+
if (args && args.count === 42 && args.ratio === 3.14 && args.enabled === true && args.missing === null) {
|
|
129
|
+
ok("typed kwargs (number, float, bool, null)");
|
|
130
|
+
} else {
|
|
131
|
+
fail("typed kwargs", JSON.stringify(args));
|
|
132
|
+
}
|
|
133
|
+
})();
|
|
134
|
+
|
|
135
|
+
(() => {
|
|
136
|
+
const args = parseKwargs(`file_path=<|"|>medieval_market.bone<|"|>`);
|
|
137
|
+
if (args && args.file_path === "medieval_market.bone") ok(`<|"|> escapes are stripped`);
|
|
138
|
+
else fail("escape markers", JSON.stringify(args));
|
|
139
|
+
})();
|
|
140
|
+
|
|
141
|
+
// ─── Tests: loose-object form ─────────────────────────────────────────────────
|
|
142
|
+
|
|
143
|
+
header("[5] Loose-object form (pseudo-JSON)");
|
|
144
|
+
|
|
145
|
+
(() => {
|
|
146
|
+
const o = parseLooseObject(`file_path:"foo.bone", count:3`);
|
|
147
|
+
if (o && o.file_path === "foo.bone" && o.count === 3) ok("colon-separated loose object");
|
|
148
|
+
else fail("loose object", JSON.stringify(o));
|
|
149
|
+
})();
|
|
150
|
+
|
|
151
|
+
// ─── Tests: fenced tool_code ──────────────────────────────────────────────────
|
|
152
|
+
|
|
153
|
+
header("[6] Fenced tool_code blocks");
|
|
154
|
+
|
|
155
|
+
(() => {
|
|
156
|
+
const text = "Some prose\n```tool_code\nwrite(path=\"x\", content=\"y\")\n```\nMore prose.";
|
|
157
|
+
const r = extractLeakedToolCall(text);
|
|
158
|
+
if (r && r.toolName === "write" && r.toolInput.path === "x" && r.toolInput.content === "y") {
|
|
159
|
+
ok("```tool_code\\nname(args)\\n```");
|
|
160
|
+
} else {
|
|
161
|
+
fail("fenced tool_code", JSON.stringify(r));
|
|
162
|
+
}
|
|
163
|
+
})();
|
|
164
|
+
|
|
165
|
+
// ─── Tests: false-positives ───────────────────────────────────────────────────
|
|
166
|
+
|
|
167
|
+
header("[7] No false-positives on plain text");
|
|
168
|
+
|
|
169
|
+
const cleanCases = [
|
|
170
|
+
"I'll create a file called foo.bone now.",
|
|
171
|
+
"Use the `write` tool to save the file.",
|
|
172
|
+
"Here's how you'd do it: write(path, content) — but that's pseudocode.",
|
|
173
|
+
"<not_a_tool_call>just text</not_a_tool_call>",
|
|
174
|
+
"",
|
|
175
|
+
"<tool_call></tool_call>",
|
|
176
|
+
];
|
|
177
|
+
for (const c of cleanCases) {
|
|
178
|
+
const r = extractLeakedToolCall(c);
|
|
179
|
+
if (r === null) ok(`clean: "${c.slice(0, 50)}..."`);
|
|
180
|
+
else fail(`false positive`, `"${c}" → ${JSON.stringify(r)}`);
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
// ─── Tests: stripping positions ───────────────────────────────────────────────
|
|
184
|
+
|
|
185
|
+
header("[8] startIndex/endIndex enable text stripping");
|
|
186
|
+
|
|
187
|
+
(() => {
|
|
188
|
+
const text = `Before <|tool_call|>{"name":"write","arguments":{}}<|/tool_call|> after`;
|
|
189
|
+
const r = extractLeakedToolCall(text);
|
|
190
|
+
if (!r) {
|
|
191
|
+
fail("strip positions", "no match");
|
|
192
|
+
} else {
|
|
193
|
+
const stripped = text.slice(0, r.startIndex) + text.slice(r.endIndex);
|
|
194
|
+
if (stripped === "Before after") ok("text stripped cleanly", `"${stripped}"`);
|
|
195
|
+
else fail("strip", `got "${stripped}"`);
|
|
196
|
+
}
|
|
197
|
+
})();
|
|
198
|
+
|
|
199
|
+
// ─── Tests: build mode trigger detection ──────────────────────────────────────
|
|
200
|
+
|
|
201
|
+
header("[9] isBuildPrompt covers the previously-missed prompts");
|
|
202
|
+
|
|
203
|
+
const bmModulePath = path.join(ROOT, "dist", "src", "engine", "session", "build_mode.js");
|
|
204
|
+
if (!fs.existsSync(bmModulePath)) {
|
|
205
|
+
fail("build_mode module", "compiled file missing");
|
|
206
|
+
} else {
|
|
207
|
+
const { isBuildPrompt } = require(bmModulePath);
|
|
208
|
+
|
|
209
|
+
const newCases = [
|
|
210
|
+
// The exact prompt that previously failed in the user's session
|
|
211
|
+
"using BoneScript as the backend, write a python 2d mideveal copper silver gold platinum transaction market simulation",
|
|
212
|
+
"with bonescript, build a chat app",
|
|
213
|
+
"in BoneScript, design a multi-tenant CRM",
|
|
214
|
+
"BoneScript backend for a music streaming service",
|
|
215
|
+
"write me a REST API for a todo list",
|
|
216
|
+
"develop a graphql api for users",
|
|
217
|
+
"scaffold a web application with auth",
|
|
218
|
+
];
|
|
219
|
+
for (const p of newCases) {
|
|
220
|
+
if (isBuildPrompt(p)) ok(`triggers: "${p.slice(0, 60)}..."`);
|
|
221
|
+
else fail(`missed`, p);
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
const negative = [
|
|
225
|
+
"what does this function do",
|
|
226
|
+
"explain the difference between let and const",
|
|
227
|
+
"fix the typo on line 5",
|
|
228
|
+
];
|
|
229
|
+
for (const p of negative) {
|
|
230
|
+
if (!isBuildPrompt(p)) ok(`not triggered: "${p}"`);
|
|
231
|
+
else fail(`over-matched`, p);
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
// ─── Tests: parseLeakedBody handles edge cases ────────────────────────────────
|
|
236
|
+
|
|
237
|
+
header("[10] parseLeakedBody edge cases");
|
|
238
|
+
|
|
239
|
+
(() => {
|
|
240
|
+
const r = parseLeakedBody("");
|
|
241
|
+
if (r === null) ok("empty body returns null");
|
|
242
|
+
else fail("empty", JSON.stringify(r));
|
|
243
|
+
})();
|
|
244
|
+
|
|
245
|
+
(() => {
|
|
246
|
+
const r = parseLeakedBody("not a tool call at all");
|
|
247
|
+
if (r === null) ok("garbage body returns null");
|
|
248
|
+
else fail("garbage", JSON.stringify(r));
|
|
249
|
+
})();
|
|
250
|
+
|
|
251
|
+
(() => {
|
|
252
|
+
// Function call with JSON arg
|
|
253
|
+
const r = parseLeakedBody('write({"path": "a.txt", "content": "b"})');
|
|
254
|
+
if (r && r.toolName === "write" && r.toolInput.path === "a.txt" && r.toolInput.content === "b") {
|
|
255
|
+
ok("function with JSON arg");
|
|
256
|
+
} else {
|
|
257
|
+
fail("function JSON arg", JSON.stringify(r));
|
|
258
|
+
}
|
|
259
|
+
})();
|
|
260
|
+
|
|
261
|
+
console.log();
|
|
262
|
+
if (failed === 0) {
|
|
263
|
+
console.log(`${G}${B}✓ All ${passed} tests passed${N}`);
|
|
264
|
+
process.exit(0);
|
|
265
|
+
} else {
|
|
266
|
+
console.log(`${R}${B}✗ ${failed} failed, ${passed} passed${N}`);
|
|
267
|
+
for (const f of failures) console.log(` ${R}- ${f}${N}`);
|
|
268
|
+
process.exit(1);
|
|
269
|
+
}
|
|
@@ -887,9 +887,20 @@ export function isBuildPrompt(prompt: string): boolean {
|
|
|
887
887
|
/\bmake\s+(?:a|an|the)\s+(?:full|complete|whole|new)\b/,
|
|
888
888
|
/\bproject\s+(?:from\s+scratch|to)\b/,
|
|
889
889
|
/\bsimulation\s+(?:with|using|of)\b/,
|
|
890
|
-
/\bbackend\s+(?:for|with|using)\b/,
|
|
890
|
+
/\bbackend\s+(?:for|with|using|service)\b/,
|
|
891
891
|
/\bspec(?:ification)?\s+(?:for|of)\b/,
|
|
892
892
|
/\bend[- ]to[- ]end\b/,
|
|
893
|
+
// Verb-led "write/build/code/develop" requests with a noun follow
|
|
894
|
+
/\b(?:write|code|develop|generate|scaffold)\s+(?:me\s+)?(?:a|an|the)\s+\w+/,
|
|
895
|
+
// BoneScript-specific phrases — if they say bonescript at all, treat as build
|
|
896
|
+
/\busing\s+bonescript\b/,
|
|
897
|
+
/\bwith\s+bonescript\b/,
|
|
898
|
+
/\bin\s+bonescript\b/,
|
|
899
|
+
/\bbonescript\s+(?:as|for|backend)\b/,
|
|
900
|
+
// Generic "<adjective> <noun-app>" patterns indicating a system request
|
|
901
|
+
/\b(?:rest|graphql)\s+api\b/,
|
|
902
|
+
/\bweb\s+app(?:lication)?\b/,
|
|
903
|
+
/\bgame\s+(?:simulation|engine|server)\b/,
|
|
893
904
|
];
|
|
894
905
|
return triggers.some((re) => re.test(p));
|
|
895
906
|
}
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pure, side-effect-free parser for leaked tool-call markers.
|
|
3
|
+
*
|
|
4
|
+
* Some local models (gemma, qwen, llama variants) emit their internal
|
|
5
|
+
* tool-call markers as raw text instead of producing structured tool_call
|
|
6
|
+
* events. The AI SDK's parser misses these, so the model's prose appears in
|
|
7
|
+
* the output but no tool ever runs.
|
|
8
|
+
*
|
|
9
|
+
* This module recovers the intended call by pattern-matching the leaked text.
|
|
10
|
+
* No DB, no network, no global state — pure functions only, fully testable.
|
|
11
|
+
*
|
|
12
|
+
* Patterns recognized (across multiple template formats):
|
|
13
|
+
* <|tool_call|>{"name":"write","arguments":{...}}<|/tool_call|>
|
|
14
|
+
* <|tool_call>name:write{...args...}<tool_call|>
|
|
15
|
+
* <tool_call>{"name":"write","arguments":{...}}</tool_call>
|
|
16
|
+
* <function_call>{"name":"write","arguments":{...}}</function_call>
|
|
17
|
+
* ```tool_code\nwrite(path="x", content="y")\n```
|
|
18
|
+
* <|python_tag|>write({"path": "x"})<|/python_tag|>
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
export interface LeakedToolCall {
|
|
22
|
+
toolName: string;
|
|
23
|
+
toolInput: Record<string, any>;
|
|
24
|
+
startIndex: number;
|
|
25
|
+
endIndex: number;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export function extractLeakedToolCall(text: string): LeakedToolCall | null {
|
|
29
|
+
// Pattern 1: <|tool_call|>...<|/tool_call|> or <tool_call>...</tool_call>
|
|
30
|
+
const blockPatterns = [
|
|
31
|
+
/<\|tool_call\|?>([\s\S]*?)<\|?\/?tool_call\|?>/i,
|
|
32
|
+
/<tool_call>([\s\S]*?)<\/?tool_call>/i,
|
|
33
|
+
/<function_call>([\s\S]*?)<\/?function_call>/i,
|
|
34
|
+
/<\|python_tag\|>([\s\S]*?)<\|?\/?python_tag\|?>/i,
|
|
35
|
+
];
|
|
36
|
+
for (const re of blockPatterns) {
|
|
37
|
+
const m = text.match(re);
|
|
38
|
+
if (!m || m.index === undefined) continue;
|
|
39
|
+
const body = m[1];
|
|
40
|
+
const parsed = parseLeakedBody(body);
|
|
41
|
+
if (parsed) {
|
|
42
|
+
return { ...parsed, startIndex: m.index, endIndex: m.index + m[0].length };
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// Pattern 2: ```tool_code ... ```
|
|
47
|
+
const codeBlock = text.match(/```(?:tool_code|tool_call|function|python)\s*\n([\s\S]*?)\n```/i);
|
|
48
|
+
if (codeBlock && codeBlock.index !== undefined) {
|
|
49
|
+
const parsed = parseLeakedBody(codeBlock[1]);
|
|
50
|
+
if (parsed) {
|
|
51
|
+
return { ...parsed, startIndex: codeBlock.index, endIndex: codeBlock.index + codeBlock[0].length };
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
return null;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Parse the body of a leaked tool-call block. Tries multiple formats:
|
|
60
|
+
* - JSON: {"name": "write", "arguments": {...}} or {"tool":"write","args":{...}}
|
|
61
|
+
* - Function-call style: write(path="x", content="y")
|
|
62
|
+
* - Pseudo-syntax: call:write{path:"x"}
|
|
63
|
+
*/
|
|
64
|
+
export function parseLeakedBody(body: string): { toolName: string; toolInput: Record<string, any> } | null {
|
|
65
|
+
if (!body) return null;
|
|
66
|
+
const trimmed = body.trim();
|
|
67
|
+
|
|
68
|
+
// Try JSON first
|
|
69
|
+
try {
|
|
70
|
+
const json = JSON.parse(trimmed);
|
|
71
|
+
if (json && typeof json === "object") {
|
|
72
|
+
const name = json.name || json.tool || json.tool_name || json.function;
|
|
73
|
+
const args = json.arguments || json.args || json.parameters || json.input || {};
|
|
74
|
+
if (typeof name === "string" && name.length > 0) {
|
|
75
|
+
const parsedArgs = typeof args === "string" ? safeParseJson(args) : args;
|
|
76
|
+
return { toolName: name, toolInput: parsedArgs ?? {} };
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
} catch {}
|
|
80
|
+
|
|
81
|
+
// Try function-call style: name(arg1=val1, arg2="val2")
|
|
82
|
+
const fnMatch = trimmed.match(/^([a-zA-Z_][\w]*)\s*\(([\s\S]*)\)\s*$/);
|
|
83
|
+
if (fnMatch) {
|
|
84
|
+
const toolName = fnMatch[1];
|
|
85
|
+
const argsStr = fnMatch[2];
|
|
86
|
+
// Try JSON-shaped arg first: write({"path": "x"})
|
|
87
|
+
const innerJson = safeParseJson(argsStr);
|
|
88
|
+
if (innerJson && typeof innerJson === "object" && !Array.isArray(innerJson)) {
|
|
89
|
+
return { toolName, toolInput: innerJson };
|
|
90
|
+
}
|
|
91
|
+
const toolInput = parseKwargs(argsStr);
|
|
92
|
+
if (toolInput) return { toolName, toolInput };
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// Try pseudo-syntax: call:name{key:"val", ...} or name:foo{...}
|
|
96
|
+
const callMatch = trimmed.match(/(?:call:|name:|tool:|function:)([a-zA-Z_][\w]*)\s*\{([\s\S]*)\}\s*/i);
|
|
97
|
+
if (callMatch) {
|
|
98
|
+
const toolName = callMatch[1];
|
|
99
|
+
const innerJson = "{" + callMatch[2] + "}";
|
|
100
|
+
const toolInput = safeParseJson(innerJson) || parseLooseObject(callMatch[2]);
|
|
101
|
+
if (toolInput) return { toolName, toolInput };
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
return null;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
export function safeParseJson(s: string): any | null {
|
|
108
|
+
try {
|
|
109
|
+
return JSON.parse(s);
|
|
110
|
+
} catch {
|
|
111
|
+
return null;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Parse Python-style kwargs from a function-call body:
|
|
117
|
+
* path="x", content="y", count=42
|
|
118
|
+
* Strips `<|"|>` style escape markers some templates inject.
|
|
119
|
+
*/
|
|
120
|
+
export function parseKwargs(s: string): Record<string, any> | null {
|
|
121
|
+
if (!s.trim()) return {};
|
|
122
|
+
const cleaned = s.replace(/<\|"\|>/g, '"').replace(/<\|'\|>/g, "'");
|
|
123
|
+
const result: Record<string, any> = {};
|
|
124
|
+
const re = /([a-zA-Z_][\w]*)\s*=\s*("([^"\\]|\\.)*"|'([^'\\]|\\.)*'|-?\d+(?:\.\d+)?|true|false|null)/g;
|
|
125
|
+
let m: RegExpExecArray | null;
|
|
126
|
+
let matched = false;
|
|
127
|
+
while ((m = re.exec(cleaned)) !== null) {
|
|
128
|
+
matched = true;
|
|
129
|
+
const key = m[1];
|
|
130
|
+
const raw = m[2];
|
|
131
|
+
let value: any = raw;
|
|
132
|
+
if (raw === "true") value = true;
|
|
133
|
+
else if (raw === "false") value = false;
|
|
134
|
+
else if (raw === "null") value = null;
|
|
135
|
+
else if (/^-?\d/.test(raw)) value = parseFloat(raw);
|
|
136
|
+
else value = raw.slice(1, -1).replace(/\\(.)/g, "$1");
|
|
137
|
+
result[key] = value;
|
|
138
|
+
}
|
|
139
|
+
return matched ? result : null;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Parse a loose key:value object body (no surrounding braces, no enforced
|
|
144
|
+
* JSON quoting). Used for pseudo-syntax fallbacks like:
|
|
145
|
+
* file_path:<|"|>medieval_market.bone<|"|>
|
|
146
|
+
*/
|
|
147
|
+
export function parseLooseObject(s: string): Record<string, any> | null {
|
|
148
|
+
const cleaned = s.replace(/<\|"\|>/g, '"').replace(/<\|'\|>/g, "'");
|
|
149
|
+
const result: Record<string, any> = {};
|
|
150
|
+
const re = /([a-zA-Z_][\w]*)\s*[:=]\s*("([^"\\]|\\.)*"|'([^'\\]|\\.)*'|-?\d+(?:\.\d+)?|true|false|null|[^\s,}]+)/g;
|
|
151
|
+
let m: RegExpExecArray | null;
|
|
152
|
+
let matched = false;
|
|
153
|
+
while ((m = re.exec(cleaned)) !== null) {
|
|
154
|
+
matched = true;
|
|
155
|
+
const key = m[1];
|
|
156
|
+
const raw = m[2];
|
|
157
|
+
let value: any = raw;
|
|
158
|
+
if (raw === "true") value = true;
|
|
159
|
+
else if (raw === "false") value = false;
|
|
160
|
+
else if (raw === "null") value = null;
|
|
161
|
+
else if (/^-?\d/.test(raw)) value = parseFloat(raw);
|
|
162
|
+
else if (raw.startsWith('"') || raw.startsWith("'")) value = raw.slice(1, -1).replace(/\\(.)/g, "$1");
|
|
163
|
+
result[key] = value;
|
|
164
|
+
}
|
|
165
|
+
return matched ? result : null;
|
|
166
|
+
}
|
|
@@ -42,6 +42,7 @@ import { buildCompactionSummary } from "./compaction_logic";
|
|
|
42
42
|
import { getSystemPrompt } from "./system_prompt";
|
|
43
43
|
import { loadInstructionFiles } from "./instruction_loader";
|
|
44
44
|
import { buildToolRegistry } from "./tool_registry";
|
|
45
|
+
import { extractLeakedToolCall } from "./leaked_tool_call";
|
|
45
46
|
|
|
46
47
|
// ─── Types ────────────────────────────────────────────────────────────────────
|
|
47
48
|
|
|
@@ -349,6 +350,36 @@ async function streamOnce(ctx: {
|
|
|
349
350
|
}
|
|
350
351
|
|
|
351
352
|
currentTextContent += text;
|
|
353
|
+
|
|
354
|
+
// Detect models leaking their internal tool-call markers as raw text
|
|
355
|
+
// (gemma, qwen, llama variants do this when the tokenizer template
|
|
356
|
+
// doesn't match the AI SDK's expected format). When we find a complete
|
|
357
|
+
// leaked call, synthesize a real tool execution.
|
|
358
|
+
const leak = extractLeakedToolCall(currentTextContent);
|
|
359
|
+
if (leak) {
|
|
360
|
+
// Strip the leaked markers from the displayed text part
|
|
361
|
+
currentTextContent = currentTextContent.slice(0, leak.startIndex) +
|
|
362
|
+
currentTextContent.slice(leak.endIndex);
|
|
363
|
+
await pool.query(
|
|
364
|
+
`UPDATE parts SET data = $2, updated_at = NOW() WHERE id = $1`,
|
|
365
|
+
[currentTextPartId, JSON.stringify({ text: currentTextContent })]
|
|
366
|
+
);
|
|
367
|
+
|
|
368
|
+
// Execute the synthesized tool call directly via the registry
|
|
369
|
+
await executeSynthesizedToolCall({
|
|
370
|
+
session_id,
|
|
371
|
+
agentId: ctx.agentId,
|
|
372
|
+
assistantMsgId,
|
|
373
|
+
toolName: leak.toolName,
|
|
374
|
+
toolInput: leak.toolInput,
|
|
375
|
+
tools,
|
|
376
|
+
});
|
|
377
|
+
|
|
378
|
+
// Mark the turn as having tool calls so the loop continues
|
|
379
|
+
hasToolCalls = true;
|
|
380
|
+
break;
|
|
381
|
+
}
|
|
382
|
+
|
|
352
383
|
// Broadcast delta to WebSocket part_stream for live streaming
|
|
353
384
|
broadcastToChannel("part_stream", {
|
|
354
385
|
type: "part.delta",
|
|
@@ -837,3 +868,122 @@ function supportsTools(model_id: string): boolean {
|
|
|
837
868
|
// Default: try with tools, fall back gracefully on error
|
|
838
869
|
return true;
|
|
839
870
|
}
|
|
871
|
+
|
|
872
|
+
// ─── Synthesized tool-call execution ──────────────────────────────────────────
|
|
873
|
+
|
|
874
|
+
/**
|
|
875
|
+
* Execute a synthesized tool call when we detect a leak. Mirrors the work the
|
|
876
|
+
* AI SDK would normally do: insert a tool_invocation part, broadcast events,
|
|
877
|
+
* run the registered tool's execute() function.
|
|
878
|
+
*/
|
|
879
|
+
async function executeSynthesizedToolCall(input: {
|
|
880
|
+
session_id: string;
|
|
881
|
+
agentId: string;
|
|
882
|
+
assistantMsgId: string;
|
|
883
|
+
toolName: string;
|
|
884
|
+
toolInput: Record<string, any>;
|
|
885
|
+
tools: Record<string, any>;
|
|
886
|
+
}): Promise<void> {
|
|
887
|
+
const { session_id, agentId, assistantMsgId, toolName, toolInput, tools } = input;
|
|
888
|
+
|
|
889
|
+
// Map common aliases (write_file → write, edit_file → edit, etc.)
|
|
890
|
+
const aliases: Record<string, string> = {
|
|
891
|
+
write_file: "write",
|
|
892
|
+
edit_file: "edit",
|
|
893
|
+
read_file: "read",
|
|
894
|
+
run_command: "bash",
|
|
895
|
+
shell: "bash",
|
|
896
|
+
search_files: "grep",
|
|
897
|
+
};
|
|
898
|
+
const resolvedName = aliases[toolName] || toolName;
|
|
899
|
+
const tool = tools[resolvedName];
|
|
900
|
+
if (!tool || !tool.execute) {
|
|
901
|
+
logger.warn("synthesized_tool_unknown", { event: "leak", metadata: { toolName, resolvedName } });
|
|
902
|
+
return;
|
|
903
|
+
}
|
|
904
|
+
|
|
905
|
+
const callId = uuid();
|
|
906
|
+
// Persist the tool call record
|
|
907
|
+
try {
|
|
908
|
+
await pool.query(
|
|
909
|
+
`INSERT INTO tool_calls (id, session_id, agent_id, tool_name, tool_input, state) VALUES ($1, $2, $3, $4, $5, 'running')`,
|
|
910
|
+
[callId, session_id, agentId, resolvedName, JSON.stringify(toolInput)]
|
|
911
|
+
);
|
|
912
|
+
} catch {}
|
|
913
|
+
|
|
914
|
+
// Broadcast tool.requested so the TUI shows "← Edit foo.bone"
|
|
915
|
+
broadcastToChannel("part_stream", {
|
|
916
|
+
type: "tool.requested",
|
|
917
|
+
session_id,
|
|
918
|
+
tool_call_id: callId,
|
|
919
|
+
tool_name: resolvedName,
|
|
920
|
+
tool_input: toolInput,
|
|
921
|
+
});
|
|
922
|
+
|
|
923
|
+
// Persist as a tool_invocation part on the assistant message
|
|
924
|
+
const partId = uuid();
|
|
925
|
+
await pool.query(
|
|
926
|
+
`INSERT INTO parts (id, message_id, session_id, part_type, data, order_index) VALUES ($1, $2, $3, 'tool_invocation', $4, 0)`,
|
|
927
|
+
[partId, assistantMsgId, session_id, JSON.stringify({ tool_call_id: callId, tool_name: resolvedName, args: toolInput, state: "running" })]
|
|
928
|
+
);
|
|
929
|
+
|
|
930
|
+
// Run the actual tool — emit ToolCallRequested so the same machinery as a
|
|
931
|
+
// real tool call kicks in.
|
|
932
|
+
await eventBus.publish("ToolCallRequested", {
|
|
933
|
+
tool_call_id: callId,
|
|
934
|
+
session_id,
|
|
935
|
+
agent_id: agentId,
|
|
936
|
+
tool_name: resolvedName,
|
|
937
|
+
tool_input: toolInput,
|
|
938
|
+
requested_at: new Date().toISOString(),
|
|
939
|
+
}, "AgentLoop").catch(() => {});
|
|
940
|
+
|
|
941
|
+
const startMs = Date.now();
|
|
942
|
+
let success = true;
|
|
943
|
+
let output = "";
|
|
944
|
+
try {
|
|
945
|
+
const result = await tool.execute(toolInput, { toolCallId: callId });
|
|
946
|
+
output = typeof result === "string" ? result : (result?.output || "");
|
|
947
|
+
} catch (e: any) {
|
|
948
|
+
success = false;
|
|
949
|
+
output = e?.message || "tool execution failed";
|
|
950
|
+
}
|
|
951
|
+
|
|
952
|
+
// Update the part with the result
|
|
953
|
+
const durationMs = Date.now() - startMs;
|
|
954
|
+
try {
|
|
955
|
+
await pool.query(
|
|
956
|
+
`UPDATE parts SET data = $2, updated_at = NOW() WHERE id = $1`,
|
|
957
|
+
[partId, JSON.stringify({
|
|
958
|
+
tool_call_id: callId,
|
|
959
|
+
tool_name: resolvedName,
|
|
960
|
+
args: toolInput,
|
|
961
|
+
state: success ? "done" : "failed",
|
|
962
|
+
output,
|
|
963
|
+
})]
|
|
964
|
+
);
|
|
965
|
+
await pool.query(
|
|
966
|
+
`UPDATE tool_calls SET state = $2, tool_output = $3, duration_ms = $4, updated_at = NOW() WHERE id = $1`,
|
|
967
|
+
[callId, success ? "done" : "failed", JSON.stringify({ output }), durationMs]
|
|
968
|
+
);
|
|
969
|
+
} catch {}
|
|
970
|
+
|
|
971
|
+
// Broadcast completion
|
|
972
|
+
broadcastToChannel("part_stream", {
|
|
973
|
+
type: success ? "tool.completed" : "tool.failed",
|
|
974
|
+
session_id,
|
|
975
|
+
tool_call_id: callId,
|
|
976
|
+
tool_name: resolvedName,
|
|
977
|
+
tool_input: toolInput,
|
|
978
|
+
duration_ms: durationMs,
|
|
979
|
+
...(success ? {} : { error: output }),
|
|
980
|
+
});
|
|
981
|
+
|
|
982
|
+
await eventBus.publish("ToolCallCompleted", {
|
|
983
|
+
tool_call_id: callId,
|
|
984
|
+
session_id,
|
|
985
|
+
tool_name: resolvedName,
|
|
986
|
+
duration_ms: durationMs,
|
|
987
|
+
completed_at: new Date().toISOString(),
|
|
988
|
+
}, "AgentLoop").catch(() => {});
|
|
989
|
+
}
|