bonecode 1.3.0 → 1.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +42 -0
- package/compat/opencode_adapter.ts +69 -8
- package/dist/compat/opencode_adapter.js +63 -7
- package/dist/compat/opencode_adapter.js.map +1 -1
- package/dist/src/db_adapter.js +30 -0
- package/dist/src/db_adapter.js.map +1 -1
- package/dist/src/engine/session/build_mode.d.ts +83 -0
- package/dist/src/engine/session/build_mode.js +800 -0
- package/dist/src/engine/session/build_mode.js.map +1 -0
- package/dist/src/engine/session/build_mode_helpers.d.ts +6 -0
- package/dist/src/engine/session/build_mode_helpers.js +61 -0
- package/dist/src/engine/session/build_mode_helpers.js.map +1 -0
- package/dist/src/engine/session/leaked_tool_call.d.ts +49 -0
- package/dist/src/engine/session/leaked_tool_call.js +174 -0
- package/dist/src/engine/session/leaked_tool_call.js.map +1 -0
- package/dist/src/engine/session/prompt/bonescript.txt +11 -0
- package/dist/src/engine/session/prompt.js +173 -2
- package/dist/src/engine/session/prompt.js.map +1 -1
- package/dist/src/tui.js +146 -9
- package/dist/src/tui.js.map +1 -1
- package/package.json +1 -1
- package/scripts/debug_extract.js +40 -0
- package/scripts/test_build_fallback.js +221 -0
- package/scripts/test_build_mode.js +301 -0
- package/scripts/test_leaked_tool_call.js +269 -0
- package/src/db_adapter.ts +29 -0
- package/src/engine/session/build_mode.ts +906 -0
- package/src/engine/session/build_mode_helpers.ts +72 -0
- package/src/engine/session/leaked_tool_call.ts +166 -0
- package/src/engine/session/prompt/bonescript.txt +11 -0
- package/src/engine/session/prompt.ts +219 -2
- package/src/tui.ts +147 -9
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Provider/model factory used by build_mode.ts for structured-output prompts.
|
|
3
|
+
* Mirrors the factory in prompt.ts so the build orchestrator can issue
|
|
4
|
+
* non-streaming model calls without depending on the streaming agent loop.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { createOpenAI } from "@ai-sdk/openai";
|
|
8
|
+
import { createAnthropic } from "@ai-sdk/anthropic";
|
|
9
|
+
import { createGoogleGenerativeAI } from "@ai-sdk/google";
|
|
10
|
+
|
|
11
|
+
export function getLanguageModel(provider_id: string, model_id: string): any {
|
|
12
|
+
const pid = provider_id.toLowerCase();
|
|
13
|
+
|
|
14
|
+
const resolvedProvider = pid === "local"
|
|
15
|
+
? (process.env.DEFAULT_PROVIDER || "openai_compatible").toLowerCase()
|
|
16
|
+
: pid;
|
|
17
|
+
const resolvedModel = pid === "local"
|
|
18
|
+
? (process.env.DEFAULT_MODEL || model_id)
|
|
19
|
+
: model_id;
|
|
20
|
+
|
|
21
|
+
const apiKey = (
|
|
22
|
+
process.env[`${resolvedProvider.toUpperCase()}_API_KEY`] ||
|
|
23
|
+
process.env.OPENAI_API_KEY ||
|
|
24
|
+
"not-needed"
|
|
25
|
+
);
|
|
26
|
+
const baseUrl = (
|
|
27
|
+
process.env[`${resolvedProvider.toUpperCase()}_BASE_URL`] ||
|
|
28
|
+
process.env.OPENAI_BASE_URL
|
|
29
|
+
);
|
|
30
|
+
|
|
31
|
+
switch (resolvedProvider) {
|
|
32
|
+
case "anthropic":
|
|
33
|
+
return createAnthropic({ apiKey, baseURL: baseUrl })(resolvedModel);
|
|
34
|
+
|
|
35
|
+
case "google":
|
|
36
|
+
return createGoogleGenerativeAI({ apiKey })(resolvedModel);
|
|
37
|
+
|
|
38
|
+
case "groq": {
|
|
39
|
+
const { createGroq } = require("@ai-sdk/groq");
|
|
40
|
+
return createGroq({ apiKey: process.env.GROQ_API_KEY || apiKey })(resolvedModel);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
case "cerebras": {
|
|
44
|
+
const { createCerebras } = require("@ai-sdk/cerebras");
|
|
45
|
+
return createCerebras({ apiKey: process.env.CEREBRAS_API_KEY || apiKey })(resolvedModel);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
case "deepseek":
|
|
49
|
+
return createOpenAI({
|
|
50
|
+
apiKey: process.env.DEEPSEEK_API_KEY || apiKey,
|
|
51
|
+
baseURL: "https://api.deepseek.com/v1",
|
|
52
|
+
})(resolvedModel);
|
|
53
|
+
|
|
54
|
+
case "openrouter": {
|
|
55
|
+
const { createOpenRouter } = require("@openrouter/ai-sdk-provider");
|
|
56
|
+
return createOpenRouter({
|
|
57
|
+
apiKey: process.env.OPENROUTER_API_KEY || apiKey,
|
|
58
|
+
})(resolvedModel);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
case "ollama":
|
|
62
|
+
return createOpenAI({
|
|
63
|
+
apiKey: "ollama",
|
|
64
|
+
baseURL: process.env.OLLAMA_BASE_URL || "http://localhost:11434/v1",
|
|
65
|
+
})(resolvedModel);
|
|
66
|
+
|
|
67
|
+
case "openai":
|
|
68
|
+
case "openai_compatible":
|
|
69
|
+
default:
|
|
70
|
+
return createOpenAI({ apiKey, baseURL: baseUrl })(resolvedModel);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pure, side-effect-free parser for leaked tool-call markers.
|
|
3
|
+
*
|
|
4
|
+
* Some local models (gemma, qwen, llama variants) emit their internal
|
|
5
|
+
* tool-call markers as raw text instead of producing structured tool_call
|
|
6
|
+
* events. The AI SDK's parser misses these, so the model's prose appears in
|
|
7
|
+
* the output but no tool ever runs.
|
|
8
|
+
*
|
|
9
|
+
* This module recovers the intended call by pattern-matching the leaked text.
|
|
10
|
+
* No DB, no network, no global state — pure functions only, fully testable.
|
|
11
|
+
*
|
|
12
|
+
* Patterns recognized (across multiple template formats):
|
|
13
|
+
* <|tool_call|>{"name":"write","arguments":{...}}<|/tool_call|>
|
|
14
|
+
* <|tool_call>name:write{...args...}<tool_call|>
|
|
15
|
+
* <tool_call>{"name":"write","arguments":{...}}</tool_call>
|
|
16
|
+
* <function_call>{"name":"write","arguments":{...}}</function_call>
|
|
17
|
+
* ```tool_code\nwrite(path="x", content="y")\n```
|
|
18
|
+
* <|python_tag|>write({"path": "x"})<|/python_tag|>
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
export interface LeakedToolCall {
|
|
22
|
+
toolName: string;
|
|
23
|
+
toolInput: Record<string, any>;
|
|
24
|
+
startIndex: number;
|
|
25
|
+
endIndex: number;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export function extractLeakedToolCall(text: string): LeakedToolCall | null {
|
|
29
|
+
// Pattern 1: <|tool_call|>...<|/tool_call|> or <tool_call>...</tool_call>
|
|
30
|
+
const blockPatterns = [
|
|
31
|
+
/<\|tool_call\|?>([\s\S]*?)<\|?\/?tool_call\|?>/i,
|
|
32
|
+
/<tool_call>([\s\S]*?)<\/?tool_call>/i,
|
|
33
|
+
/<function_call>([\s\S]*?)<\/?function_call>/i,
|
|
34
|
+
/<\|python_tag\|>([\s\S]*?)<\|?\/?python_tag\|?>/i,
|
|
35
|
+
];
|
|
36
|
+
for (const re of blockPatterns) {
|
|
37
|
+
const m = text.match(re);
|
|
38
|
+
if (!m || m.index === undefined) continue;
|
|
39
|
+
const body = m[1];
|
|
40
|
+
const parsed = parseLeakedBody(body);
|
|
41
|
+
if (parsed) {
|
|
42
|
+
return { ...parsed, startIndex: m.index, endIndex: m.index + m[0].length };
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// Pattern 2: ```tool_code ... ```
|
|
47
|
+
const codeBlock = text.match(/```(?:tool_code|tool_call|function|python)\s*\n([\s\S]*?)\n```/i);
|
|
48
|
+
if (codeBlock && codeBlock.index !== undefined) {
|
|
49
|
+
const parsed = parseLeakedBody(codeBlock[1]);
|
|
50
|
+
if (parsed) {
|
|
51
|
+
return { ...parsed, startIndex: codeBlock.index, endIndex: codeBlock.index + codeBlock[0].length };
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
return null;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Parse the body of a leaked tool-call block. Tries multiple formats:
|
|
60
|
+
* - JSON: {"name": "write", "arguments": {...}} or {"tool":"write","args":{...}}
|
|
61
|
+
* - Function-call style: write(path="x", content="y")
|
|
62
|
+
* - Pseudo-syntax: call:write{path:"x"}
|
|
63
|
+
*/
|
|
64
|
+
export function parseLeakedBody(body: string): { toolName: string; toolInput: Record<string, any> } | null {
|
|
65
|
+
if (!body) return null;
|
|
66
|
+
const trimmed = body.trim();
|
|
67
|
+
|
|
68
|
+
// Try JSON first
|
|
69
|
+
try {
|
|
70
|
+
const json = JSON.parse(trimmed);
|
|
71
|
+
if (json && typeof json === "object") {
|
|
72
|
+
const name = json.name || json.tool || json.tool_name || json.function;
|
|
73
|
+
const args = json.arguments || json.args || json.parameters || json.input || {};
|
|
74
|
+
if (typeof name === "string" && name.length > 0) {
|
|
75
|
+
const parsedArgs = typeof args === "string" ? safeParseJson(args) : args;
|
|
76
|
+
return { toolName: name, toolInput: parsedArgs ?? {} };
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
} catch {}
|
|
80
|
+
|
|
81
|
+
// Try function-call style: name(arg1=val1, arg2="val2")
|
|
82
|
+
const fnMatch = trimmed.match(/^([a-zA-Z_][\w]*)\s*\(([\s\S]*)\)\s*$/);
|
|
83
|
+
if (fnMatch) {
|
|
84
|
+
const toolName = fnMatch[1];
|
|
85
|
+
const argsStr = fnMatch[2];
|
|
86
|
+
// Try JSON-shaped arg first: write({"path": "x"})
|
|
87
|
+
const innerJson = safeParseJson(argsStr);
|
|
88
|
+
if (innerJson && typeof innerJson === "object" && !Array.isArray(innerJson)) {
|
|
89
|
+
return { toolName, toolInput: innerJson };
|
|
90
|
+
}
|
|
91
|
+
const toolInput = parseKwargs(argsStr);
|
|
92
|
+
if (toolInput) return { toolName, toolInput };
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// Try pseudo-syntax: call:name{key:"val", ...} or name:foo{...}
|
|
96
|
+
const callMatch = trimmed.match(/(?:call:|name:|tool:|function:)([a-zA-Z_][\w]*)\s*\{([\s\S]*)\}\s*/i);
|
|
97
|
+
if (callMatch) {
|
|
98
|
+
const toolName = callMatch[1];
|
|
99
|
+
const innerJson = "{" + callMatch[2] + "}";
|
|
100
|
+
const toolInput = safeParseJson(innerJson) || parseLooseObject(callMatch[2]);
|
|
101
|
+
if (toolInput) return { toolName, toolInput };
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
return null;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
export function safeParseJson(s: string): any | null {
|
|
108
|
+
try {
|
|
109
|
+
return JSON.parse(s);
|
|
110
|
+
} catch {
|
|
111
|
+
return null;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Parse Python-style kwargs from a function-call body:
|
|
117
|
+
* path="x", content="y", count=42
|
|
118
|
+
* Strips `<|"|>` style escape markers some templates inject.
|
|
119
|
+
*/
|
|
120
|
+
export function parseKwargs(s: string): Record<string, any> | null {
|
|
121
|
+
if (!s.trim()) return {};
|
|
122
|
+
const cleaned = s.replace(/<\|"\|>/g, '"').replace(/<\|'\|>/g, "'");
|
|
123
|
+
const result: Record<string, any> = {};
|
|
124
|
+
const re = /([a-zA-Z_][\w]*)\s*=\s*("([^"\\]|\\.)*"|'([^'\\]|\\.)*'|-?\d+(?:\.\d+)?|true|false|null)/g;
|
|
125
|
+
let m: RegExpExecArray | null;
|
|
126
|
+
let matched = false;
|
|
127
|
+
while ((m = re.exec(cleaned)) !== null) {
|
|
128
|
+
matched = true;
|
|
129
|
+
const key = m[1];
|
|
130
|
+
const raw = m[2];
|
|
131
|
+
let value: any = raw;
|
|
132
|
+
if (raw === "true") value = true;
|
|
133
|
+
else if (raw === "false") value = false;
|
|
134
|
+
else if (raw === "null") value = null;
|
|
135
|
+
else if (/^-?\d/.test(raw)) value = parseFloat(raw);
|
|
136
|
+
else value = raw.slice(1, -1).replace(/\\(.)/g, "$1");
|
|
137
|
+
result[key] = value;
|
|
138
|
+
}
|
|
139
|
+
return matched ? result : null;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Parse a loose key:value object body (no surrounding braces, no enforced
|
|
144
|
+
* JSON quoting). Used for pseudo-syntax fallbacks like:
|
|
145
|
+
* file_path:<|"|>medieval_market.bone<|"|>
|
|
146
|
+
*/
|
|
147
|
+
export function parseLooseObject(s: string): Record<string, any> | null {
|
|
148
|
+
const cleaned = s.replace(/<\|"\|>/g, '"').replace(/<\|'\|>/g, "'");
|
|
149
|
+
const result: Record<string, any> = {};
|
|
150
|
+
const re = /([a-zA-Z_][\w]*)\s*[:=]\s*("([^"\\]|\\.)*"|'([^'\\]|\\.)*'|-?\d+(?:\.\d+)?|true|false|null|[^\s,}]+)/g;
|
|
151
|
+
let m: RegExpExecArray | null;
|
|
152
|
+
let matched = false;
|
|
153
|
+
while ((m = re.exec(cleaned)) !== null) {
|
|
154
|
+
matched = true;
|
|
155
|
+
const key = m[1];
|
|
156
|
+
const raw = m[2];
|
|
157
|
+
let value: any = raw;
|
|
158
|
+
if (raw === "true") value = true;
|
|
159
|
+
else if (raw === "false") value = false;
|
|
160
|
+
else if (raw === "null") value = null;
|
|
161
|
+
else if (/^-?\d/.test(raw)) value = parseFloat(raw);
|
|
162
|
+
else if (raw.startsWith('"') || raw.startsWith("'")) value = raw.slice(1, -1).replace(/\\(.)/g, "$1");
|
|
163
|
+
result[key] = value;
|
|
164
|
+
}
|
|
165
|
+
return matched ? result : null;
|
|
166
|
+
}
|
|
@@ -31,6 +31,17 @@ Use plain Python/TypeScript/etc. only for:
|
|
|
31
31
|
|
|
32
32
|
If the request is ambiguous (e.g. "a 2D market simulation"), ask the user: "Is this a self-contained simulation script (plain code) or a backend service with persistence (BoneScript)?"
|
|
33
33
|
|
|
34
|
+
## Build mode
|
|
35
|
+
|
|
36
|
+
When the user starts a session with a project-scoped prompt ("build me X", "create a full Y"), BoneCode runs you in **build mode**. Build mode is a state machine: clarify → plan → execute → verify → done. You will receive structured prompts at each stage. Specifically:
|
|
37
|
+
|
|
38
|
+
- **Clarify stage**: you'll be asked to either propose a design document (JSON) or ask 1-3 questions. Be concrete. Don't ramble.
|
|
39
|
+
- **Plan stage**: you'll be asked for a JSON todo list. Each todo must be a single concrete file action.
|
|
40
|
+
- **Execute stage**: you'll receive one todo at a time. **YOU MUST CALL TOOLS** — `write`, `edit`, `bash`. Prose-only responses are detected and rejected. The system will inject a reminder if you describe edits without calling tools.
|
|
41
|
+
- **Verify stage**: for each requirement, you'll be asked yes/no whether it's satisfied. Be honest. If a requirement is not yet met, say so — the orchestrator will create fix-up tasks.
|
|
42
|
+
|
|
43
|
+
The user can resume a build session at any time. Build state is persisted.
|
|
44
|
+
|
|
34
45
|
## BoneScript syntax — authoritative reference
|
|
35
46
|
|
|
36
47
|
### `system` block
|
|
@@ -42,6 +42,7 @@ import { buildCompactionSummary } from "./compaction_logic";
|
|
|
42
42
|
import { getSystemPrompt } from "./system_prompt";
|
|
43
43
|
import { loadInstructionFiles } from "./instruction_loader";
|
|
44
44
|
import { buildToolRegistry } from "./tool_registry";
|
|
45
|
+
import { extractLeakedToolCall } from "./leaked_tool_call";
|
|
45
46
|
|
|
46
47
|
// ─── Types ────────────────────────────────────────────────────────────────────
|
|
47
48
|
|
|
@@ -96,6 +97,7 @@ export async function runAgentLoop(input: PromptInput): Promise<LoopResult> {
|
|
|
96
97
|
|
|
97
98
|
const stats = { tokens_in: 0, tokens_out: 0, cost: 0, compacted: false };
|
|
98
99
|
let turn = 0;
|
|
100
|
+
let lazyReminderSent = false;
|
|
99
101
|
let lastFinishReason = "unknown";
|
|
100
102
|
|
|
101
103
|
try {
|
|
@@ -174,6 +176,39 @@ export async function runAgentLoop(input: PromptInput): Promise<LoopResult> {
|
|
|
174
176
|
// 3. "content-filter" = blocked — stop
|
|
175
177
|
// 4. "tool-calls" with no actual tool calls = model confused — stop
|
|
176
178
|
const terminalReasons = new Set(["stop", "length", "content-filter", "end-turn"]);
|
|
179
|
+
|
|
180
|
+
// Detect "lazy assistant" — the model claims it's editing/creating files
|
|
181
|
+
// in prose but never actually called a tool. Common with non-tool-tuned
|
|
182
|
+
// local models. Once per session, push a synthetic reminder and re-run.
|
|
183
|
+
const lazyAssistant = !result.has_tool_calls &&
|
|
184
|
+
Object.keys(tools).length > 0 &&
|
|
185
|
+
!lazyReminderSent &&
|
|
186
|
+
await wasLazyResponse(session_id, assistantMsgId);
|
|
187
|
+
|
|
188
|
+
if (lazyAssistant) {
|
|
189
|
+
lazyReminderSent = true;
|
|
190
|
+
broadcastToChannel("session_events", {
|
|
191
|
+
type: "session.warning",
|
|
192
|
+
session_id,
|
|
193
|
+
message: "Model claimed it would edit files but didn't call any tools. Reminding it to actually use the tools.",
|
|
194
|
+
});
|
|
195
|
+
// Insert a synthetic user reminder so the next turn sees it
|
|
196
|
+
const reminderMsgId = uuid();
|
|
197
|
+
await pool.query(
|
|
198
|
+
`INSERT INTO messages (id, session_id, role) VALUES ($1, $2, 'user')`,
|
|
199
|
+
[reminderMsgId, session_id]
|
|
200
|
+
);
|
|
201
|
+
const reminderPartId = uuid();
|
|
202
|
+
await pool.query(
|
|
203
|
+
`INSERT INTO parts (id, message_id, session_id, part_type, data, order_index) VALUES ($1, $2, $3, 'text', $4, 0)`,
|
|
204
|
+
[reminderPartId, reminderMsgId, session_id, JSON.stringify({
|
|
205
|
+
text: "<system-reminder>You described file changes but did not actually invoke any tools. The user cannot see prose descriptions of edits — only real tool calls produce file changes. Call the `write` or `edit` tool now to perform the actions you described. Do not respond with prose; emit a tool call.</system-reminder>",
|
|
206
|
+
synthetic: true,
|
|
207
|
+
})]
|
|
208
|
+
);
|
|
209
|
+
continue; // re-run the loop with the reminder appended
|
|
210
|
+
}
|
|
211
|
+
|
|
177
212
|
if (terminalReasons.has(result.finish_reason) && !result.has_tool_calls) {
|
|
178
213
|
break;
|
|
179
214
|
}
|
|
@@ -222,9 +257,19 @@ async function streamWithRetry(ctx: {
|
|
|
222
257
|
try {
|
|
223
258
|
return await streamOnce(currentCtx);
|
|
224
259
|
} catch (e: any) {
|
|
225
|
-
// On Bad Request with tools, retry without tools
|
|
260
|
+
// On Bad Request with tools, retry without tools BUT log it visibly so
|
|
261
|
+
// the user knows their model can't do tool calls — otherwise they get
|
|
262
|
+
// pure-prose responses with no real edits.
|
|
226
263
|
if (e.message?.includes("Bad Request") && Object.keys(currentCtx.tools).length > 0 && attempt === 0) {
|
|
227
|
-
|
|
264
|
+
logger.error("model_tools_unsupported", {
|
|
265
|
+
event: "tools_stripped",
|
|
266
|
+
metadata: { model: ctx.model_id, provider: ctx.provider_id, error: e.message },
|
|
267
|
+
});
|
|
268
|
+
broadcastToChannel("session_events", {
|
|
269
|
+
type: "session.warning",
|
|
270
|
+
session_id: ctx.session_id,
|
|
271
|
+
message: `Model ${ctx.model_id} rejected tool definitions — running without tools (no file edits possible). Set MODEL_SUPPORTS_TOOLS=false to suppress this warning, or use a tool-capable model.`,
|
|
272
|
+
});
|
|
228
273
|
currentCtx = { ...currentCtx, tools: {} };
|
|
229
274
|
attempt++;
|
|
230
275
|
continue;
|
|
@@ -305,6 +350,36 @@ async function streamOnce(ctx: {
|
|
|
305
350
|
}
|
|
306
351
|
|
|
307
352
|
currentTextContent += text;
|
|
353
|
+
|
|
354
|
+
// Detect models leaking their internal tool-call markers as raw text
|
|
355
|
+
// (gemma, qwen, llama variants do this when the tokenizer template
|
|
356
|
+
// doesn't match the AI SDK's expected format). When we find a complete
|
|
357
|
+
// leaked call, synthesize a real tool execution.
|
|
358
|
+
const leak = extractLeakedToolCall(currentTextContent);
|
|
359
|
+
if (leak) {
|
|
360
|
+
// Strip the leaked markers from the displayed text part
|
|
361
|
+
currentTextContent = currentTextContent.slice(0, leak.startIndex) +
|
|
362
|
+
currentTextContent.slice(leak.endIndex);
|
|
363
|
+
await pool.query(
|
|
364
|
+
`UPDATE parts SET data = $2, updated_at = NOW() WHERE id = $1`,
|
|
365
|
+
[currentTextPartId, JSON.stringify({ text: currentTextContent })]
|
|
366
|
+
);
|
|
367
|
+
|
|
368
|
+
// Execute the synthesized tool call directly via the registry
|
|
369
|
+
await executeSynthesizedToolCall({
|
|
370
|
+
session_id,
|
|
371
|
+
agentId: ctx.agentId,
|
|
372
|
+
assistantMsgId,
|
|
373
|
+
toolName: leak.toolName,
|
|
374
|
+
toolInput: leak.toolInput,
|
|
375
|
+
tools,
|
|
376
|
+
});
|
|
377
|
+
|
|
378
|
+
// Mark the turn as having tool calls so the loop continues
|
|
379
|
+
hasToolCalls = true;
|
|
380
|
+
break;
|
|
381
|
+
}
|
|
382
|
+
|
|
308
383
|
// Broadcast delta to WebSocket part_stream for live streaming
|
|
309
384
|
broadcastToChannel("part_stream", {
|
|
310
385
|
type: "part.delta",
|
|
@@ -519,6 +594,29 @@ async function runCompaction(
|
|
|
519
594
|
|
|
520
595
|
// ─── Message History Builder ──────────────────────────────────────────────────
|
|
521
596
|
|
|
597
|
+
// Detect a "lazy" response — assistant text says it will edit/create files
|
|
598
|
+
// but no tool was actually invoked. Common with non-tool-tuned local models.
|
|
599
|
+
async function wasLazyResponse(session_id: string, messageId: string): Promise<boolean> {
|
|
600
|
+
const r = await pool.query(
|
|
601
|
+
`SELECT data FROM parts WHERE message_id = $1 AND part_type = 'text' ORDER BY order_index ASC`,
|
|
602
|
+
[messageId]
|
|
603
|
+
);
|
|
604
|
+
const text = r.rows.map((row: any) => row.data?.text || "").join(" ").toLowerCase();
|
|
605
|
+
if (!text || text.length < 30) return false;
|
|
606
|
+
// Phrases that imply the model is committing to a file edit it didn't make
|
|
607
|
+
const editIntentPatterns = [
|
|
608
|
+
/\bi['']ll\s+(create|write|update|edit|modify|add|implement|generate)\b/,
|
|
609
|
+
/\bi['']m\s+(creating|writing|updating|editing|modifying|adding|implementing|generating)\b/,
|
|
610
|
+
/\b(creating|writing|updating|editing|generating)\s+(?:the\s+)?(?:file|files|spec)\b/,
|
|
611
|
+
/\bi\s+(?:will|am\s+going\s+to)\s+(create|write|update|edit|implement|generate)\b/,
|
|
612
|
+
/\blet\s+me\s+(create|write|update|edit|implement)\b/,
|
|
613
|
+
/\bhere['']s\s+(?:the\s+)?(?:updated|new)\s+(?:file|version|content)\b/,
|
|
614
|
+
/\.(bone|ts|tsx|js|jsx|py|md|json|yaml|yml|sql|sh|html|css)\b.*\b(updated|created|written|modified|added)\b/,
|
|
615
|
+
/\b(updated|created|written|modified|added)\b.*\.(bone|ts|tsx|js|jsx|py|md|json|yaml|yml|sql|sh|html|css)\b/,
|
|
616
|
+
];
|
|
617
|
+
return editIntentPatterns.some(re => re.test(text));
|
|
618
|
+
}
|
|
619
|
+
|
|
522
620
|
async function loadMessageHistory(session_id: string): Promise<any[]> {
|
|
523
621
|
const result = await pool.query(
|
|
524
622
|
`SELECT m.id, m.role, m.model_id, m.provider_id, m.tokens_input, m.tokens_output,
|
|
@@ -770,3 +868,122 @@ function supportsTools(model_id: string): boolean {
|
|
|
770
868
|
// Default: try with tools, fall back gracefully on error
|
|
771
869
|
return true;
|
|
772
870
|
}
|
|
871
|
+
|
|
872
|
+
// ─── Synthesized tool-call execution ──────────────────────────────────────────
|
|
873
|
+
|
|
874
|
+
/**
|
|
875
|
+
* Execute a synthesized tool call when we detect a leak. Mirrors the work the
|
|
876
|
+
* AI SDK would normally do: insert a tool_invocation part, broadcast events,
|
|
877
|
+
* run the registered tool's execute() function.
|
|
878
|
+
*/
|
|
879
|
+
async function executeSynthesizedToolCall(input: {
|
|
880
|
+
session_id: string;
|
|
881
|
+
agentId: string;
|
|
882
|
+
assistantMsgId: string;
|
|
883
|
+
toolName: string;
|
|
884
|
+
toolInput: Record<string, any>;
|
|
885
|
+
tools: Record<string, any>;
|
|
886
|
+
}): Promise<void> {
|
|
887
|
+
const { session_id, agentId, assistantMsgId, toolName, toolInput, tools } = input;
|
|
888
|
+
|
|
889
|
+
// Map common aliases (write_file → write, edit_file → edit, etc.)
|
|
890
|
+
const aliases: Record<string, string> = {
|
|
891
|
+
write_file: "write",
|
|
892
|
+
edit_file: "edit",
|
|
893
|
+
read_file: "read",
|
|
894
|
+
run_command: "bash",
|
|
895
|
+
shell: "bash",
|
|
896
|
+
search_files: "grep",
|
|
897
|
+
};
|
|
898
|
+
const resolvedName = aliases[toolName] || toolName;
|
|
899
|
+
const tool = tools[resolvedName];
|
|
900
|
+
if (!tool || !tool.execute) {
|
|
901
|
+
logger.warn("synthesized_tool_unknown", { event: "leak", metadata: { toolName, resolvedName } });
|
|
902
|
+
return;
|
|
903
|
+
}
|
|
904
|
+
|
|
905
|
+
const callId = uuid();
|
|
906
|
+
// Persist the tool call record
|
|
907
|
+
try {
|
|
908
|
+
await pool.query(
|
|
909
|
+
`INSERT INTO tool_calls (id, session_id, agent_id, tool_name, tool_input, state) VALUES ($1, $2, $3, $4, $5, 'running')`,
|
|
910
|
+
[callId, session_id, agentId, resolvedName, JSON.stringify(toolInput)]
|
|
911
|
+
);
|
|
912
|
+
} catch {}
|
|
913
|
+
|
|
914
|
+
// Broadcast tool.requested so the TUI shows "← Edit foo.bone"
|
|
915
|
+
broadcastToChannel("part_stream", {
|
|
916
|
+
type: "tool.requested",
|
|
917
|
+
session_id,
|
|
918
|
+
tool_call_id: callId,
|
|
919
|
+
tool_name: resolvedName,
|
|
920
|
+
tool_input: toolInput,
|
|
921
|
+
});
|
|
922
|
+
|
|
923
|
+
// Persist as a tool_invocation part on the assistant message
|
|
924
|
+
const partId = uuid();
|
|
925
|
+
await pool.query(
|
|
926
|
+
`INSERT INTO parts (id, message_id, session_id, part_type, data, order_index) VALUES ($1, $2, $3, 'tool_invocation', $4, 0)`,
|
|
927
|
+
[partId, assistantMsgId, session_id, JSON.stringify({ tool_call_id: callId, tool_name: resolvedName, args: toolInput, state: "running" })]
|
|
928
|
+
);
|
|
929
|
+
|
|
930
|
+
// Run the actual tool — emit ToolCallRequested so the same machinery as a
|
|
931
|
+
// real tool call kicks in.
|
|
932
|
+
await eventBus.publish("ToolCallRequested", {
|
|
933
|
+
tool_call_id: callId,
|
|
934
|
+
session_id,
|
|
935
|
+
agent_id: agentId,
|
|
936
|
+
tool_name: resolvedName,
|
|
937
|
+
tool_input: toolInput,
|
|
938
|
+
requested_at: new Date().toISOString(),
|
|
939
|
+
}, "AgentLoop").catch(() => {});
|
|
940
|
+
|
|
941
|
+
const startMs = Date.now();
|
|
942
|
+
let success = true;
|
|
943
|
+
let output = "";
|
|
944
|
+
try {
|
|
945
|
+
const result = await tool.execute(toolInput, { toolCallId: callId });
|
|
946
|
+
output = typeof result === "string" ? result : (result?.output || "");
|
|
947
|
+
} catch (e: any) {
|
|
948
|
+
success = false;
|
|
949
|
+
output = e?.message || "tool execution failed";
|
|
950
|
+
}
|
|
951
|
+
|
|
952
|
+
// Update the part with the result
|
|
953
|
+
const durationMs = Date.now() - startMs;
|
|
954
|
+
try {
|
|
955
|
+
await pool.query(
|
|
956
|
+
`UPDATE parts SET data = $2, updated_at = NOW() WHERE id = $1`,
|
|
957
|
+
[partId, JSON.stringify({
|
|
958
|
+
tool_call_id: callId,
|
|
959
|
+
tool_name: resolvedName,
|
|
960
|
+
args: toolInput,
|
|
961
|
+
state: success ? "done" : "failed",
|
|
962
|
+
output,
|
|
963
|
+
})]
|
|
964
|
+
);
|
|
965
|
+
await pool.query(
|
|
966
|
+
`UPDATE tool_calls SET state = $2, tool_output = $3, duration_ms = $4, updated_at = NOW() WHERE id = $1`,
|
|
967
|
+
[callId, success ? "done" : "failed", JSON.stringify({ output }), durationMs]
|
|
968
|
+
);
|
|
969
|
+
} catch {}
|
|
970
|
+
|
|
971
|
+
// Broadcast completion
|
|
972
|
+
broadcastToChannel("part_stream", {
|
|
973
|
+
type: success ? "tool.completed" : "tool.failed",
|
|
974
|
+
session_id,
|
|
975
|
+
tool_call_id: callId,
|
|
976
|
+
tool_name: resolvedName,
|
|
977
|
+
tool_input: toolInput,
|
|
978
|
+
duration_ms: durationMs,
|
|
979
|
+
...(success ? {} : { error: output }),
|
|
980
|
+
});
|
|
981
|
+
|
|
982
|
+
await eventBus.publish("ToolCallCompleted", {
|
|
983
|
+
tool_call_id: callId,
|
|
984
|
+
session_id,
|
|
985
|
+
tool_name: resolvedName,
|
|
986
|
+
duration_ms: durationMs,
|
|
987
|
+
completed_at: new Date().toISOString(),
|
|
988
|
+
}, "AgentLoop").catch(() => {});
|
|
989
|
+
}
|