bonecode 1.3.0 → 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/README.md +42 -0
  2. package/compat/opencode_adapter.ts +69 -8
  3. package/dist/compat/opencode_adapter.js +63 -7
  4. package/dist/compat/opencode_adapter.js.map +1 -1
  5. package/dist/src/db_adapter.js +30 -0
  6. package/dist/src/db_adapter.js.map +1 -1
  7. package/dist/src/engine/session/build_mode.d.ts +83 -0
  8. package/dist/src/engine/session/build_mode.js +800 -0
  9. package/dist/src/engine/session/build_mode.js.map +1 -0
  10. package/dist/src/engine/session/build_mode_helpers.d.ts +6 -0
  11. package/dist/src/engine/session/build_mode_helpers.js +61 -0
  12. package/dist/src/engine/session/build_mode_helpers.js.map +1 -0
  13. package/dist/src/engine/session/leaked_tool_call.d.ts +49 -0
  14. package/dist/src/engine/session/leaked_tool_call.js +174 -0
  15. package/dist/src/engine/session/leaked_tool_call.js.map +1 -0
  16. package/dist/src/engine/session/prompt/bonescript.txt +11 -0
  17. package/dist/src/engine/session/prompt.js +173 -2
  18. package/dist/src/engine/session/prompt.js.map +1 -1
  19. package/dist/src/tui.js +146 -9
  20. package/dist/src/tui.js.map +1 -1
  21. package/package.json +1 -1
  22. package/scripts/debug_extract.js +40 -0
  23. package/scripts/test_build_fallback.js +221 -0
  24. package/scripts/test_build_mode.js +301 -0
  25. package/scripts/test_leaked_tool_call.js +269 -0
  26. package/src/db_adapter.ts +29 -0
  27. package/src/engine/session/build_mode.ts +906 -0
  28. package/src/engine/session/build_mode_helpers.ts +72 -0
  29. package/src/engine/session/leaked_tool_call.ts +166 -0
  30. package/src/engine/session/prompt/bonescript.txt +11 -0
  31. package/src/engine/session/prompt.ts +219 -2
  32. package/src/tui.ts +147 -9
@@ -0,0 +1,72 @@
1
+ /**
2
+ * Provider/model factory used by build_mode.ts for structured-output prompts.
3
+ * Mirrors the factory in prompt.ts so the build orchestrator can issue
4
+ * non-streaming model calls without depending on the streaming agent loop.
5
+ */
6
+
7
+ import { createOpenAI } from "@ai-sdk/openai";
8
+ import { createAnthropic } from "@ai-sdk/anthropic";
9
+ import { createGoogleGenerativeAI } from "@ai-sdk/google";
10
+
11
+ export function getLanguageModel(provider_id: string, model_id: string): any {
12
+ const pid = provider_id.toLowerCase();
13
+
14
+ const resolvedProvider = pid === "local"
15
+ ? (process.env.DEFAULT_PROVIDER || "openai_compatible").toLowerCase()
16
+ : pid;
17
+ const resolvedModel = pid === "local"
18
+ ? (process.env.DEFAULT_MODEL || model_id)
19
+ : model_id;
20
+
21
+ const apiKey = (
22
+ process.env[`${resolvedProvider.toUpperCase()}_API_KEY`] ||
23
+ process.env.OPENAI_API_KEY ||
24
+ "not-needed"
25
+ );
26
+ const baseUrl = (
27
+ process.env[`${resolvedProvider.toUpperCase()}_BASE_URL`] ||
28
+ process.env.OPENAI_BASE_URL
29
+ );
30
+
31
+ switch (resolvedProvider) {
32
+ case "anthropic":
33
+ return createAnthropic({ apiKey, baseURL: baseUrl })(resolvedModel);
34
+
35
+ case "google":
36
+ return createGoogleGenerativeAI({ apiKey })(resolvedModel);
37
+
38
+ case "groq": {
39
+ const { createGroq } = require("@ai-sdk/groq");
40
+ return createGroq({ apiKey: process.env.GROQ_API_KEY || apiKey })(resolvedModel);
41
+ }
42
+
43
+ case "cerebras": {
44
+ const { createCerebras } = require("@ai-sdk/cerebras");
45
+ return createCerebras({ apiKey: process.env.CEREBRAS_API_KEY || apiKey })(resolvedModel);
46
+ }
47
+
48
+ case "deepseek":
49
+ return createOpenAI({
50
+ apiKey: process.env.DEEPSEEK_API_KEY || apiKey,
51
+ baseURL: "https://api.deepseek.com/v1",
52
+ })(resolvedModel);
53
+
54
+ case "openrouter": {
55
+ const { createOpenRouter } = require("@openrouter/ai-sdk-provider");
56
+ return createOpenRouter({
57
+ apiKey: process.env.OPENROUTER_API_KEY || apiKey,
58
+ })(resolvedModel);
59
+ }
60
+
61
+ case "ollama":
62
+ return createOpenAI({
63
+ apiKey: "ollama",
64
+ baseURL: process.env.OLLAMA_BASE_URL || "http://localhost:11434/v1",
65
+ })(resolvedModel);
66
+
67
+ case "openai":
68
+ case "openai_compatible":
69
+ default:
70
+ return createOpenAI({ apiKey, baseURL: baseUrl })(resolvedModel);
71
+ }
72
+ }
@@ -0,0 +1,166 @@
1
+ /**
2
+ * Pure, side-effect-free parser for leaked tool-call markers.
3
+ *
4
+ * Some local models (gemma, qwen, llama variants) emit their internal
5
+ * tool-call markers as raw text instead of producing structured tool_call
6
+ * events. The AI SDK's parser misses these, so the model's prose appears in
7
+ * the output but no tool ever runs.
8
+ *
9
+ * This module recovers the intended call by pattern-matching the leaked text.
10
+ * No DB, no network, no global state — pure functions only, fully testable.
11
+ *
12
+ * Patterns recognized (across multiple template formats):
13
+ * <|tool_call|>{"name":"write","arguments":{...}}<|/tool_call|>
14
+ * <|tool_call>name:write{...args...}<tool_call|>
15
+ * <tool_call>{"name":"write","arguments":{...}}</tool_call>
16
+ * <function_call>{"name":"write","arguments":{...}}</function_call>
17
+ * ```tool_code\nwrite(path="x", content="y")\n```
18
+ * <|python_tag|>write({"path": "x"})<|/python_tag|>
19
+ */
20
+
21
+ export interface LeakedToolCall {
22
+ toolName: string;
23
+ toolInput: Record<string, any>;
24
+ startIndex: number;
25
+ endIndex: number;
26
+ }
27
+
28
+ export function extractLeakedToolCall(text: string): LeakedToolCall | null {
29
+ // Pattern 1: <|tool_call|>...<|/tool_call|> or <tool_call>...</tool_call>
30
+ const blockPatterns = [
31
+ /<\|tool_call\|?>([\s\S]*?)<\|?\/?tool_call\|?>/i,
32
+ /<tool_call>([\s\S]*?)<\/?tool_call>/i,
33
+ /<function_call>([\s\S]*?)<\/?function_call>/i,
34
+ /<\|python_tag\|>([\s\S]*?)<\|?\/?python_tag\|?>/i,
35
+ ];
36
+ for (const re of blockPatterns) {
37
+ const m = text.match(re);
38
+ if (!m || m.index === undefined) continue;
39
+ const body = m[1];
40
+ const parsed = parseLeakedBody(body);
41
+ if (parsed) {
42
+ return { ...parsed, startIndex: m.index, endIndex: m.index + m[0].length };
43
+ }
44
+ }
45
+
46
+ // Pattern 2: ```tool_code ... ```
47
+ const codeBlock = text.match(/```(?:tool_code|tool_call|function|python)\s*\n([\s\S]*?)\n```/i);
48
+ if (codeBlock && codeBlock.index !== undefined) {
49
+ const parsed = parseLeakedBody(codeBlock[1]);
50
+ if (parsed) {
51
+ return { ...parsed, startIndex: codeBlock.index, endIndex: codeBlock.index + codeBlock[0].length };
52
+ }
53
+ }
54
+
55
+ return null;
56
+ }
57
+
58
+ /**
59
+ * Parse the body of a leaked tool-call block. Tries multiple formats:
60
+ * - JSON: {"name": "write", "arguments": {...}} or {"tool":"write","args":{...}}
61
+ * - Function-call style: write(path="x", content="y")
62
+ * - Pseudo-syntax: call:write{path:"x"}
63
+ */
64
+ export function parseLeakedBody(body: string): { toolName: string; toolInput: Record<string, any> } | null {
65
+ if (!body) return null;
66
+ const trimmed = body.trim();
67
+
68
+ // Try JSON first
69
+ try {
70
+ const json = JSON.parse(trimmed);
71
+ if (json && typeof json === "object") {
72
+ const name = json.name || json.tool || json.tool_name || json.function;
73
+ const args = json.arguments || json.args || json.parameters || json.input || {};
74
+ if (typeof name === "string" && name.length > 0) {
75
+ const parsedArgs = typeof args === "string" ? safeParseJson(args) : args;
76
+ return { toolName: name, toolInput: parsedArgs ?? {} };
77
+ }
78
+ }
79
+ } catch {}
80
+
81
+ // Try function-call style: name(arg1=val1, arg2="val2")
82
+ const fnMatch = trimmed.match(/^([a-zA-Z_][\w]*)\s*\(([\s\S]*)\)\s*$/);
83
+ if (fnMatch) {
84
+ const toolName = fnMatch[1];
85
+ const argsStr = fnMatch[2];
86
+ // Try JSON-shaped arg first: write({"path": "x"})
87
+ const innerJson = safeParseJson(argsStr);
88
+ if (innerJson && typeof innerJson === "object" && !Array.isArray(innerJson)) {
89
+ return { toolName, toolInput: innerJson };
90
+ }
91
+ const toolInput = parseKwargs(argsStr);
92
+ if (toolInput) return { toolName, toolInput };
93
+ }
94
+
95
+ // Try pseudo-syntax: call:name{key:"val", ...} or name:foo{...}
96
+ const callMatch = trimmed.match(/(?:call:|name:|tool:|function:)([a-zA-Z_][\w]*)\s*\{([\s\S]*)\}\s*/i);
97
+ if (callMatch) {
98
+ const toolName = callMatch[1];
99
+ const innerJson = "{" + callMatch[2] + "}";
100
+ const toolInput = safeParseJson(innerJson) || parseLooseObject(callMatch[2]);
101
+ if (toolInput) return { toolName, toolInput };
102
+ }
103
+
104
+ return null;
105
+ }
106
+
107
+ export function safeParseJson(s: string): any | null {
108
+ try {
109
+ return JSON.parse(s);
110
+ } catch {
111
+ return null;
112
+ }
113
+ }
114
+
115
+ /**
116
+ * Parse Python-style kwargs from a function-call body:
117
+ * path="x", content="y", count=42
118
+ * Strips `<|"|>` style escape markers some templates inject.
119
+ */
120
+ export function parseKwargs(s: string): Record<string, any> | null {
121
+ if (!s.trim()) return {};
122
+ const cleaned = s.replace(/<\|"\|>/g, '"').replace(/<\|'\|>/g, "'");
123
+ const result: Record<string, any> = {};
124
+ const re = /([a-zA-Z_][\w]*)\s*=\s*("([^"\\]|\\.)*"|'([^'\\]|\\.)*'|-?\d+(?:\.\d+)?|true|false|null)/g;
125
+ let m: RegExpExecArray | null;
126
+ let matched = false;
127
+ while ((m = re.exec(cleaned)) !== null) {
128
+ matched = true;
129
+ const key = m[1];
130
+ const raw = m[2];
131
+ let value: any = raw;
132
+ if (raw === "true") value = true;
133
+ else if (raw === "false") value = false;
134
+ else if (raw === "null") value = null;
135
+ else if (/^-?\d/.test(raw)) value = parseFloat(raw);
136
+ else value = raw.slice(1, -1).replace(/\\(.)/g, "$1");
137
+ result[key] = value;
138
+ }
139
+ return matched ? result : null;
140
+ }
141
+
142
+ /**
143
+ * Parse a loose key:value object body (no surrounding braces, no enforced
144
+ * JSON quoting). Used for pseudo-syntax fallbacks like:
145
+ * file_path:<|"|>medieval_market.bone<|"|>
146
+ */
147
+ export function parseLooseObject(s: string): Record<string, any> | null {
148
+ const cleaned = s.replace(/<\|"\|>/g, '"').replace(/<\|'\|>/g, "'");
149
+ const result: Record<string, any> = {};
150
+ const re = /([a-zA-Z_][\w]*)\s*[:=]\s*("([^"\\]|\\.)*"|'([^'\\]|\\.)*'|-?\d+(?:\.\d+)?|true|false|null|[^\s,}]+)/g;
151
+ let m: RegExpExecArray | null;
152
+ let matched = false;
153
+ while ((m = re.exec(cleaned)) !== null) {
154
+ matched = true;
155
+ const key = m[1];
156
+ const raw = m[2];
157
+ let value: any = raw;
158
+ if (raw === "true") value = true;
159
+ else if (raw === "false") value = false;
160
+ else if (raw === "null") value = null;
161
+ else if (/^-?\d/.test(raw)) value = parseFloat(raw);
162
+ else if (raw.startsWith('"') || raw.startsWith("'")) value = raw.slice(1, -1).replace(/\\(.)/g, "$1");
163
+ result[key] = value;
164
+ }
165
+ return matched ? result : null;
166
+ }
@@ -31,6 +31,17 @@ Use plain Python/TypeScript/etc. only for:
31
31
 
32
32
  If the request is ambiguous (e.g. "a 2D market simulation"), ask the user: "Is this a self-contained simulation script (plain code) or a backend service with persistence (BoneScript)?"
33
33
 
34
+ ## Build mode
35
+
36
+ When the user starts a session with a project-scoped prompt ("build me X", "create a full Y"), BoneCode runs you in **build mode**. Build mode is a state machine: clarify → plan → execute → verify → done. You will receive structured prompts at each stage. Specifically:
37
+
38
+ - **Clarify stage**: you'll be asked to either propose a design document (JSON) or ask 1-3 questions. Be concrete. Don't ramble.
39
+ - **Plan stage**: you'll be asked for a JSON todo list. Each todo must be a single concrete file action.
40
+ - **Execute stage**: you'll receive one todo at a time. **YOU MUST CALL TOOLS** — `write`, `edit`, `bash`. Prose-only responses are detected and rejected. The system will inject a reminder if you describe edits without calling tools.
41
+ - **Verify stage**: for each requirement, you'll be asked yes/no whether it's satisfied. Be honest. If a requirement is not yet met, say so — the orchestrator will create fix-up tasks.
42
+
43
+ The user can resume a build session at any time. Build state is persisted.
44
+
34
45
  ## BoneScript syntax — authoritative reference
35
46
 
36
47
  ### `system` block
@@ -42,6 +42,7 @@ import { buildCompactionSummary } from "./compaction_logic";
42
42
  import { getSystemPrompt } from "./system_prompt";
43
43
  import { loadInstructionFiles } from "./instruction_loader";
44
44
  import { buildToolRegistry } from "./tool_registry";
45
+ import { extractLeakedToolCall } from "./leaked_tool_call";
45
46
 
46
47
  // ─── Types ────────────────────────────────────────────────────────────────────
47
48
 
@@ -96,6 +97,7 @@ export async function runAgentLoop(input: PromptInput): Promise<LoopResult> {
96
97
 
97
98
  const stats = { tokens_in: 0, tokens_out: 0, cost: 0, compacted: false };
98
99
  let turn = 0;
100
+ let lazyReminderSent = false;
99
101
  let lastFinishReason = "unknown";
100
102
 
101
103
  try {
@@ -174,6 +176,39 @@ export async function runAgentLoop(input: PromptInput): Promise<LoopResult> {
174
176
  // 3. "content-filter" = blocked — stop
175
177
  // 4. "tool-calls" with no actual tool calls = model confused — stop
176
178
  const terminalReasons = new Set(["stop", "length", "content-filter", "end-turn"]);
179
+
180
+ // Detect "lazy assistant" — the model claims it's editing/creating files
181
+ // in prose but never actually called a tool. Common with non-tool-tuned
182
+ // local models. Once per session, push a synthetic reminder and re-run.
183
+ const lazyAssistant = !result.has_tool_calls &&
184
+ Object.keys(tools).length > 0 &&
185
+ !lazyReminderSent &&
186
+ await wasLazyResponse(session_id, assistantMsgId);
187
+
188
+ if (lazyAssistant) {
189
+ lazyReminderSent = true;
190
+ broadcastToChannel("session_events", {
191
+ type: "session.warning",
192
+ session_id,
193
+ message: "Model claimed it would edit files but didn't call any tools. Reminding it to actually use the tools.",
194
+ });
195
+ // Insert a synthetic user reminder so the next turn sees it
196
+ const reminderMsgId = uuid();
197
+ await pool.query(
198
+ `INSERT INTO messages (id, session_id, role) VALUES ($1, $2, 'user')`,
199
+ [reminderMsgId, session_id]
200
+ );
201
+ const reminderPartId = uuid();
202
+ await pool.query(
203
+ `INSERT INTO parts (id, message_id, session_id, part_type, data, order_index) VALUES ($1, $2, $3, 'text', $4, 0)`,
204
+ [reminderPartId, reminderMsgId, session_id, JSON.stringify({
205
+ text: "<system-reminder>You described file changes but did not actually invoke any tools. The user cannot see prose descriptions of edits — only real tool calls produce file changes. Call the `write` or `edit` tool now to perform the actions you described. Do not respond with prose; emit a tool call.</system-reminder>",
206
+ synthetic: true,
207
+ })]
208
+ );
209
+ continue; // re-run the loop with the reminder appended
210
+ }
211
+
177
212
  if (terminalReasons.has(result.finish_reason) && !result.has_tool_calls) {
178
213
  break;
179
214
  }
@@ -222,9 +257,19 @@ async function streamWithRetry(ctx: {
222
257
  try {
223
258
  return await streamOnce(currentCtx);
224
259
  } catch (e: any) {
225
- // On Bad Request with tools, retry without tools
260
+ // On Bad Request with tools, retry without tools BUT log it visibly so
261
+ // the user knows their model can't do tool calls — otherwise they get
262
+ // pure-prose responses with no real edits.
226
263
  if (e.message?.includes("Bad Request") && Object.keys(currentCtx.tools).length > 0 && attempt === 0) {
227
- // Local model doesn't support function calling — silently retry without tools
264
+ logger.error("model_tools_unsupported", {
265
+ event: "tools_stripped",
266
+ metadata: { model: ctx.model_id, provider: ctx.provider_id, error: e.message },
267
+ });
268
+ broadcastToChannel("session_events", {
269
+ type: "session.warning",
270
+ session_id: ctx.session_id,
271
+ message: `Model ${ctx.model_id} rejected tool definitions — running without tools (no file edits possible). Set MODEL_SUPPORTS_TOOLS=false to suppress this warning, or use a tool-capable model.`,
272
+ });
228
273
  currentCtx = { ...currentCtx, tools: {} };
229
274
  attempt++;
230
275
  continue;
@@ -305,6 +350,36 @@ async function streamOnce(ctx: {
305
350
  }
306
351
 
307
352
  currentTextContent += text;
353
+
354
+ // Detect models leaking their internal tool-call markers as raw text
355
+ // (gemma, qwen, llama variants do this when the tokenizer template
356
+ // doesn't match the AI SDK's expected format). When we find a complete
357
+ // leaked call, synthesize a real tool execution.
358
+ const leak = extractLeakedToolCall(currentTextContent);
359
+ if (leak) {
360
+ // Strip the leaked markers from the displayed text part
361
+ currentTextContent = currentTextContent.slice(0, leak.startIndex) +
362
+ currentTextContent.slice(leak.endIndex);
363
+ await pool.query(
364
+ `UPDATE parts SET data = $2, updated_at = NOW() WHERE id = $1`,
365
+ [currentTextPartId, JSON.stringify({ text: currentTextContent })]
366
+ );
367
+
368
+ // Execute the synthesized tool call directly via the registry
369
+ await executeSynthesizedToolCall({
370
+ session_id,
371
+ agentId: ctx.agentId,
372
+ assistantMsgId,
373
+ toolName: leak.toolName,
374
+ toolInput: leak.toolInput,
375
+ tools,
376
+ });
377
+
378
+ // Mark the turn as having tool calls so the loop continues
379
+ hasToolCalls = true;
380
+ break;
381
+ }
382
+
308
383
  // Broadcast delta to WebSocket part_stream for live streaming
309
384
  broadcastToChannel("part_stream", {
310
385
  type: "part.delta",
@@ -519,6 +594,29 @@ async function runCompaction(
519
594
 
520
595
  // ─── Message History Builder ──────────────────────────────────────────────────
521
596
 
597
+ // Detect a "lazy" response — assistant text says it will edit/create files
598
+ // but no tool was actually invoked. Common with non-tool-tuned local models.
599
+ async function wasLazyResponse(session_id: string, messageId: string): Promise<boolean> {
600
+ const r = await pool.query(
601
+ `SELECT data FROM parts WHERE message_id = $1 AND part_type = 'text' ORDER BY order_index ASC`,
602
+ [messageId]
603
+ );
604
+ const text = r.rows.map((row: any) => row.data?.text || "").join(" ").toLowerCase();
605
+ if (!text || text.length < 30) return false;
606
+ // Phrases that imply the model is committing to a file edit it didn't make
607
+ const editIntentPatterns = [
608
+ /\bi['']ll\s+(create|write|update|edit|modify|add|implement|generate)\b/,
609
+ /\bi['']m\s+(creating|writing|updating|editing|modifying|adding|implementing|generating)\b/,
610
+ /\b(creating|writing|updating|editing|generating)\s+(?:the\s+)?(?:file|files|spec)\b/,
611
+ /\bi\s+(?:will|am\s+going\s+to)\s+(create|write|update|edit|implement|generate)\b/,
612
+ /\blet\s+me\s+(create|write|update|edit|implement)\b/,
613
+ /\bhere['']s\s+(?:the\s+)?(?:updated|new)\s+(?:file|version|content)\b/,
614
+ /\.(bone|ts|tsx|js|jsx|py|md|json|yaml|yml|sql|sh|html|css)\b.*\b(updated|created|written|modified|added)\b/,
615
+ /\b(updated|created|written|modified|added)\b.*\.(bone|ts|tsx|js|jsx|py|md|json|yaml|yml|sql|sh|html|css)\b/,
616
+ ];
617
+ return editIntentPatterns.some(re => re.test(text));
618
+ }
619
+
522
620
  async function loadMessageHistory(session_id: string): Promise<any[]> {
523
621
  const result = await pool.query(
524
622
  `SELECT m.id, m.role, m.model_id, m.provider_id, m.tokens_input, m.tokens_output,
@@ -770,3 +868,122 @@ function supportsTools(model_id: string): boolean {
770
868
  // Default: try with tools, fall back gracefully on error
771
869
  return true;
772
870
  }
871
+
872
+ // ─── Synthesized tool-call execution ──────────────────────────────────────────
873
+
874
+ /**
875
+ * Execute a synthesized tool call when we detect a leak. Mirrors the work the
876
+ * AI SDK would normally do: insert a tool_invocation part, broadcast events,
877
+ * run the registered tool's execute() function.
878
+ */
879
+ async function executeSynthesizedToolCall(input: {
880
+ session_id: string;
881
+ agentId: string;
882
+ assistantMsgId: string;
883
+ toolName: string;
884
+ toolInput: Record<string, any>;
885
+ tools: Record<string, any>;
886
+ }): Promise<void> {
887
+ const { session_id, agentId, assistantMsgId, toolName, toolInput, tools } = input;
888
+
889
+ // Map common aliases (write_file → write, edit_file → edit, etc.)
890
+ const aliases: Record<string, string> = {
891
+ write_file: "write",
892
+ edit_file: "edit",
893
+ read_file: "read",
894
+ run_command: "bash",
895
+ shell: "bash",
896
+ search_files: "grep",
897
+ };
898
+ const resolvedName = aliases[toolName] || toolName;
899
+ const tool = tools[resolvedName];
900
+ if (!tool || !tool.execute) {
901
+ logger.warn("synthesized_tool_unknown", { event: "leak", metadata: { toolName, resolvedName } });
902
+ return;
903
+ }
904
+
905
+ const callId = uuid();
906
+ // Persist the tool call record
907
+ try {
908
+ await pool.query(
909
+ `INSERT INTO tool_calls (id, session_id, agent_id, tool_name, tool_input, state) VALUES ($1, $2, $3, $4, $5, 'running')`,
910
+ [callId, session_id, agentId, resolvedName, JSON.stringify(toolInput)]
911
+ );
912
+ } catch {}
913
+
914
+ // Broadcast tool.requested so the TUI shows "← Edit foo.bone"
915
+ broadcastToChannel("part_stream", {
916
+ type: "tool.requested",
917
+ session_id,
918
+ tool_call_id: callId,
919
+ tool_name: resolvedName,
920
+ tool_input: toolInput,
921
+ });
922
+
923
+ // Persist as a tool_invocation part on the assistant message
924
+ const partId = uuid();
925
+ await pool.query(
926
+ `INSERT INTO parts (id, message_id, session_id, part_type, data, order_index) VALUES ($1, $2, $3, 'tool_invocation', $4, 0)`,
927
+ [partId, assistantMsgId, session_id, JSON.stringify({ tool_call_id: callId, tool_name: resolvedName, args: toolInput, state: "running" })]
928
+ );
929
+
930
+ // Run the actual tool — emit ToolCallRequested so the same machinery as a
931
+ // real tool call kicks in.
932
+ await eventBus.publish("ToolCallRequested", {
933
+ tool_call_id: callId,
934
+ session_id,
935
+ agent_id: agentId,
936
+ tool_name: resolvedName,
937
+ tool_input: toolInput,
938
+ requested_at: new Date().toISOString(),
939
+ }, "AgentLoop").catch(() => {});
940
+
941
+ const startMs = Date.now();
942
+ let success = true;
943
+ let output = "";
944
+ try {
945
+ const result = await tool.execute(toolInput, { toolCallId: callId });
946
+ output = typeof result === "string" ? result : (result?.output || "");
947
+ } catch (e: any) {
948
+ success = false;
949
+ output = e?.message || "tool execution failed";
950
+ }
951
+
952
+ // Update the part with the result
953
+ const durationMs = Date.now() - startMs;
954
+ try {
955
+ await pool.query(
956
+ `UPDATE parts SET data = $2, updated_at = NOW() WHERE id = $1`,
957
+ [partId, JSON.stringify({
958
+ tool_call_id: callId,
959
+ tool_name: resolvedName,
960
+ args: toolInput,
961
+ state: success ? "done" : "failed",
962
+ output,
963
+ })]
964
+ );
965
+ await pool.query(
966
+ `UPDATE tool_calls SET state = $2, tool_output = $3, duration_ms = $4, updated_at = NOW() WHERE id = $1`,
967
+ [callId, success ? "done" : "failed", JSON.stringify({ output }), durationMs]
968
+ );
969
+ } catch {}
970
+
971
+ // Broadcast completion
972
+ broadcastToChannel("part_stream", {
973
+ type: success ? "tool.completed" : "tool.failed",
974
+ session_id,
975
+ tool_call_id: callId,
976
+ tool_name: resolvedName,
977
+ tool_input: toolInput,
978
+ duration_ms: durationMs,
979
+ ...(success ? {} : { error: output }),
980
+ });
981
+
982
+ await eventBus.publish("ToolCallCompleted", {
983
+ tool_call_id: callId,
984
+ session_id,
985
+ tool_name: resolvedName,
986
+ duration_ms: durationMs,
987
+ completed_at: new Date().toISOString(),
988
+ }, "AgentLoop").catch(() => {});
989
+ }