bonecode 1.3.0 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,72 @@
1
+ /**
2
+ * Provider/model factory used by build_mode.ts for structured-output prompts.
3
+ * Mirrors the factory in prompt.ts so the build orchestrator can issue
4
+ * non-streaming model calls without depending on the streaming agent loop.
5
+ */
6
+
7
+ import { createOpenAI } from "@ai-sdk/openai";
8
+ import { createAnthropic } from "@ai-sdk/anthropic";
9
+ import { createGoogleGenerativeAI } from "@ai-sdk/google";
10
+
11
+ export function getLanguageModel(provider_id: string, model_id: string): any {
12
+ const pid = provider_id.toLowerCase();
13
+
14
+ const resolvedProvider = pid === "local"
15
+ ? (process.env.DEFAULT_PROVIDER || "openai_compatible").toLowerCase()
16
+ : pid;
17
+ const resolvedModel = pid === "local"
18
+ ? (process.env.DEFAULT_MODEL || model_id)
19
+ : model_id;
20
+
21
+ const apiKey = (
22
+ process.env[`${resolvedProvider.toUpperCase()}_API_KEY`] ||
23
+ process.env.OPENAI_API_KEY ||
24
+ "not-needed"
25
+ );
26
+ const baseUrl = (
27
+ process.env[`${resolvedProvider.toUpperCase()}_BASE_URL`] ||
28
+ process.env.OPENAI_BASE_URL
29
+ );
30
+
31
+ switch (resolvedProvider) {
32
+ case "anthropic":
33
+ return createAnthropic({ apiKey, baseURL: baseUrl })(resolvedModel);
34
+
35
+ case "google":
36
+ return createGoogleGenerativeAI({ apiKey })(resolvedModel);
37
+
38
+ case "groq": {
39
+ const { createGroq } = require("@ai-sdk/groq");
40
+ return createGroq({ apiKey: process.env.GROQ_API_KEY || apiKey })(resolvedModel);
41
+ }
42
+
43
+ case "cerebras": {
44
+ const { createCerebras } = require("@ai-sdk/cerebras");
45
+ return createCerebras({ apiKey: process.env.CEREBRAS_API_KEY || apiKey })(resolvedModel);
46
+ }
47
+
48
+ case "deepseek":
49
+ return createOpenAI({
50
+ apiKey: process.env.DEEPSEEK_API_KEY || apiKey,
51
+ baseURL: "https://api.deepseek.com/v1",
52
+ })(resolvedModel);
53
+
54
+ case "openrouter": {
55
+ const { createOpenRouter } = require("@openrouter/ai-sdk-provider");
56
+ return createOpenRouter({
57
+ apiKey: process.env.OPENROUTER_API_KEY || apiKey,
58
+ })(resolvedModel);
59
+ }
60
+
61
+ case "ollama":
62
+ return createOpenAI({
63
+ apiKey: "ollama",
64
+ baseURL: process.env.OLLAMA_BASE_URL || "http://localhost:11434/v1",
65
+ })(resolvedModel);
66
+
67
+ case "openai":
68
+ case "openai_compatible":
69
+ default:
70
+ return createOpenAI({ apiKey, baseURL: baseUrl })(resolvedModel);
71
+ }
72
+ }
@@ -31,6 +31,17 @@ Use plain Python/TypeScript/etc. only for:
31
31
 
32
32
  If the request is ambiguous (e.g. "a 2D market simulation"), ask the user: "Is this a self-contained simulation script (plain code) or a backend service with persistence (BoneScript)?"
33
33
 
34
+ ## Build mode
35
+
36
+ When the user starts a session with a project-scoped prompt ("build me X", "create a full Y"), BoneCode runs you in **build mode**. Build mode is a state machine: clarify → plan → execute → verify → done. You will receive structured prompts at each stage. Specifically:
37
+
38
+ - **Clarify stage**: you'll be asked to either propose a design document (JSON) or ask 1-3 questions. Be concrete. Don't ramble.
39
+ - **Plan stage**: you'll be asked for a JSON todo list. Each todo must be a single concrete file action.
40
+ - **Execute stage**: you'll receive one todo at a time. **YOU MUST CALL TOOLS** — `write`, `edit`, `bash`. Prose-only responses are detected and rejected. The system will inject a reminder if you describe edits without calling tools.
41
+ - **Verify stage**: for each requirement, you'll be asked yes/no whether it's satisfied. Be honest. If a requirement is not yet met, say so — the orchestrator will create fix-up tasks.
42
+
43
+ The user can resume a build session at any time. Build state is persisted.
44
+
34
45
  ## BoneScript syntax — authoritative reference
35
46
 
36
47
  ### `system` block
@@ -96,6 +96,7 @@ export async function runAgentLoop(input: PromptInput): Promise<LoopResult> {
96
96
 
97
97
  const stats = { tokens_in: 0, tokens_out: 0, cost: 0, compacted: false };
98
98
  let turn = 0;
99
+ let lazyReminderSent = false;
99
100
  let lastFinishReason = "unknown";
100
101
 
101
102
  try {
@@ -174,6 +175,39 @@ export async function runAgentLoop(input: PromptInput): Promise<LoopResult> {
174
175
  // 3. "content-filter" = blocked — stop
175
176
  // 4. "tool-calls" with no actual tool calls = model confused — stop
176
177
  const terminalReasons = new Set(["stop", "length", "content-filter", "end-turn"]);
178
+
179
+ // Detect "lazy assistant" — the model claims it's editing/creating files
180
+ // in prose but never actually called a tool. Common with non-tool-tuned
181
+ // local models. Once per session, push a synthetic reminder and re-run.
182
+ const lazyAssistant = !result.has_tool_calls &&
183
+ Object.keys(tools).length > 0 &&
184
+ !lazyReminderSent &&
185
+ await wasLazyResponse(session_id, assistantMsgId);
186
+
187
+ if (lazyAssistant) {
188
+ lazyReminderSent = true;
189
+ broadcastToChannel("session_events", {
190
+ type: "session.warning",
191
+ session_id,
192
+ message: "Model claimed it would edit files but didn't call any tools. Reminding it to actually use the tools.",
193
+ });
194
+ // Insert a synthetic user reminder so the next turn sees it
195
+ const reminderMsgId = uuid();
196
+ await pool.query(
197
+ `INSERT INTO messages (id, session_id, role) VALUES ($1, $2, 'user')`,
198
+ [reminderMsgId, session_id]
199
+ );
200
+ const reminderPartId = uuid();
201
+ await pool.query(
202
+ `INSERT INTO parts (id, message_id, session_id, part_type, data, order_index) VALUES ($1, $2, $3, 'text', $4, 0)`,
203
+ [reminderPartId, reminderMsgId, session_id, JSON.stringify({
204
+ text: "<system-reminder>You described file changes but did not actually invoke any tools. The user cannot see prose descriptions of edits — only real tool calls produce file changes. Call the `write` or `edit` tool now to perform the actions you described. Do not respond with prose; emit a tool call.</system-reminder>",
205
+ synthetic: true,
206
+ })]
207
+ );
208
+ continue; // re-run the loop with the reminder appended
209
+ }
210
+
177
211
  if (terminalReasons.has(result.finish_reason) && !result.has_tool_calls) {
178
212
  break;
179
213
  }
@@ -222,9 +256,19 @@ async function streamWithRetry(ctx: {
222
256
  try {
223
257
  return await streamOnce(currentCtx);
224
258
  } catch (e: any) {
225
- // On Bad Request with tools, retry without tools
259
+ // On Bad Request with tools, retry without tools BUT log it visibly so
260
+ // the user knows their model can't do tool calls — otherwise they get
261
+ // pure-prose responses with no real edits.
226
262
  if (e.message?.includes("Bad Request") && Object.keys(currentCtx.tools).length > 0 && attempt === 0) {
227
- // Local model doesn't support function calling — silently retry without tools
263
+ logger.error("model_tools_unsupported", {
264
+ event: "tools_stripped",
265
+ metadata: { model: ctx.model_id, provider: ctx.provider_id, error: e.message },
266
+ });
267
+ broadcastToChannel("session_events", {
268
+ type: "session.warning",
269
+ session_id: ctx.session_id,
270
+ message: `Model ${ctx.model_id} rejected tool definitions — running without tools (no file edits possible). Set MODEL_SUPPORTS_TOOLS=false to suppress this warning, or use a tool-capable model.`,
271
+ });
228
272
  currentCtx = { ...currentCtx, tools: {} };
229
273
  attempt++;
230
274
  continue;
@@ -519,6 +563,29 @@ async function runCompaction(
519
563
 
520
564
  // ─── Message History Builder ──────────────────────────────────────────────────
521
565
 
566
+ // Detect a "lazy" response — assistant text says it will edit/create files
567
+ // but no tool was actually invoked. Common with non-tool-tuned local models.
568
+ async function wasLazyResponse(session_id: string, messageId: string): Promise<boolean> {
569
+ const r = await pool.query(
570
+ `SELECT data FROM parts WHERE message_id = $1 AND part_type = 'text' ORDER BY order_index ASC`,
571
+ [messageId]
572
+ );
573
+ const text = r.rows.map((row: any) => row.data?.text || "").join(" ").toLowerCase();
574
+ if (!text || text.length < 30) return false;
575
+ // Phrases that imply the model is committing to a file edit it didn't make
576
+ const editIntentPatterns = [
577
+ /\bi['']ll\s+(create|write|update|edit|modify|add|implement|generate)\b/,
578
+ /\bi['']m\s+(creating|writing|updating|editing|modifying|adding|implementing|generating)\b/,
579
+ /\b(creating|writing|updating|editing|generating)\s+(?:the\s+)?(?:file|files|spec)\b/,
580
+ /\bi\s+(?:will|am\s+going\s+to)\s+(create|write|update|edit|implement|generate)\b/,
581
+ /\blet\s+me\s+(create|write|update|edit|implement)\b/,
582
+ /\bhere['']s\s+(?:the\s+)?(?:updated|new)\s+(?:file|version|content)\b/,
583
+ /\.(bone|ts|tsx|js|jsx|py|md|json|yaml|yml|sql|sh|html|css)\b.*\b(updated|created|written|modified|added)\b/,
584
+ /\b(updated|created|written|modified|added)\b.*\.(bone|ts|tsx|js|jsx|py|md|json|yaml|yml|sql|sh|html|css)\b/,
585
+ ];
586
+ return editIntentPatterns.some(re => re.test(text));
587
+ }
588
+
522
589
  async function loadMessageHistory(session_id: string): Promise<any[]> {
523
590
  const result = await pool.query(
524
591
  `SELECT m.id, m.role, m.model_id, m.provider_id, m.tokens_input, m.tokens_output,
package/src/tui.ts CHANGED
@@ -84,6 +84,7 @@ interface Command {
84
84
 
85
85
  const COMMANDS: Command[] = [
86
86
  { name: "/new", description: "Start a new session" },
87
+ { name: "/build", description: "Start autonomous build mode", args: "<project description>" },
87
88
  { name: "/session", description: "Show current session ID" },
88
89
  { name: "/sessions", description: "List recent sessions" },
89
90
  { name: "/model", description: "Switch model", args: "<provider/model>" },
@@ -702,6 +703,88 @@ async function streamPrompt(opts: {
702
703
  nl(` ${BLUE}⊕ Context compacted${R}`);
703
704
  continue;
704
705
  }
706
+
707
+ // Build mode events — autonomous orchestration progress
708
+ if (ev.type === "session.warning") {
709
+ flushTextLine();
710
+ nl(` ${YELLOW}⚠ ${ev.message || ""}${R}`);
711
+ continue;
712
+ }
713
+ if (ev.type === "build.stage") {
714
+ flushTextLine();
715
+ const stage = (ev.stage || "?").toUpperCase();
716
+ nl(` ${CYAN}${BOLD}▶ Build stage: ${stage}${R}`);
717
+ continue;
718
+ }
719
+ if (ev.type === "build.questions") {
720
+ flushTextLine();
721
+ nl(` ${CYAN}${BOLD}? Clarifying questions:${R}`);
722
+ for (const q of ev.questions || []) {
723
+ nl(` ${WHITE}- ${q}${R}`);
724
+ }
725
+ nl(` ${GRAY}Reply with your answers to continue.${R}`);
726
+ continue;
727
+ }
728
+ if (ev.type === "build.design") {
729
+ flushTextLine();
730
+ const d = ev.design || {};
731
+ nl(` ${CYAN}${BOLD}✓ Design locked in${R}`);
732
+ if (d.goal) nl(` ${GRAY}goal: ${d.goal}${R}`);
733
+ if (Array.isArray(d.requirements)) nl(` ${GRAY}${d.requirements.length} requirement(s), ${d.artifacts?.length || 0} artifact(s)${R}`);
734
+ continue;
735
+ }
736
+ if (ev.type === "build.plan") {
737
+ flushTextLine();
738
+ const todos = ev.todos || [];
739
+ nl(` ${CYAN}${BOLD}✓ Plan: ${todos.length} task(s)${R}`);
740
+ for (let i = 0; i < Math.min(todos.length, 10); i++) {
741
+ const t = todos[i];
742
+ nl(` ${GRAY}${(i + 1).toString().padStart(2, " ")}. ${t.title}${R}`);
743
+ }
744
+ if (todos.length > 10) nl(` ${GRAY}... ${todos.length - 10} more${R}`);
745
+ continue;
746
+ }
747
+ if (ev.type === "build.todo.start") {
748
+ flushTextLine();
749
+ const t = ev.todo || {};
750
+ nl(` ${WHITE}● ${t.title || ""}${R}`);
751
+ continue;
752
+ }
753
+ if (ev.type === "build.todo.done") {
754
+ flushTextLine();
755
+ const t = ev.todo || {};
756
+ nl(` ${GREEN}✓ ${t.title || ""}${R}${t.evidence ? ` ${GRAY}(${t.evidence})${R}` : ""}`);
757
+ continue;
758
+ }
759
+ if (ev.type === "build.todo.retry") {
760
+ flushTextLine();
761
+ const t = ev.todo || {};
762
+ nl(` ${YELLOW}⟳ Retry ${ev.attempt || ""}: ${t.title || ""}${R}`);
763
+ continue;
764
+ }
765
+ if (ev.type === "build.todo.failed") {
766
+ flushTextLine();
767
+ const t = ev.todo || {};
768
+ nl(` ${RED}✗ ${t.title || ""}${R} ${GRAY}${ev.reason || ""}${R}`);
769
+ continue;
770
+ }
771
+ if (ev.type === "build.verify.item") {
772
+ flushTextLine();
773
+ const icon = ev.satisfied ? `${GREEN}✓${R}` : `${RED}✗${R}`;
774
+ nl(` ${icon} ${WHITE}${ev.requirement || ""}${R}`);
775
+ if (ev.evidence) nl(` ${GRAY}${ev.evidence}${R}`);
776
+ continue;
777
+ }
778
+ if (ev.type === "build.replan") {
779
+ flushTextLine();
780
+ nl(` ${YELLOW}↻ Re-planning: ${ev.added || 0} new task(s) to address gaps${R}`);
781
+ continue;
782
+ }
783
+ if (ev.type === "build.done") {
784
+ flushTextLine();
785
+ nl(` ${GREEN}${BOLD}✓✓ Build complete — all requirements satisfied${R}`);
786
+ continue;
787
+ }
705
788
  } catch {
706
789
  // Ignore malformed events
707
790
  }
@@ -856,36 +939,62 @@ export async function runTUI(opts: {
856
939
  const promptStr = () => `${CYAN}${BOLD}>${R} `;
857
940
 
858
941
  // ─── Ctrl+C handling ──────────────────────────────────────────────────────
859
- // When streaming: abort the request AND notify server
860
- // When idle: clear menu/input or hint to use /exit
942
+ // Windows quirk: rl.on("SIGINT") only fires when readline is actively
943
+ // reading. We pause readline during streaming, which makes Ctrl+C dead.
944
+ // Use process.on("SIGINT") as the always-on handler so streaming aborts work.
945
+
946
+ let interruptCount = 0;
947
+
861
948
  const onSigint = async () => {
862
949
  if (streaming && abort) {
863
950
  abort.abort();
864
- // Also tell the server to cancel the agent loop
951
+ // Tell the server to cancel the agent loop too
865
952
  try {
866
953
  await fetch(`http://localhost:${port}/v2/session/${sessionId}/cancel`, {
867
954
  method: "POST",
868
955
  headers: { "Authorization": `Bearer ${token}` },
869
956
  });
870
957
  } catch { /* server may not have the endpoint, abort is enough */ }
958
+ out(`\n${YELLOW}interrupted${R}\n`);
871
959
  // Don't reprompt here — the streamPrompt finally block will handle UI
872
960
  return;
873
961
  }
874
962
 
875
- // Idle: clear menu if visible, else hint
963
+ interruptCount++;
964
+
965
+ // Idle: clear menu/input first, then on second Ctrl+C, exit
876
966
  if (menu.visible) {
877
967
  clearMenu(menu.rowsRendered);
878
968
  menu.visible = false;
879
969
  menu.rowsRendered = 0;
880
970
  menu.selected = 0;
971
+ interruptCount = 0;
972
+ out(`\n${promptStr()}`);
973
+ return;
881
974
  }
882
- out(`\n${GRAY}(Ctrl+D or /exit to quit)${R}\n`);
883
- rl.setPrompt(promptStr());
884
- rl.prompt();
975
+
976
+ // If user typed something, clear it
977
+ const lineLen = ((rl as any).line || "").length;
978
+ if (lineLen > 0) {
979
+ (rl as any).line = "";
980
+ (rl as any).cursor = 0;
981
+ out(`\r${ESC}[2K${promptStr()}`);
982
+ interruptCount = 0;
983
+ return;
984
+ }
985
+
986
+ // Empty prompt + Ctrl+C: first time hint, second time exit
987
+ if (interruptCount >= 2) {
988
+ out(`\n${GRAY}Goodbye.${R}\n`);
989
+ process.exit(0);
990
+ }
991
+ out(`\n${GRAY}(Press Ctrl+C again or Ctrl+D to exit)${R}\n${promptStr()}`);
885
992
  };
886
993
 
887
- // Detach readline's default SIGINT (which closes the line buffer)
888
- // and route it to our handler.
994
+ // Register on BOTH process and readline so it works during streaming AND
995
+ // during input. Process-level catches signals while rl is paused; readline
996
+ // catches them while the user is typing.
997
+ process.on("SIGINT", onSigint);
889
998
  rl.on("SIGINT", onSigint);
890
999
 
891
1000
  rl.on("close", () => {
@@ -1043,6 +1152,35 @@ export async function runTUI(opts: {
1043
1152
  } catch (e: any) { nl(`${RED}✗ ${e.message}${R}`); }
1044
1153
  break;
1045
1154
 
1155
+ case "build": {
1156
+ // /build <description> — start autonomous build mode for this prompt
1157
+ const description = args.join(" ").trim();
1158
+ if (!description) {
1159
+ nl(`${YELLOW}Usage:${R} /build <project description>`);
1160
+ nl(`${GRAY}Example: /build a 2D market simulation with 1000 shops over 100 years${R}`);
1161
+ break;
1162
+ }
1163
+ // Send the prompt prefixed with /build so the server enters build mode
1164
+ rl.pause();
1165
+ renderUserMessage(`/build ${description}`);
1166
+ out(` ${GRAY}entering build mode...${R}`);
1167
+ streaming = true;
1168
+ abort = new AbortController();
1169
+ const result = await streamPrompt({
1170
+ port, token, sessionId: sessionId!, model, provider,
1171
+ message: `/build ${description}`,
1172
+ worktree, abortSignal: abort.signal,
1173
+ });
1174
+ streaming = false;
1175
+ abort = null;
1176
+ if (!result.text && !result.error) clearLine();
1177
+ renderTurnEnd(model, result.elapsedMs, result.interrupted);
1178
+ if (result.error && !result.interrupted) nl(` ${RED}✗ ${result.error}${R}`);
1179
+ nl();
1180
+ rl.resume();
1181
+ break;
1182
+ }
1183
+
1046
1184
  case "session":
1047
1185
  nl(`${GRAY}${sessionId}${R}`);
1048
1186
  break;