bonecode 1.3.0 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,895 @@
1
+ /**
2
+ * Autonomous Build Mode — small-model-friendly project orchestrator
3
+ *
4
+ * Why: small/local models (8-20B) struggle with open-ended "build me X" prompts.
5
+ * They produce prose, hallucinate edits, and forget what they were doing across
6
+ * turns. This module replaces the single-turn agent loop with a deterministic
7
+ * state machine that drives the model through narrow, focused stages.
8
+ *
9
+ * State flow:
10
+ *
11
+ * ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐
12
+ * │ CLARIFY │ ──▶ │ PLAN │ ──▶ │ EXECUTE │ ──▶ │ VERIFY │
13
+ * │ (Q&A) │ │ (todos) │ │ (loop) │ │ (yes/no) │
14
+ * └──────────┘ └──────────┘ └─────┬────┘ └─────┬────┘
15
+ * │ │
16
+ * ▼ ▼
17
+ * ┌────────┐ ┌────────┐
18
+ * │ DONE │ ◀──── │ all ok │
19
+ * └────────┘ └────────┘
20
+ * │
21
+ * │ failures
22
+ * ▼
23
+ * back to PLAN
24
+ *
25
+ * Each stage uses a tightly scoped prompt with a structured-output requirement
26
+ * (JSON we parse deterministically). The agent's natural "describe what I'd do"
27
+ * tendency is replaced by short, mechanical answers.
28
+ *
29
+ * State is persisted to the sessions table (in build_state JSON column) so
30
+ * the loop can resume across restarts and is visible to the user via the UI.
31
+ */
32
+
33
+ import { generateText } from "ai";
34
+ import { v4 as uuid } from "uuid";
35
+ import { pool } from "../../../bone/output/session/src/db";
36
+ import { broadcastToChannel } from "../../../bone/output/session/src/websocket";
37
+ import { logger } from "../../../bone/output/session/src/logger";
38
+ import { runAgentLoop } from "./prompt";
39
+
40
+ // ─── State ────────────────────────────────────────────────────────────────────
41
+
42
+ export type BuildStage = "clarify" | "plan" | "execute" | "verify" | "done" | "failed";
43
+
44
+ export interface DesignDoc {
45
+ goal: string;
46
+ requirements: string[];
47
+ constraints: string[];
48
+ artifacts: string[]; // expected file paths
49
+ }
50
+
51
+ export interface BuildTodo {
52
+ id: string;
53
+ title: string;
54
+ description: string;
55
+ status: "pending" | "in_progress" | "completed" | "failed";
56
+ failure_count: number;
57
+ evidence?: string; // file paths or notes recorded after completion
58
+ }
59
+
60
+ export interface BuildState {
61
+ stage: BuildStage;
62
+ original_prompt: string;
63
+ design: DesignDoc | null;
64
+ todos: BuildTodo[];
65
+ iteration: number;
66
+ max_iterations: number;
67
+ pending_clarification?: string;
68
+ verification_results?: VerificationResult[];
69
+ error?: string;
70
+ /** Set after probe: whether the model can emit OpenAI-format tool calls. */
71
+ tool_capable?: boolean;
72
+ }
73
+
74
+ export interface VerificationResult {
75
+ requirement: string;
76
+ satisfied: boolean;
77
+ evidence: string;
78
+ }
79
+
80
+ export interface BuildModeInput {
81
+ session_id: string;
82
+ message_id: string;
83
+ prompt: string;
84
+ model_id: string;
85
+ provider_id: string;
86
+ }
87
+
88
+ // ─── Persistence ──────────────────────────────────────────────────────────────
89
+
90
+ async function loadState(session_id: string): Promise<BuildState | null> {
91
+ try {
92
+ const r = await pool.query(`SELECT build_state FROM sessions WHERE id = $1`, [session_id]);
93
+ const raw = r.rows[0]?.build_state;
94
+ if (!raw) return null;
95
+ return typeof raw === "string" ? JSON.parse(raw) : raw;
96
+ } catch {
97
+ return null;
98
+ }
99
+ }
100
+
101
+ async function saveState(session_id: string, state: BuildState): Promise<void> {
102
+ // Try with the dedicated column first; fall back to permission_ruleset.build
103
+ // if the column doesn't exist yet (older schemas).
104
+ try {
105
+ await pool.query(
106
+ `UPDATE sessions SET build_state = $2::jsonb, updated_at = NOW() WHERE id = $1`,
107
+ [session_id, JSON.stringify(state)]
108
+ );
109
+ } catch {
110
+ try {
111
+ await pool.query(
112
+ `UPDATE sessions
113
+ SET permission_ruleset = jsonb_set(COALESCE(permission_ruleset, '{}'::jsonb), '{build}', $2::jsonb),
114
+ updated_at = NOW()
115
+ WHERE id = $1`,
116
+ [session_id, JSON.stringify(state)]
117
+ );
118
+ } catch {}
119
+ }
120
+ }
121
+
122
+ function emit(session_id: string, event: string, data: Record<string, unknown>) {
123
+ broadcastToChannel("session_events", { type: event, session_id, ...data });
124
+ }
125
+
126
+ // ─── Structured-output helpers ────────────────────────────────────────────────
127
+
128
+ /**
129
+ * Ask the model a focused question with a JSON-only response requirement.
130
+ * Strips any prose/markdown around the JSON and parses it. Returns null on
131
+ * failure so the caller can decide whether to retry.
132
+ */
133
+ async function askJson<T>(input: {
134
+ model_id: string;
135
+ provider_id: string;
136
+ system: string;
137
+ user: string;
138
+ schema_hint: string; // human-readable description of the expected shape
139
+ }): Promise<T | null> {
140
+ const { getLanguageModel } = await import("./build_mode_helpers");
141
+ const model = getLanguageModel(input.provider_id, input.model_id);
142
+
143
+ const fullSystem = [
144
+ input.system,
145
+ "",
146
+ "OUTPUT REQUIREMENTS:",
147
+ `- Reply with a single JSON object only.`,
148
+ `- Expected shape: ${input.schema_hint}`,
149
+ `- Do NOT include any prose before or after the JSON.`,
150
+ `- Do NOT wrap in markdown code fences.`,
151
+ `- Do NOT explain. Output only the JSON object.`,
152
+ ].join("\n");
153
+
154
+ try {
155
+ const { text } = await generateText({
156
+ model,
157
+ system: fullSystem,
158
+ prompt: input.user,
159
+ temperature: 0.1,
160
+ maxTokens: 2048,
161
+ });
162
+ return parseJsonLoose<T>(text);
163
+ } catch (e: any) {
164
+ logger.error("build_mode_json_failed", { event: "askJson", metadata: { error: e.message } });
165
+ return null;
166
+ }
167
+ }
168
+
169
+ /**
170
+ * Parse JSON from a model response that might be wrapped in markdown fences,
171
+ * have extra prose around it, or contain partial output. Returns null if no
172
+ * recoverable JSON object is found.
173
+ */
174
+ function parseJsonLoose<T>(raw: string): T | null {
175
+ if (!raw) return null;
176
+ // Strip <think>...</think> blocks (some reasoning models include them)
177
+ let s = raw.replace(/<think>[\s\S]*?<\/think>/gi, "").trim();
178
+ // Strip code fences
179
+ s = s.replace(/^```(?:json)?\s*/i, "").replace(/```\s*$/, "").trim();
180
+ // Find first { and matching close
181
+ const start = s.indexOf("{");
182
+ if (start === -1) {
183
+ // Maybe an array
184
+ const arrStart = s.indexOf("[");
185
+ if (arrStart === -1) return null;
186
+ return tryParse<T>(extractBalanced(s, arrStart, "[", "]"));
187
+ }
188
+ return tryParse<T>(extractBalanced(s, start, "{", "}"));
189
+ }
190
+
191
+ function extractBalanced(s: string, start: number, open: string, close: string): string {
192
+ let depth = 0;
193
+ let inStr = false;
194
+ let escape = false;
195
+ for (let i = start; i < s.length; i++) {
196
+ const ch = s[i];
197
+ if (escape) { escape = false; continue; }
198
+ if (ch === "\\") { escape = true; continue; }
199
+ if (ch === '"') { inStr = !inStr; continue; }
200
+ if (inStr) continue;
201
+ if (ch === open) depth++;
202
+ else if (ch === close) {
203
+ depth--;
204
+ if (depth === 0) return s.slice(start, i + 1);
205
+ }
206
+ }
207
+ return s.slice(start); // unbalanced — return what we have
208
+ }
209
+
210
+ function tryParse<T>(s: string): T | null {
211
+ try { return JSON.parse(s) as T; } catch { return null; }
212
+ }
213
+
214
+ // ─── Stage 1: Clarify ─────────────────────────────────────────────────────────
215
+
216
+ async function stageClarify(state: BuildState, input: BuildModeInput): Promise<BuildState> {
217
+ emit(input.session_id, "build.stage", { stage: "clarify" });
218
+
219
+ // Ask the model: do you have enough info? If not, what 1-3 questions?
220
+ const result = await askJson<{
221
+ sufficient: boolean;
222
+ questions: string[];
223
+ proposed_design?: DesignDoc;
224
+ }>({
225
+ model_id: input.model_id,
226
+ provider_id: input.provider_id,
227
+ system: [
228
+ "You are a senior engineer scoping a project.",
229
+ "The user has given you a prompt. Decide if you have enough to start.",
230
+ "If yes, propose a concrete design document.",
231
+ "If no, ask 1-3 specific questions.",
232
+ "",
233
+ "RULES:",
234
+ "- Never ask more than 3 questions in one round.",
235
+ "- Questions must be answerable in one sentence each.",
236
+ "- If the prompt is concrete enough (mentions specific tech, scope, constraints), set sufficient=true.",
237
+ ].join("\n"),
238
+ user: `User prompt:\n${input.prompt}`,
239
+ schema_hint: `{ "sufficient": boolean, "questions": string[], "proposed_design": { "goal": string, "requirements": string[], "constraints": string[], "artifacts": string[] } }`,
240
+ });
241
+
242
+ if (!result) {
243
+ state.error = "Could not parse model response during clarification.";
244
+ state.stage = "failed";
245
+ return state;
246
+ }
247
+
248
+ if (result.sufficient && result.proposed_design) {
249
+ state.design = result.proposed_design;
250
+ state.stage = "plan";
251
+ emit(input.session_id, "build.design", { design: result.proposed_design });
252
+ return state;
253
+ }
254
+
255
+ // Ask the user the questions
256
+ state.pending_clarification = result.questions.join("\n");
257
+ emit(input.session_id, "build.questions", { questions: result.questions });
258
+ return state;
259
+ }
260
+
261
+ /**
262
+ * Called when the user has answered the clarification questions.
263
+ * Combines the original prompt + answers into a finalized design.
264
+ */
265
+ export async function continueAfterClarification(
266
+ session_id: string,
267
+ user_answer: string,
268
+ model_id: string,
269
+ provider_id: string
270
+ ): Promise<BuildState | null> {
271
+ const state = await loadState(session_id);
272
+ if (!state || state.stage !== "clarify") return null;
273
+
274
+ const result = await askJson<DesignDoc>({
275
+ model_id,
276
+ provider_id,
277
+ system: [
278
+ "You are scoping a project. Below is the original user prompt, the questions you asked, and the user's answers.",
279
+ "Produce a concrete design document.",
280
+ "",
281
+ "REQUIREMENTS list must be specific and verifiable (each item must be answerable yes/no).",
282
+ "ARTIFACTS list must be concrete file paths the project should produce.",
283
+ ].join("\n"),
284
+ user: [
285
+ `Original prompt:\n${state.original_prompt}`,
286
+ ``,
287
+ `Questions asked:\n${state.pending_clarification ?? ""}`,
288
+ ``,
289
+ `User's answers:\n${user_answer}`,
290
+ ].join("\n"),
291
+ schema_hint: `{ "goal": string, "requirements": string[], "constraints": string[], "artifacts": string[] }`,
292
+ });
293
+
294
+ if (!result) {
295
+ state.error = "Could not finalize design from clarification answers.";
296
+ state.stage = "failed";
297
+ } else {
298
+ state.design = result;
299
+ state.pending_clarification = undefined;
300
+ state.stage = "plan";
301
+ emit(session_id, "build.design", { design: result });
302
+ }
303
+
304
+ await saveState(session_id, state);
305
+ return state;
306
+ }
307
+
308
+ // ─── Stage 2: Plan ────────────────────────────────────────────────────────────
309
+
310
+ async function stagePlan(state: BuildState, input: BuildModeInput): Promise<BuildState> {
311
+ if (!state.design) {
312
+ state.stage = "failed";
313
+ state.error = "No design to plan from.";
314
+ return state;
315
+ }
316
+
317
+ emit(input.session_id, "build.stage", { stage: "plan" });
318
+
319
+ const result = await askJson<{ todos: Array<{ title: string; description: string }> }>({
320
+ model_id: input.model_id,
321
+ provider_id: input.provider_id,
322
+ system: [
323
+ "You are turning a design document into an ordered todo list.",
324
+ "",
325
+ "RULES:",
326
+ "- Each todo must be a concrete, single-file action when possible.",
327
+ "- Use tool-friendly verbs: 'Write', 'Edit', 'Run', 'Create'.",
328
+ "- Order todos by dependency: schema/config first, then implementation, then tests.",
329
+ "- Aim for 5-15 todos. Don't split trivial work.",
330
+ "- Each title fits on one line, max 80 chars.",
331
+ "- Each description gives the file path and what to do, in 1-2 sentences.",
332
+ ].join("\n"),
333
+ user: [
334
+ `Design:`,
335
+ `Goal: ${state.design.goal}`,
336
+ `Requirements:\n${state.design.requirements.map((r) => `- ${r}`).join("\n")}`,
337
+ `Constraints:\n${state.design.constraints.map((c) => `- ${c}`).join("\n")}`,
338
+ `Expected artifacts:\n${state.design.artifacts.map((a) => `- ${a}`).join("\n")}`,
339
+ ].join("\n"),
340
+ schema_hint: `{ "todos": [{ "title": string, "description": string }] }`,
341
+ });
342
+
343
+ if (!result || !Array.isArray(result.todos) || result.todos.length === 0) {
344
+ state.error = "Could not produce a todo list.";
345
+ state.stage = "failed";
346
+ return state;
347
+ }
348
+
349
+ state.todos = result.todos.map((t) => ({
350
+ id: uuid(),
351
+ title: t.title,
352
+ description: t.description,
353
+ status: "pending",
354
+ failure_count: 0,
355
+ }));
356
+ state.stage = "execute";
357
+ emit(input.session_id, "build.plan", { todos: state.todos });
358
+ return state;
359
+ }
360
+
361
+ // ─── Stage 3: Execute ─────────────────────────────────────────────────────────
362
+
363
+ const MAX_TODO_RETRIES = 3;
364
+
365
+ async function stageExecute(state: BuildState, input: BuildModeInput): Promise<BuildState> {
366
+ emit(input.session_id, "build.stage", { stage: "execute" });
367
+
368
+ // Detect tool-calling capability with a probe. Small/local models often can't
369
+ // emit tool calls in the OpenAI format. When that's the case, switch to
370
+ // a content-fallback path where we ask the model for a JSON manifest of
371
+ // files to write and apply them ourselves via fs.
372
+ if (state.tool_capable === undefined) {
373
+ state.tool_capable = await probeToolCapability(input);
374
+ if (!state.tool_capable) {
375
+ emit(input.session_id, "session.warning", {
376
+ message: `Model ${input.model_id} cannot emit tool calls — using JSON-manifest fallback mode (file content provided directly by the model, applied by the orchestrator).`,
377
+ });
378
+ }
379
+ }
380
+
381
+ // Track consecutive tool-call failures so we can bail out fast instead
382
+ // of grinding through 30 iterations against a model that can't do tools.
383
+ let consecutiveZeroToolCallTodos = 0;
384
+ const ABORT_AFTER_CONSECUTIVE_ZERO = 2;
385
+
386
+ while (state.iteration < state.max_iterations) {
387
+ state.iteration++;
388
+
389
+ const next = state.todos.find((t) => t.status === "pending");
390
+ if (!next) {
391
+ // All todos done — move to verification
392
+ state.stage = "verify";
393
+ return state;
394
+ }
395
+
396
+ next.status = "in_progress";
397
+ emit(input.session_id, "build.todo.start", { todo: next });
398
+
399
+ let succeeded = false;
400
+ let toolCallsCount = 0;
401
+ let errorMsg = "";
402
+
403
+ if (state.tool_capable) {
404
+ // ── Tool-calling path ────────────────────────────────────────────────
405
+ const focusedPrompt = [
406
+ `<build-task>`,
407
+ `Title: ${next.title}`,
408
+ `Description: ${next.description}`,
409
+ ``,
410
+ `This is one task in a larger build. Complete this task NOW by calling the appropriate tools.`,
411
+ `Do not describe what you would do — call the tools.`,
412
+ `</build-task>`,
413
+ ].join("\n");
414
+
415
+ const taskMsgId = uuid();
416
+ await pool.query(
417
+ `INSERT INTO messages (id, session_id, role) VALUES ($1, $2, 'user')`,
418
+ [taskMsgId, input.session_id]
419
+ );
420
+ const taskPartId = uuid();
421
+ await pool.query(
422
+ `INSERT INTO parts (id, message_id, session_id, part_type, data, order_index) VALUES ($1, $2, $3, 'text', $4, 0)`,
423
+ [taskPartId, input.session_id, input.session_id, JSON.stringify({ text: focusedPrompt, synthetic: true })]
424
+ );
425
+
426
+ const result = await runAgentLoop({
427
+ session_id: input.session_id,
428
+ message_id: taskMsgId,
429
+ content: focusedPrompt,
430
+ model_id: input.model_id,
431
+ provider_id: input.provider_id,
432
+ agent_name: "build",
433
+ });
434
+
435
+ toolCallsCount = await countToolCallsSince(input.session_id, taskMsgId);
436
+ succeeded = result.ok && toolCallsCount > 0;
437
+ errorMsg = result.error || "no tool calls";
438
+ } else {
439
+ // ── JSON-manifest fallback ───────────────────────────────────────────
440
+ const fallback = await executeFallback(state, next, input);
441
+ succeeded = fallback.ok;
442
+ toolCallsCount = fallback.filesWritten;
443
+ errorMsg = fallback.error || "no files produced";
444
+ }
445
+
446
+ if (succeeded) {
447
+ next.status = "completed";
448
+ next.evidence = state.tool_capable
449
+ ? `${toolCallsCount} tool call(s) made`
450
+ : `${toolCallsCount} file(s) written via fallback`;
451
+ consecutiveZeroToolCallTodos = 0;
452
+ emit(input.session_id, "build.todo.done", { todo: next });
453
+ } else {
454
+ next.failure_count++;
455
+ if (next.failure_count >= MAX_TODO_RETRIES) {
456
+ next.status = "failed";
457
+ consecutiveZeroToolCallTodos++;
458
+ emit(input.session_id, "build.todo.failed", { todo: next, reason: errorMsg });
459
+ } else {
460
+ next.status = "pending";
461
+ emit(input.session_id, "build.todo.retry", { todo: next, attempt: next.failure_count });
462
+ }
463
+ }
464
+
465
+ await saveState(input.session_id, state);
466
+
467
+ // Early bailout — if N consecutive todos fail completely, this model can't
468
+ // do the job. Stop so the user can switch models instead of waiting through
469
+ // 30 useless iterations.
470
+ if (consecutiveZeroToolCallTodos >= ABORT_AFTER_CONSECUTIVE_ZERO) {
471
+ state.stage = "failed";
472
+ state.error = `Model failed ${consecutiveZeroToolCallTodos} consecutive todos with no progress. ${
473
+ state.tool_capable
474
+ ? "Try MODEL_SUPPORTS_TOOLS=false to enable JSON-manifest fallback."
475
+ : "The model cannot produce structured output for this task. Try a larger or more instruction-tuned model."
476
+ }`;
477
+ emit(input.session_id, "session.warning", { message: state.error });
478
+ return state;
479
+ }
480
+ }
481
+
482
+ // Hit max iterations — bail out
483
+ if (state.todos.some((t) => t.status === "pending" || t.status === "in_progress")) {
484
+ state.stage = "failed";
485
+ state.error = `Hit iteration limit (${state.max_iterations}) with todos still pending.`;
486
+ } else {
487
+ state.stage = "verify";
488
+ }
489
+ return state;
490
+ }
491
+
492
+ async function countToolCallsSince(session_id: string, since_message_id: string): Promise<number> {
493
+ try {
494
+ const r = await pool.query(
495
+ `SELECT COUNT(*)::int AS n FROM tool_calls
496
+ WHERE session_id = $1
497
+ AND created_at >= (SELECT created_at FROM messages WHERE id = $2)`,
498
+ [session_id, since_message_id]
499
+ );
500
+ return r.rows[0]?.n || 0;
501
+ } catch {
502
+ return 0;
503
+ }
504
+ }
505
+
506
+ // ─── Tool-capability probe & JSON-manifest fallback ───────────────────────────
507
+
508
+ /**
509
+ * Detect whether the model can produce a structured tool call. Honors the
510
+ * MODEL_SUPPORTS_TOOLS env override so users with known-good models skip the
511
+ * probe entirely. Otherwise, runs a tiny smoke test: ask the model to write a
512
+ * trivial file via the `write` tool and check whether tool_calls were recorded.
513
+ *
514
+ * Result is cached on the BuildState so we only probe once per build.
515
+ */
516
+ async function probeToolCapability(input: BuildModeInput): Promise<boolean> {
517
+ if (process.env.MODEL_SUPPORTS_TOOLS === "true") return true;
518
+ if (process.env.MODEL_SUPPORTS_TOOLS === "false") return false;
519
+ // Heuristic: known-good model families
520
+ const id = input.model_id.toLowerCase();
521
+ if (
522
+ id.includes("gpt-4") ||
523
+ id.includes("gpt-5") ||
524
+ id.includes("claude") ||
525
+ id.includes("gemini-1.5") ||
526
+ id.includes("gemini-2") ||
527
+ id.includes("gemini-3")
528
+ ) {
529
+ return true;
530
+ }
531
+ // For everything else, probe live. Run a one-shot agent call asking for a
532
+ // single trivial tool invocation and see if the DB records it.
533
+ try {
534
+ const probeMsgId = uuid();
535
+ await pool.query(
536
+ `INSERT INTO messages (id, session_id, role) VALUES ($1, $2, 'user')`,
537
+ [probeMsgId, input.session_id]
538
+ );
539
+ const probePartId = uuid();
540
+ const probePrompt =
541
+ "PROBE: Write a single line of text 'probe' to a file at .bonecode-probe using the write tool. Do not respond with prose. Call the write tool exactly once.";
542
+ await pool.query(
543
+ `INSERT INTO parts (id, message_id, session_id, part_type, data, order_index) VALUES ($1, $2, $3, 'text', $4, 0)`,
544
+ [probePartId, probeMsgId, input.session_id, JSON.stringify({ text: probePrompt, synthetic: true })]
545
+ );
546
+
547
+ await runAgentLoop({
548
+ session_id: input.session_id,
549
+ message_id: probeMsgId,
550
+ content: probePrompt,
551
+ model_id: input.model_id,
552
+ provider_id: input.provider_id,
553
+ agent_name: "build",
554
+ });
555
+
556
+ const calls = await countToolCallsSince(input.session_id, probeMsgId);
557
+ return calls > 0;
558
+ } catch {
559
+ return false;
560
+ }
561
+ }
562
+
563
+ interface FallbackResult {
564
+ ok: boolean;
565
+ filesWritten: number;
566
+ error?: string;
567
+ }
568
+
569
+ /**
570
+ * JSON-manifest fallback for models that can't emit tool calls.
571
+ *
572
+ * Asks the model for a manifest:
573
+ * { "files": [{ "path": "...", "content": "..." }], "commands": ["..."] }
574
+ *
575
+ * Then applies it directly via fs/exec. The model never has to format an
576
+ * OpenAI-style tool call — it just produces a structured JSON document, which
577
+ * smaller/abliterated models handle much better.
578
+ */
579
+ async function executeFallback(
580
+ state: BuildState,
581
+ todo: BuildTodo,
582
+ input: BuildModeInput
583
+ ): Promise<FallbackResult> {
584
+ const fs = require("fs/promises");
585
+ const path = require("path");
586
+ const { execSync } = require("child_process");
587
+
588
+ // Resolve worktree from the session
589
+ const sessionRow = await pool.query(
590
+ `SELECT s.directory, p.worktree FROM sessions s
591
+ LEFT JOIN projects p ON p.id = s.project_id
592
+ WHERE s.id = $1`,
593
+ [input.session_id]
594
+ );
595
+ const worktree =
596
+ sessionRow.rows[0]?.directory || sessionRow.rows[0]?.worktree || process.cwd();
597
+
598
+ // Build a focused prompt with the design context so the model knows what
599
+ // it's contributing to.
600
+ const designContext = state.design
601
+ ? [
602
+ `Design goal: ${state.design.goal}`,
603
+ `Constraints: ${state.design.constraints.join("; ") || "(none)"}`,
604
+ `Expected artifacts: ${state.design.artifacts.join(", ") || "(unspecified)"}`,
605
+ ].join("\n")
606
+ : "";
607
+
608
+ const completedFiles = state.todos
609
+ .filter((t) => t.status === "completed" && t.evidence)
610
+ .map((t) => `- ${t.title}`)
611
+ .join("\n");
612
+
613
+ const result = await askJson<{
614
+ files?: Array<{ path: string; content: string }>;
615
+ commands?: string[];
616
+ }>({
617
+ model_id: input.model_id,
618
+ provider_id: input.provider_id,
619
+ system: [
620
+ "You are completing one task in a project build. Produce a JSON manifest of the files to create or update and shell commands to run for THIS task only.",
621
+ "",
622
+ "RULES:",
623
+ "- Output a single JSON object: { \"files\": [...], \"commands\": [...] }",
624
+ "- Each file must have a relative `path` and full `content`. Do not abbreviate file content.",
625
+ "- File paths must be relative to the project root (no leading slash, no '..').",
626
+ "- Commands run in the project root. Use them only for compilation, package install, or migrations.",
627
+ "- Do not include explanatory prose. The JSON IS the entire response.",
628
+ ].join("\n"),
629
+ user: [
630
+ `<design>`,
631
+ designContext,
632
+ `</design>`,
633
+ ``,
634
+ `<completed-tasks>`,
635
+ completedFiles || "(none yet)",
636
+ `</completed-tasks>`,
637
+ ``,
638
+ `<current-task>`,
639
+ `Title: ${todo.title}`,
640
+ `Description: ${todo.description}`,
641
+ `</current-task>`,
642
+ ].join("\n"),
643
+ schema_hint: `{ "files": [{ "path": string, "content": string }], "commands": string[] }`,
644
+ });
645
+
646
+ if (!result || (!Array.isArray(result.files) && !Array.isArray(result.commands))) {
647
+ return { ok: false, filesWritten: 0, error: "Model did not produce a valid manifest" };
648
+ }
649
+
650
+ let filesWritten = 0;
651
+ const errors: string[] = [];
652
+
653
+ // Write files
654
+ for (const f of result.files ?? []) {
655
+ if (!f || typeof f.path !== "string" || typeof f.content !== "string") continue;
656
+ // Sanity: no traversal, no absolute paths
657
+ if (f.path.includes("..") || path.isAbsolute(f.path)) {
658
+ errors.push(`refused unsafe path: ${f.path}`);
659
+ continue;
660
+ }
661
+ const target = path.resolve(worktree, f.path);
662
+ if (!target.startsWith(path.resolve(worktree))) {
663
+ errors.push(`refused path outside worktree: ${f.path}`);
664
+ continue;
665
+ }
666
+ try {
667
+ await fs.mkdir(path.dirname(target), { recursive: true });
668
+ await fs.writeFile(target, f.content, "utf-8");
669
+ filesWritten++;
670
+ // Surface a tool.completed event so the TUI shows an Edit/Write line
671
+ const callId = `fallback-${uuid()}`;
672
+ const broadcastModule = await import("../../../bone/output/session/src/websocket");
673
+ broadcastModule.broadcastToChannel("part_stream", {
674
+ type: "tool.requested",
675
+ session_id: input.session_id,
676
+ tool_call_id: callId,
677
+ tool_name: "write",
678
+ tool_input: { path: f.path, content: f.content.slice(0, 200) },
679
+ });
680
+ broadcastModule.broadcastToChannel("part_stream", {
681
+ type: "tool.completed",
682
+ session_id: input.session_id,
683
+ tool_call_id: callId,
684
+ tool_name: "write",
685
+ tool_input: { path: f.path },
686
+ duration_ms: 0,
687
+ });
688
+ } catch (e: any) {
689
+ errors.push(`${f.path}: ${e.message}`);
690
+ }
691
+ }
692
+
693
+ // Run commands
694
+ for (const cmd of result.commands ?? []) {
695
+ if (typeof cmd !== "string" || !cmd.trim()) continue;
696
+ try {
697
+ execSync(cmd, { cwd: worktree, stdio: "pipe", timeout: 60_000 });
698
+ const callId = `fallback-${uuid()}`;
699
+ const broadcastModule = await import("../../../bone/output/session/src/websocket");
700
+ broadcastModule.broadcastToChannel("part_stream", {
701
+ type: "tool.requested",
702
+ session_id: input.session_id,
703
+ tool_call_id: callId,
704
+ tool_name: "bash",
705
+ tool_input: { command: cmd },
706
+ });
707
+ broadcastModule.broadcastToChannel("part_stream", {
708
+ type: "tool.completed",
709
+ session_id: input.session_id,
710
+ tool_call_id: callId,
711
+ tool_name: "bash",
712
+ tool_input: { command: cmd },
713
+ duration_ms: 0,
714
+ });
715
+ } catch (e: any) {
716
+ errors.push(`command failed: ${cmd} → ${e.message}`);
717
+ }
718
+ }
719
+
720
+ return {
721
+ ok: filesWritten > 0 || (result.commands?.length ?? 0) > 0,
722
+ filesWritten,
723
+ error: errors.length ? errors.join("; ") : undefined,
724
+ };
725
+ }
726
+
727
+ // ─── Stage 4: Verify ──────────────────────────────────────────────────────────
728
+
729
+ async function stageVerify(state: BuildState, input: BuildModeInput): Promise<BuildState> {
730
+ if (!state.design) {
731
+ state.stage = "failed";
732
+ state.error = "No design to verify against.";
733
+ return state;
734
+ }
735
+
736
+ emit(input.session_id, "build.stage", { stage: "verify" });
737
+
738
+ const results: VerificationResult[] = [];
739
+
740
+ for (const requirement of state.design.requirements) {
741
+ // Ask the model: is this requirement satisfied? Yes/no plus evidence.
742
+ const r = await askJson<{ satisfied: boolean; evidence: string }>({
743
+ model_id: input.model_id,
744
+ provider_id: input.provider_id,
745
+ system: [
746
+ "You are auditing whether a single requirement has been satisfied by the project so far.",
747
+ "",
748
+ "RULES:",
749
+ "- Answer with a yes/no verdict and one-line evidence.",
750
+ "- Evidence should reference concrete files or behavior.",
751
+ "- If you cannot tell, set satisfied=false and explain in evidence what's missing.",
752
+ "- Do not assume anything not visible in the project.",
753
+ ].join("\n"),
754
+ user: [
755
+ `Requirement: ${requirement}`,
756
+ ``,
757
+ `Original goal: ${state.design.goal}`,
758
+ ``,
759
+ `Expected artifacts:\n${state.design.artifacts.map((a) => `- ${a}`).join("\n")}`,
760
+ ``,
761
+ `Completed work:\n${state.todos
762
+ .filter((t) => t.status === "completed")
763
+ .map((t) => `- ${t.title}`)
764
+ .join("\n") || "(none)"}`,
765
+ ].join("\n"),
766
+ schema_hint: `{ "satisfied": boolean, "evidence": string }`,
767
+ });
768
+
769
+ const result: VerificationResult = r
770
+ ? { requirement, satisfied: r.satisfied, evidence: r.evidence }
771
+ : { requirement, satisfied: false, evidence: "Could not verify (no response)." };
772
+ results.push(result);
773
+ emit(input.session_id, "build.verify.item", { requirement, satisfied: result.satisfied, evidence: result.evidence });
774
+ }
775
+
776
+ state.verification_results = results;
777
+
778
+ const allOk = results.every((r) => r.satisfied);
779
+ if (allOk) {
780
+ state.stage = "done";
781
+ emit(input.session_id, "build.done", { verifications: results });
782
+ } else {
783
+ // Re-plan only the unsatisfied requirements
784
+ const failures = results.filter((r) => !r.satisfied);
785
+ if (state.iteration >= state.max_iterations) {
786
+ state.stage = "failed";
787
+ state.error = `${failures.length} requirement(s) unsatisfied after ${state.max_iterations} iterations.`;
788
+ } else {
789
+ // Generate new todos to address failures
790
+ const newPlan = await askJson<{ todos: Array<{ title: string; description: string }> }>({
791
+ model_id: input.model_id,
792
+ provider_id: input.provider_id,
793
+ system: "Some requirements are not yet satisfied. Produce a short todo list to fix them.",
794
+ user: [
795
+ `Unsatisfied requirements:`,
796
+ ...failures.map((f) => `- ${f.requirement} (missing: ${f.evidence})`),
797
+ ].join("\n"),
798
+ schema_hint: `{ "todos": [{ "title": string, "description": string }] }`,
799
+ });
800
+
801
+ if (newPlan && Array.isArray(newPlan.todos) && newPlan.todos.length > 0) {
802
+ for (const t of newPlan.todos) {
803
+ state.todos.push({
804
+ id: uuid(),
805
+ title: t.title,
806
+ description: t.description,
807
+ status: "pending",
808
+ failure_count: 0,
809
+ });
810
+ }
811
+ state.stage = "execute";
812
+ emit(input.session_id, "build.replan", { added: newPlan.todos.length });
813
+ } else {
814
+ state.stage = "failed";
815
+ state.error = `Cannot generate fix-up tasks for ${failures.length} unsatisfied requirement(s).`;
816
+ }
817
+ }
818
+ }
819
+ return state;
820
+ }
821
+
822
+ // ─── Driver ───────────────────────────────────────────────────────────────────
823
+
824
+ export async function runBuildMode(input: BuildModeInput): Promise<BuildState> {
825
+ let state: BuildState = (await loadState(input.session_id)) ?? {
826
+ stage: "clarify",
827
+ original_prompt: input.prompt,
828
+ design: null,
829
+ todos: [],
830
+ iteration: 0,
831
+ max_iterations: 30,
832
+ };
833
+
834
+ // Resume from saved state if applicable. If the user is sending a new prompt
835
+ // and we're already in clarify with pending questions, treat the prompt as
836
+ // the answer and continue.
837
+ if (state.stage === "clarify" && state.pending_clarification && input.prompt !== state.original_prompt) {
838
+ const next = await continueAfterClarification(input.session_id, input.prompt, input.model_id, input.provider_id);
839
+ if (next) state = next;
840
+ }
841
+
842
+ // Run stages until we hit a terminal state, a user prompt, or iteration cap.
843
+ // Each stage advances state.stage. We save after every stage transition.
844
+ let safety = 0;
845
+ while (state.stage !== "done" && state.stage !== "failed" && safety < 50) {
846
+ safety++;
847
+ const before = state.stage;
848
+
849
+ if (state.stage === "clarify") {
850
+ state = await stageClarify(state, input);
851
+ await saveState(input.session_id, state);
852
+ // If we asked the user questions, exit and wait for their answer.
853
+ if (state.pending_clarification) break;
854
+ } else if (state.stage === "plan") {
855
+ state = await stagePlan(state, input);
856
+ await saveState(input.session_id, state);
857
+ } else if (state.stage === "execute") {
858
+ state = await stageExecute(state, input);
859
+ await saveState(input.session_id, state);
860
+ } else if (state.stage === "verify") {
861
+ state = await stageVerify(state, input);
862
+ await saveState(input.session_id, state);
863
+ }
864
+
865
+ if (state.stage === before) break; // safety: no progress
866
+ }
867
+
868
+ return state;
869
+ }
870
+
871
+ // ─── Trigger detection ────────────────────────────────────────────────────────
872
+
873
+ /**
874
+ * Heuristic: should this prompt go through build mode rather than the
875
+ * regular agent loop? Build-mode prompts are project-scoped — "build me",
876
+ * "create a", "design and implement", "make a full" — vs ad-hoc questions.
877
+ */
878
+ export function isBuildPrompt(prompt: string): boolean {
879
+ const p = prompt.toLowerCase().trim();
880
+ if (p.length < 20) return false;
881
+ const triggers = [
882
+ /\bbuild\s+(me|a|an|the)\b/,
883
+ /\bcreate\s+(a|an|the|me)\s+(?:full|complete|whole|new)\b/,
884
+ /\bcreate\s+(?:a|an|the)\b.*\bfrom\s+scratch\b/,
885
+ /\bdesign\s+and\s+(?:implement|build|create)\b/,
886
+ /\bimplement\s+(?:a|an|the)\s+(?:full|complete|whole)\b/,
887
+ /\bmake\s+(?:a|an|the)\s+(?:full|complete|whole|new)\b/,
888
+ /\bproject\s+(?:from\s+scratch|to)\b/,
889
+ /\bsimulation\s+(?:with|using|of)\b/,
890
+ /\bbackend\s+(?:for|with|using)\b/,
891
+ /\bspec(?:ification)?\s+(?:for|of)\b/,
892
+ /\bend[- ]to[- ]end\b/,
893
+ ];
894
+ return triggers.some((re) => re.test(p));
895
+ }