npm - botholomew - Versions diffs - 0.9.11 → 0.10.0 - Mend

botholomew 0.9.11 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/README.md +8 -5
package/package.json +15 -4
package/src/chat/agent.ts +1 -1
package/src/commands/context.ts +16 -6
package/src/commands/db.ts +22 -11
package/src/commands/prepare.ts +3 -7
package/src/config/loader.ts +0 -3
package/src/config/schemas.ts +2 -4
package/src/constants.ts +2 -2
package/src/context/embedder-impl.ts +29 -31
package/src/context/ingest.ts +1 -10
package/src/context/refresh.ts +2 -3
package/src/db/doctor.ts +37 -9
package/src/db/reembed.ts +113 -0
package/src/db/schema.ts +7 -0
package/src/db/sql/18-reset_embeddings_for_local.sql +39 -0
package/src/tui/components/StatusBar.tsx +23 -15
package/src/worker/llm.ts +19 -0
package/src/worker/prompt.ts +1 -1
package/src/worker/tick.ts +3 -0

package/README.md CHANGED Viewed

@@ -88,9 +88,9 @@ bun run dev -- --help
 # 1. Initialize a project in the current directory
 botholomew init
-# 2. Add your API keys to .botholomew/config.json, or export env vars
+# 2. Add your Anthropic key to .botholomew/config.json, or export it
 export ANTHROPIC_API_KEY=sk-ant-...
-export OPENAI_API_KEY=sk-...     # used for embeddings
+# Embeddings run locally — no API key required.
 # 3. Queue some work
 botholomew task add "Summarize every markdown file in ~/notes"
@@ -144,7 +144,7 @@ Everything the agent can touch is here. No surprises.
 | `botholomew chat` | Interactive Ink/React TUI |
 | `botholomew task list\|add\|view\|update\|reset\|delete` | Manage the task queue |
 | `botholomew schedule list\|add\|view\|enable\|disable\|trigger\|delete` | Recurring work |
-| `botholomew context add\|list\|search\|chunks\|refresh\|delete` | Ingest & browse knowledge (files, folders, URLs); also exposes the agent's `read`/`write`/`tree`/`edit`/… tools as subcommands |
+| `botholomew context add\|list\|search\|chunks\|refresh\|reembed\|delete` | Ingest & browse knowledge (files, folders, URLs); `reembed` rebuilds every vector after upgrading the embedding model; also exposes the agent's `read`/`write`/`tree`/`edit`/… tools as subcommands |
 | `botholomew capabilities` | Rescan built-in + MCPX tools and rewrite `.botholomew/capabilities.md` |
 | `botholomew mcpx servers\|list\|add\|remove\|info\|search\|exec\|ping\|auth\|deauth\|import-global\|…` | Configure external MCP servers (passthrough to `mcpx`) |
 | `botholomew skill list\|show\|create\|validate` | Manage slash-command skills |
@@ -193,6 +193,8 @@ See [docs/architecture.md](docs/architecture.md) for a deeper tour.
 ## Deep dives
+> The full docs site is published at **[www.botholomew.com](https://www.botholomew.com)**.
 Topics worth understanding in detail:
 - **[Architecture](docs/architecture.md)** — workers, chat, and how
@@ -234,8 +236,9 @@ Topics worth understanding in detail:
   built-in FTS extension for BM25 keyword search
 - **[Anthropic SDK](https://docs.anthropic.com/en/api/client-sdks)** for
   Claude — the reasoning model
-- **OpenAI embeddings API** (`text-embedding-3-small`, 1536-dim) for
-  semantic search
+- **[`@huggingface/transformers`](https://huggingface.co/docs/transformers.js)**
+  for local embeddings (default `Xenova/bge-small-en-v1.5`, 384-dim) —
+  no API key, weights cached on first run
 - **[MCPX](https://github.com/evantahler/mcpx)** for external tools
 - **[Ink 6](https://github.com/vadimdemedes/ink)** + **React 19** for the
   terminal UI

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "botholomew",
-  "version": "0.9.11",
+  "version": "0.10.0",
   "description": "An autonomous AI agent for knowledge work — works your task queue while you sleep.",
   "type": "module",
   "bin": {
@@ -20,12 +20,16 @@
     "dev:demo": "bun run src/cli.ts chat -p 'learn everything you can about me from the connected MCP services and then save what you'\\''ve learned about me to context'",
     "test": "bun test",
     "lint": "tsc --noEmit && biome check .",
-    "capture": "bun run scripts/capture.ts"
+    "capture": "bun run scripts/capture.ts",
+    "docs:dev": "vitepress dev docs",
+    "docs:build": "vitepress build docs",
+    "docs:preview": "vitepress preview docs"
   },
   "dependencies": {
     "@anthropic-ai/sdk": "^0.88.0",
     "@duckdb/node-api": "^1.5.2-r.1",
     "@evantahler/mcpx": "0.18.6",
+    "@huggingface/transformers": "^4.2.0",
     "ansis": "^4.2.0",
     "commander": "^14.0.0",
     "gray-matter": "^4.0.3",
@@ -43,6 +47,13 @@
     "@types/bun": "latest",
     "@types/react": "^19.1.0",
     "@types/uuid": "^11.0.0",
-    "typescript": "^6.0.2"
-  }
+    "typescript": "^6.0.2",
+    "vitepress": "^1.5.0",
+    "vitepress-plugin-llms": "^1.12.1",
+    "vue": "^3.5.0"
+  },
+  "trustedDependencies": [
+    "onnxruntime-node",
+    "protobufjs"
+  ]
 }

package/src/chat/agent.ts CHANGED Viewed

@@ -86,7 +86,7 @@ export async function buildChatSystemPrompt(
   const dbPath = options?.dbPath;
   const config = options?.config;
-  if (dbPath && config?.openai_api_key && keywordSource) {
+  if (dbPath && config && keywordSource) {
     try {
       const queryVec = await embedSingle(keywordSource, config);
       const results = await withDb(dbPath, (conn) =>

package/src/commands/context.ts CHANGED Viewed

@@ -6,6 +6,7 @@ import { isText } from "istextorbinary";
 import { createSpinner } from "nanospinner";
 import { loadConfig } from "../config/loader.ts";
 import type { BotholomewConfig } from "../config/schemas.ts";
+import { getDbPath } from "../constants.ts";
 import { generateDescription } from "../context/describer.ts";
 import {
   type DriveTarget,
@@ -36,6 +37,7 @@ import {
   upsertContextItem,
 } from "../db/context.ts";
 import { getEmbeddingsForItem, hybridSearch } from "../db/embeddings.ts";
+import { reembedMissingVectors } from "../db/reembed.ts";
 import { createMcpxClient } from "../mcpx/client.ts";
 import { logger } from "../utils/logger.ts";
 import {
@@ -425,10 +427,7 @@ export function registerContextCommand(program: Command) {
         skipped.push(...dedupSkipped);
-        if (itemIds.length === 0 || !config.openai_api_key) {
-          if (!config.openai_api_key) {
-            logger.dim("Skipping embeddings (no OpenAI API key configured).");
-          }
+        if (itemIds.length === 0) {
           const msg = buildSummary({
             added: itemIds.length,
             refreshed: refreshedCount,
@@ -693,12 +692,23 @@ export function registerContextCommand(program: Command) {
           logger.success(
             `Refreshed ${result.updated} item(s), ${result.chunks} chunk(s) re-indexed.`,
           );
-        } else if (result.embeddings_skipped) {
-          logger.dim("Skipping embeddings (no OpenAI API key configured).");
         }
       }),
     );
+  ctx
+    .command("reembed")
+    .description(
+      "Recompute every embedding using the configured local model. Run this after upgrading or after changing embedding_model.",
+    )
+    .action(() =>
+      withDb(program, async (_conn, dir) => {
+        const config = await loadConfig(dir);
+        const dbPath = getDbPath(dir);
+        await reembedMissingVectors(dbPath, config, { mode: "all" });
+      }),
+    );
   registerContextToolSubcommands(ctx);
 }

package/src/commands/db.ts CHANGED Viewed

@@ -3,11 +3,12 @@ import type { Command } from "commander";
 import { getDbPath } from "../constants.ts";
 import { withDb as coreWithDb } from "../db/connection.ts";
 import {
+  isPidAlive,
   type ProbeResult,
   probeAllTables,
   repairDatabase,
 } from "../db/doctor.ts";
-import { listWorkers } from "../db/workers.ts";
+import { listWorkers, type Worker } from "../db/workers.ts";
 import { logger } from "../utils/logger.ts";
 function statusBadge(status: ProbeResult["status"]): string {
@@ -78,28 +79,38 @@ async function doctor(program: Command, repair: boolean): Promise<void> {
     process.exit(1);
   }
-  // Repair requires exclusive access — refuse if any worker is registered
-  // as running, otherwise the EXPORT would race with the worker's writes.
+  // Repair requires exclusive access — refuse if any worker is actually
+  // running, otherwise the EXPORT would race with the worker's writes.
+  // Stale `status='running'` rows whose PID is dead (the exact case that
+  // tends to coexist with workers-table corruption) are reported but do
+  // not block repair: trying to flip them to `stopped` would just trip
+  // the same corruption we're about to fix.
   const running = await coreWithDb(dbPath, async (conn) => {
     try {
       return await listWorkers(conn, { status: "running" });
     } catch {
-      // If listWorkers itself trips the corruption we're about to fix,
-      // fall through and let repair proceed; the user is on their own
-      // for confirming no live workers, which `worker reap` would also
-      // be unable to do anyway.
-      return [];
+      return [] as Worker[];
     }
   });
-  if (running.length > 0) {
+  const live = running.filter((w) => isPidAlive(w.pid));
+  const stale = running.filter((w) => !isPidAlive(w.pid));
+  if (live.length > 0) {
     logger.error(
-      `${running.length} worker(s) registered as running. Stop them first: botholomew worker stop <id>`,
+      `${live.length} worker(s) actually running. Stop them first: botholomew worker stop <id>`,
     );
-    for (const w of running) {
+    for (const w of live) {
       logger.dim(`  ${w.id} (pid ${w.pid}, mode=${w.mode})`);
     }
     process.exit(1);
   }
+  if (stale.length > 0) {
+    logger.warn(
+      `${stale.length} worker row(s) marked 'running' but PID is dead — proceeding (rows will be carried through repair, then reapable):`,
+    );
+    for (const w of stale) {
+      logger.dim(`  ${w.id} (pid ${w.pid}, mode=${w.mode})`);
+    }
+  }
   logger.phase("repair", "EXPORT DATABASE → swap files → IMPORT DATABASE");
   const result = await repairDatabase(dbPath);

package/src/commands/prepare.ts CHANGED Viewed

@@ -12,14 +12,10 @@ export function registerPrepareCommand(program: Command) {
       withDb(program, async (_conn, dir) => {
         logger.info("Preparing Botholomew...");
         const config = await loadConfig(dir);
-        if (!config.openai_api_key) {
-          logger.error(
-            "OpenAI API key not set. Set openai_api_key in config or OPENAI_API_KEY env var.",
-          );
-          process.exit(1);
-        }
         await embedSingle("test", config);
-        logger.success("OpenAI embeddings API is reachable and configured.");
+        logger.success(
+          `Embedding model ${config.embedding_model} is loaded and ready.`,
+        );
       }),
     );
 }

package/src/config/loader.ts CHANGED Viewed

@@ -19,9 +19,6 @@ export async function loadConfig(
   if (process.env.ANTHROPIC_API_KEY) {
     config.anthropic_api_key = process.env.ANTHROPIC_API_KEY;
   }
-  if (process.env.OPENAI_API_KEY) {
-    config.openai_api_key = process.env.OPENAI_API_KEY;
-  }
   setLogLevel(config.log_level);

package/src/config/schemas.ts CHANGED Viewed

@@ -1,6 +1,5 @@
 export interface BotholomewConfig {
   anthropic_api_key?: string;
-  openai_api_key?: string;
   model?: string;
   chunker_model?: string;
   embedding_model?: string;
@@ -20,11 +19,10 @@ export interface BotholomewConfig {
 export const DEFAULT_CONFIG: Required<BotholomewConfig> = {
   anthropic_api_key: "",
-  openai_api_key: "",
   model: "claude-opus-4-20250514",
   chunker_model: "claude-haiku-4-5-20251001",
-  embedding_model: "text-embedding-3-small",
-  embedding_dimension: 1536,
+  embedding_model: "Xenova/bge-small-en-v1.5",
+  embedding_dimension: 384,
   tick_interval_seconds: 300,
   max_tick_duration_seconds: 120,
   system_prompt_override: "",

package/src/constants.ts CHANGED Viewed

@@ -18,8 +18,8 @@ export const CONFIG_FILENAME = "config.json";
 export const MCPX_DIR = "mcpx";
 export const SKILLS_DIR = "skills";
 export const MCPX_SERVERS_FILENAME = "servers.json";
-export const EMBEDDING_DIMENSION = 1536;
-export const EMBEDDING_MODEL = "text-embedding-3-small";
+export const EMBEDDING_DIMENSION = 384;
+export const EMBEDDING_MODEL = "Xenova/bge-small-en-v1.5";
 export function getBotholomewDir(projectDir: string): string {
   return join(projectDir, BOTHOLOMEW_DIR);

package/src/context/embedder-impl.ts CHANGED Viewed

@@ -1,18 +1,36 @@
+import {
+  type FeatureExtractionPipeline,
+  pipeline,
+} from "@huggingface/transformers";
 import type { BotholomewConfig } from "../config/schemas.ts";
+import { logger } from "../utils/logger.ts";
 type EmbedFn = (
   texts: string[],
   config: Required<BotholomewConfig>,
 ) => Promise<number[][]>;
-interface OpenAIEmbeddingResponse {
-  data: { embedding: number[]; index: number }[];
-  usage: { total_tokens: number };
+// Singleton pipeline keyed by model name. Loading the model is expensive
+// (downloads weights on first run, then ~hundreds of ms to instantiate the
+// ONNX runtime), so we hold one per model for the life of the process.
+const pipelinePromises = new Map<string, Promise<FeatureExtractionPipeline>>();
+async function getPipeline(model: string): Promise<FeatureExtractionPipeline> {
+  let p = pipelinePromises.get(model);
+  if (!p) {
+    logger.info(
+      `Loading embedding model ${model} (first run downloads weights)`,
+    );
+    p = pipeline("feature-extraction", model);
+    pipelinePromises.set(model, p);
+  }
+  return p;
 }
 /**
- * Embed multiple texts using the OpenAI embeddings API.
- * Returns an array of float vectors with the configured dimension.
+ * Embed multiple texts using a local @huggingface/transformers feature-extraction
+ * pipeline. Returns an array of L2-normalized float vectors with the model's
+ * native dimension (must match `config.embedding_dimension`).
  */
 export async function embed(
   texts: string[],
@@ -20,37 +38,17 @@ export async function embed(
 ): Promise<number[][]> {
   if (texts.length === 0) return [];
-  if (!config.openai_api_key) {
-    throw new Error(
-      "OpenAI API key is required for embeddings. Set openai_api_key in config or OPENAI_API_KEY env var.",
-    );
-  }
-  const response = await fetch("https://api.openai.com/v1/embeddings", {
-    method: "POST",
-    headers: {
-      Authorization: `Bearer ${config.openai_api_key}`,
-      "Content-Type": "application/json",
-    },
-    body: JSON.stringify({
-      input: texts,
-      model: config.embedding_model,
-      dimensions: config.embedding_dimension,
-    }),
-  });
+  const extractor = await getPipeline(config.embedding_model);
+  const output = await extractor(texts, { pooling: "mean", normalize: true });
+  const data = output.tolist() as number[][];
-  if (!response.ok) {
-    const body = await response.text();
+  if (data[0] && data[0].length !== config.embedding_dimension) {
     throw new Error(
-      `OpenAI embeddings API error (${response.status}): ${body}`,
+      `Embedding model ${config.embedding_model} returned ${data[0].length}-dim vectors, but embedding_dimension is set to ${config.embedding_dimension}. Update embedding_dimension in config and re-embed.`,
     );
   }
-  const result = (await response.json()) as OpenAIEmbeddingResponse;
-  // Sort by index to ensure order matches input
-  const sorted = result.data.sort((a, b) => a.index - b.index);
-  return sorted.map((d) => d.embedding);
+  return data;
 }
 /**

package/src/context/ingest.ts CHANGED Viewed

@@ -44,16 +44,7 @@ export async function prepareIngestion(
     return null;
   }
-  // Resolve the embed function before chunking — if we can't embed, skip early
-  const doEmbed =
-    embedFn ??
-    (config.openai_api_key
-      ? (texts: string[]) => defaultEmbed(texts, config)
-      : null);
-  if (!doEmbed) {
-    logger.debug("ingest: skipping embeddings (no OpenAI API key configured)");
-    return null;
-  }
+  const doEmbed = embedFn ?? ((texts: string[]) => defaultEmbed(texts, config));
   const chunks = await chunk(item.content, item.mime_type, config);
   if (chunks.length === 0) return null;

package/src/context/refresh.ts CHANGED Viewed

@@ -132,8 +132,7 @@ export async function refreshContextItems(
   const unchanged = results.filter((r) => r.status === "unchanged").length;
   const missing = results.filter((r) => r.status === "missing").length;
-  const hasEmbedder = !!embedFn || !!config.openai_api_key;
-  if (toReembed.length === 0 || !hasEmbedder) {
+  if (toReembed.length === 0) {
     return {
       checked: refreshable.length,
       updated,
@@ -141,7 +140,7 @@ export async function refreshContextItems(
       missing,
       reembedded: 0,
       chunks: 0,
-      embeddings_skipped: toReembed.length > 0 && !hasEmbedder,
+      embeddings_skipped: false,
       items: results,
     };
   }

package/src/db/doctor.ts CHANGED Viewed

@@ -85,12 +85,15 @@ export async function probeTable(
     }
   `;
+  // Discard the child's stderr. When the probe panics, Bun writes a multi-
+  // line crash banner there which would otherwise spill into our table
+  // output via the fallback message. The exit code alone tells us what we
+  // need to know.
   const proc = Bun.spawn(["bun", "-e", script], {
-    stdio: ["ignore", "pipe", "pipe"],
+    stdio: ["ignore", "pipe", "ignore"],
   });
-  const [stdout, stderr, exitCode] = await Promise.all([
+  const [stdout, exitCode] = await Promise.all([
     new Response(proc.stdout).text(),
-    new Response(proc.stderr).text(),
     proc.exited,
   ]);
@@ -103,20 +106,21 @@ export async function probeTable(
     return {
       table,
       status: "missing",
-      message: stdout.slice("MISSING:".length),
+      message: firstLine(stdout.slice("MISSING:".length)),
     };
   }
   if (stdout.startsWith("CORRUPT:")) {
     return {
       table,
       status: "corrupt",
-      message: stdout.slice("CORRUPT:".length),
+      message: firstLine(stdout.slice("CORRUPT:".length)),
     };
   }
-  const reason =
-    stderr.trim() ||
-    `child exited with code ${exitCode} and no verdict (likely native panic)`;
-  return { table, status: "corrupt", message: reason };
+  return {
+    table,
+    status: "corrupt",
+    message: `child exited with code ${exitCode} (likely native panic)`,
+  };
 }
 /**
@@ -212,3 +216,27 @@ async function pathExists(p: string): Promise<boolean> {
     return false;
   }
 }
+function firstLine(s: string): string {
+  const trimmed = s.trim();
+  const nl = trimmed.indexOf("\n");
+  return nl === -1 ? trimmed : trimmed.slice(0, nl);
+}
+/**
+ * Send signal 0 to test whether `pid` corresponds to a live process. Returns
+ * false on ESRCH (no such process) and on any other error (including EPERM,
+ * which we conservatively treat as "not ours, not relevant"). Used by the
+ * doctor's safety gate to distinguish workers actually running from rows
+ * that say `status = 'running'` because the worker crashed before flipping
+ * its row to `stopped` or `dead`.
+ */
+export function isPidAlive(pid: number): boolean {
+  if (!pid || pid < 1) return false;
+  try {
+    process.kill(pid, 0);
+    return true;
+  } catch {
+    return false;
+  }
+}

package/src/db/reembed.ts ADDED Viewed

@@ -0,0 +1,113 @@
+import type { BotholomewConfig } from "../config/schemas.ts";
+import { embed } from "../context/embedder.ts";
+import { logger } from "../utils/logger.ts";
+import { withDb } from "./connection.ts";
+import { rebuildSearchIndex } from "./embeddings.ts";
+interface PendingRow {
+  id: string;
+  chunk_content: string | null;
+  title: string;
+  description: string;
+  drive: string | null;
+  path: string | null;
+}
+const BATCH_SIZE = 32;
+function buildEmbeddingInput(row: PendingRow): string {
+  const parts: string[] = [];
+  if (row.title) parts.push(`Title: ${row.title}`);
+  if (row.description) parts.push(`Description: ${row.description}`);
+  if (row.drive && row.path) parts.push(`Source: ${row.drive}:${row.path}`);
+  if (row.chunk_content) parts.push(row.chunk_content);
+  return parts.join("\n");
+}
+interface ReembedOptions {
+  /**
+   * `"missing"` (default) — only re-embed rows where `embedding IS NULL`.
+   * `"all"` — re-embed every row, including ones that already have a vector.
+   *           Use this after changing `embedding_model` so old vectors don't
+   *           sit alongside new ones in a different space.
+   */
+  mode?: "missing" | "all";
+}
+/**
+ * Recompute embeddings for rows in the embeddings table.
+ *
+ * Default mode (`"missing"`) only touches NULL rows — the case after migration
+ * 18 leaves existing rows with no vector. The `context reembed` CLI command
+ * passes `mode: "all"` to force a full rebuild after the user changes
+ * `embedding_model`.
+ *
+ * Each batch is its own withDb so the file lock releases between embedding
+ * calls — long sweeps don't block other workers from acquiring the DB.
+ */
+export async function reembedMissingVectors(
+  dbPath: string,
+  config: Required<BotholomewConfig>,
+  options: ReembedOptions = {},
+): Promise<void> {
+  const mode = options.mode ?? "missing";
+  const filter = mode === "all" ? "" : "WHERE embedding IS NULL";
+  const total = await withDb(dbPath, async (conn) => {
+    const row = await conn.queryGet<{ count: number }>(
+      `SELECT count(*)::INTEGER AS count FROM embeddings ${filter}`,
+    );
+    return row?.count ?? 0;
+  });
+  if (total === 0) {
+    logger.info("No embeddings to recompute.");
+    return;
+  }
+  logger.info(
+    `re-embedding ${total} row${total === 1 ? "" : "s"} with model ${config.embedding_model}`,
+  );
+  let processed = 0;
+  while (processed < total) {
+    const batch = await withDb(dbPath, async (conn) => {
+      const offsetClause = mode === "all" ? `LIMIT ?1 OFFSET ?2` : `LIMIT ?1`;
+      const sql = `SELECT e.id, e.chunk_content, e.title, e.description, ci.drive, ci.path
+         FROM embeddings e
+         LEFT JOIN context_items ci ON ci.id = e.context_item_id
+         ${filter}
+         ORDER BY e.id
+         ${offsetClause}`;
+      return mode === "all"
+        ? conn.queryAll<PendingRow>(sql, BATCH_SIZE, processed)
+        : conn.queryAll<PendingRow>(sql, BATCH_SIZE);
+    });
+    if (batch.length === 0) break;
+    const inputs = batch.map(buildEmbeddingInput);
+    const vectors = await embed(inputs, config);
+    await withDb(dbPath, async (conn) => {
+      for (let i = 0; i < batch.length; i++) {
+        const row = batch[i];
+        const vec = vectors[i];
+        if (!row || !vec) continue;
+        await conn.queryRun(
+          `UPDATE embeddings
+           SET embedding = ?1::FLOAT[${config.embedding_dimension}]
+           WHERE id = ?2`,
+          vec,
+          row.id,
+        );
+      }
+    });
+    processed += batch.length;
+    logger.info(`  re-embedded ${processed}/${total}`);
+  }
+  await withDb(dbPath, (conn) => rebuildSearchIndex(conn));
+  logger.success(`re-embed complete (${processed} rows)`);
+}

package/src/db/schema.ts CHANGED Viewed

@@ -2,6 +2,7 @@ import { readdirSync, readFileSync } from "node:fs";
 import { join } from "node:path";
 import { logger } from "../utils/logger.ts";
 import type { DbConnection } from "./connection.ts";
+import { rebuildSearchIndex } from "./embeddings.ts";
 interface Migration {
   id: number;
@@ -83,4 +84,10 @@ export async function migrate(db: DbConnection): Promise<void> {
   if (appliedAny) {
     await db.exec("CHECKPOINT");
   }
+  // Ensure the FTS index exists. Migration 18 drops it (it can't recreate it
+  // in the same SQL run without DuckDB rejecting the dependency commit), and
+  // fresh DBs need it created at least once. `overwrite = 1` makes this
+  // idempotent for DBs that already have a healthy FTS index.
+  await rebuildSearchIndex(db);
 }

package/src/db/sql/18-reset_embeddings_for_local.sql ADDED Viewed

@@ -0,0 +1,39 @@
+-- Switch from OpenAI 1536-dim embeddings to local 384-dim embeddings.
+--
+-- DuckDB encodes array dimension in the column type, so we rebuild the
+-- embeddings table preserving every row's metadata (chunk_content, title,
+-- description, context_item_id, chunk_index, created_at). The vectors
+-- themselves are NULLed and repopulated by `botholomew context reembed`
+-- using the locally-loaded embedding model.
+--
+-- Idempotency: every destructive step uses IF EXISTS so a partial prior
+-- run can be re-attempted cleanly. The FTS index is dropped here but NOT
+-- recreated — `migrate()` calls rebuildSearchIndex once after all SQL
+-- migrations apply, which avoids a same-migration drop-then-create that
+-- DuckDB rejects with "Could not commit creation of dependency, subject
+-- 'stopwords' has been deleted".
+DROP SCHEMA IF EXISTS fts_main_embeddings CASCADE;
+DROP TABLE IF EXISTS embeddings_new;
+CREATE TABLE embeddings_new (
+  id TEXT PRIMARY KEY,
+  context_item_id TEXT NOT NULL,
+  chunk_index INTEGER NOT NULL,
+  chunk_content TEXT,
+  title TEXT NOT NULL,
+  description TEXT NOT NULL DEFAULT '',
+  embedding FLOAT[384],
+  created_at TEXT NOT NULL DEFAULT (current_timestamp::VARCHAR),
+  UNIQUE(context_item_id, chunk_index)
+);
+INSERT INTO embeddings_new (id, context_item_id, chunk_index, chunk_content, title, description, embedding, created_at)
+SELECT id, context_item_id, chunk_index, chunk_content, title, description, NULL, created_at
+FROM embeddings;
+DROP TABLE embeddings;
+ALTER TABLE embeddings_new RENAME TO embeddings;
+CHECKPOINT;

package/src/tui/components/StatusBar.tsx CHANGED Viewed

@@ -33,22 +33,30 @@ export function StatusBar({
   useEffect(() => {
     let mounted = true;
+    // Errors here (e.g. transient DuckDB lock conflicts while a freshly
+    // spawned worker is migrating) must not freeze the count — the next
+    // interval tick will retry. Swallow silently rather than logging
+    // because logger writes to stdout and would corrupt the Ink render.
     const refresh = async () => {
-      const [pending, inProgress, workers] = await withDb(
-        dbPath,
-        async (conn) => [
-          await listTasks(conn, { status: "pending" }),
-          await listTasks(conn, { status: "in_progress" }),
-          await listWorkers(conn, { status: "running" }),
-        ],
-      );
-      if (mounted) {
-        setStatus({
-          workerCount: workers.length,
-          pendingCount: pending.length,
-          inProgressCount: inProgress.length,
-        });
-        onWorkerStatusChange?.(workers.length > 0);
+      try {
+        const [pending, inProgress, workers] = await withDb(
+          dbPath,
+          async (conn) => [
+            await listTasks(conn, { status: "pending" }),
+            await listTasks(conn, { status: "in_progress" }),
+            await listWorkers(conn, { status: "running" }),
+          ],
+        );
+        if (mounted) {
+          setStatus({
+            workerCount: workers.length,
+            pendingCount: pending.length,
+            inProgressCount: inProgress.length,
+          });
+          onWorkerStatusChange?.(workers.length > 0);
+        }
+      } catch {
+        // Keep prior state; next tick will retry.
       }
     };

package/src/worker/llm.ts CHANGED Viewed

@@ -11,12 +11,17 @@ import { getTask, type Task } from "../db/tasks.ts";
 import { logInteraction } from "../db/threads.ts";
 import { registerAllTools } from "../tools/registry.ts";
 import { getTool, type ToolContext, toAnthropicTools } from "../tools/tool.ts";
+import { logger } from "../utils/logger.ts";
 import { fitToContextWindow, getMaxInputTokens } from "./context.ts";
 import { clearLargeResults, maybeStoreResult } from "./large-results.ts";
 import { createLlmClient } from "./llm-client.ts";
 registerAllTools();
+function truncate(s: string, max: number): string {
+  return s.length > max ? `${s.slice(0, max)}…` : s;
+}
 export interface WorkerStreamCallbacks {
   onToken: (text: string) => void;
   onToolStart: (name: string, input: string) => void;
@@ -153,6 +158,9 @@ export async function runAgentLoop(input: {
             tokenCount,
           }),
         );
+        if (!callbacks) {
+          logger.phase("assistant", block.text);
+        }
       }
     }
@@ -175,6 +183,12 @@ export async function runAgentLoop(input: {
     for (const toolUse of toolUseBlocks) {
       const toolInput = JSON.stringify(toolUse.input);
       callbacks?.onToolStart(toolUse.name, toolInput);
+      if (!callbacks) {
+        logger.phase(
+          "tool-call",
+          `${toolUse.name} ${truncate(toolInput, 200)}`,
+        );
+      }
       await withDb(dbPath, (conn) =>
         logInteraction(conn, threadId, {
           role: "assistant",
@@ -222,6 +236,11 @@ export async function runAgentLoop(input: {
           durationMs,
         }),
       );
+      if (!callbacks) {
+        const seconds = (durationMs / 1000).toFixed(1);
+        const status = result.isError ? "err" : "ok";
+        logger.phase("tool-result", `${toolUse.name} ${status} in ${seconds}s`);
+      }
       if (result.terminal && result.agentResult) {
         return result.agentResult;

package/src/worker/prompt.ts CHANGED Viewed

@@ -104,7 +104,7 @@ export async function buildSystemPrompt(
   prompt += await loadPersistentContext(projectDir, taskKeywords);
-  if (task && dbPath && _config?.openai_api_key) {
+  if (task && dbPath && _config) {
     try {
       const query = `${task.name} ${task.description}`;
       const queryVec = await embedSingle(query, _config);

package/src/worker/tick.ts CHANGED Viewed

@@ -133,6 +133,9 @@ async function runClaimedTask(opts: {
   const { projectDir, dbPath, config, mcpxClient, callbacks, task } = opts;
   logger.info(`Claimed task: ${task.name} (${task.id})`);
+  if (!callbacks && task.description) {
+    logger.dim(task.description);
+  }
   callbacks?.onTaskStart(task);
   const threadId = await withDb(dbPath, (conn) =>