npm - botholomew - Versions diffs - 0.9.10 → 0.9.12 - Mend

botholomew 0.9.10 → 0.9.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/README.md CHANGED Viewed

@@ -31,8 +31,9 @@ through MCP servers wired up via [MCPX](https://github.com/evantahler/mcpx).
   DuckDB. Copy it, share it, check it in (or `.gitignore` it).
 - **Your data, your disk.** Project state — tasks, threads, ingested
   context, embeddings — lives in `.botholomew/`, indexed in DuckDB with
-  HNSW for vector search. Model calls go direct to Anthropic and OpenAI;
-  any further reach is scoped to the MCP servers you add.
+  BM25 keyword search and `array_cosine_distance` vector search. Model
+  calls go direct to Anthropic and OpenAI; any further reach is scoped to
+  the MCP servers you add.
 - **Extensible.** External tools come from MCP servers via
   [MCPX](https://github.com/evantahler/mcpx) — run them locally (Gmail,
   Slack, GitHub) or connect through an MCP gateway like
@@ -115,12 +116,14 @@ my-project/
     soul.md               # always-loaded identity (not agent-editable)
     beliefs.md            # always-loaded, agent-editable priors
     goals.md              # always-loaded, agent-editable goals
+    capabilities.md       # always-loaded, agent-editable tool inventory
     config.json           # models, tick interval, API keys
     data.duckdb           # tasks, schedules, context, embeddings, logs
     mcpx/servers.json     # external MCP servers (Gmail, Slack, …)
-    skills/               # user-defined slash commands
+    skills/               # slash commands (built-ins + user-defined)
       summarize.md
       standup.md
+      capabilities.md
     logs/                 # per-worker log files (one file per spawned worker)
       <worker-id>.log
 ```
@@ -140,14 +143,14 @@ Everything the agent can touch is here. No surprises.
 | `botholomew worker list\|status\|stop\|kill\|reap` | Inspect and manage running workers |
 | `botholomew chat` | Interactive Ink/React TUI |
 | `botholomew task list\|add\|view\|update\|reset\|delete` | Manage the task queue |
-| `botholomew schedule list\|add\|enable\|trigger\|delete` | Recurring work |
-| `botholomew context add\|list\|view\|search\|refresh\|remove` | Ingest & browse knowledge (files, folders, URLs) |
+| `botholomew schedule list\|add\|view\|enable\|disable\|trigger\|delete` | Recurring work |
+| `botholomew context add\|list\|search\|chunks\|refresh\|delete` | Ingest & browse knowledge (files, folders, URLs); also exposes the agent's `read`/`write`/`tree`/`edit`/… tools as subcommands |
 | `botholomew capabilities` | Rescan built-in + MCPX tools and rewrite `.botholomew/capabilities.md` |
-| `botholomew mcpx servers\|add\|remove\|info\|search\|exec\|ping\|auth\|import-global` | Configure external MCP servers |
+| `botholomew mcpx servers\|list\|add\|remove\|info\|search\|exec\|ping\|auth\|deauth\|import-global\|…` | Configure external MCP servers (passthrough to `mcpx`) |
 | `botholomew skill list\|show\|create\|validate` | Manage slash-command skills |
-| `botholomew context ... \| search ...` | Direct access to the agent's virtual filesystem |
 | `botholomew thread list\|view` | Browse the agent's interaction history |
 | `botholomew nuke context\|tasks\|schedules\|threads\|all` | Bulk-erase sections of the database |
+| `botholomew db doctor [--repair]` | Probe each table for primary-key index corruption; rebuild via EXPORT/IMPORT |
 | `botholomew upgrade` | Self-update |
 All `list` subcommands support `-l, --limit <n>` and `-o, --offset <n>` for pagination.
@@ -175,7 +178,7 @@ All `list` subcommands support `-l, --limit <n>` and `-o, --offset <n>` for pagi
                │  ┌───────────┐ ┌──────────────┐    │
                │  │  tasks    │ │ context_items│    │
                │  │ schedules │ │  embeddings  │    │
-               │  │  workers  │ │   (HNSW)     │    │
+               │  │  workers  │ │  (FTS+vector)│    │
                │  │  threads  │ │              │    │
                │  └───────────┘ └──────────────┘    │
                └─────┬───────────────────────────────┘
@@ -203,8 +206,8 @@ Topics worth understanding in detail:
 - **[The virtual filesystem](docs/virtual-filesystem.md)** — why the agent's
   "files" are actually DuckDB rows, and how `context_read`/`context_write` work.
 - **[Context & hybrid search](docs/context-and-search.md)** — LLM-driven
-  chunking, OpenAI embeddings, and DuckDB's HNSW-accelerated keyword +
-  vector search.
+  chunking, OpenAI embeddings, and DuckDB BM25 + linear-scan vector
+  search merged with reciprocal rank fusion.
 - **[Tasks & schedules](docs/tasks-and-schedules.md)** — the claim loop, DAG
   validation, stale-task recovery, and natural-language recurring schedules.
 - **[The Tool class](docs/tools.md)** — one Zod definition, three consumers
@@ -226,9 +229,9 @@ Topics worth understanding in detail:
 ## Tech stack
 - **[Bun](https://bun.sh)** + TypeScript
-- **[DuckDB](https://duckdb.org)** via `@duckdb/node-api`, with the
-  **[VSS extension](https://duckdb.org/docs/stable/extensions/vss)** for
-  native vector search
+- **[DuckDB](https://duckdb.org)** via `@duckdb/node-api` —
+  `array_cosine_distance()` (core DuckDB) for vector search, plus the
+  built-in FTS extension for BM25 keyword search
 - **[Anthropic SDK](https://docs.anthropic.com/en/api/client-sdks)** for
   Claude — the reasoning model
 - **OpenAI embeddings API** (`text-embedding-3-small`, 1536-dim) for

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "botholomew",
-  "version": "0.9.10",
+  "version": "0.9.12",
   "description": "An autonomous AI agent for knowledge work — works your task queue while you sleep.",
   "type": "module",
   "bin": {

package/src/cli.ts CHANGED Viewed

@@ -6,6 +6,7 @@ import { registerCapabilitiesCommand } from "./commands/capabilities.ts";
 import { registerChatCommand } from "./commands/chat.ts";
 import { registerCheckUpdateCommand } from "./commands/check-update.ts";
 import { registerContextCommand } from "./commands/context.ts";
+import { registerDbCommand } from "./commands/db.ts";
 import { registerInitCommand } from "./commands/init.ts";
 import { registerMcpxCommand } from "./commands/mcpx.ts";
 import { registerNukeCommand } from "./commands/nuke.ts";
@@ -40,6 +41,7 @@ registerThreadCommand(program);
 registerScheduleCommand(program);
 registerChatCommand(program);
 registerContextCommand(program);
+registerDbCommand(program);
 registerCapabilitiesCommand(program);
 registerMcpxCommand(program);
 registerSkillCommand(program);

package/src/commands/db.ts ADDED Viewed

@@ -0,0 +1,123 @@
+import ansis from "ansis";
+import type { Command } from "commander";
+import { getDbPath } from "../constants.ts";
+import { withDb as coreWithDb } from "../db/connection.ts";
+import {
+  isPidAlive,
+  type ProbeResult,
+  probeAllTables,
+  repairDatabase,
+} from "../db/doctor.ts";
+import { listWorkers, type Worker } from "../db/workers.ts";
+import { logger } from "../utils/logger.ts";
+function statusBadge(status: ProbeResult["status"]): string {
+  switch (status) {
+    case "ok":
+      return ansis.green("ok");
+    case "empty":
+      return ansis.dim("empty");
+    case "missing":
+      return ansis.dim("missing");
+    case "corrupt":
+      return ansis.red.bold("corrupt");
+  }
+}
+function printResults(results: ProbeResult[]) {
+  const nameWidth = Math.max(...results.map((r) => r.table.length));
+  for (const r of results) {
+    const name = r.table.padEnd(nameWidth + 2);
+    const detail = r.message ? ansis.dim(`  ${r.message.slice(0, 200)}`) : "";
+    console.log(`  ${name}${statusBadge(r.status)}${detail}`);
+  }
+}
+export function registerDbCommand(program: Command) {
+  const db = program
+    .command("db")
+    .description("Inspect and repair the project database");
+  db.command("doctor")
+    .description(
+      "Probe every table for primary-key index corruption and optionally repair via EXPORT/IMPORT",
+    )
+    .option(
+      "-r, --repair",
+      "Rebuild the database file from an export when corruption is detected",
+    )
+    .action((opts) => doctor(program, opts.repair === true));
+}
+async function doctor(program: Command, repair: boolean): Promise<void> {
+  const dir = program.opts().dir as string;
+  const dbPath = getDbPath(dir);
+  logger.info(`Probing tables in ${dbPath}`);
+  const results = await probeAllTables(dbPath);
+  printResults(results);
+  const corrupt = results.filter((r) => r.status === "corrupt");
+  if (corrupt.length === 0) {
+    logger.success("No corruption detected.");
+    return;
+  }
+  logger.error(
+    `${corrupt.length} table(s) have corrupted indexes: ${corrupt
+      .map((r) => r.table)
+      .join(", ")}`,
+  );
+  if (!repair) {
+    console.log("");
+    console.log(
+      ansis.yellow(
+        "Re-run with --repair to rebuild the database file (creates a timestamped backup).",
+      ),
+    );
+    process.exit(1);
+  }
+  // Repair requires exclusive access — refuse if any worker is actually
+  // running, otherwise the EXPORT would race with the worker's writes.
+  // Stale `status='running'` rows whose PID is dead (the exact case that
+  // tends to coexist with workers-table corruption) are reported but do
+  // not block repair: trying to flip them to `stopped` would just trip
+  // the same corruption we're about to fix.
+  const running = await coreWithDb(dbPath, async (conn) => {
+    try {
+      return await listWorkers(conn, { status: "running" });
+    } catch {
+      return [] as Worker[];
+    }
+  });
+  const live = running.filter((w) => isPidAlive(w.pid));
+  const stale = running.filter((w) => !isPidAlive(w.pid));
+  if (live.length > 0) {
+    logger.error(
+      `${live.length} worker(s) actually running. Stop them first: botholomew worker stop <id>`,
+    );
+    for (const w of live) {
+      logger.dim(`  ${w.id} (pid ${w.pid}, mode=${w.mode})`);
+    }
+    process.exit(1);
+  }
+  if (stale.length > 0) {
+    logger.warn(
+      `${stale.length} worker row(s) marked 'running' but PID is dead — proceeding (rows will be carried through repair, then reapable):`,
+    );
+    for (const w of stale) {
+      logger.dim(`  ${w.id} (pid ${w.pid}, mode=${w.mode})`);
+    }
+  }
+  logger.phase("repair", "EXPORT DATABASE → swap files → IMPORT DATABASE");
+  const result = await repairDatabase(dbPath);
+  logger.success(
+    `Repaired in ${result.durationMs}ms. Backup: ${result.backupDbPath}`,
+  );
+  logger.dim(
+    "  Re-run `botholomew db doctor` to confirm. Delete the backup once you're sure.",
+  );
+}

package/src/db/doctor.ts ADDED Viewed

@@ -0,0 +1,242 @@
+import { mkdir, rename, rm, stat } from "node:fs/promises";
+import { dirname, join } from "node:path";
+import { withDb } from "./connection.ts";
+/**
+ * Tables we probe for primary-key index integrity. Every user table has a
+ * single-column PK that we exercise with a self-update (SET pk = pk WHERE
+ * pk = ...). DuckDB still walks the index for the SET, which surfaces
+ * "Failed to delete all rows from index" FATAL errors when the index is
+ * out of sync with the row data. `_migrations` is excluded — it is small,
+ * append-only, and rebuilding it would defeat its purpose.
+ */
+export const PROBE_TABLES: ReadonlyArray<{ name: string; pk: string }> = [
+  { name: "workers", pk: "id" },
+  { name: "tasks", pk: "id" },
+  { name: "schedules", pk: "id" },
+  { name: "threads", pk: "id" },
+  { name: "interactions", pk: "id" },
+  { name: "context_items", pk: "id" },
+  { name: "embeddings", pk: "id" },
+  { name: "daemon_state", pk: "key" },
+];
+export type ProbeStatus = "ok" | "empty" | "missing" | "corrupt";
+export interface ProbeResult {
+  table: string;
+  status: ProbeStatus;
+  /** Detail message when status is corrupt or missing. */
+  message?: string;
+}
+/**
+ * Probe a single table for index corruption by spawning a child Bun
+ * process. We use a child process because a corrupt PK index in DuckDB
+ * surfaces as a Bun panic (a C++ exception that unwinds past the NAPI
+ * boundary), which would kill the doctor itself. The child reports its
+ * verdict on stdout and exits.
+ *
+ * Uses absolute import path resolved against this file so the spawned
+ * Bun process picks up the same `@duckdb/node-api` install.
+ */
+export async function probeTable(
+  dbPath: string,
+  table: string,
+  pk: string,
+): Promise<ProbeResult> {
+  const script = `
+    const { DuckDBInstance } = await import("@duckdb/node-api");
+    const dbPath = ${JSON.stringify(dbPath)};
+    const table = ${JSON.stringify(table)};
+    const pk = ${JSON.stringify(pk)};
+    let inst;
+    try {
+      inst = await DuckDBInstance.create(dbPath);
+    } catch (e) {
+      process.stdout.write("MISSING:" + (e?.message ?? String(e)));
+      process.exit(0);
+    }
+    const c = await inst.connect();
+    try {
+      const r = await c.runAndReadAll(\`SELECT \${pk} FROM \${table} LIMIT 1\`);
+      if (r.getRows().length === 0) {
+        process.stdout.write("EMPTY");
+        process.exit(0);
+      }
+    } catch (e) {
+      const msg = String(e?.message ?? e);
+      // Table doesn't exist yet (e.g., schema older than this doctor) — not
+      // a corruption signal, just skip it.
+      if (msg.includes("does not exist") || msg.includes("Catalog Error")) {
+        process.stdout.write("MISSING:" + msg);
+        process.exit(0);
+      }
+      process.stdout.write("CORRUPT:" + msg);
+      process.exit(2);
+    }
+    try {
+      await c.run(\`UPDATE \${table} SET \${pk} = \${pk} WHERE \${pk} = (SELECT \${pk} FROM \${table} LIMIT 1)\`);
+      process.stdout.write("OK");
+      process.exit(0);
+    } catch (e) {
+      process.stdout.write("CORRUPT:" + (e?.message ?? String(e)));
+      process.exit(2);
+    }
+  `;
+  // Discard the child's stderr. When the probe panics, Bun writes a multi-
+  // line crash banner there which would otherwise spill into our table
+  // output via the fallback message. The exit code alone tells us what we
+  // need to know.
+  const proc = Bun.spawn(["bun", "-e", script], {
+    stdio: ["ignore", "pipe", "ignore"],
+  });
+  const [stdout, exitCode] = await Promise.all([
+    new Response(proc.stdout).text(),
+    proc.exited,
+  ]);
+  // Bun panic: process killed by SIGTRAP / non-zero exit with no stdout
+  // verdict. Treat any unrecognized exit as corruption — better to flag
+  // for repair than to silently miss a problem.
+  if (stdout.startsWith("OK")) return { table, status: "ok" };
+  if (stdout.startsWith("EMPTY")) return { table, status: "empty" };
+  if (stdout.startsWith("MISSING:")) {
+    return {
+      table,
+      status: "missing",
+      message: firstLine(stdout.slice("MISSING:".length)),
+    };
+  }
+  if (stdout.startsWith("CORRUPT:")) {
+    return {
+      table,
+      status: "corrupt",
+      message: firstLine(stdout.slice("CORRUPT:".length)),
+    };
+  }
+  return {
+    table,
+    status: "corrupt",
+    message: `child exited with code ${exitCode} (likely native panic)`,
+  };
+}
+/**
+ * Run probes for every known table. Sequential rather than parallel so we
+ * cooperate with DuckDB's per-process file lock and don't multiply the
+ * blast radius of a panic.
+ */
+export async function probeAllTables(dbPath: string): Promise<ProbeResult[]> {
+  const results: ProbeResult[] = [];
+  for (const { name, pk } of PROBE_TABLES) {
+    results.push(await probeTable(dbPath, name, pk));
+  }
+  return results;
+}
+export interface RepairResult {
+  backupDbPath: string;
+  exportDir: string;
+  durationMs: number;
+}
+/**
+ * Repair `dbPath` by exporting its contents and importing into a fresh
+ * file. EXPORT DATABASE reads via sequential scans, not via PK indexes,
+ * so it survives the kind of index corruption that breaks UPDATE/DELETE.
+ * IMPORT DATABASE rebuilds every index from the data, which restores
+ * write integrity.
+ *
+ * Steps:
+ *   1. CHECKPOINT (best-effort) to flush WAL.
+ *   2. EXPORT DATABASE to `<dotDir>/.export-<timestamp>`.
+ *   3. Move `data.duckdb` (and `.wal`) to `data.duckdb.bak-<timestamp>`.
+ *   4. Open a fresh DB at the original path and IMPORT DATABASE.
+ *   5. Leave the export dir on disk — cheap insurance if step 4 ever fails
+ *      mid-way; cleanup on the next successful run.
+ *
+ * The caller is responsible for ensuring no other process holds the DB
+ * (no running workers, no chat session, no TUI).
+ */
+export async function repairDatabase(dbPath: string): Promise<RepairResult> {
+  const start = Date.now();
+  const dotDir = dirname(dbPath);
+  await mkdir(dotDir, { recursive: true });
+  const stamp = new Date()
+    .toISOString()
+    .replace(/[:.]/g, "-")
+    .replace(/Z$/, "");
+  const exportDir = join(dotDir, `.export-${stamp}`);
+  const backupDbPath = `${dbPath}.bak-${stamp}`;
+  const walPath = `${dbPath}.wal`;
+  const backupWalPath = `${backupDbPath}.wal`;
+  await withDb(dbPath, async (conn) => {
+    try {
+      await conn.exec("CHECKPOINT");
+    } catch {
+      // CHECKPOINT can fail on an already-invalidated DB; the EXPORT
+      // below is what actually matters.
+    }
+    await conn.exec(`EXPORT DATABASE '${exportDir.replace(/'/g, "''")}'`);
+  });
+  await rename(dbPath, backupDbPath);
+  if (await pathExists(walPath)) {
+    await rename(walPath, backupWalPath);
+  }
+  await withDb(dbPath, async (conn) => {
+    await conn.exec(`IMPORT DATABASE '${exportDir.replace(/'/g, "''")}'`);
+  });
+  // Best-effort cleanup of the export dir. Leave it on failure — the user
+  // still has data.duckdb (rebuilt) and the backup.
+  try {
+    await rm(exportDir, { recursive: true, force: true });
+  } catch {
+    // ignore
+  }
+  return {
+    backupDbPath,
+    exportDir,
+    durationMs: Date.now() - start,
+  };
+}
+async function pathExists(p: string): Promise<boolean> {
+  try {
+    await stat(p);
+    return true;
+  } catch {
+    return false;
+  }
+}
+function firstLine(s: string): string {
+  const trimmed = s.trim();
+  const nl = trimmed.indexOf("\n");
+  return nl === -1 ? trimmed : trimmed.slice(0, nl);
+}
+/**
+ * Send signal 0 to test whether `pid` corresponds to a live process. Returns
+ * false on ESRCH (no such process) and on any other error (including EPERM,
+ * which we conservatively treat as "not ours, not relevant"). Used by the
+ * doctor's safety gate to distinguish workers actually running from rows
+ * that say `status = 'running'` because the worker crashed before flipping
+ * its row to `stopped` or `dead`.
+ */
+export function isPidAlive(pid: number): boolean {
+  if (!pid || pid < 1) return false;
+  try {
+    process.kill(pid, 0);
+    return true;
+  } catch {
+    return false;
+  }
+}

package/src/worker/llm.ts CHANGED Viewed

@@ -11,12 +11,17 @@ import { getTask, type Task } from "../db/tasks.ts";
 import { logInteraction } from "../db/threads.ts";
 import { registerAllTools } from "../tools/registry.ts";
 import { getTool, type ToolContext, toAnthropicTools } from "../tools/tool.ts";
+import { logger } from "../utils/logger.ts";
 import { fitToContextWindow, getMaxInputTokens } from "./context.ts";
 import { clearLargeResults, maybeStoreResult } from "./large-results.ts";
 import { createLlmClient } from "./llm-client.ts";
 registerAllTools();
+function truncate(s: string, max: number): string {
+  return s.length > max ? `${s.slice(0, max)}…` : s;
+}
 export interface WorkerStreamCallbacks {
   onToken: (text: string) => void;
   onToolStart: (name: string, input: string) => void;
@@ -153,6 +158,9 @@ export async function runAgentLoop(input: {
             tokenCount,
           }),
         );
+        if (!callbacks) {
+          logger.phase("assistant", block.text);
+        }
       }
     }
@@ -175,6 +183,12 @@ export async function runAgentLoop(input: {
     for (const toolUse of toolUseBlocks) {
       const toolInput = JSON.stringify(toolUse.input);
       callbacks?.onToolStart(toolUse.name, toolInput);
+      if (!callbacks) {
+        logger.phase(
+          "tool-call",
+          `${toolUse.name} ${truncate(toolInput, 200)}`,
+        );
+      }
       await withDb(dbPath, (conn) =>
         logInteraction(conn, threadId, {
           role: "assistant",
@@ -222,6 +236,11 @@ export async function runAgentLoop(input: {
           durationMs,
         }),
       );
+      if (!callbacks) {
+        const seconds = (durationMs / 1000).toFixed(1);
+        const status = result.isError ? "err" : "ok";
+        logger.phase("tool-result", `${toolUse.name} ${status} in ${seconds}s`);
+      }
       if (result.terminal && result.agentResult) {
         return result.agentResult;

package/src/worker/tick.ts CHANGED Viewed

@@ -133,6 +133,9 @@ async function runClaimedTask(opts: {
   const { projectDir, dbPath, config, mcpxClient, callbacks, task } = opts;
   logger.info(`Claimed task: ${task.name} (${task.id})`);
+  if (!callbacks && task.description) {
+    logger.dim(task.description);
+  }
   callbacks?.onTaskStart(task);
   const threadId = await withDb(dbPath, (conn) =>