npm - membot - Versions diffs - 0.3.0 → 0.4.0 - Mend

membot 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/README.md +2 -5
package/package.json +1 -1
package/src/config/schemas.ts +7 -0
package/src/context.ts +5 -1
package/src/db/chunks.ts +2 -1
package/src/db/connection.ts +190 -31
package/src/db/migrations.ts +22 -1
package/src/ingest/ingest.ts +74 -10
package/src/ingest/source-resolver.ts +92 -11
package/src/mount/mcp.ts +8 -0
package/src/operations/add.ts +118 -10
package/src/operations/search.ts +26 -4
package/src/output/progress.ts +47 -8
package/src/refresh/scheduler.ts +8 -0
package/src/sdk.ts +4 -2
package/src/search/hybrid.ts +7 -1
package/src/search/keyword.ts +20 -13

package/README.md CHANGED Viewed

@@ -2,8 +2,7 @@
 > Versioned context store with hybrid search for AI agents. Stdio + HTTP MCP server and CLI.
-[![npm](https://img.shields.io/npm/v/membot.svg)](https://www.npmjs.com/package/membot)
-[![license](https://img.shields.io/npm/l/membot.svg)](./LICENSE)
+[![license](https://img.shields.io/github/license/evantahler/membot.svg)](./LICENSE)
 `membot` is a single-binary CLI and MCP server that gives AI agents a persistent, versioned, searchable context store. Files (markdown, PDFs, DOCX, HTML, URLs, agent-authored notes) are ingested, converted to markdown, chunked, embedded **locally** with `@huggingface/transformers` (WASM, no cloud calls), and indexed in DuckDB with hybrid search (semantic vector + BM25). Every change creates a new version — nothing is overwritten in place.
@@ -16,11 +15,9 @@
 ```bash
 bun install -g membot
-# or
-npm install -g membot
 ```
-This pulls in DuckDB's per-platform native bindings alongside membot. The build externalizes `@duckdb/*` (those `.node` bindings can't be embedded by `bun build --compile`), so a global npm/bun install is the supported path.
+This pulls in DuckDB's per-platform native bindings alongside membot. The build externalizes `@duckdb/*` (those `.node` bindings can't be embedded by `bun build --compile`), so a global Bun install is the supported path.
 ## Quick start

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "membot",
-	"version": "0.3.0",
+	"version": "0.4.0",
 	"description": "Versioned context store with hybrid search for AI agents. Stdio + HTTP MCP server and CLI.",
 	"type": "module",
 	"exports": {

package/src/config/schemas.ts CHANGED Viewed

@@ -23,6 +23,12 @@ export const DaemonConfigSchema = z.object({
 	tick_interval_sec: z.number().int().positive().default(DEFAULTS.DAEMON_TICK_SEC),
 });
+export const DbLockRetryConfigSchema = z.object({
+	max_attempts: z.number().int().positive().default(30),
+	base_delay_ms: z.number().int().positive().default(100),
+	max_delay_ms: z.number().int().positive().default(2000),
+});
 export const MembotConfigSchema = z.object({
 	data_dir: z.string().default(defaultMembotHome()),
 	embedding_model: z.string().default(EMBEDDING_MODEL),
@@ -31,6 +37,7 @@ export const MembotConfigSchema = z.object({
 	llm: LlmConfigSchema.default(() => LlmConfigSchema.parse({})),
 	mcpx: McpxConfigSchema.default(() => McpxConfigSchema.parse({})),
 	daemon: DaemonConfigSchema.default(() => DaemonConfigSchema.parse({})),
+	db_lock_retry: DbLockRetryConfigSchema.default(() => DbLockRetryConfigSchema.parse({})),
 	default_refresh_frequency_sec: z.number().int().positive().nullable().default(null),
 });

package/src/context.ts CHANGED Viewed

@@ -39,7 +39,11 @@ export async function buildContext(options: BuildContextOptions = {}): Promise<A
 	const { config, dataDir, configPath } = await loadConfig({ configFlag: options.configFlag });
 	const dbPath = join(dataDir, FILES.INDEX_DUCKDB);
-	const db = await openDb(dbPath);
+	const db = await openDb(dbPath, {
+		maxAttempts: config.db_lock_retry.max_attempts,
+		baseDelayMs: config.db_lock_retry.base_delay_ms,
+		maxDelayMs: config.db_lock_retry.max_delay_ms,
+	});
 	const mcpx = await maybeMcpx(config);

package/src/db/chunks.ts CHANGED Viewed

@@ -140,7 +140,8 @@ export async function rebuildFts(db: DbConnection): Promise<RebuildFtsResult> {
 		await db.exec(
 			`CREATE TABLE _current_chunks_fts AS
 			 SELECT (logical_path || '::' || CAST(version_id AS VARCHAR) || '::' || chunk_index) AS row_key,
-			        logical_path, CAST(version_id AS VARCHAR) AS version_id, chunk_index, search_text
+			        logical_path, CAST(version_id AS VARCHAR) AS version_id, chunk_index,
+			        chunk_content, search_text
 			 FROM current_chunks`,
 		);
 		await db.exec(

package/src/db/connection.ts CHANGED Viewed

@@ -8,6 +8,7 @@ import {
 import { EMBEDDING_DIMENSION } from "../constants.ts";
 import { asHelpful } from "../errors.ts";
+import { logger } from "../output/logger.ts";
 import { applyMigrations } from "./migrations.ts";
 /** Subset of @duckdb/node-api types we feed into / get out of queries. */
@@ -18,25 +19,51 @@ export interface RunResult {
 	changes: number;
 }
+/** Tunables for retrying a `DuckDBInstance.create()` call when another process holds the file lock. */
+export interface LockRetryOptions {
+	maxAttempts: number;
+	baseDelayMs: number;
+	maxDelayMs: number;
+}
+export const DEFAULT_LOCK_RETRY: LockRetryOptions = {
+	maxAttempts: 30,
+	baseDelayMs: 100,
+	maxDelayMs: 2000,
+};
 /**
- * Thin async wrapper around a DuckDB connection. Uses ?N placeholders
- * (translated to $N internally) and returns plain JS objects.
+ * Async wrapper around DuckDB with **lazy claim / release** semantics so
+ * concurrent membot processes don't deadlock on the file lock.
+ *
+ * Lifecycle:
+ *  - construct with a path; nothing is opened yet
+ *  - first query call (`exec`/`queryGet`/`queryAll`/`queryRun`) lazily opens
+ *    DuckDB, retrying with backoff on lock conflicts, and runs migrations
+ *  - `release()` closes the underlying DuckDB instance but leaves the
+ *    wrapper reusable — the next query reopens transparently
+ *  - `close()` is permanent: subsequent queries throw
+ *
+ * Long-running flows (MCP server, daemon, multi-file `add`) call `release()`
+ * between units of work so other consumers can grab the lock.
  */
 export class DbConnection {
-	private readonly conn: DuckDBNativeConnection;
-	private readonly instance: DuckDBInstance | null;
 	readonly path: string;
+	private readonly retry: LockRetryOptions;
+	private conn: DuckDBNativeConnection | null = null;
+	private instance: DuckDBInstance | null = null;
 	private closed = false;
+	private opening: Promise<void> | null = null;
-	constructor(conn: DuckDBNativeConnection, instance: DuckDBInstance | null, path: string) {
-		this.conn = conn;
-		this.instance = instance;
+	constructor(path: string, retry: LockRetryOptions = DEFAULT_LOCK_RETRY) {
 		this.path = path;
+		this.retry = retry;
 	}
 	/** Run a parameter-less SQL statement (DDL, PRAGMA, batch SQL). */
 	async exec(sql: string): Promise<void> {
-		await this.conn.run(sql);
+		const conn = await this.ensureOpen();
+		await conn.run(sql);
 	}
 	/** Run a query and return the first row, or null. SQL uses `?N` placeholders. */
@@ -44,7 +71,8 @@ export class DbConnection {
 		sql: string,
 		...params: SqlParam[]
 	): Promise<T | null> {
-		const result = await this.conn.runAndReadAll(translateParams(sql), flattenParams(params) as DuckDBValue[]);
+		const conn = await this.ensureOpen();
+		const result = await conn.runAndReadAll(translateParams(sql), flattenParams(params) as DuckDBValue[]);
 		const rows = (await result.getRowObjectsJS()) as Record<string, unknown>[];
 		if (!rows[0]) return null;
 		return convertRow(rows[0]) as T;
@@ -55,30 +83,169 @@ export class DbConnection {
 		sql: string,
 		...params: SqlParam[]
 	): Promise<T[]> {
-		const result = await this.conn.runAndReadAll(translateParams(sql), flattenParams(params) as DuckDBValue[]);
+		const conn = await this.ensureOpen();
+		const result = await conn.runAndReadAll(translateParams(sql), flattenParams(params) as DuckDBValue[]);
 		const rows = (await result.getRowObjectsJS()) as Record<string, unknown>[];
 		return rows.map(convertRow) as T[];
 	}
 	/** Run a mutation (INSERT/UPDATE/DELETE) and report rows changed. SQL uses `?N` placeholders. */
 	async queryRun(sql: string, ...params: SqlParam[]): Promise<RunResult> {
-		const result = await this.conn.run(translateParams(sql), flattenParams(params) as DuckDBValue[]);
+		const conn = await this.ensureOpen();
+		const result = await conn.run(translateParams(sql), flattenParams(params) as DuckDBValue[]);
 		return { changes: Number(result.rowsChanged) };
 	}
-	/** Disconnect and close the owning DuckDB instance. Idempotent; subsequent calls are no-ops. */
+	/**
+	 * Release the underlying DuckDB instance so other processes can claim
+	 * the lock. The wrapper stays usable: the next query reopens. Idempotent
+	 * — calling it on an already-released wrapper is a no-op.
+	 */
+	async release(): Promise<void> {
+		if (this.closed) return;
+		// If an open is in-flight, wait for it so we don't leave a stray instance behind.
+		if (this.opening) {
+			try {
+				await this.opening;
+			} catch {
+				// ensureOpen already cleared state on failure
+				return;
+			}
+		}
+		this.disposeHandles();
+	}
+	/** Permanently close. Subsequent queries throw. */
 	async close(): Promise<void> {
 		if (this.closed) return;
 		this.closed = true;
-		this.conn.disconnectSync();
+		if (this.opening) {
+			try {
+				await this.opening;
+			} catch {
+				return;
+			}
+		}
+		this.disposeHandles();
+	}
+	private disposeHandles(): void {
+		if (this.conn) {
+			try {
+				this.conn.disconnectSync();
+			} catch {
+				// best effort
+			}
+			this.conn = null;
+		}
 		if (this.instance) {
 			try {
 				this.instance.closeSync();
 			} catch {
 				// best effort
 			}
+			this.instance = null;
+		}
+	}
+	private async ensureOpen(): Promise<DuckDBNativeConnection> {
+		if (this.closed) {
+			throw new Error(`DbConnection at ${this.path} has been closed`);
+		}
+		if (this.conn) return this.conn;
+		if (!this.opening) {
+			this.opening = this.openOnce().finally(() => {
+				this.opening = null;
+			});
+		}
+		await this.opening;
+		if (!this.conn) {
+			throw new Error(`DbConnection at ${this.path} failed to open`);
+		}
+		return this.conn;
+	}
+	private async openOnce(): Promise<void> {
+		const instance = await createInstanceWithRetry(this.path, this.retry);
+		try {
+			const conn = await instance.connect();
+			this.instance = instance;
+			this.conn = conn;
+			await applyMigrations(this);
+		} catch (err) {
+			// On any failure after instance creation, release the lock immediately.
+			try {
+				instance.closeSync();
+			} catch {
+				// best effort
+			}
+			this.instance = null;
+			this.conn = null;
+			throw err;
+		}
+	}
+}
+/** True if the error message looks like DuckDB's lock-conflict shape. */
+export function isLockConflictError(err: unknown): boolean {
+	const msg = err instanceof Error ? err.message : String(err ?? "");
+	return /could not set lock on file|conflicting lock|database is locked/i.test(msg);
+}
+/** Sleep helper. */
+function delay(ms: number): Promise<void> {
+	return new Promise((resolve) => setTimeout(resolve, ms));
+}
+/**
+ * Run an open-the-DB factory with exponential backoff + jitter when the file
+ * lock is held by another process. Non-lock errors are re-thrown immediately
+ * (wrapped as `HelpfulError`) — only lock conflicts are retried. After
+ * exhausting attempts we throw a `HelpfulError` whose hint names the
+ * concurrent-process problem. Exposed (rather than inlined) so tests can
+ * verify the retry behavior with a fake factory.
+ */
+export async function withLockRetry<T>(
+	factory: () => Promise<T>,
+	path: string,
+	retry: LockRetryOptions = DEFAULT_LOCK_RETRY,
+): Promise<T> {
+	let lastErr: unknown;
+	for (let attempt = 1; attempt <= retry.maxAttempts; attempt++) {
+		try {
+			return await factory();
+		} catch (err) {
+			lastErr = err;
+			if (!isLockConflictError(err)) {
+				throw asHelpful(
+					err,
+					`while opening DuckDB at ${path}`,
+					`Check that ${path} is writable and not held open by another process. Delete the file to start fresh.`,
+					"internal_error",
+				);
+			}
+			if (attempt === retry.maxAttempts) break;
+			const backoff = Math.min(retry.maxDelayMs, retry.baseDelayMs * 2 ** (attempt - 1));
+			const jitter = Math.floor(Math.random() * Math.min(retry.baseDelayMs, backoff));
+			const wait = backoff + jitter;
+			logger.debug(`db: lock held on ${path}, retrying in ${wait}ms (attempt ${attempt}/${retry.maxAttempts})`);
+			await delay(wait);
 		}
 	}
+	throw asHelpful(
+		lastErr,
+		`while opening DuckDB at ${path} after ${retry.maxAttempts} attempts`,
+		`Another process is holding the database lock. Stop the conflicting process (check for a running 'membot serve' or open DuckDB CLI session) or delete ${path} to start fresh.`,
+		"internal_error",
+	);
+}
+/** Open a `DuckDBInstance` for `path`, retrying with backoff on lock conflicts. */
+export function createInstanceWithRetry(
+	path: string,
+	retry: LockRetryOptions = DEFAULT_LOCK_RETRY,
+): Promise<DuckDBInstance> {
+	return withLockRetry(() => DuckDBInstance.create(path), path, retry);
 }
 /** Type guard for the JS values DuckDB returns directly without further coercion. */
@@ -149,25 +316,17 @@ function flattenParams(params: SqlParam[]): unknown[] {
 }
 /**
- * Open a DuckDB-backed connection for the given file path. Runs all migrations
- * against the connection before returning. Pass `:memory:` for in-process tests.
+ * Construct a lazy DuckDB-backed connection for the given file path. The
+ * underlying DuckDB instance isn't opened until the first query call (which
+ * also runs migrations). To surface lock conflicts at the call site, callers
+ * may probe with `await db.exec("SELECT 1")` immediately after construction.
  */
-export async function openDb(path: string): Promise<DbConnection> {
-	let instance: DuckDBInstance;
-	try {
-		instance = await DuckDBInstance.create(path);
-	} catch (err) {
-		throw asHelpful(
-			err,
-			`while opening DuckDB at ${path}`,
-			`Check that ${path} is writable and not held open by another process. Delete the file to start fresh.`,
-			"internal_error",
-		);
-	}
-	const conn = await instance.connect();
-	const wrapper = new DbConnection(conn, instance, path);
-	await applyMigrations(wrapper);
-	return wrapper;
+export async function openDb(path: string, retry: LockRetryOptions = DEFAULT_LOCK_RETRY): Promise<DbConnection> {
+	const db = new DbConnection(path, retry);
+	// Eager probe so initial open errors (lock conflict, bad path, migration
+	// failure) surface here rather than at the first query in user code.
+	await db.exec("SELECT 1");
+	return db;
 }
 export { EMBEDDING_DIMENSION };

package/src/db/migrations.ts CHANGED Viewed

@@ -16,12 +16,31 @@ export interface Migration {
 const MIGRATIONS: Migration[] = [MIGRATION_001, MIGRATION_002];
+/**
+ * Process-level cache of paths whose migrations have been applied (or
+ * confirmed already-current) in this process. With lazy-claim DB connections,
+ * `applyMigrations` runs on every reopen — caching here keeps the DDL/SELECT
+ * traffic and "migration: applied" log lines off the hot reopen path.
+ * Cleared by `forgetMigrations` so tests can simulate a fresh process.
+ */
+const checkedPaths = new Set<string>();
+/** Reset the per-process migration cache. Test-only — production code never calls this. */
+export function forgetMigrations(path?: string): void {
+	if (path === undefined) checkedPaths.clear();
+	else checkedPaths.delete(path);
+}
 /**
  * Apply every unapplied migration in id order. Tracks applied ids in
  * `_migrations`. Each successful run is logged via the shared logger so a
- * user upgrading membot can see exactly what changed in their store.
+ * user upgrading membot can see exactly what changed in their store. The
+ * first call for a given DB path checks the table; subsequent calls in the
+ * same process short-circuit via `checkedPaths`.
  */
 export async function applyMigrations(db: DbConnection): Promise<void> {
+	if (checkedPaths.has(db.path)) return;
 	await db.exec(`CREATE TABLE IF NOT EXISTS _migrations (
 		id INTEGER PRIMARY KEY,
 		name TEXT NOT NULL,
@@ -42,4 +61,6 @@ export async function applyMigrations(db: DbConnection): Promise<void> {
 		await db.queryRun(`INSERT INTO _migrations(id, name) VALUES (?1, ?2)`, migration.id, migration.name);
 		logger.info(`migration: applied  ${String(migration.id).padStart(3, "0")}-${migration.name}`);
 	}
+	checkedPaths.add(db.path);
 }

package/src/ingest/ingest.ts CHANGED Viewed

@@ -44,12 +44,36 @@ export interface IngestResult {
 	failed: number;
 }
+/**
+ * Per-entry hooks invoked while a resolved source is being ingested. Used by
+ * `add` to drive a single shared progress reporter across many sources
+ * without re-resolving anything. `onEntryStart` fires before the pipeline
+ * touches an entry; `onEntryComplete` fires after the result (ok / unchanged
+ * / failed) is known. Both are optional.
+ */
+export interface IngestCallbacks {
+	onEntryStart?: (label: string) => void;
+	onEntryComplete?: (entry: IngestEntryResult) => void;
+}
+/**
+ * Count how many per-entry results a `ResolvedSource` will produce. Used by
+ * `add` to size a shared progress bar before ingestion starts.
+ */
+export function countResolvedEntries(resolved: ResolvedSource): number {
+	if (resolved.kind === "local-files") return resolved.entries.length;
+	return 1;
+}
 /**
  * Top-level ingest orchestrator. Resolves the source arg, dispatches to the
  * right reader (local / remote / inline), runs the pipeline (convert →
  * describe → chunk → embed → write), and returns one entry per matched
  * file. Partial failures are reported per-entry; the entire call doesn't
- * abort because one URL or PDF is bad.
+ * abort because one URL or PDF is bad. Drives `ctx.progress` itself, so
+ * single-source SDK callers get a usable indicator out of the box. When
+ * orchestrating many sources at once (e.g. `add`), call `resolveSource` +
+ * `ingestResolved` directly so one shared progress spans every entry.
  */
 export async function ingest(input: IngestInput, ctx: AppContext): Promise<IngestResult> {
 	const resolved = await resolveSource(input.source, {
@@ -57,17 +81,40 @@ export async function ingest(input: IngestInput, ctx: AppContext): Promise<Inges
 		exclude: input.exclude,
 		followSymlinks: input.follow_symlinks ?? true,
 	});
+	const total = countResolvedEntries(resolved);
+	ctx.progress.start(total, "ingest");
+	const callbacks: IngestCallbacks = {
+		onEntryStart: (label) => ctx.progress.tick(label),
+	};
+	const result = await ingestResolved(resolved, input, ctx, callbacks);
+	const okCount = result.ok;
+	const unchangedSuffix = result.unchanged > 0 ? ` (${result.unchanged} unchanged)` : "";
+	ctx.progress.done(`ingested ${okCount}/${result.total}${unchangedSuffix}`);
+	return result;
+}
+/**
+ * Run the ingest pipeline against a pre-resolved source. Same as `ingest`
+ * but skips the resolve step and delegates progress reporting to the caller
+ * via `callbacks`. This is the entry point used by multi-source orchestrators
+ * (`add`) so a single progress bar can span every entry across every source.
+ */
+export async function ingestResolved(
+	resolved: ResolvedSource,
+	input: IngestInput,
+	ctx: AppContext,
+	callbacks?: IngestCallbacks,
+): Promise<IngestResult> {
 	const refreshSec = parseDuration(input.refresh_frequency);
 	const force = input.force === true;
 	if (resolved.kind === "inline") {
-		return ingestInline(resolved.text, input, ctx, refreshSec);
+		return ingestInline(resolved.text, input, ctx, refreshSec, callbacks);
 	}
 	if (resolved.kind === "url") {
-		return ingestUrl(resolved.url, input, ctx, refreshSec, force);
+		return ingestUrl(resolved.url, input, ctx, refreshSec, force, callbacks);
 	}
-	return ingestLocalFiles(resolved, input, ctx, refreshSec, force);
+	return ingestLocalFiles(resolved, input, ctx, refreshSec, force, callbacks);
 }
 /** Ingest a single inline blob (source_type='inline'). */
@@ -76,8 +123,10 @@ async function ingestInline(
 	input: IngestInput,
 	ctx: AppContext,
 	refreshSec: number | null,
+	callbacks?: IngestCallbacks,
 ): Promise<IngestResult> {
 	const logicalPath = input.logical_path ?? defaultInlinePath();
+	callbacks?.onEntryStart?.(logicalPath);
 	const bytes = new TextEncoder().encode(text);
 	const sha = sha256Hex(bytes);
 	const result: IngestEntryResult = {
@@ -113,6 +162,7 @@ async function ingestInline(
 		result.status = "failed";
 		result.error = errorMessage(err);
 	}
+	callbacks?.onEntryComplete?.(result);
 	return summarize([result]);
 }
@@ -123,6 +173,7 @@ async function ingestUrl(
 	ctx: AppContext,
 	refreshSec: number | null,
 	force: boolean,
+	callbacks?: IngestCallbacks,
 ): Promise<IngestResult> {
 	const mcpxAdapter = ctx.mcpx
 		? {
@@ -137,6 +188,7 @@ async function ingestUrl(
 		: null;
 	const logicalPath = input.logical_path ?? defaultLogicalForUrl(url);
+	callbacks?.onEntryStart?.(url);
 	const result: IngestEntryResult = {
 		source_path: url,
 		logical_path: logicalPath,
@@ -160,6 +212,7 @@ async function ingestUrl(
 			if (cur && cur.source_sha256 === fetched.sha256) {
 				result.status = "unchanged";
 				result.version_id = cur.version_id;
+				callbacks?.onEntryComplete?.(result);
 				return summarize([result]);
 			}
 		}
@@ -185,6 +238,7 @@ async function ingestUrl(
 		result.status = "failed";
 		result.error = errorMessage(err);
 	}
+	callbacks?.onEntryComplete?.(result);
 	return summarize([result]);
 }
@@ -195,8 +249,16 @@ async function ingestLocalFiles(
 	ctx: AppContext,
 	refreshSec: number | null,
 	force: boolean,
+	callbacks?: IngestCallbacks,
 ): Promise<IngestResult> {
 	if (resolved.entries.length === 0) {
+		// `filtered: true` means the source resolved successfully but every
+		// entry was dropped by --exclude / --include / DEFAULT_EXCLUDES.
+		// Treat that as a silent no-op: shell-expanded globs commonly hand
+		// us individual files we should skip without aborting the batch.
+		if (resolved.filtered) {
+			return { ingested: [], total: 0, ok: 0, unchanged: 0, failed: 0 };
+		}
 		throw new HelpfulError({
 			kind: "input_error",
 			message: `Glob/path matched 0 files`,
@@ -205,11 +267,10 @@ async function ingestLocalFiles(
 	}
 	const results: IngestEntryResult[] = [];
-	ctx.progress.start(resolved.entries.length, "ingest");
 	const isMulti = resolved.entries.length > 1;
 	for (const entry of resolved.entries) {
-		ctx.progress.tick(entry.relPathFromBase);
+		callbacks?.onEntryStart?.(entry.relPathFromBase);
 		const logicalPath = pickLogicalPath(input.logical_path, entry, isMulti);
 		const result: IngestEntryResult = {
 			source_path: entry.absPath,
@@ -233,6 +294,7 @@ async function ingestLocalFiles(
 					result.status = "unchanged";
 					result.version_id = cur.version_id;
 					results.push(result);
+					callbacks?.onEntryComplete?.(result);
 					continue;
 				}
 			}
@@ -257,13 +319,15 @@ async function ingestLocalFiles(
 		} catch (err) {
 			result.status = "failed";
 			result.error = errorMessage(err);
+		} finally {
+			// Release the DB lock between files in a directory/glob walk so
+			// concurrent processes can wedge in mid-batch. The next entry's
+			// first DB call reopens (cheap — same-process reopen).
+			await ctx.db.release();
 		}
 		results.push(result);
+		callbacks?.onEntryComplete?.(result);
 	}
-	const okCount = results.filter((r) => r.status === "ok").length;
-	const unchangedCount = results.filter((r) => r.status === "unchanged").length;
-	const suffix = unchangedCount > 0 ? ` (${unchangedCount} unchanged)` : "";
-	ctx.progress.done(`ingested ${okCount}/${results.length}${suffix}`);
 	return summarize(results);
 }

package/src/ingest/source-resolver.ts CHANGED Viewed

@@ -1,12 +1,26 @@
 import { realpath, stat } from "node:fs/promises";
+import { homedir } from "node:os";
 import { isAbsolute, join, relative, resolve, sep } from "node:path";
 import picomatch from "picomatch";
 import { asHelpful, HelpfulError } from "../errors.ts";
+/**
+ * Expand a leading `~` or `~/` to the user's home directory. The shell does
+ * this for us when the arg is unquoted, but `bun dev add "~/foo/*.md"` passes
+ * the literal `~` through, and `path.resolve("~/foo")` treats `~` as a
+ * regular directory name. We patch it up so quoted args work like users
+ * expect. Inline literals and URLs are caught earlier and never reach here.
+ */
+function expandHome(p: string): string {
+	if (p === "~") return homedir();
+	if (p.startsWith("~/") || p.startsWith(`~${sep}`)) return join(homedir(), p.slice(2));
+	return p;
+}
 export type ResolvedSource =
 	| { kind: "inline"; text: string; logicalHint: string | null }
 	| { kind: "url"; url: string; logicalHint: string | null }
-	| { kind: "local-files"; entries: ResolvedLocalEntry[]; basePath: string };
+	| { kind: "local-files"; entries: ResolvedLocalEntry[]; basePath: string; filtered?: boolean };
 export interface ResolvedLocalEntry {
 	/** Absolute filesystem path (post-realpath). */
@@ -28,6 +42,45 @@ export interface ResolveOptions {
 const DEFAULT_EXCLUDES = ["**/node_modules/**", "**/.git/**", "**/.DS_Store", "**/dist/**", "**/.cache/**"];
+/**
+ * Expand a user-supplied include/exclude pattern into a small set of
+ * gitignore-ish equivalents so common spellings all do the intuitive thing.
+ * Examples (all exclude the whole subtree): a bare name like `node_modules`,
+ * a trailing-slash form like `node_modules/`, the shell-style `node_modules`
+ * followed by single-star, the canonical doublestar forms — every spelling
+ * a user would reasonably reach for ends up matching nested files.
+ * Patterns starting with `**`-slash, `/`, or `./` are considered anchored
+ * and are not given an any-depth variant. `DEFAULT_EXCLUDES` are already
+ * canonical and bypass this helper.
+ */
+export function expandUserPattern(p: string): string[] {
+	const out = new Set<string>([p]);
+	const anchored = p.startsWith("**/") || p.startsWith("/") || p.startsWith("./");
+	const hasSlash = p.includes("/");
+	const hasGlob = /[*?[\]{}!]/.test(p);
+	// Path-like patterns ("foo/bar", "node_modules/*") imply the user is
+	// thinking about a directory tree — match at any depth. Bare globs like
+	// "*.md" are left alone so they keep their anchored top-level meaning.
+	if (hasSlash && !anchored) out.add(`**/${p}`);
+	if (p.endsWith("/*") && !p.endsWith("/**/*")) {
+		const base = p.slice(0, -2);
+		out.add(`${base}/**`);
+		if (!anchored) out.add(`**/${base}/**`);
+	}
+	if (p.endsWith("/")) {
+		const base = p.slice(0, -1);
+		out.add(`${base}/**`);
+		if (!anchored) out.add(`**/${base}/**`);
+	}
+	// Bare name with no slashes and no glob chars (e.g. "node_modules",
+	// "dist") → treat as a directory match anywhere in the tree.
+	if (!hasSlash && !hasGlob) {
+		out.add(`**/${p}`);
+		out.add(`**/${p}/**`);
+	}
+	return [...out];
+}
 /**
  * Polymorphic source-arg expander. Accepts:
  *   - "inline:<text>"             → inline literal
@@ -48,20 +101,28 @@ export async function resolveSource(source: string, options: ResolveOptions = {}
 		return { kind: "url", url: source, logicalHint: null };
 	}
+	source = expandHome(source);
 	const followSymlinks = options.followSymlinks !== false;
-	const userIncludes = options.include
+	const userIncludesRaw = options.include
 		? options.include
 				.split(",")
 				.map((g) => g.trim())
 				.filter(Boolean)
 		: [];
-	const excludeMatchers = [
-		...DEFAULT_EXCLUDES,
-		...(options.exclude ?? "")
-			.split(",")
-			.map((g) => g.trim())
-			.filter(Boolean),
-	];
+	const userExcludesRaw = (options.exclude ?? "")
+		.split(",")
+		.map((g) => g.trim())
+		.filter(Boolean);
+	const userIncludesExpanded = userIncludesRaw.flatMap(expandUserPattern);
+	const userExcludesExpanded = userExcludesRaw.flatMap(expandUserPattern);
+	const excludeMatchers = [...DEFAULT_EXCLUDES, ...userExcludesExpanded];
+	// Single-file matchers run against the absolute path so shell-expanded
+	// globs (where each file lands here individually) still honor excludes.
+	const isExcludeAbs = picomatch(excludeMatchers, { dot: false });
+	const isIncludeAbs = userIncludesExpanded.length
+		? picomatch(userIncludesExpanded, { dot: false, nocase: false })
+		: null;
 	if (isGlob(source)) {
 		const base = globBase(source);
@@ -71,7 +132,7 @@ export async function resolveSource(source: string, options: ResolveOptions = {}
 			// Source glob acts as a hard filter; user includes (if any) further
 			// narrow the result via AND. Pass them as a separate matcher so the
 			// two sets aren't picomatch-OR'd together.
-			const extraIncludes = userIncludes.length > 0 ? [userIncludes] : [];
+			const extraIncludes = userIncludesExpanded.length > 0 ? [userIncludesExpanded] : [];
 			return walk(realBase, [remainder], excludeMatchers, followSymlinks, extraIncludes);
 		} catch (err) {
 			throw asHelpful(
@@ -98,6 +159,16 @@ export async function resolveSource(source: string, options: ResolveOptions = {}
 	if (st.isFile()) {
 		const real = await realpath(abs);
+		// Shell-expanded globs (e.g. zsh expanding `~/foo/**/*.md`) deliver
+		// each match here individually, so this branch must enforce both
+		// DEFAULT_EXCLUDES and the user's own --include/--exclude. Otherwise
+		// `node_modules` paths slip through whenever the shell expanded for us.
+		if (isExcludeAbs(real)) {
+			return { kind: "local-files", basePath: real, entries: [], filtered: true };
+		}
+		if (isIncludeAbs && !isIncludeAbs(real)) {
+			return { kind: "local-files", basePath: real, entries: [], filtered: true };
+		}
 		return {
 			kind: "local-files",
 			basePath: real,
@@ -107,7 +178,7 @@ export async function resolveSource(source: string, options: ResolveOptions = {}
 	if (st.isDirectory()) {
 		const realBase = await realpath(abs);
-		const dirIncludes = userIncludes.length > 0 ? userIncludes : ["**/*"];
+		const dirIncludes = userIncludesExpanded.length > 0 ? userIncludesExpanded : ["**/*"];
 		return walk(realBase, dirIncludes, excludeMatchers, followSymlinks);
 	}
@@ -170,6 +241,14 @@ async function walk(
 	const isInclude = picomatch(includes, { dot: false, nocase: false });
 	const extraMatchers = extraIncludeSets.map((set) => picomatch(set, { dot: false, nocase: false }));
 	const isExclude = excludes.length ? picomatch(excludes, { dot: false }) : null;
+	// Directory-prune patterns: derived from excludes by stripping a trailing
+	// `/**` or `/*`. Without this we descend into massive subtrees (e.g.
+	// every `node_modules/` under a workspace) before discarding files one
+	// by one — which on real machines presents as a hang.
+	const dirPrunePatterns = excludes
+		.map((p) => (p.endsWith("/**") ? p.slice(0, -3) : p.endsWith("/*") ? p.slice(0, -2) : p))
+		.filter((p) => p.length > 0);
+	const isExcludeDir = dirPrunePatterns.length ? picomatch(dirPrunePatterns, { dot: false }) : null;
 	const queue: string[] = [base];
 	while (queue.length > 0) {
@@ -191,6 +270,8 @@ async function walk(
 		}
 		if (st.isSymbolicLink() && !followSymlinks) continue;
 		if (st.isDirectory()) {
+			const rel = relative(base, real);
+			if (rel.length > 0 && isExcludeDir?.(rel)) continue;
 			let names: string[];
 			try {
 				names = await readdir(real);

package/src/mount/mcp.ts CHANGED Viewed

@@ -48,6 +48,14 @@ export function mountAsMcpTool<I extends z.ZodObject, O extends z.ZodTypeAny>(
 				};
 			} catch (err) {
 				return renderMcpError(err);
+			} finally {
+				// Drop the DuckDB lock between MCP tool calls so concurrent CLI
+				// or daemon callers can claim it. The next tool call reopens.
+				try {
+					await ctx.db.release();
+				} catch {
+					// best effort — never let release failures mask a tool result
+				}
 			}
 		},
 	);

package/src/operations/add.ts CHANGED Viewed

@@ -1,5 +1,12 @@
 import { z } from "zod";
-import { ingest } from "../ingest/ingest.ts";
+import {
+	countResolvedEntries,
+	type IngestCallbacks,
+	type IngestEntryResult,
+	type IngestResult,
+	ingestResolved,
+} from "../ingest/ingest.ts";
+import { type ResolvedSource, resolveSource } from "../ingest/source-resolver.ts";
 import { colors } from "../output/formatter.ts";
 import { defineOperation } from "./types.ts";
@@ -97,21 +104,122 @@ Pass \`logical_path\` to override. For a multi-source / directory / glob walk it
 	},
 	handler: async (input, ctx) => {
 		const { sources, ...rest } = input;
-		const aggregated = {
-			ingested: [] as Awaited<ReturnType<typeof ingest>>["ingested"],
+		const followSymlinks = rest.follow_symlinks ?? true;
+		// Phase 1: resolve every source upfront so the shared progress bar
+		// knows its total. A resolve failure (bad path, glob with no base) is
+		// captured per-source so one bad arg doesn't abort the whole batch.
+		type ResolveOutcome = { source: string; resolved: ResolvedSource } | { source: string; error: Error };
+		const outcomes: ResolveOutcome[] = [];
+		for (const source of sources) {
+			try {
+				const resolved = await resolveSource(source, {
+					include: rest.include,
+					exclude: rest.exclude,
+					followSymlinks,
+				});
+				outcomes.push({ source, resolved });
+			} catch (err) {
+				outcomes.push({ source, error: err instanceof Error ? err : new Error(String(err)) });
+			}
+		}
+		const total = outcomes.reduce((n, o) => ("error" in o ? n + 1 : n + countResolvedEntries(o.resolved)), 0);
+		const aggregated: IngestResult = {
+			ingested: [],
 			total: 0,
 			ok: 0,
 			unchanged: 0,
 			failed: 0,
 		};
-		for (const source of sources) {
-			const r = await ingest({ ...rest, source }, ctx);
-			aggregated.ingested.push(...r.ingested);
-			aggregated.total += r.total;
-			aggregated.ok += r.ok;
-			aggregated.unchanged += r.unchanged;
-			aggregated.failed += r.failed;
+		ctx.progress.start(total, "ingest");
+		const callbacks: IngestCallbacks = {
+			onEntryStart: (label) => ctx.progress.tick(label),
+			onEntryComplete: (entry) => ctx.progress.entry(formatEntryLine(entry)),
+		};
+		for (const outcome of outcomes) {
+			if ("error" in outcome) {
+				const failed: IngestEntryResult = {
+					source_path: outcome.source,
+					logical_path: outcome.source,
+					version_id: null,
+					status: "failed",
+					error: outcome.error.message,
+					mime_type: null,
+					size_bytes: 0,
+					fetcher: "local",
+					source_sha256: "",
+				};
+				callbacks.onEntryStart?.(outcome.source);
+				callbacks.onEntryComplete?.(failed);
+				aggregated.ingested.push(failed);
+				aggregated.total += 1;
+				aggregated.failed += 1;
+				continue;
+			}
+			try {
+				const r = await ingestResolved(outcome.resolved, { ...rest, source: outcome.source }, ctx, callbacks);
+				aggregated.ingested.push(...r.ingested);
+				aggregated.total += r.total;
+				aggregated.ok += r.ok;
+				aggregated.unchanged += r.unchanged;
+				aggregated.failed += r.failed;
+			} catch (err) {
+				const message = err instanceof Error ? err.message : String(err);
+				const failed: IngestEntryResult = {
+					source_path: outcome.source,
+					logical_path: outcome.source,
+					version_id: null,
+					status: "failed",
+					error: message,
+					mime_type: null,
+					size_bytes: 0,
+					fetcher: "local",
+					source_sha256: "",
+				};
+				callbacks.onEntryStart?.(outcome.source);
+				callbacks.onEntryComplete?.(failed);
+				aggregated.ingested.push(failed);
+				aggregated.total += 1;
+				aggregated.failed += 1;
+			} finally {
+				// Release the DB lock between sources so other consumers (a
+				// concurrent CLI call, the daemon, or a separate MCP server)
+				// can wedge in. The next source's first DB call reopens.
+				await ctx.db.release();
+			}
 		}
+		const summary = formatSummary(aggregated);
+		ctx.progress.done(summary);
 		return aggregated;
 	},
 });
+/**
+ * Render the persistent stderr line shown for one completed entry. Mirrors
+ * the glyphs used by the final `console_formatter` so users see the same
+ * status indicators twice (once during ingest on stderr, once in the final
+ * stdout summary).
+ */
+function formatEntryLine(entry: IngestEntryResult): string {
+	if (entry.status === "ok") {
+		return `${colors.green("✓")} ${colors.cyan(entry.logical_path)} ${colors.dim(`(${entry.fetcher}, ${entry.size_bytes}B)`)}`;
+	}
+	if (entry.status === "unchanged") {
+		return `${colors.dim("≡")} ${colors.cyan(entry.logical_path)} ${colors.dim("(unchanged)")}`;
+	}
+	return `${colors.red("✗")} ${entry.source_path} ${colors.dim(entry.error ?? "")}`;
+}
+/** Compose the final spinner-success line summarising the whole batch. */
+function formatSummary(r: IngestResult): string {
+	const parts: string[] = [`added ${r.ok}/${r.total}`];
+	if (r.unchanged > 0) parts.push(`${r.unchanged} unchanged`);
+	if (r.failed > 0) parts.push(`${r.failed} failed`);
+	return parts.join(", ");
+}

package/src/operations/search.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import { z } from "zod";
+import { HelpfulError } from "../errors.ts";
 import { embedSingle } from "../ingest/embedder.ts";
 import { colors } from "../output/formatter.ts";
 import { fuseRRF } from "../search/hybrid.ts";
@@ -26,9 +27,19 @@ export const searchOperation = defineOperation({
 				version_id: z.string(),
 				chunk_index: z.number(),
 				snippet: z.string(),
-				score: z.number(),
-				semantic_score: z.number().nullable(),
-				keyword_score: z.number().nullable(),
+				score: z
+					.number()
+					.describe(
+						"Normalized fusion score in [0,1]; 1.0 = chunk was top-1 on both semantic and keyword lists, ~0.5 = top-1 on one",
+					),
+				semantic_score: z
+					.number()
+					.nullable()
+					.describe("Cosine similarity from the semantic side (0-1), or null if not matched"),
+				keyword_score: z
+					.number()
+					.nullable()
+					.describe("Raw BM25 score from the keyword side (unbounded), or null if not matched"),
 			}),
 		),
 		mode: z.string(),
@@ -39,7 +50,10 @@ export const searchOperation = defineOperation({
 			return colors.dim(`(no hits in ${result.mode} mode)`);
 		}
 		const blocks = result.hits.map((h) => {
-			const head = `${colors.cyan(h.logical_path)} ${colors.dim(`v=${h.version_id}`)} ${colors.green(`score=${h.score.toFixed(3)}`)}`;
+			const parts = [`score=${h.score.toFixed(3)}`];
+			if (h.semantic_score !== null) parts.push(`sem=${h.semantic_score.toFixed(3)}`);
+			if (h.keyword_score !== null) parts.push(`bm25=${h.keyword_score.toFixed(2)}`);
+			const head = `${colors.cyan(h.logical_path)} ${colors.dim(`v=${h.version_id}`)} ${colors.green(parts.join(" "))}`;
 			const snippet = h.snippet
 				.split("\n")
 				.map((l) => `  ${l}`)
@@ -52,6 +66,14 @@ export const searchOperation = defineOperation({
 		const query = input.query ?? input.pattern ?? "";
 		const pattern = input.pattern ?? input.query ?? "";
+		if (!query.trim() && !pattern.trim()) {
+			throw new HelpfulError({
+				kind: "input_error",
+				message: "search requires a query or pattern",
+				hint: 'Pass a natural-language query (e.g. `membot search "oauth flow"`) or a keyword pattern (e.g. `membot search --pattern OAuth`).',
+			});
+		}
 		const semanticHits =
 			input.mode === "keyword" || !query.trim()
 				? []

package/src/output/progress.ts CHANGED Viewed

@@ -2,21 +2,48 @@ import { logger } from "./logger.ts";
 import { isSilent, useSpinner } from "./tty.ts";
 /**
- * Minimal progress reporter for multi-entry operations (directory/glob ingest,
- * batch refresh). Operations call `start(total)`, then `tick(label)` for each
- * entry, then `done(summary)`.
+ * Progress reporter for multi-entry operations (directory/glob ingest, batch
+ * refresh, multi-source `add`). Operations call `start(total)`, then for each
+ * entry call `tick(label)` (advances the in-flight indicator) and optionally
+ * `entry(line)` (writes a persistent stderr line that survives the spinner),
+ * then `done(summary)`.
  *
- * Interactive: replaces a single spinner line as work happens.
- * Non-interactive: emits `info` lines per entry.
+ * Interactive: replaces a single spinner line as work happens, with an ASCII
+ * bar like `[████░░░░░░] 4/15 (26%) — relative/path.md`.
+ * Non-interactive: emits `info` lines per `tick` and per `entry`.
  */
 export interface Progress {
 	start(total: number, label?: string): void;
 	tick(label: string): void;
+	entry(line: string): void;
 	done(summary?: string): void;
 	fail(summary?: string): void;
 	info(msg: string): void;
 }
+const BAR_WIDTH = 20;
+const LABEL_MAX = 60;
+/**
+ * Render a fixed-width ASCII progress bar. Uses block-drawing characters in
+ * interactive mode so the bar reads naturally next to other unicode glyphs.
+ */
+export function renderBar(count: number, total: number, width = BAR_WIDTH): string {
+	if (total <= 0) return `[${"░".repeat(width)}]`;
+	const ratio = Math.min(1, Math.max(0, count / total));
+	const filled = Math.round(ratio * width);
+	return `[${"█".repeat(filled)}${"░".repeat(width - filled)}]`;
+}
+/**
+ * Truncate a label from the left so the most-specific tail of a long path
+ * stays visible. Keeps the spinner line on a single terminal row.
+ */
+function truncateLabel(label: string, max = LABEL_MAX): string {
+	if (label.length <= max) return label;
+	return `…${label.slice(label.length - max + 1)}`;
+}
 /**
  * Build a `Progress` reporter whose mode is decided once, at call time, from
  * the current TTY state. Use one per multi-entry operation.
@@ -29,26 +56,38 @@ export function createProgress(): Progress {
 	const interactive = useSpinner();
 	const silent = isSilent();
+	const renderSpinnerText = (label: string): string => {
+		const bar = renderBar(count, total);
+		const pct = total > 0 ? Math.floor((count / total) * 100) : 0;
+		const tail = label ? ` — ${truncateLabel(label)}` : "";
+		return `${bar} ${count}/${total} (${pct}%)${tail}`;
+	};
 	return {
 		start(t: number, label?: string) {
 			total = t;
 			count = 0;
 			if (silent) return;
 			if (interactive) {
-				spinner = logger.startSpinner(label ? `${label} (0/${total})` : `0/${total}`);
+				const initial = renderSpinnerText(label ?? "");
+				spinner = logger.startSpinner(initial);
 			} else if (label) {
-				logger.info(label);
+				logger.info(`${label}: 0/${total}`);
 			}
 		},
 		tick(label: string) {
 			count += 1;
 			if (silent) return;
 			if (interactive && spinner) {
-				spinner.update(`${count}/${total} — ${label}`);
+				spinner.update(renderSpinnerText(label));
 			} else {
 				logger.info(`[${count}/${total}] ${label}`);
 			}
 		},
+		entry(line: string) {
+			if (silent) return;
+			logger.info(line);
+		},
 		done(summary?: string) {
 			if (silent) return;
 			if (interactive && spinner) {

package/src/refresh/scheduler.ts CHANGED Viewed

@@ -40,6 +40,14 @@ export function startDaemon(ctx: AppContext, tickSec: number): () => void {
 			await runDueRefreshes(ctx);
 		} catch (err) {
 			logger.warn(`daemon: tick failed (${err instanceof Error ? err.message : String(err)})`);
+		} finally {
+			// Drop the DuckDB lock between ticks so the CLI / MCP server can
+			// run while the daemon is idle. Next tick reopens transparently.
+			try {
+				await ctx.db.release();
+			} catch {
+				// best effort
+			}
 		}
 		if (!stopped) setTimeout(loop, intervalMs);
 	};

package/src/sdk.ts CHANGED Viewed

@@ -14,8 +14,10 @@ export { chunkDeterministic } from "./ingest/chunker.ts";
 export { embed, embedSingle } from "./ingest/embedder.ts";
 export type { FetchedRemote, FetchOptions } from "./ingest/fetcher.ts";
 export { fetchRemote } from "./ingest/fetcher.ts";
-export type { IngestEntryResult, IngestInput, IngestResult } from "./ingest/ingest.ts";
-export { ingest } from "./ingest/ingest.ts";
+export type { IngestCallbacks, IngestEntryResult, IngestInput, IngestResult } from "./ingest/ingest.ts";
+export { countResolvedEntries, ingest, ingestResolved } from "./ingest/ingest.ts";
+export type { ResolvedLocalEntry, ResolvedSource } from "./ingest/source-resolver.ts";
+export { resolveSource } from "./ingest/source-resolver.ts";
 export { buildMcpServer, startHttpServer, startStdioServer } from "./mcp/server.ts";
 export { OPERATIONS } from "./operations/index.ts";
 export type { CliMetadata, Operation } from "./operations/types.ts";

package/src/search/hybrid.ts CHANGED Viewed

@@ -17,6 +17,11 @@ const SNIPPET_MAX = 300;
  * Reciprocal-rank fusion of semantic and keyword hit lists. Each result is
  * keyed by `(logical_path, version_id, chunk_index)` so the same chunk
  * appearing in both lists gets one fused score = sum of its RRF scores.
+ *
+ * The returned `score` is normalized to [0,1] by dividing by the theoretical
+ * max RRF (`2/(k+1)`, achieved when a chunk is rank-0 on both lists). This
+ * preserves ordering — division is monotonic — but makes the displayed value
+ * interpretable: 1.0 = top-1 on both signals, ~0.5 = top-1 on one.
  */
 export function fuseRRF(
 	semantic: SemanticHit[],
@@ -24,6 +29,7 @@ export function fuseRRF(
 	options: { k?: number; limit: number },
 ): FusedHit[] {
 	const k = options.k ?? 60;
+	const maxRrf = 2 / (k + 1);
 	const merged = new Map<
 		string,
 		{
@@ -89,7 +95,7 @@ export function fuseRRF(
 		version_id: h.version_id,
 		chunk_index: h.chunk_index,
 		snippet: h.snippet,
-		score: round(h.rrf),
+		score: round(h.rrf / maxRrf),
 		semantic_score: h.semantic_score,
 		keyword_score: h.keyword_score,
 	}));

package/src/search/keyword.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 import { rebuildFts } from "../db/chunks.ts";
 import type { DbConnection } from "../db/connection.ts";
+import { asHelpful } from "../errors.ts";
 export interface KeywordHit {
 	logical_path: string;
@@ -23,9 +24,10 @@ interface RawKeywordRow {
 /**
  * BM25 keyword search over `chunks.search_text` via the FTS extension.
- * Returns an empty list when FTS isn't available on this platform — the
- * hybrid layer treats missing keyword hits as "no signal" and degrades
- * to semantic-only.
+ * Returns an empty list when FTS isn't available on this platform or the
+ * index is empty — the hybrid layer treats missing keyword hits as "no
+ * signal" and degrades to semantic-only. Genuine SQL/runtime errors are
+ * surfaced as HelpfulError so they don't get silently buried.
  */
 export async function searchKeyword(
 	db: DbConnection,
@@ -36,15 +38,15 @@ export async function searchKeyword(
 	if (result.kind !== "rebuilt") return [];
 	const limit = options.limit ?? 50;
+	const sql = `SELECT row_key, logical_path, version_id, chunk_index,
+	                   chunk_content, search_text,
+	                   fts_main__current_chunks_fts.match_bm25(row_key, ?1) AS bm25_score
+	            FROM _current_chunks_fts
+	           WHERE fts_main__current_chunks_fts.match_bm25(row_key, ?1) IS NOT NULL
+	             ${options.pathPrefix ? "AND logical_path LIKE ?2" : ""}
+	           ORDER BY bm25_score DESC
+	           LIMIT ${Number(limit)}`;
 	try {
-		const sql = `SELECT row_key, logical_path, version_id, chunk_index,
-		                   chunk_content, search_text,
-		                   fts_main__current_chunks_fts.match_bm25(row_key, ?1) AS bm25_score
-		            FROM _current_chunks_fts
-		           WHERE fts_main__current_chunks_fts.match_bm25(row_key, ?1) IS NOT NULL
-		             ${options.pathPrefix ? "AND logical_path LIKE ?2" : ""}
-		           ORDER BY bm25_score DESC
-		           LIMIT ${Number(limit)}`;
 		const rows: RawKeywordRow[] = options.pathPrefix
 			? await db.queryAll<RawKeywordRow>(sql, query, `${options.pathPrefix}%`)
 			: await db.queryAll<RawKeywordRow>(sql, query);
@@ -56,7 +58,12 @@ export async function searchKeyword(
 			search_text: r.search_text,
 			score: Number(r.bm25_score),
 		}));
-	} catch {
-		return [];
+	} catch (e) {
+		throw asHelpful(
+			e,
+			"while running BM25 keyword search",
+			"Run `membot reindex` to rebuild the FTS index, then retry the search.",
+			"internal_error",
+		);
 	}
 }