npm - membot - Versions diffs - 0.0.1 → 0.1.1 - Mend

membot 0.0.1 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

package/.claude/skills/membot.md +137 -0
package/.cursor/rules/membot.mdc +137 -0
package/README.md +131 -0
package/package.json +83 -24
package/patches/@huggingface%2Ftransformers@4.2.0.patch +137 -0
package/scripts/apply-transformers-patch.sh +35 -0
package/src/cli.ts +72 -0
package/src/commands/check-update.ts +69 -0
package/src/commands/mcpx.ts +112 -0
package/src/commands/reindex.ts +53 -0
package/src/commands/serve.ts +58 -0
package/src/commands/skill.ts +131 -0
package/src/commands/upgrade.ts +220 -0
package/src/config/loader.ts +100 -0
package/src/config/schemas.ts +39 -0
package/src/constants.ts +42 -0
package/src/context.ts +80 -0
package/src/db/blobs.ts +53 -0
package/src/db/chunks.ts +176 -0
package/src/db/connection.ts +173 -0
package/src/db/files.ts +325 -0
package/src/db/migrations/001-init.ts +63 -0
package/src/db/migrations/002-fts.ts +12 -0
package/src/db/migrations.ts +45 -0
package/src/errors.ts +87 -0
package/src/ingest/chunker.ts +117 -0
package/src/ingest/converter/docx.ts +15 -0
package/src/ingest/converter/html.ts +20 -0
package/src/ingest/converter/image.ts +71 -0
package/src/ingest/converter/index.ts +119 -0
package/src/ingest/converter/llm.ts +66 -0
package/src/ingest/converter/ocr.ts +51 -0
package/src/ingest/converter/pdf.ts +38 -0
package/src/ingest/converter/text.ts +8 -0
package/src/ingest/describer.ts +72 -0
package/src/ingest/embedder.ts +98 -0
package/src/ingest/fetcher.ts +280 -0
package/src/ingest/ingest.ts +444 -0
package/src/ingest/local-reader.ts +64 -0
package/src/ingest/search-text.ts +18 -0
package/src/ingest/source-resolver.ts +186 -0
package/src/mcp/instructions.ts +34 -0
package/src/mcp/server.ts +101 -0
package/src/mount/commander.ts +174 -0
package/src/mount/mcp.ts +111 -0
package/src/mount/zod-to-cli.ts +158 -0
package/src/operations/add.ts +69 -0
package/src/operations/diff.ts +105 -0
package/src/operations/index.ts +38 -0
package/src/operations/info.ts +95 -0
package/src/operations/list.ts +87 -0
package/src/operations/move.ts +83 -0
package/src/operations/prune.ts +80 -0
package/src/operations/read.ts +102 -0
package/src/operations/refresh.ts +72 -0
package/src/operations/remove.ts +35 -0
package/src/operations/search.ts +72 -0
package/src/operations/tree.ts +103 -0
package/src/operations/types.ts +81 -0
package/src/operations/versions.ts +78 -0
package/src/operations/write.ts +77 -0
package/src/output/formatter.ts +68 -0
package/src/output/logger.ts +114 -0
package/src/output/progress.ts +78 -0
package/src/output/tty.ts +91 -0
package/src/refresh/runner.ts +296 -0
package/src/refresh/scheduler.ts +54 -0
package/src/sdk.ts +27 -0
package/src/search/hybrid.ts +100 -0
package/src/search/keyword.ts +62 -0
package/src/search/semantic.ts +56 -0
package/src/types/text-modules.d.ts +9 -0
package/src/update/background.ts +73 -0
package/src/update/cache.ts +40 -0
package/src/update/checker.ts +117 -0
package/.claude/settings.local.json +0 -7
package/CLAUDE.md +0 -139
package/docs/plan.md +0 -905

package/src/output/progress.ts ADDED Viewed

@@ -0,0 +1,78 @@
+import { logger } from "./logger.ts";
+import { isSilent, useSpinner } from "./tty.ts";
+/**
+ * Minimal progress reporter for multi-entry operations (directory/glob ingest,
+ * batch refresh). Operations call `start(total)`, then `tick(label)` for each
+ * entry, then `done(summary)`.
+ *
+ * Interactive: replaces a single spinner line as work happens.
+ * Non-interactive: emits `info` lines per entry.
+ */
+export interface Progress {
+	start(total: number, label?: string): void;
+	tick(label: string): void;
+	done(summary?: string): void;
+	fail(summary?: string): void;
+	info(msg: string): void;
+}
+/**
+ * Build a `Progress` reporter whose mode is decided once, at call time, from
+ * the current TTY state. Use one per multi-entry operation.
+ */
+export function createProgress(): Progress {
+	let total = 0;
+	let count = 0;
+	let spinner: ReturnType<typeof logger.startSpinner> | null = null;
+	const interactive = useSpinner();
+	const silent = isSilent();
+	return {
+		start(t: number, label?: string) {
+			total = t;
+			count = 0;
+			if (silent) return;
+			if (interactive) {
+				spinner = logger.startSpinner(label ? `${label} (0/${total})` : `0/${total}`);
+			} else if (label) {
+				logger.info(label);
+			}
+		},
+		tick(label: string) {
+			count += 1;
+			if (silent) return;
+			if (interactive && spinner) {
+				spinner.update(`${count}/${total} — ${label}`);
+			} else {
+				logger.info(`[${count}/${total}] ${label}`);
+			}
+		},
+		done(summary?: string) {
+			if (silent) return;
+			if (interactive && spinner) {
+				spinner.success(summary ?? `${count}/${total} done`);
+				spinner = null;
+			} else if (summary) {
+				logger.info(summary);
+			}
+		},
+		fail(summary?: string) {
+			if (silent) {
+				if (summary) logger.warn(summary);
+				return;
+			}
+			if (interactive && spinner) {
+				spinner.error(summary ?? `failed at ${count}/${total}`);
+				spinner = null;
+			} else if (summary) {
+				logger.warn(summary);
+			}
+		},
+		info(msg: string) {
+			if (silent) return;
+			logger.info(msg);
+		},
+	};
+}

package/src/output/tty.ts ADDED Viewed

@@ -0,0 +1,91 @@
+/**
+ * Single source of truth for whether the CLI is running interactively. All
+ * spinner / color / progress decisions go through these helpers — operations
+ * never inspect process.stdout themselves.
+ *
+ * Mode resolution (read once at startup, then frozen via setMode):
+ *   stdout.isTTY && stderr.isTTY && !json   → interactive
+ *   anything else                            → non-interactive
+ *   CI=true                                  → forces non-interactive
+ *   --no-color or NO_COLOR                   → disables ANSI even if interactive
+ *   FORCE_COLOR                              → forces ANSI on regardless
+ *   CI=true OR NODE_ENV=test OR              → silent (suppresses advisory
+ *   MEMBOT_SILENT=1                             info / progress lines)
+ *   --verbose                                → overrides silent
+ */
+export interface OutputMode {
+	interactive: boolean;
+	color: boolean;
+	json: boolean;
+	verbose: boolean;
+	silent: boolean;
+}
+let mode: OutputMode | null = null;
+export interface DetectModeOptions {
+	json?: boolean;
+	noColor?: boolean;
+	forceColor?: boolean;
+	verbose?: boolean;
+}
+/** Compute the active output mode from env + flags. Idempotent. */
+export function detectMode(opts: DetectModeOptions = {}): OutputMode {
+	const json = !!opts.json;
+	const verbose = !!opts.verbose;
+	const stdoutTty = !!(process.stdout.isTTY ?? false);
+	const stderrTty = !!(process.stderr.isTTY ?? false);
+	const ci = process.env.CI === "true" || process.env.CI === "1";
+	const interactive = !json && !ci && stdoutTty && stderrTty;
+	const noColorEnv = !!process.env.NO_COLOR;
+	const forceColor = !!opts.forceColor || !!process.env.FORCE_COLOR;
+	const noColorFlag = !!opts.noColor;
+	let color: boolean;
+	if (forceColor) color = true;
+	else if (noColorFlag || noColorEnv || json) color = false;
+	else color = stderrTty; // colors target stderr (logs) and stdout (formatted output)
+	const testEnv = process.env.NODE_ENV === "test";
+	const explicitSilent = process.env.MEMBOT_SILENT === "1" || process.env.MEMBOT_SILENT === "true";
+	const silent = !verbose && !json && (ci || testEnv || explicitSilent);
+	return { interactive, color, json, verbose, silent };
+}
+export function setMode(m: OutputMode): void {
+	mode = m;
+}
+export function getMode(): OutputMode {
+	if (!mode) mode = detectMode();
+	return mode;
+}
+export function isInteractive(): boolean {
+	return getMode().interactive;
+}
+export function useColor(): boolean {
+	return getMode().color;
+}
+export function useSpinner(): boolean {
+	return getMode().interactive && !getMode().verbose;
+}
+export function isJson(): boolean {
+	return getMode().json;
+}
+export function isVerbose(): boolean {
+	return getMode().verbose;
+}
+export function isSilent(): boolean {
+	return getMode().silent;
+}

package/src/refresh/runner.ts ADDED Viewed

@@ -0,0 +1,296 @@
+import type { McpxClient } from "@evantahler/mcpx";
+import type { AppContext } from "../context.ts";
+import { upsertBlob } from "../db/blobs.ts";
+import { insertChunksForVersion, rebuildFts } from "../db/chunks.ts";
+import { getCurrent, insertVersion, millisIso, updateRefreshStatus } from "../db/files.ts";
+import { HelpfulError } from "../errors.ts";
+import { chunkDeterministic } from "../ingest/chunker.ts";
+import { convert } from "../ingest/converter/index.ts";
+import { describe } from "../ingest/describer.ts";
+import { embed } from "../ingest/embedder.ts";
+import { fetchRemote } from "../ingest/fetcher.ts";
+import { mimeFromPath, readLocalFile, sha256Hex } from "../ingest/local-reader.ts";
+import { buildSearchText } from "../ingest/search-text.ts";
+export interface RefreshOutcome {
+	logical_path: string;
+	status: "ok" | "unchanged" | "failed";
+	new_version_id?: string;
+	error?: string;
+}
+/**
+ * Refresh one logical_path. Re-reads its source (local stat+sha or remote
+ * via the persisted mcpx invocation), and creates a new version only if
+ * the source bytes changed. Always updates `refreshed_at` and
+ * `last_refresh_status` on the row. Returns a per-path outcome — never
+ * throws unless the path doesn't exist.
+ */
+export async function refreshOne(ctx: AppContext, logicalPath: string, force = false): Promise<RefreshOutcome> {
+	const cur = await getCurrent(ctx.db, logicalPath);
+	if (!cur) {
+		throw new HelpfulError({
+			kind: "not_found",
+			message: `no current version for ${logicalPath}`,
+			hint: `Run \`membot ls\` to see available paths, or ingest with \`membot add\`.`,
+		});
+	}
+	if (cur.source_type === "inline") {
+		return { logical_path: logicalPath, status: "unchanged" };
+	}
+	try {
+		if (cur.source_type === "local") {
+			return await refreshLocal(ctx, cur, force);
+		}
+		if (cur.source_type === "remote") {
+			return await refreshRemote(ctx, cur, force);
+		}
+	} catch (err) {
+		const message = err instanceof Error ? err.message : String(err);
+		await updateRefreshStatus(ctx.db, logicalPath, cur.version_id, {
+			refreshed_at: new Date().toISOString(),
+			last_refresh_status: `failed:${message}`,
+		});
+		return { logical_path: logicalPath, status: "failed", error: message };
+	}
+	return { logical_path: logicalPath, status: "unchanged" };
+}
+interface CurrentRow {
+	logical_path: string;
+	version_id: string;
+	source_type: string;
+	source_path: string | null;
+	source_mtime_ms: number | null;
+	source_sha256: string | null;
+	mime_type: string | null;
+	fetcher: string | null;
+	fetcher_server: string | null;
+	fetcher_tool: string | null;
+	fetcher_args: Record<string, unknown> | null;
+	refresh_frequency_sec: number | null;
+}
+/** Local-file refresh: stat-then-sha gate before re-running the pipeline. */
+async function refreshLocal(ctx: AppContext, cur: CurrentRow, force: boolean): Promise<RefreshOutcome> {
+	if (!cur.source_path) {
+		throw new HelpfulError({
+			kind: "input_error",
+			message: `local row ${cur.logical_path} has no source_path`,
+			hint: "This row likely came from an inline write. Re-ingest with `membot add` if you want refreshing.",
+		});
+	}
+	const local = await readLocalFile(cur.source_path);
+	if (!force && cur.source_sha256 === local.sha256) {
+		await updateRefreshStatus(ctx.db, cur.logical_path, cur.version_id, {
+			refreshed_at: new Date().toISOString(),
+			last_refresh_status: "unchanged",
+		});
+		return { logical_path: cur.logical_path, status: "unchanged" };
+	}
+	const versionId = await runPipelineForRefresh(ctx, {
+		logicalPath: cur.logical_path,
+		bytes: local.bytes,
+		mime: local.mimeType,
+		source: cur.source_path,
+		sourceType: "local",
+		sourcePath: cur.source_path,
+		sourceMtimeMs: local.mtimeMs,
+		sourceSha: local.sha256,
+		fetcher: "local",
+		fetcherServer: null,
+		fetcherTool: null,
+		fetcherArgs: null,
+		refreshSec: cur.refresh_frequency_sec,
+	});
+	return { logical_path: cur.logical_path, status: "ok", new_version_id: versionId };
+}
+/** Remote refresh: replay the persisted mcpx invocation, or plain HTTP. */
+async function refreshRemote(ctx: AppContext, cur: CurrentRow, force: boolean): Promise<RefreshOutcome> {
+	if (!cur.source_path) {
+		throw new HelpfulError({
+			kind: "input_error",
+			message: `remote row ${cur.logical_path} has no source_path`,
+			hint: "Inspect with `membot info` and consider re-ingesting.",
+		});
+	}
+	const fetched = await replayFetch(cur, ctx.mcpx);
+	if (!force && cur.source_sha256 === fetched.sha256) {
+		await updateRefreshStatus(ctx.db, cur.logical_path, cur.version_id, {
+			refreshed_at: new Date().toISOString(),
+			last_refresh_status: "unchanged",
+		});
+		return { logical_path: cur.logical_path, status: "unchanged" };
+	}
+	const versionId = await runPipelineForRefresh(ctx, {
+		logicalPath: cur.logical_path,
+		bytes: fetched.bytes,
+		mime: fetched.mimeType,
+		source: cur.source_path,
+		sourceType: "remote",
+		sourcePath: cur.source_path,
+		sourceMtimeMs: null,
+		sourceSha: fetched.sha256,
+		fetcher: cur.fetcher === "mcpx" ? "mcpx" : "http",
+		fetcherServer: fetched.fetcherServer,
+		fetcherTool: fetched.fetcherTool,
+		fetcherArgs: fetched.fetcherArgs,
+		refreshSec: cur.refresh_frequency_sec,
+	});
+	return { logical_path: cur.logical_path, status: "ok", new_version_id: versionId };
+}
+/**
+ * Re-fetch a remote source. When the row recorded an mcpx invocation,
+ * call it directly with the same args (no agent re-routing); otherwise
+ * fall back to plain HTTP. The choice is deterministic — same row always
+ * produces the same fetch path.
+ */
+async function replayFetch(
+	cur: CurrentRow,
+	mcpx: McpxClient | null,
+): Promise<{
+	bytes: Uint8Array;
+	sha256: string;
+	mimeType: string;
+	fetcherServer: string | null;
+	fetcherTool: string | null;
+	fetcherArgs: Record<string, unknown> | null;
+}> {
+	if (cur.fetcher === "mcpx" && cur.fetcher_server && cur.fetcher_tool && mcpx) {
+		const args = cur.fetcher_args ?? {};
+		const result = await mcpx.exec(cur.fetcher_server, cur.fetcher_tool, args);
+		const text = extractText(result);
+		const bytes = new TextEncoder().encode(text);
+		return {
+			bytes,
+			sha256: sha256Hex(bytes),
+			mimeType: "text/markdown",
+			fetcherServer: cur.fetcher_server,
+			fetcherTool: cur.fetcher_tool,
+			fetcherArgs: args,
+		};
+	}
+	const r = await fetchRemote(cur.source_path ?? "", { hint: "http" });
+	return {
+		bytes: r.bytes,
+		sha256: r.sha256,
+		mimeType: r.mimeType,
+		fetcherServer: null,
+		fetcherTool: null,
+		fetcherArgs: null,
+	};
+}
+/** Pull a string out of whatever shape an mcpx tool happens to return. */
+function extractText(result: unknown): string {
+	if (typeof result === "string") return result;
+	if (result && typeof result === "object") {
+		const r = result as Record<string, unknown>;
+		if (typeof r.text === "string") return r.text;
+		if (typeof r.content === "string") return r.content;
+		if (typeof r.markdown === "string") return r.markdown;
+		if (Array.isArray(r.content)) {
+			const out: string[] = [];
+			for (const c of r.content) {
+				if (c && typeof c === "object") {
+					const inner = c as Record<string, unknown>;
+					if (typeof inner.text === "string") out.push(inner.text);
+				}
+			}
+			if (out.length > 0) return out.join("\n\n");
+		}
+	}
+	try {
+		return JSON.stringify(result);
+	} catch {
+		return "";
+	}
+}
+interface PipelineParams {
+	logicalPath: string;
+	bytes: Uint8Array;
+	mime: string;
+	source: string;
+	sourceType: "local" | "remote";
+	sourcePath: string | null;
+	sourceMtimeMs: number | null;
+	sourceSha: string;
+	fetcher: "local" | "http" | "mcpx";
+	fetcherServer: string | null;
+	fetcherTool: string | null;
+	fetcherArgs: Record<string, unknown> | null;
+	refreshSec: number | null;
+}
+/**
+ * Re-run convert → describe → chunk → embed and write a fresh version
+ * row. Mirrors `ingest.ts`'s pipeline; kept separate so refresh-specific
+ * fields (`change_note='refresh: source updated'`) aren't accidentally
+ * applied to first-time ingests.
+ */
+async function runPipelineForRefresh(ctx: AppContext, p: PipelineParams): Promise<string> {
+	await upsertBlob(ctx.db, {
+		sha256: p.sourceSha,
+		mime_type: p.mime,
+		size_bytes: p.bytes.byteLength,
+		bytes: p.bytes,
+	});
+	const conversion = await convert(p.bytes, p.mime, p.source, ctx.config.llm);
+	const markdown = conversion.markdown;
+	const description = await describe(p.logicalPath, p.mime, markdown, ctx.config.llm);
+	const chunks = chunkDeterministic(markdown, ctx.config.chunker);
+	const searchTexts = chunks.map((c) => buildSearchText(p.logicalPath, description, c.content));
+	const embeddings = await embed(searchTexts, ctx.config.embedding_model);
+	const versionId = millisIso(Date.now());
+	const contentSha = sha256Hex(new TextEncoder().encode(markdown));
+	await insertVersion(ctx.db, {
+		logical_path: p.logicalPath,
+		version_id: versionId,
+		source_type: p.sourceType,
+		source_path: p.sourcePath,
+		source_mtime_ms: p.sourceMtimeMs,
+		source_sha256: p.sourceSha,
+		blob_sha256: p.sourceSha,
+		content_sha256: contentSha,
+		content: markdown,
+		description,
+		mime_type: p.mime,
+		size_bytes: p.bytes.byteLength,
+		fetcher: p.fetcher,
+		fetcher_server: p.fetcherServer,
+		fetcher_tool: p.fetcherTool,
+		fetcher_args: p.fetcherArgs,
+		refresh_frequency_sec: p.refreshSec,
+		refreshed_at: new Date().toISOString(),
+		last_refresh_status: "ok",
+		change_note: "refresh: source updated",
+	});
+	await insertChunksForVersion(
+		ctx.db,
+		p.logicalPath,
+		versionId,
+		chunks.map((c, i) => ({
+			chunk_index: c.index,
+			chunk_content: c.content,
+			search_text: searchTexts[i] ?? buildSearchText(p.logicalPath, description, c.content),
+			embedding: embeddings[i] ?? new Array(embeddings[0]?.length ?? 0).fill(0),
+		})),
+	);
+	await rebuildFts(ctx.db);
+	return versionId;
+}
+export { mimeFromPath };

package/src/refresh/scheduler.ts ADDED Viewed

@@ -0,0 +1,54 @@
+import type { AppContext } from "../context.ts";
+import { listDueRefreshes } from "../db/files.ts";
+import { logger } from "../output/logger.ts";
+import { type RefreshOutcome, refreshOne } from "./runner.ts";
+/**
+ * One scheduler tick: refresh every row whose `refresh_frequency_sec` has
+ * elapsed since `refreshed_at`. Errors on individual rows are logged and
+ * the loop continues so one bad source doesn't halt the daemon.
+ */
+export async function runDueRefreshes(ctx: AppContext): Promise<RefreshOutcome[]> {
+	const due = await listDueRefreshes(ctx.db);
+	const out: RefreshOutcome[] = [];
+	for (const row of due) {
+		try {
+			const r = await refreshOne(ctx, row.logical_path);
+			out.push(r);
+			if (r.status === "ok") logger.info(`refresh: ${row.logical_path} → new version ${r.new_version_id}`);
+		} catch (err) {
+			const msg = err instanceof Error ? err.message : String(err);
+			logger.warn(`refresh: ${row.logical_path} failed (${msg})`);
+			out.push({ logical_path: row.logical_path, status: "failed", error: msg });
+		}
+	}
+	return out;
+}
+/**
+ * Long-running daemon loop. Calls `runDueRefreshes` every `tick_interval_sec`
+ * (from config). Returns a stop function the caller can use to terminate
+ * the daemon (e.g. on SIGINT).
+ */
+export function startDaemon(ctx: AppContext, tickSec: number): () => void {
+	const intervalMs = Math.max(1, tickSec) * 1000;
+	let stopped = false;
+	const loop = async () => {
+		if (stopped) return;
+		try {
+			await runDueRefreshes(ctx);
+		} catch (err) {
+			logger.warn(`daemon: tick failed (${err instanceof Error ? err.message : String(err)})`);
+		}
+		if (!stopped) setTimeout(loop, intervalMs);
+	};
+	logger.info(`daemon: started, tick interval ${tickSec}s`);
+	setTimeout(loop, intervalMs);
+	return () => {
+		stopped = true;
+		logger.info("daemon: stopping");
+	};
+}

package/src/sdk.ts ADDED Viewed

@@ -0,0 +1,27 @@
+// SDK entrypoint for embedding membot in other apps. Re-exports the core
+// surfaces — context, errors, operations, search, ingest, refresh — so
+// callers don't need to depend on internal file paths.
+export { loadConfig, saveConfig } from "./config/loader.ts";
+export type { ChunkerConfig, LlmConfig, MembotConfig } from "./config/schemas.ts";
+export { defaultMembotHome, EMBEDDING_DIMENSION, EMBEDDING_MODEL } from "./constants.ts";
+export type { AppContext, BuildContextOptions } from "./context.ts";
+export { buildContext, closeContext } from "./context.ts";
+export type { ErrorKind, HelpfulErrorArgs } from "./errors.ts";
+export { asHelpful, HelpfulError, isHelpfulError, mapKindToExit } from "./errors.ts";
+export type { Chunk } from "./ingest/chunker.ts";
+export { chunkDeterministic } from "./ingest/chunker.ts";
+export { embed, embedSingle } from "./ingest/embedder.ts";
+export type { FetchedRemote, FetchOptions } from "./ingest/fetcher.ts";
+export { fetchRemote } from "./ingest/fetcher.ts";
+export type { IngestEntryResult, IngestInput, IngestResult } from "./ingest/ingest.ts";
+export { ingest } from "./ingest/ingest.ts";
+export { buildMcpServer, startHttpServer, startStdioServer } from "./mcp/server.ts";
+export { OPERATIONS } from "./operations/index.ts";
+export type { CliMetadata, Operation } from "./operations/types.ts";
+export { composeDescription, defaultCliName, defineOperation } from "./operations/types.ts";
+export { refreshOne } from "./refresh/runner.ts";
+export { runDueRefreshes, startDaemon } from "./refresh/scheduler.ts";
+export { fuseRRF } from "./search/hybrid.ts";
+export { searchKeyword } from "./search/keyword.ts";
+export { searchSemantic } from "./search/semantic.ts";

package/src/search/hybrid.ts ADDED Viewed

@@ -0,0 +1,100 @@
+import type { KeywordHit } from "./keyword.ts";
+import type { SemanticHit } from "./semantic.ts";
+export interface FusedHit {
+	logical_path: string;
+	version_id: string;
+	chunk_index: number;
+	snippet: string;
+	score: number;
+	semantic_score: number | null;
+	keyword_score: number | null;
+}
+const SNIPPET_MAX = 300;
+/**
+ * Reciprocal-rank fusion of semantic and keyword hit lists. Each result is
+ * keyed by `(logical_path, version_id, chunk_index)` so the same chunk
+ * appearing in both lists gets one fused score = sum of its RRF scores.
+ */
+export function fuseRRF(
+	semantic: SemanticHit[],
+	keyword: KeywordHit[],
+	options: { k?: number; limit: number },
+): FusedHit[] {
+	const k = options.k ?? 60;
+	const merged = new Map<
+		string,
+		{
+			logical_path: string;
+			version_id: string;
+			chunk_index: number;
+			snippet: string;
+			rrf: number;
+			semantic_score: number | null;
+			keyword_score: number | null;
+		}
+	>();
+	const keyOf = (lp: string, v: string, ci: number) => `${lp}::${v}::${ci}`;
+	for (let i = 0; i < semantic.length; i++) {
+		const hit = semantic[i];
+		if (!hit) continue;
+		const key = keyOf(hit.logical_path, hit.version_id, hit.chunk_index);
+		const rrf = 1 / (k + i + 1);
+		const existing = merged.get(key);
+		if (existing) {
+			existing.rrf += rrf;
+			existing.semantic_score = round(hit.score);
+		} else {
+			merged.set(key, {
+				logical_path: hit.logical_path,
+				version_id: hit.version_id,
+				chunk_index: hit.chunk_index,
+				snippet: hit.chunk_content.slice(0, SNIPPET_MAX),
+				rrf,
+				semantic_score: round(hit.score),
+				keyword_score: null,
+			});
+		}
+	}
+	for (let i = 0; i < keyword.length; i++) {
+		const hit = keyword[i];
+		if (!hit) continue;
+		const key = keyOf(hit.logical_path, hit.version_id, hit.chunk_index);
+		const rrf = 1 / (k + i + 1);
+		const existing = merged.get(key);
+		if (existing) {
+			existing.rrf += rrf;
+			existing.keyword_score = round(hit.score);
+		} else {
+			merged.set(key, {
+				logical_path: hit.logical_path,
+				version_id: hit.version_id,
+				chunk_index: hit.chunk_index,
+				snippet: hit.chunk_content.slice(0, SNIPPET_MAX),
+				rrf,
+				semantic_score: null,
+				keyword_score: round(hit.score),
+			});
+		}
+	}
+	const all = [...merged.values()].sort((a, b) => b.rrf - a.rrf).slice(0, options.limit);
+	return all.map((h) => ({
+		logical_path: h.logical_path,
+		version_id: h.version_id,
+		chunk_index: h.chunk_index,
+		snippet: h.snippet,
+		score: round(h.rrf),
+		semantic_score: h.semantic_score,
+		keyword_score: h.keyword_score,
+	}));
+}
+function round(n: number): number {
+	return Math.round(n * 10000) / 10000;
+}