npm - membot - Versions diffs - 0.0.1 → 0.1.1 - Mend

membot 0.0.1 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

package/.claude/skills/membot.md +137 -0
package/.cursor/rules/membot.mdc +137 -0
package/README.md +131 -0
package/package.json +83 -24
package/patches/@huggingface%2Ftransformers@4.2.0.patch +137 -0
package/scripts/apply-transformers-patch.sh +35 -0
package/src/cli.ts +72 -0
package/src/commands/check-update.ts +69 -0
package/src/commands/mcpx.ts +112 -0
package/src/commands/reindex.ts +53 -0
package/src/commands/serve.ts +58 -0
package/src/commands/skill.ts +131 -0
package/src/commands/upgrade.ts +220 -0
package/src/config/loader.ts +100 -0
package/src/config/schemas.ts +39 -0
package/src/constants.ts +42 -0
package/src/context.ts +80 -0
package/src/db/blobs.ts +53 -0
package/src/db/chunks.ts +176 -0
package/src/db/connection.ts +173 -0
package/src/db/files.ts +325 -0
package/src/db/migrations/001-init.ts +63 -0
package/src/db/migrations/002-fts.ts +12 -0
package/src/db/migrations.ts +45 -0
package/src/errors.ts +87 -0
package/src/ingest/chunker.ts +117 -0
package/src/ingest/converter/docx.ts +15 -0
package/src/ingest/converter/html.ts +20 -0
package/src/ingest/converter/image.ts +71 -0
package/src/ingest/converter/index.ts +119 -0
package/src/ingest/converter/llm.ts +66 -0
package/src/ingest/converter/ocr.ts +51 -0
package/src/ingest/converter/pdf.ts +38 -0
package/src/ingest/converter/text.ts +8 -0
package/src/ingest/describer.ts +72 -0
package/src/ingest/embedder.ts +98 -0
package/src/ingest/fetcher.ts +280 -0
package/src/ingest/ingest.ts +444 -0
package/src/ingest/local-reader.ts +64 -0
package/src/ingest/search-text.ts +18 -0
package/src/ingest/source-resolver.ts +186 -0
package/src/mcp/instructions.ts +34 -0
package/src/mcp/server.ts +101 -0
package/src/mount/commander.ts +174 -0
package/src/mount/mcp.ts +111 -0
package/src/mount/zod-to-cli.ts +158 -0
package/src/operations/add.ts +69 -0
package/src/operations/diff.ts +105 -0
package/src/operations/index.ts +38 -0
package/src/operations/info.ts +95 -0
package/src/operations/list.ts +87 -0
package/src/operations/move.ts +83 -0
package/src/operations/prune.ts +80 -0
package/src/operations/read.ts +102 -0
package/src/operations/refresh.ts +72 -0
package/src/operations/remove.ts +35 -0
package/src/operations/search.ts +72 -0
package/src/operations/tree.ts +103 -0
package/src/operations/types.ts +81 -0
package/src/operations/versions.ts +78 -0
package/src/operations/write.ts +77 -0
package/src/output/formatter.ts +68 -0
package/src/output/logger.ts +114 -0
package/src/output/progress.ts +78 -0
package/src/output/tty.ts +91 -0
package/src/refresh/runner.ts +296 -0
package/src/refresh/scheduler.ts +54 -0
package/src/sdk.ts +27 -0
package/src/search/hybrid.ts +100 -0
package/src/search/keyword.ts +62 -0
package/src/search/semantic.ts +56 -0
package/src/types/text-modules.d.ts +9 -0
package/src/update/background.ts +73 -0
package/src/update/cache.ts +40 -0
package/src/update/checker.ts +117 -0
package/.claude/settings.local.json +0 -7
package/CLAUDE.md +0 -139
package/docs/plan.md +0 -905

package/src/mcp/instructions.ts ADDED Viewed

@@ -0,0 +1,34 @@
+/**
+ * Server-level instructions sent to the LLM when it connects to membot's
+ * MCP server. Frames how the tool surface should be used: discovery →
+ * ingest → consume → write, with explicit notes on versioning and refresh.
+ */
+export const SERVER_INSTRUCTIONS = `You have a persistent context store. Files live as versioned markdown rows
+addressed by logical path (e.g. "research/threat-models/llm.md"). The store
+is a hybrid search index: every file is chunked, embedded locally, and
+indexed with BM25 — so prefer membot_search to membot_read+grep for discovery.
+Workflow:
+  1. membot_tree or membot_search to find what already exists before adding new content.
+  2. membot_add to ingest a local file, a URL, or a remote document. URLs are
+     fetched via mcpx (the chosen invocation is stored so refresh is fast and
+     deterministic).
+  3. membot_read or membot_search hits to consume content.
+  4. membot_write to record agent-authored notes (source_type='inline').
+Versioning:
+  - Every ingest, refresh, or write that changes content creates a NEW
+    version_id (a timestamp). Older versions stay queryable via the
+    \`version\` parameter on membot_read / membot_info / membot_versions / membot_diff.
+  - All other tools default to the current (latest, non-tombstoned) version.
+  - membot_delete is a tombstone — history is preserved unless membot_prune runs.
+Refresh:
+  - Each row has source metadata. membot_refresh re-reads the source, hashes
+    it, and only re-embeds when bytes changed. Safe to call often.
+  - If a file has refresh_frequency_sec set, the daemon refreshes it
+    automatically — you do not need to schedule it yourself.
+When in doubt: search before you read, read before you write, and prefer
+adding the source URL once (with a refresh interval) over copy-pasting
+content that will go stale.`;

package/src/mcp/server.ts ADDED Viewed

@@ -0,0 +1,101 @@
+import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
+import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
+import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
+import { type AppContext, buildContext, closeContext } from "../context.ts";
+import { mountAsMcpTool } from "../mount/mcp.ts";
+import { OPERATIONS } from "../operations/index.ts";
+import { logger } from "../output/logger.ts";
+import { SERVER_INSTRUCTIONS } from "./instructions.ts";
+export interface McpServerOptions {
+	configFlag?: string;
+	httpPort?: number;
+}
+/**
+ * Build a fresh `McpServer` instance with every Operation mounted as a
+ * tool. The supplied `ctxFactory` is awaited lazily on the first tool
+ * invocation — for stdio servers we share one context across the connection;
+ * for HTTP servers we'd want one context per session, but for now a single
+ * lazy-initialized context is fine.
+ */
+export function buildMcpServer(ctxFactory: () => Promise<AppContext>): McpServer {
+	const server = new McpServer({ name: "membot", version: "0.0.1" }, { instructions: SERVER_INSTRUCTIONS });
+	let ctxPromise: Promise<AppContext> | null = null;
+	const getCtx = async () => {
+		if (!ctxPromise) ctxPromise = ctxFactory();
+		return ctxPromise;
+	};
+	for (const op of OPERATIONS) {
+		mountAsMcpTool(server, op, getCtx);
+	}
+	return server;
+}
+/**
+ * Start the MCP server in stdio mode. Used by `membot serve` (default
+ * transport) so MCP clients (mcpx, Claude Desktop, etc.) can connect over
+ * stdin/stdout.
+ */
+export async function startStdioServer(options: McpServerOptions = {}): Promise<() => Promise<void>> {
+	let ctx: AppContext | null = null;
+	const server = buildMcpServer(async () => {
+		ctx = await buildContext({ configFlag: options.configFlag, json: true });
+		return ctx;
+	});
+	const transport = new StdioServerTransport();
+	await server.connect(transport);
+	logger.info("membot-mcp: stdio server connected");
+	return async () => {
+		await server.close();
+		if (ctx) await closeContext(ctx);
+	};
+}
+/**
+ * Start the MCP server in HTTP (streamable) mode. Used by
+ * `membot serve --http <port>` to expose the same tools over HTTP for
+ * browser-based or remote clients.
+ */
+export async function startHttpServer(port: number, options: McpServerOptions = {}): Promise<() => Promise<void>> {
+	let ctx: AppContext | null = null;
+	const server = buildMcpServer(async () => {
+		ctx = await buildContext({ configFlag: options.configFlag });
+		return ctx;
+	});
+	const transport = new StreamableHTTPServerTransport({ sessionIdGenerator: () => crypto.randomUUID() });
+	await server.connect(transport);
+	const httpServer = Bun.serve({
+		port,
+		async fetch(req) {
+			const url = new URL(req.url);
+			if (url.pathname !== "/mcp") return new Response("not found", { status: 404 });
+			const body = await req.arrayBuffer();
+			const headers: Record<string, string> = {};
+			req.headers.forEach((v, k) => {
+				headers[k] = v;
+			});
+			// Adapt Bun's Request → Node-shaped req/res. Streamable HTTP
+			// transport expects a Node IncomingMessage / ServerResponse;
+			// for now the SDK provides handlers for Web's Request directly
+			// in newer versions. Simplest: forward to transport.handleRequest.
+			const resp = await transport.handleRequest(
+				req as unknown as Parameters<typeof transport.handleRequest>[0],
+				undefined as unknown as Parameters<typeof transport.handleRequest>[1],
+				body,
+			);
+			return resp as unknown as Response;
+		},
+	});
+	logger.info(`membot-mcp: http listening on :${port}/mcp`);
+	return async () => {
+		httpServer.stop();
+		await server.close();
+		if (ctx) await closeContext(ctx);
+	};
+}

package/src/mount/commander.ts ADDED Viewed

@@ -0,0 +1,174 @@
+import type { Command } from "commander";
+import type { z } from "zod";
+import { type AppContext, type BuildContextOptions, buildContext, closeContext } from "../context.ts";
+import { asHelpful, HelpfulError, isHelpfulError, mapKindToExit } from "../errors.ts";
+import { composeDescription, defaultCliName, type Operation } from "../operations/types.ts";
+import { colors, renderResult } from "../output/formatter.ts";
+import { logger } from "../output/logger.ts";
+import { isJson } from "../output/tty.ts";
+import { applySchemaToCommand, toKebab } from "./zod-to-cli.ts";
+/**
+ * Mount an Operation as a commander subcommand. The command:
+ *   1. accepts positional + flag args inferred from the zod input schema
+ *   2. validates with the same schema
+ *   3. starts a spinner, runs the handler, prints the formatted result
+ *   4. catches `HelpfulError` and renders it (color text on TTY, JSON on stderr otherwise)
+ */
+export function mountAsCommanderCommand<I extends z.ZodObject, O extends z.ZodTypeAny>(
+	program: Command,
+	op: Operation<I, O>,
+	getContextOptions: () => BuildContextOptions,
+): void {
+	const cmdName = defaultCliName(op);
+	const cmd = program.command(cmdName).description(composeDescription(op));
+	applySchemaToCommand(cmd, op.inputSchema, {
+		positional: (op.cli?.positional as readonly string[] | undefined) ?? [],
+		aliases: op.cli?.aliases as Readonly<Record<string, string>> | undefined,
+	});
+	cmd.action(async (...args: unknown[]) => {
+		// Commander passes positionals first, then the options object, then the Command instance.
+		// The middle option-bag is what we want for flag values.
+		let optsObj: Record<string, unknown> = {};
+		for (const a of args) {
+			if (a && typeof a === "object" && !Array.isArray(a) && a.constructor && a.constructor.name === "Object") {
+				optsObj = a as Record<string, unknown>;
+			}
+		}
+		const positionals = args.slice(0, op.cli?.positional?.length ?? 0);
+		const inputObj: Record<string, unknown> = {};
+		const positionalNames = (op.cli?.positional ?? []) as readonly string[];
+		positionalNames.forEach((name, i) => {
+			if (positionals[i] !== undefined) inputObj[name] = positionals[i];
+		});
+		for (const fieldName of Object.keys(op.inputSchema.shape)) {
+			if (positionalNames.includes(fieldName)) continue;
+			const camel = kebabToCamel(toKebab(fieldName));
+			const v = optsObj[camel] ?? optsObj[fieldName];
+			if (v !== undefined) inputObj[fieldName] = v;
+		}
+		// stdinField support: read stdin when the field is missing AND stdin is not a TTY.
+		if (op.cli?.stdinField && inputObj[op.cli.stdinField as string] === undefined && !process.stdin.isTTY) {
+			const stdin = await readStdin();
+			if (stdin.length > 0) inputObj[op.cli.stdinField as string] = stdin;
+		}
+		let ctx: AppContext | null = null;
+		try {
+			const parsedInput = parseInput(op, inputObj);
+			ctx = await buildContext(getContextOptions());
+			const result = await op.handler(parsedInput, ctx);
+			const validated = parseOutput(op, result);
+			process.stdout.write(`${renderResult(validated, { console_formatter: op.console_formatter })}\n`);
+		} catch (err) {
+			renderCliError(err);
+			const exitCode = isHelpfulError(err) ? mapKindToExit(err.kind) : 1;
+			if (ctx) await closeContext(ctx);
+			process.exit(exitCode);
+		}
+		if (ctx) await closeContext(ctx);
+	});
+}
+/** Validate the user-supplied input against the operation's zod schema. */
+function parseInput<I extends z.ZodObject, O extends z.ZodTypeAny>(
+	op: Operation<I, O>,
+	inputObj: Record<string, unknown>,
+): z.infer<I> {
+	const result = op.inputSchema.safeParse(inputObj);
+	if (!result.success) {
+		throw new HelpfulError({
+			kind: "input_error",
+			message: `invalid arguments to ${op.name}: ${result.error.message}`,
+			hint: `Run \`membot ${defaultCliName(op)} --help\` to see expected arguments.`,
+			details: result.error.issues,
+		});
+	}
+	return result.data;
+}
+/** Validate the handler's return value against the operation's output schema. */
+function parseOutput<I extends z.ZodObject, O extends z.ZodTypeAny>(op: Operation<I, O>, result: unknown): z.infer<O> {
+	const validated = op.outputSchema.safeParse(result);
+	if (!validated.success) {
+		throw new HelpfulError({
+			kind: "internal_error",
+			message: `${op.name} produced an output that doesn't match its declared schema: ${validated.error.message}`,
+			hint: "This is a membot bug. Re-run with --verbose and report at https://github.com/evantahler/membot/issues.",
+			details: validated.error.issues,
+			cause: validated.error,
+		});
+	}
+	return validated.data;
+}
+/**
+ * Render an error caught at the mount boundary. Wraps unknown errors via
+ * `asHelpful()` so the output shape (kind/message/hint) is uniform regardless
+ * of where the throw came from.
+ */
+export function renderCliError(err: unknown): void {
+	const helpful = isHelpfulError(err)
+		? err
+		: asHelpful(
+				err,
+				"unexpected error",
+				"Re-run with --verbose for the underlying message; if it persists this is a bug.",
+				"internal_error",
+			);
+	if (isJson()) {
+		const payload = {
+			ok: false,
+			error: {
+				kind: helpful.kind,
+				message: helpful.message,
+				hint: helpful.hint,
+				details: helpful.details,
+			},
+		};
+		process.stderr.write(`${JSON.stringify(payload)}\n`);
+		return;
+	}
+	logger.error(`✗ ${helpful.message}`);
+	logger.writeRaw(`  ${colors.yellow("hint:")} ${helpful.hint}\n`);
+	if (helpful.details !== undefined) {
+		logger.writeRaw(`  ${colors.dim(`details: ${formatDetails(helpful.details)}`)}\n`);
+	}
+}
+function formatDetails(details: unknown): string {
+	try {
+		return JSON.stringify(details);
+	} catch {
+		return String(details);
+	}
+}
+/** kebab-case-or-snake_case → camelCase (commander gives us camelCase keys on opts). */
+function kebabToCamel(s: string): string {
+	return s.replace(/-([a-z])/g, (_, c) => c.toUpperCase());
+}
+/** Drain stdin into a single string. Used by operations whose `cli.stdinField` is unset. */
+async function readStdin(): Promise<string> {
+	const chunks: Uint8Array[] = [];
+	for await (const chunk of process.stdin as AsyncIterable<Uint8Array>) {
+		chunks.push(chunk);
+	}
+	const total = chunks.reduce((n, c) => n + c.byteLength, 0);
+	const merged = new Uint8Array(total);
+	let offset = 0;
+	for (const c of chunks) {
+		merged.set(c, offset);
+		offset += c.byteLength;
+	}
+	return new TextDecoder().decode(merged);
+}

package/src/mount/mcp.ts ADDED Viewed

@@ -0,0 +1,111 @@
+import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
+import type { CallToolResult } from "@modelcontextprotocol/sdk/types.js";
+import type { z } from "zod";
+import type { AppContext } from "../context.ts";
+import { asHelpful, HelpfulError, isHelpfulError } from "../errors.ts";
+import { composeDescription, type Operation } from "../operations/types.ts";
+/**
+ * Mount an Operation as an MCP tool on the supplied server. The tool:
+ *   1. registers using `op.name` and `op.description`
+ *   2. exposes the zod input schema as JSON-Schema (via the SDK helper)
+ *   3. validates input + output through the same zod schemas the CLI uses
+ *   4. catches HelpfulError and returns it as `isError: true` with the
+ *      hint placed in BOTH the rendered text and `structuredContent.error`
+ */
+export function mountAsMcpTool<I extends z.ZodObject, O extends z.ZodTypeAny>(
+	server: McpServer,
+	op: Operation<I, O>,
+	getCtx: () => Promise<AppContext>,
+): void {
+	server.registerTool(
+		op.name,
+		{
+			description: composeDescription(op),
+			inputSchema: op.inputSchema.shape as z.ZodRawShape,
+		},
+		async (rawInput: unknown): Promise<CallToolResult> => {
+			let parsedInput: z.infer<I>;
+			try {
+				parsedInput = parseInput(op, rawInput);
+			} catch (err) {
+				return renderMcpError(err);
+			}
+			let ctx: AppContext;
+			try {
+				ctx = await getCtx();
+			} catch (err) {
+				return renderMcpError(err);
+			}
+			try {
+				const result = await op.handler(parsedInput, ctx);
+				const validated = parseOutput(op, result);
+				return {
+					content: [{ type: "text", text: jsonOrText(validated) }],
+					structuredContent: validated as Record<string, unknown>,
+				};
+			} catch (err) {
+				return renderMcpError(err);
+			}
+		},
+	);
+}
+/** Validate the MCP-supplied input against the operation's zod schema. */
+function parseInput<I extends z.ZodObject, O extends z.ZodTypeAny>(op: Operation<I, O>, raw: unknown): z.infer<I> {
+	const result = op.inputSchema.safeParse(raw);
+	if (!result.success) {
+		throw new HelpfulError({
+			kind: "input_error",
+			message: `invalid input to ${op.name}: ${result.error.message}`,
+			hint: `Check the tool's inputSchema. Common issues: missing required fields, wrong types, unknown fields.`,
+			details: result.error.issues,
+		});
+	}
+	return result.data;
+}
+/** Validate the handler's return value against the operation's output schema. */
+function parseOutput<I extends z.ZodObject, O extends z.ZodTypeAny>(op: Operation<I, O>, result: unknown): z.infer<O> {
+	const validated = op.outputSchema.safeParse(result);
+	if (!validated.success) {
+		throw new HelpfulError({
+			kind: "internal_error",
+			message: `${op.name} produced output that doesn't match its declared schema: ${validated.error.message}`,
+			hint: "This is a membot bug. Report at https://github.com/evantahler/membot/issues.",
+			details: validated.error.issues,
+		});
+	}
+	return validated.data;
+}
+/**
+ * Render any thrown value as an MCP `isError: true` result. The hint lands
+ * in both the human-visible text content and the `structuredContent.error`
+ * field so an LLM consuming the tool result gets identical guidance.
+ */
+export function renderMcpError(err: unknown): CallToolResult {
+	const helpful = isHelpfulError(err)
+		? err
+		: asHelpful(err, "unexpected error", "This is a membot bug; check server logs.", "internal_error");
+	return {
+		isError: true,
+		content: [{ type: "text", text: `${helpful.message}\n\nhint: ${helpful.hint}` }],
+		structuredContent: {
+			error: {
+				kind: helpful.kind,
+				message: helpful.message,
+				hint: helpful.hint,
+				details: helpful.details ?? null,
+			},
+		},
+	};
+}
+/** Serialize an output value to a single text block — JSON for objects, raw for strings. */
+function jsonOrText(value: unknown): string {
+	if (typeof value === "string") return value;
+	return JSON.stringify(value, null, 2);
+}

package/src/mount/zod-to-cli.ts ADDED Viewed

@@ -0,0 +1,158 @@
+import { type Command, Option } from "commander";
+import { z } from "zod";
+import { HelpfulError } from "../errors.ts";
+/**
+ * Walk a zod object schema and register its fields onto a commander command.
+ * Each field becomes either a positional `<arg>`/`[arg]` or a `--flag`,
+ * with descriptions sourced from `.describe()` so the same docstring shows
+ * up in `--help` and in the MCP tool's parameter description.
+ */
+export function applySchemaToCommand<S extends z.ZodObject>(
+	cmd: Command,
+	schema: S,
+	options: {
+		positional?: readonly string[];
+		aliases?: Readonly<Record<string, string>>;
+	} = {},
+): void {
+	const positional = new Set(options.positional ?? []);
+	const aliases = options.aliases ?? {};
+	const shape = schema.shape;
+	const positionalOrder = options.positional ?? [];
+	for (const fieldName of positionalOrder) {
+		const fieldSchema = shape[fieldName];
+		if (!fieldSchema) continue;
+		const required = !isOptional(fieldSchema);
+		const label = required ? `<${fieldName}>` : `[${fieldName}]`;
+		cmd.argument(label, describeOf(fieldSchema));
+	}
+	for (const [fieldName, fieldSchemaUnknown] of Object.entries(shape)) {
+		if (positional.has(fieldName)) continue;
+		const fieldSchema = fieldSchemaUnknown as z.ZodTypeAny;
+		const flag = toKebab(fieldName);
+		const desc = describeOf(fieldSchema);
+		const alias = aliases[fieldName];
+		const opt = buildOption(fieldName, flag, desc, fieldSchema, alias);
+		cmd.addOption(opt);
+	}
+}
+/**
+ * Translate a single zod field into a commander Option. Booleans become
+ * boolean flags (`--flag` / `--no-flag`); enums become `.choices(...)`;
+ * arrays of strings become repeatable flags; everything else becomes a
+ * value-taking flag whose argument is parsed as the field's primitive type.
+ */
+function buildOption(
+	_fieldName: string,
+	flag: string,
+	desc: string,
+	schema: z.ZodTypeAny,
+	alias: string | undefined,
+): Option {
+	const inner = unwrap(schema);
+	if (inner instanceof z.ZodBoolean) {
+		const longFlag = `--${flag}`;
+		const opt = new Option(`${alias ? `${alias}, ` : ""}${longFlag}`, desc);
+		const def = defaultOf(schema);
+		if (def !== undefined) opt.default(def as boolean);
+		return opt;
+	}
+	if (inner instanceof z.ZodEnum) {
+		const opt = new Option(`${alias ? `${alias}, ` : ""}--${flag} <value>`, desc);
+		const enumValues = inner.options as readonly string[];
+		opt.choices(enumValues as string[]);
+		const def = defaultOf(schema);
+		if (def !== undefined) opt.default(def);
+		return opt;
+	}
+	if (inner instanceof z.ZodArray) {
+		const opt = new Option(`${alias ? `${alias}, ` : ""}--${flag} <value>`, `${desc} (repeatable)`);
+		opt.argParser((val: string, prev: string[] | undefined) => {
+			const next = prev ?? [];
+			next.push(val);
+			return next;
+		});
+		const def = defaultOf(schema);
+		if (def !== undefined) opt.default(def);
+		return opt;
+	}
+	if (inner instanceof z.ZodNumber) {
+		const opt = new Option(`${alias ? `${alias}, ` : ""}--${flag} <value>`, desc);
+		opt.argParser((v: string) => {
+			const n = Number(v);
+			if (Number.isNaN(n)) {
+				throw new HelpfulError({
+					kind: "input_error",
+					message: `invalid number for --${flag}: ${JSON.stringify(v)}`,
+					hint: `Pass a numeric value, e.g. \`--${flag} 10\`. Run \`membot <command> --help\` to see expected types.`,
+				});
+			}
+			return n;
+		});
+		const def = defaultOf(schema);
+		if (def !== undefined) opt.default(def);
+		return opt;
+	}
+	const opt = new Option(`${alias ? `${alias}, ` : ""}--${flag} <value>`, desc);
+	const def = defaultOf(schema);
+	if (def !== undefined) opt.default(def);
+	return opt;
+}
+/** Pull through `.optional()` and `.default()` wrappers to find the underlying schema. */
+function unwrap(schema: z.ZodTypeAny): z.ZodTypeAny {
+	let cur: z.ZodTypeAny = schema;
+	while (true) {
+		if (cur instanceof z.ZodOptional) cur = cur.unwrap() as z.ZodTypeAny;
+		else if (cur instanceof z.ZodDefault) cur = cur._def.innerType as z.ZodTypeAny;
+		else if (cur instanceof z.ZodNullable) cur = cur.unwrap() as z.ZodTypeAny;
+		else break;
+	}
+	return cur;
+}
+/** True when the field is optional or defaulted (no value required from the user). */
+function isOptional(schema: z.ZodTypeAny): boolean {
+	if (schema instanceof z.ZodOptional) return true;
+	if (schema instanceof z.ZodDefault) return true;
+	if (schema instanceof z.ZodNullable) return true;
+	return false;
+}
+/** Read the description set via `.describe()`, falling back to an empty string. */
+function describeOf(schema: z.ZodTypeAny): string {
+	const desc = (schema._def as { description?: string }).description;
+	return desc ?? "";
+}
+/**
+ * Read the static `.default()` value off a zod schema, walking through
+ * `.optional()` to find the inner default. Returns undefined when no default
+ * is set so commander treats the option as truly optional.
+ */
+function defaultOf(schema: z.ZodTypeAny): unknown {
+	let cur: z.ZodTypeAny = schema;
+	while (cur instanceof z.ZodOptional || cur instanceof z.ZodNullable) {
+		cur = cur.unwrap() as z.ZodTypeAny;
+	}
+	if (cur instanceof z.ZodDefault) {
+		const def = cur._def.defaultValue;
+		return typeof def === "function" ? def() : def;
+	}
+	return undefined;
+}
+/** snake_case → kebab-case for CLI flag names. */
+export function toKebab(name: string): string {
+	return name.replaceAll("_", "-");
+}

package/src/operations/add.ts ADDED Viewed

@@ -0,0 +1,69 @@
+import { z } from "zod";
+import { ingest } from "../ingest/ingest.ts";
+import { colors } from "../output/formatter.ts";
+import { defineOperation } from "./types.ts";
+const FetcherKindEnum = z.enum(["http", "mcpx", "local", "inline"]);
+export const addOperation = defineOperation({
+	name: "membot_add",
+	cliName: "add",
+	description: `Ingest one or many sources into the store. \`source\` accepts:
+  - a local file path
+  - a local directory (recursive walk, symlinks followed)
+  - a glob pattern (e.g. "docs/**/*.md")
+  - a URL (fetched via mcpx if configured, otherwise plain HTTP)
+  - "inline:<text>" literal
+PDF, DOCX, HTML, images, and other binaries are converted to markdown — native libraries first, vision/OCR for images, LLM fallback for messy or scanned input. Original bytes are kept in the blobs table; \`membot_read bytes=true\` returns them. Setting \`refresh_frequency\` enables automatic refresh from the daemon. Each ingested file becomes a NEW version under its own logical_path; existing versions stay queryable via membot_versions. Directory/glob ingests stream one file at a time — partial failures do not abort the rest; the response lists per-entry status.`,
+	inputSchema: z.object({
+		source: z.string().describe("Local path, directory, glob, URL, or `inline:<text>` literal"),
+		logical_path: z.string().optional().describe("Destination logical_path (single source) or prefix (directory/glob)"),
+		include: z.string().optional().describe("Glob include filter (comma-separated for multiple); default `**/*`"),
+		exclude: z.string().optional().describe("Glob exclude filter (comma-separated for multiple)"),
+		follow_symlinks: z
+			.boolean()
+			.default(true)
+			.describe("Follow symlinks during directory walks (cycles broken via realpath)"),
+		refresh_frequency: z.string().optional().describe("Auto-refresh cadence: 5m | 1h | 24h | 7d. Omit to disable."),
+		fetcher_hint: z
+			.string()
+			.optional()
+			.describe("Free-form hint passed to mcpx tool search (e.g. 'firecrawl', 'github', 'google docs', 'http')"),
+		change_note: z.string().optional().describe("Free-text note attached to the new version"),
+	}),
+	outputSchema: z.object({
+		ingested: z.array(
+			z.object({
+				source_path: z.string(),
+				logical_path: z.string(),
+				version_id: z.string().nullable(),
+				status: z.enum(["ok", "failed"]),
+				error: z.string().optional(),
+				mime_type: z.string().nullable(),
+				size_bytes: z.number(),
+				fetcher: FetcherKindEnum,
+				source_sha256: z.string(),
+			}),
+		),
+		total: z.number(),
+		ok: z.number(),
+		failed: z.number(),
+	}),
+	cli: {
+		positional: ["source"],
+		aliases: { logical_path: "-p", refresh_frequency: "-r", change_note: "-m" },
+	},
+	console_formatter: (result) => {
+		const lines = result.ingested.map((e) => {
+			if (e.status === "ok") {
+				return `${colors.green("✓")} ${colors.cyan(e.logical_path)} ${colors.dim(`(${e.fetcher}, ${e.size_bytes}B)`)}`;
+			}
+			return `${colors.red("✗")} ${e.source_path} ${colors.dim(e.error ?? "")}`;
+		});
+		const summary = result.failed
+			? `${colors.green(`added ${result.ok}`)}, ${colors.red(`failed ${result.failed}`)}`
+			: colors.green(`added ${result.ok}`);
+		return `${lines.join("\n")}\n${summary}`;
+	},
+	handler: async (input, ctx) => ingest(input, ctx),
+});