npm - membot - Versions diffs - 0.7.0 → 0.10.0 - Mend

membot 0.7.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/.claude/skills/membot.md +3 -0
package/.cursor/rules/membot.mdc +3 -0
package/README.md +7 -0
package/package.json +1 -1
package/src/cli.ts +11 -0
package/src/config/schemas.ts +33 -0
package/src/constants.ts +23 -0
package/src/context.ts +24 -0
package/src/ingest/concurrency.ts +60 -0
package/src/ingest/describer.ts +49 -3
package/src/ingest/embed-worker.ts +74 -0
package/src/ingest/embedder-pool.ts +391 -0
package/src/ingest/embedder.ts +40 -2
package/src/ingest/ingest.ts +277 -67
package/src/operations/add.ts +139 -99
package/src/operations/index.ts +2 -0
package/src/operations/refresh.ts +61 -34
package/src/operations/stats.ts +342 -0
package/src/operations/write.ts +48 -40
package/src/output/formatter.ts +21 -0
package/src/output/logger.ts +36 -0
package/src/output/progress.ts +408 -46
package/src/refresh/scheduler.ts +22 -13

package/src/operations/add.ts CHANGED Viewed

@@ -1,4 +1,6 @@
 import { z } from "zod";
+import { resolveEmbeddingWorkers } from "../context.ts";
+import { withEmbedderPool } from "../ingest/embedder-pool.ts";
 import {
 	countResolvedEntries,
 	type IngestCallbacks,
@@ -7,7 +9,9 @@ import {
 	ingestResolved,
 } from "../ingest/ingest.ts";
 import { type ResolvedSource, resolveSource } from "../ingest/source-resolver.ts";
-import { colors } from "../output/formatter.ts";
+import { colors, formatBytes } from "../output/formatter.ts";
+import { pieFor } from "../output/progress.ts";
+import { isInteractive } from "../output/tty.ts";
 import { defineOperation } from "./types.ts";
 const FetcherKindEnum = z.enum(["downloader", "local", "inline"]);
@@ -76,6 +80,7 @@ Pass \`logical_path\` to override. For a multi-source / directory / glob walk it
 				error: z.string().optional(),
 				mime_type: z.string().nullable(),
 				size_bytes: z.number(),
+				chunk_count: z.number().nullable(),
 				fetcher: FetcherKindEnum,
 				source_sha256: z.string(),
 			}),
@@ -90,116 +95,145 @@ Pass \`logical_path\` to override. For a multi-source / directory / glob walk it
 		aliases: { logical_path: "-p", refresh_frequency: "-r", change_note: "-m", force: "-f" },
 	},
 	console_formatter: (result) => {
-		const lines = result.ingested.map((e) => {
-			if (e.status === "ok") {
-				return `${colors.green("✓")} ${colors.cyan(e.logical_path)} ${colors.dim(`(${e.fetcher}, ${e.size_bytes}B)`)}`;
-			}
-			if (e.status === "unchanged") {
-				return `${colors.dim("≡")} ${colors.cyan(e.logical_path)} ${colors.dim("(unchanged)")}`;
-			}
-			return `${colors.red("✗")} ${e.source_path} ${colors.dim(e.error ?? "")}`;
-		});
 		const parts: string[] = [colors.green(`added ${result.ok}`)];
 		if (result.unchanged > 0) parts.push(colors.dim(`unchanged ${result.unchanged}`));
 		if (result.failed > 0) parts.push(colors.red(`failed ${result.failed}`));
-		return `${lines.join("\n")}\n${parts.join(", ")}`;
+		const summary = parts.join(", ");
+		// In interactive mode, every entry was already streamed to stderr via
+		// progress.entry() during ingest; printing the same list to stdout
+		// here would just duplicate the scrollback. Non-interactive callers
+		// (JSON, piped stdout, CI) don't see the live stream, so they still
+		// get the full per-entry list as the operation's stdout payload.
+		if (isInteractive()) return summary;
+		const lines = result.ingested.map(formatEntryLine);
+		return `${lines.join("\n")}\n${summary}`;
 	},
 	handler: async (input, ctx) => {
-		const { sources, ...rest } = input;
-		const followSymlinks = rest.follow_symlinks ?? true;
+		// Spin up an ephemeral embedder pool for the whole `add` command —
+		// `withEmbedderPool` handles the workers=1 short-circuit and disposes
+		// the children when the closure returns (see embedder-pool.ts). Inside
+		// the closure, every embed() call from the ingest pipeline transparently
+		// fans out to the subprocess pool.
+		const workers = resolveEmbeddingWorkers(ctx.config.embedding.workers);
+		return withEmbedderPool(workers, ctx.config.embedding_model, async () => {
+			const { sources, ...rest } = input;
+			const followSymlinks = rest.follow_symlinks ?? true;
-		// Phase 1: resolve every source upfront so the shared progress bar
-		// knows its total. A resolve failure (bad path, glob with no base) is
-		// captured per-source so one bad arg doesn't abort the whole batch.
-		type ResolveOutcome = { source: string; resolved: ResolvedSource } | { source: string; error: Error };
-		const outcomes: ResolveOutcome[] = [];
-		for (const source of sources) {
-			try {
-				const resolved = await resolveSource(source, {
-					include: rest.include,
-					exclude: rest.exclude,
-					followSymlinks,
-				});
-				outcomes.push({ source, resolved });
-			} catch (err) {
-				outcomes.push({ source, error: err instanceof Error ? err : new Error(String(err)) });
+			// Phase 1: resolve every source upfront so the shared progress bar
+			// knows its total. A resolve failure (bad path, glob with no base) is
+			// captured per-source so one bad arg doesn't abort the whole batch.
+			type ResolveOutcome = { source: string; resolved: ResolvedSource } | { source: string; error: Error };
+			const outcomes: ResolveOutcome[] = [];
+			for (const source of sources) {
+				try {
+					const resolved = await resolveSource(source, {
+						include: rest.include,
+						exclude: rest.exclude,
+						followSymlinks,
+					});
+					outcomes.push({ source, resolved });
+				} catch (err) {
+					outcomes.push({ source, error: err instanceof Error ? err : new Error(String(err)) });
+				}
 			}
-		}
-		const total = outcomes.reduce((n, o) => ("error" in o ? n + 1 : n + countResolvedEntries(o.resolved)), 0);
+			const total = outcomes.reduce((n, o) => ("error" in o ? n + 1 : n + countResolvedEntries(o.resolved)), 0);
-		const aggregated: IngestResult = {
-			ingested: [],
-			total: 0,
-			ok: 0,
-			unchanged: 0,
-			failed: 0,
-		};
+			const aggregated: IngestResult = {
+				ingested: [],
+				total: 0,
+				ok: 0,
+				unchanged: 0,
+				failed: 0,
+			};
-		ctx.progress.start(total, "ingest");
-		const callbacks: IngestCallbacks = {
-			onEntryStart: (label) => ctx.progress.tick(label),
-			onEntryComplete: (entry) => ctx.progress.entry(formatEntryLine(entry)),
-			onEntryProgress: (_label, sublabel) => ctx.progress.update(sublabel),
-		};
+			ctx.progress.start(total, "ingest");
+			const callbacks: IngestCallbacks = {
+				// Counter advances on COMPLETION so concurrent prep doesn't race the
+				// bar to 100% before any file is fully persisted. The per-worker
+				// status section (one line per active worker) shows file + step in
+				// real time, prefixed with a pie glyph that fills as the per-file
+				// pipeline progresses. `setWorkers(n)` resizes the section whenever
+				// a new ingest source kicks off with its own pool size.
+				onWorkerCount: (n) => ctx.progress.setWorkers(n),
+				onEntryStart: (label, workerId) => {
+					if (workerId !== undefined) ctx.progress.workerSet(workerId, `${pieFor(undefined)} ${label}`);
+					ctx.progress.setLabel(label);
+				},
+				onEntryComplete: (entry, workerId) => {
+					if (workerId !== undefined) ctx.progress.workerSet(workerId, "");
+					ctx.progress.tick(entry.logical_path);
+					ctx.progress.entry(formatEntryLine(entry));
+				},
+				onEntryProgress: (label, sublabel, workerId) => {
+					if (workerId !== undefined) ctx.progress.workerSet(workerId, `${pieFor(sublabel)} ${label} — ${sublabel}`);
+					ctx.progress.update(sublabel);
+				},
+				onChunks: (n) => ctx.progress.addChunks(n),
+			};
-		for (const outcome of outcomes) {
-			if ("error" in outcome) {
-				const failed: IngestEntryResult = {
-					source_path: outcome.source,
-					logical_path: outcome.source,
-					version_id: null,
-					status: "failed",
-					error: outcome.error.message,
-					mime_type: null,
-					size_bytes: 0,
-					fetcher: "local",
-					source_sha256: "",
-				};
-				callbacks.onEntryStart?.(outcome.source);
-				callbacks.onEntryComplete?.(failed);
-				aggregated.ingested.push(failed);
-				aggregated.total += 1;
-				aggregated.failed += 1;
-				continue;
-			}
+			for (const outcome of outcomes) {
+				if ("error" in outcome) {
+					const failed: IngestEntryResult = {
+						source_path: outcome.source,
+						logical_path: outcome.source,
+						version_id: null,
+						status: "failed",
+						error: outcome.error.message,
+						mime_type: null,
+						size_bytes: 0,
+						chunk_count: null,
+						fetcher: "local",
+						source_sha256: "",
+					};
+					callbacks.onEntryStart?.(outcome.source);
+					callbacks.onEntryComplete?.(failed);
+					aggregated.ingested.push(failed);
+					aggregated.total += 1;
+					aggregated.failed += 1;
+					continue;
+				}
-			try {
-				const r = await ingestResolved(outcome.resolved, { ...rest, source: outcome.source }, ctx, callbacks);
-				aggregated.ingested.push(...r.ingested);
-				aggregated.total += r.total;
-				aggregated.ok += r.ok;
-				aggregated.unchanged += r.unchanged;
-				aggregated.failed += r.failed;
-			} catch (err) {
-				const message = err instanceof Error ? err.message : String(err);
-				const failed: IngestEntryResult = {
-					source_path: outcome.source,
-					logical_path: outcome.source,
-					version_id: null,
-					status: "failed",
-					error: message,
-					mime_type: null,
-					size_bytes: 0,
-					fetcher: "local",
-					source_sha256: "",
-				};
-				callbacks.onEntryStart?.(outcome.source);
-				callbacks.onEntryComplete?.(failed);
-				aggregated.ingested.push(failed);
-				aggregated.total += 1;
-				aggregated.failed += 1;
-			} finally {
-				// Release the DB lock between sources so other consumers (a
-				// concurrent CLI call, the daemon, or a separate MCP server)
-				// can wedge in. The next source's first DB call reopens.
-				await ctx.db.release();
+				try {
+					const r = await ingestResolved(outcome.resolved, { ...rest, source: outcome.source }, ctx, callbacks);
+					aggregated.ingested.push(...r.ingested);
+					aggregated.total += r.total;
+					aggregated.ok += r.ok;
+					aggregated.unchanged += r.unchanged;
+					aggregated.failed += r.failed;
+				} catch (err) {
+					const message = err instanceof Error ? err.message : String(err);
+					const failed: IngestEntryResult = {
+						source_path: outcome.source,
+						logical_path: outcome.source,
+						version_id: null,
+						status: "failed",
+						error: message,
+						mime_type: null,
+						size_bytes: 0,
+						chunk_count: null,
+						fetcher: "local",
+						source_sha256: "",
+					};
+					callbacks.onEntryStart?.(outcome.source);
+					callbacks.onEntryComplete?.(failed);
+					aggregated.ingested.push(failed);
+					aggregated.total += 1;
+					aggregated.failed += 1;
+				} finally {
+					// Release the DB lock between sources so other consumers (a
+					// concurrent CLI call, the daemon, or a separate MCP server)
+					// can wedge in. The next source's first DB call reopens.
+					await ctx.db.release();
+				}
 			}
-		}
-		const summary = formatSummary(aggregated);
-		ctx.progress.done(summary);
-		return aggregated;
+			const summary = formatSummary(aggregated);
+			ctx.progress.done(summary);
+			return aggregated;
+		});
 	},
 });
@@ -207,11 +241,17 @@ Pass \`logical_path\` to override. For a multi-source / directory / glob walk it
  * Render the persistent stderr line shown for one completed entry. Mirrors
  * the glyphs used by the final `console_formatter` so users see the same
  * status indicators twice (once during ingest on stderr, once in the final
- * stdout summary).
+ * stdout summary). Successful entries show source kind, humanized byte
+ * size, and chunk count so the user can spot oddly small / oddly large
+ * files at a glance.
  */
 function formatEntryLine(entry: IngestEntryResult): string {
 	if (entry.status === "ok") {
-		return `${colors.green("✓")} ${colors.cyan(entry.logical_path)} ${colors.dim(`(${entry.fetcher}, ${entry.size_bytes}B)`)}`;
+		const parts: string[] = [entry.fetcher, formatBytes(entry.size_bytes)];
+		if (entry.chunk_count !== null) {
+			parts.push(`${entry.chunk_count} chunk${entry.chunk_count === 1 ? "" : "s"}`);
+		}
+		return `${colors.green("✓")} ${colors.cyan(entry.logical_path)} ${colors.dim(`(${parts.join(", ")})`)}`;
 	}
 	if (entry.status === "unchanged") {
 		return `${colors.dim("≡")} ${colors.cyan(entry.logical_path)} ${colors.dim("(unchanged)")}`;

package/src/operations/index.ts CHANGED Viewed

@@ -8,6 +8,7 @@ import { readOperation } from "./read.ts";
 import { refreshOperation } from "./refresh.ts";
 import { removeOperation } from "./remove.ts";
 import { searchOperation } from "./search.ts";
+import { statsOperation } from "./stats.ts";
 import { treeOperation } from "./tree.ts";
 import type { Operation } from "./types.ts";
 import { versionsOperation } from "./versions.ts";
@@ -28,6 +29,7 @@ export const OPERATIONS: Operation<any, any>[] = [
 	readOperation,
 	searchOperation,
 	infoOperation,
+	statsOperation,
 	versionsOperation,
 	diffOperation,
 	writeOperation,

package/src/operations/refresh.ts CHANGED Viewed

@@ -1,9 +1,31 @@
 import { z } from "zod";
+import { resolveEmbeddingWorkers } from "../context.ts";
 import { listDueRefreshes } from "../db/files.ts";
+import { withEmbedderPool } from "../ingest/embedder-pool.ts";
 import { colors } from "../output/formatter.ts";
+import { isInteractive } from "../output/tty.ts";
 import { refreshOne } from "../refresh/runner.ts";
 import { defineOperation } from "./types.ts";
+interface RefreshEntry {
+	logical_path: string;
+	status: "ok" | "unchanged" | "failed";
+	new_version_id?: string;
+	error?: string;
+}
+/** Render one refresh result as a persistent stderr / final-summary line. */
+function formatEntryLine(p: RefreshEntry): string {
+	if (p.status === "ok") {
+		const ver = p.new_version_id ? colors.dim(`→ ${p.new_version_id}`) : "";
+		return `${colors.green("✓")} ${colors.cyan(p.logical_path)} ${ver}`;
+	}
+	if (p.status === "unchanged") {
+		return `${colors.dim("·")} ${colors.dim(p.logical_path)} ${colors.dim("(unchanged)")}`;
+	}
+	return `${colors.red("✗")} ${p.logical_path} ${colors.dim(p.error ?? "")}`;
+}
 export const refreshOperation = defineOperation({
 	name: "membot_refresh",
 	cliName: "refresh",
@@ -29,44 +51,49 @@ export const refreshOperation = defineOperation({
 		let updated = 0;
 		let unchanged = 0;
 		let failed = 0;
-		const lines = result.processed.map((p) => {
-			if (p.status === "ok") {
-				updated++;
-				const ver = p.new_version_id ? colors.dim(`→ ${p.new_version_id}`) : "";
-				return `${colors.green("✓")} ${colors.cyan(p.logical_path)} ${ver}`;
-			}
-			if (p.status === "unchanged") {
-				unchanged++;
-				return `${colors.dim("·")} ${colors.dim(p.logical_path)} ${colors.dim("(unchanged)")}`;
-			}
-			failed++;
-			return `${colors.red("✗")} ${p.logical_path} ${colors.dim(p.error ?? "")}`;
-		});
+		for (const p of result.processed) {
+			if (p.status === "ok") updated++;
+			else if (p.status === "unchanged") unchanged++;
+			else failed++;
+		}
 		const parts = [colors.green(`updated ${updated}`), colors.dim(`unchanged ${unchanged}`)];
 		if (failed) parts.push(colors.red(`failed ${failed}`));
-		return `${lines.join("\n")}\n${parts.join(", ")}`;
+		const summary = parts.join(", ");
+		// In interactive mode the per-entry results were already streamed to
+		// stderr via progress.entry() during the run; printing the same list
+		// to stdout would just duplicate the scrollback. Non-interactive
+		// callers (JSON, piped, CI) still get the full list.
+		if (isInteractive()) return summary;
+		const lines = result.processed.map(formatEntryLine);
+		return `${lines.join("\n")}\n${summary}`;
 	},
 	handler: async (input, ctx) => {
-		const targets = input.logical_path
-			? [input.logical_path]
-			: (await listDueRefreshes(ctx.db)).map((r) => r.logical_path);
-		const out: Array<{
-			logical_path: string;
-			status: "ok" | "unchanged" | "failed";
-			new_version_id?: string;
-			error?: string;
-		}> = [];
-		ctx.progress.start(targets.length, "refresh");
-		for (const path of targets) {
-			ctx.progress.tick(path);
-			try {
-				const r = await refreshOne(ctx, path, input.force, (sublabel) => ctx.progress.update(sublabel));
-				out.push(r);
-			} catch (err) {
-				out.push({ logical_path: path, status: "failed", error: err instanceof Error ? err.message : String(err) });
+		// Per-command embedder pool: workers come up at the start of the
+		// refresh sweep and are killed before we return, so a manual
+		// `membot refresh` doesn't leave subprocesses around.
+		const workers = resolveEmbeddingWorkers(ctx.config.embedding.workers);
+		return withEmbedderPool(workers, ctx.config.embedding_model, async () => {
+			const targets = input.logical_path
+				? [input.logical_path]
+				: (await listDueRefreshes(ctx.db)).map((r) => r.logical_path);
+			const out: RefreshEntry[] = [];
+			ctx.progress.start(targets.length, "refresh");
+			for (const path of targets) {
+				ctx.progress.setLabel(path);
+				let entry: RefreshEntry;
+				try {
+					entry = await refreshOne(ctx, path, input.force, (sublabel) => ctx.progress.update(sublabel));
+				} catch (err) {
+					entry = { logical_path: path, status: "failed", error: err instanceof Error ? err.message : String(err) };
+				}
+				out.push(entry);
+				ctx.progress.tick(path);
+				ctx.progress.entry(formatEntryLine(entry));
 			}
-		}
-		ctx.progress.done(`refresh: ${out.filter((r) => r.status === "ok").length}/${out.length} updated`);
-		return { processed: out, count: out.length };
+			ctx.progress.done(`refresh: ${out.filter((r) => r.status === "ok").length}/${out.length} updated`);
+			return { processed: out, count: out.length };
+		});
 	},
 });