npm - membot - Versions diffs - 0.8.0 → 0.10.0 - Mend

membot 0.8.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/README.md +3 -0
package/package.json +1 -1
package/src/config/schemas.ts +19 -0
package/src/constants.ts +15 -0
package/src/ingest/concurrency.ts +60 -0
package/src/ingest/describer.ts +49 -3
package/src/ingest/ingest.ts +277 -67
package/src/operations/add.ts +49 -17
package/src/operations/refresh.ts +43 -24
package/src/output/formatter.ts +21 -0
package/src/output/logger.ts +36 -0
package/src/output/progress.ts +408 -46

package/README.md CHANGED Viewed

@@ -10,6 +10,7 @@
 - **Local everything** — embeddings run on your machine; data lives in `~/.membot/index.duckdb`.
 - **One mental model** — every artifact (markdown, PDF, image, audio) becomes a markdown surrogate that flows through the same chunk → embed → search pipeline.
 - **Append-only versioning** — every ingest, refresh, or write creates a new `(logical_path, version_id)` row. History is queryable; nothing is mutated.
+- **Parallel ingest** — directory/glob ingests run a worker pool (default `cpus - 1`, max 8) with a `Bun.Worker` per slot for the WASM embed step, so a `~/notes/**/*.md` import actually uses your cores. The TTY shows one status line per active worker plus an ETA and a running chunk total.
 - **Two surfaces, one source of truth** — every operation is exposed identically as a CLI subcommand and an MCP tool. The agent sees `membot_search`; you see `membot search`.
 ## Install
@@ -139,6 +140,8 @@ Add `--watch` (and optional `--tick <sec>`) to also run the refresh daemon, whic
   membot config set chunker.target_chars 800                    # tweak any nested value
   membot config set embedding.workers 4                         # cap parallel embed workers
   membot config set converters.max_inline_image_captions 50     # raise per-doc cap on vision captions for embedded images
+  membot config set ingest.worker_concurrency 4                 # cap parallel ingest workers (default: cpus-1, max 8)
+  membot config set llm.describer_skip_when_titled false        # always LLM-describe (default true skips when markdown has a clear H1)
   membot config get llm.anthropic_api_key --show-secrets        # reveal the masked key
   membot config unset chunker.target_chars                      # back to schema default
   membot config path                                            # print the absolute config path

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "membot",
-	"version": "0.8.0",
+	"version": "0.10.0",
 	"description": "Versioned context store with hybrid search for AI agents. Stdio + HTTP MCP server and CLI.",
 	"type": "module",
 	"exports": {

package/src/config/schemas.ts CHANGED Viewed

@@ -1,6 +1,18 @@
+import { availableParallelism } from "node:os";
 import { z } from "zod";
 import { DEFAULTS, defaultMembotHome, EMBEDDING_DIMENSION, EMBEDDING_MODEL } from "../constants.ts";
+/**
+ * Compute the default ingest worker count: one fewer than the available CPUs
+ * (so the orchestrator and any background work still has a core), clamped to
+ * `[1, MAX_WORKERS]` to avoid hammering Anthropic with too many concurrent
+ * describe calls on machines with very high core counts.
+ */
+function defaultWorkerConcurrency(): number {
+	const cpus = availableParallelism();
+	return Math.min(DEFAULTS.MAX_WORKERS, Math.max(1, cpus - 1));
+}
 export const ChunkerConfigSchema = z.object({
 	mode: z.enum(["deterministic", "llm"]).default(DEFAULTS.CHUNKER_MODE),
 	target_chars: z.number().int().positive().default(DEFAULTS.CHUNKER_TARGET_CHARS),
@@ -17,6 +29,11 @@ export const LlmConfigSchema = z.object({
 	chunker_model: z.string().default(DEFAULTS.CHUNKER_MODEL),
 	describer_model: z.string().default(DEFAULTS.DESCRIBER_MODEL),
 	vision_model: z.string().default(DEFAULTS.VISION_MODEL),
+	describer_skip_when_titled: z.boolean().default(DEFAULTS.DESCRIBER_SKIP_WHEN_TITLED),
+});
+export const IngestConfigSchema = z.object({
+	worker_concurrency: z.number().int().positive().default(defaultWorkerConcurrency),
 });
 export const DaemonConfigSchema = z.object({
@@ -61,6 +78,7 @@ export const MembotConfigSchema = z.object({
 	chunker: ChunkerConfigSchema.default(() => ChunkerConfigSchema.parse({})),
 	embedding: EmbeddingConfigSchema.default(() => EmbeddingConfigSchema.parse({})),
 	converters: ConvertersConfigSchema.default(() => ConvertersConfigSchema.parse({})),
+	ingest: IngestConfigSchema.default(() => IngestConfigSchema.parse({})),
 	llm: LlmConfigSchema.default(() => LlmConfigSchema.parse({})),
 	downloaders: DownloadersConfigSchema.default(() => DownloadersConfigSchema.parse({})),
 	daemon: DaemonConfigSchema.default(() => DaemonConfigSchema.parse({})),
@@ -72,6 +90,7 @@ export type MembotConfig = z.infer<typeof MembotConfigSchema>;
 export type ChunkerConfig = z.infer<typeof ChunkerConfigSchema>;
 export type EmbeddingConfig = z.infer<typeof EmbeddingConfigSchema>;
 export type ConvertersConfig = z.infer<typeof ConvertersConfigSchema>;
+export type IngestConfig = z.infer<typeof IngestConfigSchema>;
 export type LlmConfig = z.infer<typeof LlmConfigSchema>;
 export type DownloadersConfig = z.infer<typeof DownloadersConfigSchema>;
 export type LinearDownloaderConfig = z.infer<typeof LinearDownloaderConfigSchema>;

package/src/constants.ts CHANGED Viewed

@@ -55,6 +55,21 @@ export const DEFAULTS = {
 	 * embedded images doesn't fan out into hundreds of vision requests.
 	 */
 	MAX_INLINE_IMAGE_CAPTIONS: 20,
+	/**
+	 * Hard cap for `ingest.worker_concurrency`. The runtime default is
+	 * `cpus - 1` so machines with very high core counts can scale, but we
+	 * clamp here to keep concurrent Anthropic describe calls (and per-worker
+	 * WASM embedder allocations — each pipeline holds the model weights) from
+	 * spiraling out of control.
+	 */
+	MAX_WORKERS: 8,
+	/**
+	 * When true, describe() skips the LLM for self-describing markdown/text
+	 * (a clear H1 within the first 40 lines of body) and uses the heading +
+	 * 200-char prefix instead. Avoids paying for an LLM round-trip when the
+	 * file already has a human-written description.
+	 */
+	DESCRIBER_SKIP_WHEN_TITLED: true,
 } as const;
 export const FILES = {

package/src/ingest/concurrency.ts ADDED Viewed

@@ -0,0 +1,60 @@
+/**
+ * Run `worker(item, index, workerId)` over `items` with at most `concurrency`
+ * workers in flight at a time. Each runner has a stable `workerId` in
+ * `[0, concurrency)` for the life of the call — useful when callers want to
+ * address per-worker UI slots. Results come back in input order. Worker
+ * rejections are caught and surfaced as `{ ok: false, error }` entries
+ * instead of aborting the batch; partial failures are normal during bulk
+ * ingest, and the caller decides how to render them per-entry.
+ */
+export async function pMap<T, R>(
+	items: readonly T[],
+	concurrency: number,
+	worker: (item: T, index: number, workerId: number) => Promise<R>,
+): Promise<Array<{ ok: true; value: R } | { ok: false; error: unknown }>> {
+	const limit = Math.max(1, Math.floor(concurrency));
+	const results: Array<{ ok: true; value: R } | { ok: false; error: unknown }> = new Array(items.length);
+	let next = 0;
+	const runOne = async (workerId: number): Promise<void> => {
+		while (true) {
+			const i = next++;
+			if (i >= items.length) return;
+			const item = items[i] as T;
+			try {
+				const value = await worker(item, i, workerId);
+				results[i] = { ok: true, value };
+			} catch (error) {
+				results[i] = { ok: false, error };
+			}
+		}
+	};
+	const runners = Array.from({ length: Math.min(limit, items.length) }, (_, workerId) => runOne(workerId));
+	await Promise.all(runners);
+	return results;
+}
+/**
+ * Single-slot async mutex. `lock(fn)` runs `fn` with exclusive access and
+ * returns its result; queued callers run in FIFO order. Used to gate the
+ * persist phase of bulk ingest because all workers share a single DuckDB
+ * connection and DuckDB rejects nested `BEGIN` statements.
+ */
+export class AsyncMutex {
+	private chain: Promise<void> = Promise.resolve();
+	async lock<T>(fn: () => Promise<T>): Promise<T> {
+		const prev = this.chain;
+		let release!: () => void;
+		this.chain = new Promise<void>((r) => {
+			release = r;
+		});
+		await prev;
+		try {
+			return await fn();
+		} finally {
+			release();
+		}
+	}
+}

package/src/ingest/describer.ts CHANGED Viewed

@@ -13,9 +13,12 @@ Rules:
 /**
  * Generate a one-paragraph description for the file's surrogate, used
- * as the `<description>` line in chunks.search_text. Falls back to a
- * deterministic heuristic when no API key is configured so the pipeline
- * still produces a non-empty description offline.
+ * as the `<description>` line in chunks.search_text. When the file is
+ * self-describing (markdown/text with a clear H1 in the opening) and the
+ * `describer_skip_when_titled` flag is on, returns the title-derived
+ * description without calling the LLM. Falls back to a deterministic
+ * heuristic when no API key is configured so the pipeline still produces
+ * a non-empty description offline.
  */
 export async function describe(
 	logicalPath: string,
@@ -23,6 +26,13 @@ export async function describe(
 	surrogate: string,
 	llm: LlmConfig,
 ): Promise<string> {
+	if (llm.describer_skip_when_titled) {
+		const titled = tryTitleDescription(mimeType, surrogate);
+		if (titled) {
+			logger.debug(`describer: using title-derived description for ${logicalPath}`);
+			return titled;
+		}
+	}
 	if (!llm.anthropic_api_key || llm.anthropic_api_key.trim() === "") {
 		return deterministicDescription(logicalPath, mimeType, surrogate);
 	}
@@ -52,6 +62,42 @@ export async function describe(
 	}
 }
+const TEXTUAL_MIMES = new Set(["application/json", "application/yaml", "application/x-yaml"]);
+/**
+ * Returns a title-derived description when the surrogate is "self-describing"
+ * markdown/text — a clear H1 within the first 40 non-blank lines, of
+ * reasonable length. Returns null otherwise so the caller falls through to
+ * the LLM. Skipping the LLM for files that already have a human-written
+ * heading is the main throughput win during bulk ingest.
+ */
+export function tryTitleDescription(mimeType: string, surrogate: string): string | null {
+	if (!mimeType.startsWith("text/") && !TEXTUAL_MIMES.has(mimeType)) return null;
+	const lines = surrogate.split(/\r?\n/);
+	let nonBlank = 0;
+	let heading: string | null = null;
+	for (const line of lines) {
+		const trimmed = line.trim();
+		if (!trimmed) continue;
+		nonBlank += 1;
+		if (nonBlank > 40) break;
+		const m = trimmed.match(/^#\s+(.+?)\s*#*$/);
+		if (m?.[1]) {
+			heading = m[1].trim();
+			break;
+		}
+	}
+	if (!heading) return null;
+	if (heading.length < 5 || heading.length > 200) return null;
+	const body = surrogate
+		.replace(/^#\s+.+$/m, "")
+		.trim()
+		.slice(0, 200)
+		.replace(/\s+/g, " ")
+		.trim();
+	return body ? `${heading} — ${body}` : heading;
+}
 /**
  * Cheap, deterministic description used when the LLM isn't available.
  * For markdown/text it's the first heading + a 200-char prefix; for