membot 0.8.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -10,6 +10,7 @@
10
10
  - **Local everything** — embeddings run on your machine; data lives in `~/.membot/index.duckdb`.
11
11
  - **One mental model** — every artifact (markdown, PDF, image, audio) becomes a markdown surrogate that flows through the same chunk → embed → search pipeline.
12
12
  - **Append-only versioning** — every ingest, refresh, or write creates a new `(logical_path, version_id)` row. History is queryable; nothing is mutated.
13
+ - **Parallel ingest** — directory/glob ingests run a worker pool (default `cpus - 1`, max 8) with a `Bun.Worker` per slot for the WASM embed step, so a `~/notes/**/*.md` import actually uses your cores. The TTY shows one status line per active worker plus an ETA and a running chunk total.
13
14
  - **Two surfaces, one source of truth** — every operation is exposed identically as a CLI subcommand and an MCP tool. The agent sees `membot_search`; you see `membot search`.
14
15
 
15
16
  ## Install
@@ -139,6 +140,8 @@ Add `--watch` (and optional `--tick <sec>`) to also run the refresh daemon, whic
139
140
  membot config set chunker.target_chars 800 # tweak any nested value
140
141
  membot config set embedding.workers 4 # cap parallel embed workers
141
142
  membot config set converters.max_inline_image_captions 50 # raise per-doc cap on vision captions for embedded images
143
+ membot config set ingest.worker_concurrency 4 # cap parallel ingest workers (default: cpus-1, max 8)
144
+ membot config set llm.describer_skip_when_titled false # always LLM-describe (default true skips when markdown has a clear H1)
142
145
  membot config get llm.anthropic_api_key --show-secrets # reveal the masked key
143
146
  membot config unset chunker.target_chars # back to schema default
144
147
  membot config path # print the absolute config path
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "membot",
3
- "version": "0.8.0",
3
+ "version": "0.10.0",
4
4
  "description": "Versioned context store with hybrid search for AI agents. Stdio + HTTP MCP server and CLI.",
5
5
  "type": "module",
6
6
  "exports": {
@@ -1,6 +1,18 @@
1
+ import { availableParallelism } from "node:os";
1
2
  import { z } from "zod";
2
3
  import { DEFAULTS, defaultMembotHome, EMBEDDING_DIMENSION, EMBEDDING_MODEL } from "../constants.ts";
3
4
 
5
+ /**
6
+ * Compute the default ingest worker count: one fewer than the available CPUs
7
+ * (so the orchestrator and any background work still has a core), clamped to
8
+ * `[1, MAX_WORKERS]` to avoid hammering Anthropic with too many concurrent
9
+ * describe calls on machines with very high core counts.
10
+ */
11
+ function defaultWorkerConcurrency(): number {
12
+ const cpus = availableParallelism();
13
+ return Math.min(DEFAULTS.MAX_WORKERS, Math.max(1, cpus - 1));
14
+ }
15
+
4
16
  export const ChunkerConfigSchema = z.object({
5
17
  mode: z.enum(["deterministic", "llm"]).default(DEFAULTS.CHUNKER_MODE),
6
18
  target_chars: z.number().int().positive().default(DEFAULTS.CHUNKER_TARGET_CHARS),
@@ -17,6 +29,11 @@ export const LlmConfigSchema = z.object({
17
29
  chunker_model: z.string().default(DEFAULTS.CHUNKER_MODEL),
18
30
  describer_model: z.string().default(DEFAULTS.DESCRIBER_MODEL),
19
31
  vision_model: z.string().default(DEFAULTS.VISION_MODEL),
32
+ describer_skip_when_titled: z.boolean().default(DEFAULTS.DESCRIBER_SKIP_WHEN_TITLED),
33
+ });
34
+
35
+ export const IngestConfigSchema = z.object({
36
+ worker_concurrency: z.number().int().positive().default(defaultWorkerConcurrency),
20
37
  });
21
38
 
22
39
  export const DaemonConfigSchema = z.object({
@@ -61,6 +78,7 @@ export const MembotConfigSchema = z.object({
61
78
  chunker: ChunkerConfigSchema.default(() => ChunkerConfigSchema.parse({})),
62
79
  embedding: EmbeddingConfigSchema.default(() => EmbeddingConfigSchema.parse({})),
63
80
  converters: ConvertersConfigSchema.default(() => ConvertersConfigSchema.parse({})),
81
+ ingest: IngestConfigSchema.default(() => IngestConfigSchema.parse({})),
64
82
  llm: LlmConfigSchema.default(() => LlmConfigSchema.parse({})),
65
83
  downloaders: DownloadersConfigSchema.default(() => DownloadersConfigSchema.parse({})),
66
84
  daemon: DaemonConfigSchema.default(() => DaemonConfigSchema.parse({})),
@@ -72,6 +90,7 @@ export type MembotConfig = z.infer<typeof MembotConfigSchema>;
72
90
  export type ChunkerConfig = z.infer<typeof ChunkerConfigSchema>;
73
91
  export type EmbeddingConfig = z.infer<typeof EmbeddingConfigSchema>;
74
92
  export type ConvertersConfig = z.infer<typeof ConvertersConfigSchema>;
93
+ export type IngestConfig = z.infer<typeof IngestConfigSchema>;
75
94
  export type LlmConfig = z.infer<typeof LlmConfigSchema>;
76
95
  export type DownloadersConfig = z.infer<typeof DownloadersConfigSchema>;
77
96
  export type LinearDownloaderConfig = z.infer<typeof LinearDownloaderConfigSchema>;
package/src/constants.ts CHANGED
@@ -55,6 +55,21 @@ export const DEFAULTS = {
55
55
  * embedded images doesn't fan out into hundreds of vision requests.
56
56
  */
57
57
  MAX_INLINE_IMAGE_CAPTIONS: 20,
58
+ /**
59
+ * Hard cap for `ingest.worker_concurrency`. The runtime default is
60
+ * `cpus - 1` so machines with very high core counts can scale, but we
61
+ * clamp here to keep concurrent Anthropic describe calls (and per-worker
62
+ * WASM embedder allocations — each pipeline holds the model weights) from
63
+ * spiraling out of control.
64
+ */
65
+ MAX_WORKERS: 8,
66
+ /**
67
+ * When true, describe() skips the LLM for self-describing markdown/text
68
+ * (a clear H1 within the first 40 lines of body) and uses the heading +
69
+ * 200-char prefix instead. Avoids paying for an LLM round-trip when the
70
+ * file already has a human-written description.
71
+ */
72
+ DESCRIBER_SKIP_WHEN_TITLED: true,
58
73
  } as const;
59
74
 
60
75
  export const FILES = {
@@ -0,0 +1,60 @@
1
+ /**
2
+ * Run `worker(item, index, workerId)` over `items` with at most `concurrency`
3
+ * workers in flight at a time. Each runner has a stable `workerId` in
4
+ * `[0, concurrency)` for the life of the call — useful when callers want to
5
+ * address per-worker UI slots. Results come back in input order. Worker
6
+ * rejections are caught and surfaced as `{ ok: false, error }` entries
7
+ * instead of aborting the batch; partial failures are normal during bulk
8
+ * ingest, and the caller decides how to render them per-entry.
9
+ */
10
+ export async function pMap<T, R>(
11
+ items: readonly T[],
12
+ concurrency: number,
13
+ worker: (item: T, index: number, workerId: number) => Promise<R>,
14
+ ): Promise<Array<{ ok: true; value: R } | { ok: false; error: unknown }>> {
15
+ const limit = Math.max(1, Math.floor(concurrency));
16
+ const results: Array<{ ok: true; value: R } | { ok: false; error: unknown }> = new Array(items.length);
17
+ let next = 0;
18
+
19
+ const runOne = async (workerId: number): Promise<void> => {
20
+ while (true) {
21
+ const i = next++;
22
+ if (i >= items.length) return;
23
+ const item = items[i] as T;
24
+ try {
25
+ const value = await worker(item, i, workerId);
26
+ results[i] = { ok: true, value };
27
+ } catch (error) {
28
+ results[i] = { ok: false, error };
29
+ }
30
+ }
31
+ };
32
+
33
+ const runners = Array.from({ length: Math.min(limit, items.length) }, (_, workerId) => runOne(workerId));
34
+ await Promise.all(runners);
35
+ return results;
36
+ }
37
+
38
+ /**
39
+ * Single-slot async mutex. `lock(fn)` runs `fn` with exclusive access and
40
+ * returns its result; queued callers run in FIFO order. Used to gate the
41
+ * persist phase of bulk ingest because all workers share a single DuckDB
42
+ * connection and DuckDB rejects nested `BEGIN` statements.
43
+ */
44
+ export class AsyncMutex {
45
+ private chain: Promise<void> = Promise.resolve();
46
+
47
+ async lock<T>(fn: () => Promise<T>): Promise<T> {
48
+ const prev = this.chain;
49
+ let release!: () => void;
50
+ this.chain = new Promise<void>((r) => {
51
+ release = r;
52
+ });
53
+ await prev;
54
+ try {
55
+ return await fn();
56
+ } finally {
57
+ release();
58
+ }
59
+ }
60
+ }
@@ -13,9 +13,12 @@ Rules:
13
13
 
14
14
  /**
15
15
  * Generate a one-paragraph description for the file's surrogate, used
16
- * as the `<description>` line in chunks.search_text. Falls back to a
17
- * deterministic heuristic when no API key is configured so the pipeline
18
- * still produces a non-empty description offline.
16
+ * as the `<description>` line in chunks.search_text. When the file is
17
+ * self-describing (markdown/text with a clear H1 in the opening) and the
18
+ * `describer_skip_when_titled` flag is on, returns the title-derived
19
+ * description without calling the LLM. Falls back to a deterministic
20
+ * heuristic when no API key is configured so the pipeline still produces
21
+ * a non-empty description offline.
19
22
  */
20
23
  export async function describe(
21
24
  logicalPath: string,
@@ -23,6 +26,13 @@ export async function describe(
23
26
  surrogate: string,
24
27
  llm: LlmConfig,
25
28
  ): Promise<string> {
29
+ if (llm.describer_skip_when_titled) {
30
+ const titled = tryTitleDescription(mimeType, surrogate);
31
+ if (titled) {
32
+ logger.debug(`describer: using title-derived description for ${logicalPath}`);
33
+ return titled;
34
+ }
35
+ }
26
36
  if (!llm.anthropic_api_key || llm.anthropic_api_key.trim() === "") {
27
37
  return deterministicDescription(logicalPath, mimeType, surrogate);
28
38
  }
@@ -52,6 +62,42 @@ export async function describe(
52
62
  }
53
63
  }
54
64
 
65
+ const TEXTUAL_MIMES = new Set(["application/json", "application/yaml", "application/x-yaml"]);
66
+
67
+ /**
68
+ * Returns a title-derived description when the surrogate is "self-describing"
69
+ * markdown/text — a clear H1 within the first 40 non-blank lines, of
70
+ * reasonable length. Returns null otherwise so the caller falls through to
71
+ * the LLM. Skipping the LLM for files that already have a human-written
72
+ * heading is the main throughput win during bulk ingest.
73
+ */
74
+ export function tryTitleDescription(mimeType: string, surrogate: string): string | null {
75
+ if (!mimeType.startsWith("text/") && !TEXTUAL_MIMES.has(mimeType)) return null;
76
+ const lines = surrogate.split(/\r?\n/);
77
+ let nonBlank = 0;
78
+ let heading: string | null = null;
79
+ for (const line of lines) {
80
+ const trimmed = line.trim();
81
+ if (!trimmed) continue;
82
+ nonBlank += 1;
83
+ if (nonBlank > 40) break;
84
+ const m = trimmed.match(/^#\s+(.+?)\s*#*$/);
85
+ if (m?.[1]) {
86
+ heading = m[1].trim();
87
+ break;
88
+ }
89
+ }
90
+ if (!heading) return null;
91
+ if (heading.length < 5 || heading.length > 200) return null;
92
+ const body = surrogate
93
+ .replace(/^#\s+.+$/m, "")
94
+ .trim()
95
+ .slice(0, 200)
96
+ .replace(/\s+/g, " ")
97
+ .trim();
98
+ return body ? `${heading} — ${body}` : heading;
99
+ }
100
+
55
101
  /**
56
102
  * Cheap, deterministic description used when the LLM isn't available.
57
103
  * For markdown/text it's the first heading + a 200-char prefix; for