membot 0.7.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -64,6 +64,7 @@ membot read <logical_path> # current markdown surrogate
64
64
  membot read <logical_path> --bytes # original bytes (base64) — PDF/DOCX/image as ingested
65
65
  membot read <logical_path> --version <ts> # historical snapshot
66
66
  membot info <logical_path> # metadata only (no content)
67
+ membot stats [prefix] # whole-index summary; optional prefix scopes the aggregates
67
68
  membot versions <logical_path> # every version, newest first
68
69
  membot diff <logical_path> --a <ts> [--b <ts>] # unified diff between versions
69
70
  ```
@@ -129,6 +130,7 @@ Tombstones hide a path from `ls` / `tree` / `search` but `versions` and `read --
129
130
  | `membot write <path> --content <txt>` | Write inline agent-authored markdown as a new version |
130
131
  | `membot search <query>` | Hybrid search (semantic + BM25); add `--include-history` to search older versions |
131
132
  | `membot info <path>` | Inspect metadata (source, downloader, refresh schedule, digests) without content |
133
+ | `membot stats [prefix]` | Summarize the index (file/version/chunk/blob counts, on-disk size, refresh health, mime/source/downloader breakdowns); optional prefix scopes |
132
134
  | `membot versions <path>` | List every version newest-first with version_id and change notes |
133
135
  | `membot diff <path> --a <ts>` | Unified diff between two versions |
134
136
  | `membot mv <old> <new>` | Rename a logical_path (history preserved) |
@@ -161,4 +163,5 @@ Tombstones hide a path from `ls` / `tree` / `search` but `versions` and `read --
161
163
 
162
164
  - Data lives in `~/.membot/index.duckdb` (override via `MEMBOT_HOME`).
163
165
  - Optional `ANTHROPIC_API_KEY` enables LLM fallback for messy/binary input. Without it, conversion degrades to deterministic native output.
166
+ - `embedding.workers` (config key) caps the per-command embed-worker subprocess pool spawned at the top of `add` / `refresh` / `write`. Default `null` resolves to `cpus()-1`; set `1` to disable the pool.
164
167
  - Config file: `~/.membot/config.json` (see `membot --help` for the global flags).
@@ -64,6 +64,7 @@ membot read <logical_path> # current markdown surrogate
64
64
  membot read <logical_path> --bytes # original bytes (base64) — PDF/DOCX/image as ingested
65
65
  membot read <logical_path> --version <ts> # historical snapshot
66
66
  membot info <logical_path> # metadata only (no content)
67
+ membot stats [prefix] # whole-index summary; optional prefix scopes the aggregates
67
68
  membot versions <logical_path> # every version, newest first
68
69
  membot diff <logical_path> --a <ts> [--b <ts>] # unified diff between versions
69
70
  ```
@@ -129,6 +130,7 @@ Tombstones hide a path from `ls` / `tree` / `search` but `versions` and `read --
129
130
  | `membot write <path> --content <txt>` | Write inline agent-authored markdown as a new version |
130
131
  | `membot search <query>` | Hybrid search (semantic + BM25); add `--include-history` to search older versions |
131
132
  | `membot info <path>` | Inspect metadata (source, downloader, refresh schedule, digests) without content |
133
+ | `membot stats [prefix]` | Summarize the index (file/version/chunk/blob counts, on-disk size, refresh health, mime/source/downloader breakdowns); optional prefix scopes |
132
134
  | `membot versions <path>` | List every version newest-first with version_id and change notes |
133
135
  | `membot diff <path> --a <ts>` | Unified diff between two versions |
134
136
  | `membot mv <old> <new>` | Rename a logical_path (history preserved) |
@@ -161,4 +163,5 @@ Tombstones hide a path from `ls` / `tree` / `search` but `versions` and `read --
161
163
 
162
164
  - Data lives in `~/.membot/index.duckdb` (override via `MEMBOT_HOME`).
163
165
  - Optional `ANTHROPIC_API_KEY` enables LLM fallback for messy/binary input. Without it, conversion degrades to deterministic native output.
166
+ - `embedding.workers` (config key) caps the per-command embed-worker subprocess pool spawned at the top of `add` / `refresh` / `write`. Default `null` resolves to `cpus()-1`; set `1` to disable the pool.
164
167
  - Config file: `~/.membot/config.json` (see `membot --help` for the global flags).
package/README.md CHANGED
@@ -10,6 +10,7 @@
10
10
  - **Local everything** — embeddings run on your machine; data lives in `~/.membot/index.duckdb`.
11
11
  - **One mental model** — every artifact (markdown, PDF, image, audio) becomes a markdown surrogate that flows through the same chunk → embed → search pipeline.
12
12
  - **Append-only versioning** — every ingest, refresh, or write creates a new `(logical_path, version_id)` row. History is queryable; nothing is mutated.
13
+ - **Parallel ingest** — directory/glob ingests run a worker pool (default `cpus - 1`, max 8) with a `Bun.Worker` per slot for the WASM embed step, so a `~/notes/**/*.md` import actually uses your cores. The TTY shows one status line per active worker plus an ETA and a running chunk total.
13
14
  - **Two surfaces, one source of truth** — every operation is exposed identically as a CLI subcommand and an MCP tool. The agent sees `membot_search`; you see `membot search`.
14
15
 
15
16
  ## Install
@@ -83,6 +84,7 @@ The skill files describe the discover → ingest → search → read → write w
83
84
  | `membot read <path>` | Read the markdown surrogate (or `--bytes` for original bytes, base64) |
84
85
  | `membot search <query>` | Hybrid search (semantic + BM25); `--include-history` searches older versions |
85
86
  | `membot info <path>` | Inspect metadata (source, fetcher, schedule, digests) without content |
87
+ | `membot stats [prefix]` | Summarize the index (file/version/chunk/blob counts, on-disk size, refresh health, mime/source/downloader breakdowns); optional prefix scopes the aggregates |
86
88
  | `membot versions <path>` | List every version newest-first |
87
89
  | `membot diff <path> <a> [b]` | Unified diff between two versions |
88
90
  | `membot write <path>` | Write inline agent-authored markdown as a new version |
@@ -136,12 +138,17 @@ Add `--watch` (and optional `--tick <sec>`) to also run the refresh daemon, whic
136
138
  membot config list # show every value (secrets masked)
137
139
  membot config set llm.anthropic_api_key sk-ant-... # enable LLM-fallback paths
138
140
  membot config set chunker.target_chars 800 # tweak any nested value
141
+ membot config set embedding.workers 4 # cap parallel embed workers
139
142
  membot config set converters.max_inline_image_captions 50 # raise per-doc cap on vision captions for embedded images
143
+ membot config set ingest.worker_concurrency 4 # cap parallel ingest workers (default: cpus-1, max 8)
144
+ membot config set llm.describer_skip_when_titled false # always LLM-describe (default true skips when markdown has a clear H1)
140
145
  membot config get llm.anthropic_api_key --show-secrets # reveal the masked key
141
146
  membot config unset chunker.target_chars # back to schema default
142
147
  membot config path # print the absolute config path
143
148
  ```
144
149
 
150
+ **Parallel embedding:** `embedding.workers` (default `null` → `cpus()-1`) controls how many subprocess workers fan out the WASM embedding work. The pool is **per-command** — spawned at the start of `add` / `refresh` / `write` and killed before the command returns, so membot doesn't keep idle workers around between invocations. Each worker loads its own ~50MB copy of the model, so on RAM-constrained machines drop it to a small fixed number (e.g. `4`); set `1` to disable the pool entirely and embed inline.
151
+
145
152
  Values are written with file mode `0600`. `ANTHROPIC_API_KEY` set in the environment still wins on read, so existing env-var setups keep working.
146
153
  - **Browser session:** `~/.membot/auth/browser-profile/` (Playwright persistent profile — cookies, localStorage, IndexedDB). Captured by `membot login`; cookie-based downloaders (Google) reuse it on every fetch. Delete the directory to force a fresh login.
147
154
  - **API keys:** stored under `downloaders.<service>.api_key` in `~/.membot/config.json`. Read by API-based downloaders (GitHub, Linear).
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "membot",
3
- "version": "0.7.0",
3
+ "version": "0.10.0",
4
4
  "description": "Versioned context store with hybrid search for AI agents. Stdio + HTTP MCP server and CLI.",
5
5
  "type": "module",
6
6
  "exports": {
package/src/cli.ts CHANGED
@@ -10,12 +10,23 @@ import { registerReindexCommand } from "./commands/reindex.ts";
10
10
  import { registerServeCommand } from "./commands/serve.ts";
11
11
  import { registerSkillCommand } from "./commands/skill.ts";
12
12
  import { registerUpgradeCommand } from "./commands/upgrade.ts";
13
+ import { EMBED_WORKER_SENTINEL } from "./constants.ts";
13
14
  import type { BuildContextOptions } from "./context.ts";
15
+ import { runEmbedWorker } from "./ingest/embed-worker.ts";
14
16
  import { mountAsCommanderCommand } from "./mount/commander.ts";
15
17
  import { OPERATIONS } from "./operations/index.ts";
16
18
  import { logger } from "./output/logger.ts";
17
19
  import { maybeCheckForUpdate } from "./update/background.ts";
18
20
 
21
+ // Hidden worker mode: the EmbedderPool re-execs this binary with the sentinel
22
+ // as argv[2] (or argv[1] when `bun run src/cli.ts <sentinel>` is invoked
23
+ // directly during tests). We bypass commander entirely and run the worker
24
+ // stdin/stdout protocol loop instead.
25
+ if (process.argv.includes(EMBED_WORKER_SENTINEL)) {
26
+ await runEmbedWorker();
27
+ process.exit(0);
28
+ }
29
+
19
30
  program
20
31
  .name("membot")
21
32
  .description("Versioned context store with hybrid search for AI agents. Stdio + HTTP MCP server and CLI.")
@@ -1,6 +1,18 @@
1
+ import { availableParallelism } from "node:os";
1
2
  import { z } from "zod";
2
3
  import { DEFAULTS, defaultMembotHome, EMBEDDING_DIMENSION, EMBEDDING_MODEL } from "../constants.ts";
3
4
 
5
+ /**
6
+ * Compute the default ingest worker count: one fewer than the available CPUs
7
+ * (so the orchestrator and any background work still has a core), clamped to
8
+ * `[1, MAX_WORKERS]` to avoid hammering Anthropic with too many concurrent
9
+ * describe calls on machines with very high core counts.
10
+ */
11
+ function defaultWorkerConcurrency(): number {
12
+ const cpus = availableParallelism();
13
+ return Math.min(DEFAULTS.MAX_WORKERS, Math.max(1, cpus - 1));
14
+ }
15
+
4
16
  export const ChunkerConfigSchema = z.object({
5
17
  mode: z.enum(["deterministic", "llm"]).default(DEFAULTS.CHUNKER_MODE),
6
18
  target_chars: z.number().int().positive().default(DEFAULTS.CHUNKER_TARGET_CHARS),
@@ -17,12 +29,29 @@ export const LlmConfigSchema = z.object({
17
29
  chunker_model: z.string().default(DEFAULTS.CHUNKER_MODEL),
18
30
  describer_model: z.string().default(DEFAULTS.DESCRIBER_MODEL),
19
31
  vision_model: z.string().default(DEFAULTS.VISION_MODEL),
32
+ describer_skip_when_titled: z.boolean().default(DEFAULTS.DESCRIBER_SKIP_WHEN_TITLED),
33
+ });
34
+
35
+ export const IngestConfigSchema = z.object({
36
+ worker_concurrency: z.number().int().positive().default(defaultWorkerConcurrency),
20
37
  });
21
38
 
22
39
  export const DaemonConfigSchema = z.object({
23
40
  tick_interval_sec: z.number().int().positive().default(DEFAULTS.DAEMON_TICK_SEC),
24
41
  });
25
42
 
43
+ /**
44
+ * Embedding parallelism. `workers = null` (the default) resolves to
45
+ * `max(1, cpus()-1)` at context-build time so the pool grows with the host
46
+ * machine. Setting `workers = 1` disables the subprocess pool entirely
47
+ * and runs embedding inline in the parent (the original single-thread
48
+ * behaviour). Each worker loads its own copy of the WASM model
49
+ * (~50MB resident), so cap this on RAM-constrained machines.
50
+ */
51
+ export const EmbeddingConfigSchema = z.object({
52
+ workers: z.number().int().min(1).nullable().default(null),
53
+ });
54
+
26
55
  export const LinearDownloaderConfigSchema = z.object({
27
56
  api_key: z.string().meta({ secret: true }).default(""),
28
57
  });
@@ -47,7 +76,9 @@ export const MembotConfigSchema = z.object({
47
76
  embedding_model: z.string().default(EMBEDDING_MODEL),
48
77
  embedding_dimension: z.number().int().positive().default(EMBEDDING_DIMENSION),
49
78
  chunker: ChunkerConfigSchema.default(() => ChunkerConfigSchema.parse({})),
79
+ embedding: EmbeddingConfigSchema.default(() => EmbeddingConfigSchema.parse({})),
50
80
  converters: ConvertersConfigSchema.default(() => ConvertersConfigSchema.parse({})),
81
+ ingest: IngestConfigSchema.default(() => IngestConfigSchema.parse({})),
51
82
  llm: LlmConfigSchema.default(() => LlmConfigSchema.parse({})),
52
83
  downloaders: DownloadersConfigSchema.default(() => DownloadersConfigSchema.parse({})),
53
84
  daemon: DaemonConfigSchema.default(() => DaemonConfigSchema.parse({})),
@@ -57,7 +88,9 @@ export const MembotConfigSchema = z.object({
57
88
 
58
89
  export type MembotConfig = z.infer<typeof MembotConfigSchema>;
59
90
  export type ChunkerConfig = z.infer<typeof ChunkerConfigSchema>;
91
+ export type EmbeddingConfig = z.infer<typeof EmbeddingConfigSchema>;
60
92
  export type ConvertersConfig = z.infer<typeof ConvertersConfigSchema>;
93
+ export type IngestConfig = z.infer<typeof IngestConfigSchema>;
61
94
  export type LlmConfig = z.infer<typeof LlmConfigSchema>;
62
95
  export type DownloadersConfig = z.infer<typeof DownloadersConfigSchema>;
63
96
  export type LinearDownloaderConfig = z.infer<typeof LinearDownloaderConfigSchema>;
package/src/constants.ts CHANGED
@@ -28,6 +28,14 @@ export const EMBEDDING_DIMENSION = 384;
28
28
  */
29
29
  export const EMBEDDING_BATCH_SIZE = 16;
30
30
 
31
+ /**
32
+ * Hidden first-arg sentinel that re-execs the membot binary as an embed
33
+ * worker. The pool spawns `process.execPath <sentinel>` so the same compiled
34
+ * binary serves both the user-facing CLI and the worker subprocess; cli.ts
35
+ * checks this argv slot before commander sees it.
36
+ */
37
+ export const EMBED_WORKER_SENTINEL = "__embed_worker";
38
+
31
39
  export const DEFAULTS = {
32
40
  CHUNKER_MODE: "deterministic" as const,
33
41
  CHUNKER_TARGET_CHARS: 4_000,
@@ -47,6 +55,21 @@ export const DEFAULTS = {
47
55
  * embedded images doesn't fan out into hundreds of vision requests.
48
56
  */
49
57
  MAX_INLINE_IMAGE_CAPTIONS: 20,
58
+ /**
59
+ * Hard cap for `ingest.worker_concurrency`. The runtime default is
60
+ * `cpus - 1` so machines with very high core counts can scale, but we
61
+ * clamp here to keep concurrent Anthropic describe calls (and per-worker
62
+ * WASM embedder allocations — each pipeline holds the model weights) from
63
+ * spiraling out of control.
64
+ */
65
+ MAX_WORKERS: 8,
66
+ /**
67
+ * When true, describe() skips the LLM for self-describing markdown/text
68
+ * (a clear H1 within the first 40 lines of body) and uses the heading +
69
+ * 200-char prefix instead. Avoids paying for an LLM round-trip when the
70
+ * file already has a human-written description.
71
+ */
72
+ DESCRIBER_SKIP_WHEN_TITLED: true,
50
73
  } as const;
51
74
 
52
75
  export const FILES = {
package/src/context.ts CHANGED
@@ -1,3 +1,4 @@
1
+ import { cpus } from "node:os";
1
2
  import { join } from "node:path";
2
3
  import { loadConfig } from "./config/loader.ts";
3
4
  import type { MembotConfig } from "./config/schemas.ts";
@@ -25,11 +26,34 @@ export interface BuildContextOptions {
25
26
  noInteractive?: boolean;
26
27
  }
27
28
 
29
+ /**
30
+ * Resolve `config.embedding.workers` to a concrete worker count. Precedence:
31
+ * 1. An explicit numeric value in the config wins (user opt-in).
32
+ * 2. `MEMBOT_EMBEDDING_WORKERS` env var, if set to a positive integer.
33
+ * The test harness sets this to `1` so unit tests doing tiny writes
34
+ * don't pay the per-pool subprocess-spawn cost on slow CI runners.
35
+ * 3. Otherwise `null`/missing → `max(1, cpus()-1)`. The minus-one leaves
36
+ * a core for the parent process (DB writes, IO, the spinner).
37
+ */
38
+ export function resolveEmbeddingWorkers(configured: number | null | undefined): number {
39
+ if (typeof configured === "number" && configured >= 1) return configured;
40
+ const envOverride = process.env.MEMBOT_EMBEDDING_WORKERS;
41
+ if (envOverride) {
42
+ const n = Number(envOverride);
43
+ if (Number.isFinite(n) && n >= 1) return Math.floor(n);
44
+ }
45
+ return Math.max(1, cpus().length - 1);
46
+ }
47
+
28
48
  /**
29
49
  * Build the AppContext used by every operation handler. Initializes:
30
50
  * - output mode (TTY/JSON/color detection — frozen for the rest of the run)
31
51
  * - config (~/.membot/config.json with env overrides)
32
52
  * - DuckDB connection (~/.membot/index.duckdb), running migrations on first open
53
+ *
54
+ * The embedder worker pool is NOT created here — it's per-command,
55
+ * spawned by `withEmbedderPool()` at the top of bulk-embedding handlers
56
+ * (`add`, `refresh`, `write`) and disposed before they return.
33
57
  */
34
58
  export async function buildContext(options: BuildContextOptions = {}): Promise<AppContext> {
35
59
  setMode(detectMode({ json: options.json, verbose: options.verbose, noColor: options.noColor }));
@@ -0,0 +1,60 @@
1
+ /**
2
+ * Run `worker(item, index, workerId)` over `items` with at most `concurrency`
3
+ * workers in flight at a time. Each runner has a stable `workerId` in
4
+ * `[0, concurrency)` for the life of the call — useful when callers want to
5
+ * address per-worker UI slots. Results come back in input order. Worker
6
+ * rejections are caught and surfaced as `{ ok: false, error }` entries
7
+ * instead of aborting the batch; partial failures are normal during bulk
8
+ * ingest, and the caller decides how to render them per-entry.
9
+ */
10
+ export async function pMap<T, R>(
11
+ items: readonly T[],
12
+ concurrency: number,
13
+ worker: (item: T, index: number, workerId: number) => Promise<R>,
14
+ ): Promise<Array<{ ok: true; value: R } | { ok: false; error: unknown }>> {
15
+ const limit = Math.max(1, Math.floor(concurrency));
16
+ const results: Array<{ ok: true; value: R } | { ok: false; error: unknown }> = new Array(items.length);
17
+ let next = 0;
18
+
19
+ const runOne = async (workerId: number): Promise<void> => {
20
+ while (true) {
21
+ const i = next++;
22
+ if (i >= items.length) return;
23
+ const item = items[i] as T;
24
+ try {
25
+ const value = await worker(item, i, workerId);
26
+ results[i] = { ok: true, value };
27
+ } catch (error) {
28
+ results[i] = { ok: false, error };
29
+ }
30
+ }
31
+ };
32
+
33
+ const runners = Array.from({ length: Math.min(limit, items.length) }, (_, workerId) => runOne(workerId));
34
+ await Promise.all(runners);
35
+ return results;
36
+ }
37
+
38
+ /**
39
+ * Single-slot async mutex. `lock(fn)` runs `fn` with exclusive access and
40
+ * returns its result; queued callers run in FIFO order. Used to gate the
41
+ * persist phase of bulk ingest because all workers share a single DuckDB
42
+ * connection and DuckDB rejects nested `BEGIN` statements.
43
+ */
44
+ export class AsyncMutex {
45
+ private chain: Promise<void> = Promise.resolve();
46
+
47
+ async lock<T>(fn: () => Promise<T>): Promise<T> {
48
+ const prev = this.chain;
49
+ let release!: () => void;
50
+ this.chain = new Promise<void>((r) => {
51
+ release = r;
52
+ });
53
+ await prev;
54
+ try {
55
+ return await fn();
56
+ } finally {
57
+ release();
58
+ }
59
+ }
60
+ }
@@ -13,9 +13,12 @@ Rules:
13
13
 
14
14
  /**
15
15
  * Generate a one-paragraph description for the file's surrogate, used
16
- * as the `<description>` line in chunks.search_text. Falls back to a
17
- * deterministic heuristic when no API key is configured so the pipeline
18
- * still produces a non-empty description offline.
16
+ * as the `<description>` line in chunks.search_text. When the file is
17
+ * self-describing (markdown/text with a clear H1 in the opening) and the
18
+ * `describer_skip_when_titled` flag is on, returns the title-derived
19
+ * description without calling the LLM. Falls back to a deterministic
20
+ * heuristic when no API key is configured so the pipeline still produces
21
+ * a non-empty description offline.
19
22
  */
20
23
  export async function describe(
21
24
  logicalPath: string,
@@ -23,6 +26,13 @@ export async function describe(
23
26
  surrogate: string,
24
27
  llm: LlmConfig,
25
28
  ): Promise<string> {
29
+ if (llm.describer_skip_when_titled) {
30
+ const titled = tryTitleDescription(mimeType, surrogate);
31
+ if (titled) {
32
+ logger.debug(`describer: using title-derived description for ${logicalPath}`);
33
+ return titled;
34
+ }
35
+ }
26
36
  if (!llm.anthropic_api_key || llm.anthropic_api_key.trim() === "") {
27
37
  return deterministicDescription(logicalPath, mimeType, surrogate);
28
38
  }
@@ -52,6 +62,42 @@ export async function describe(
52
62
  }
53
63
  }
54
64
 
65
+ const TEXTUAL_MIMES = new Set(["application/json", "application/yaml", "application/x-yaml"]);
66
+
67
+ /**
68
+ * Returns a title-derived description when the surrogate is "self-describing"
69
+ * markdown/text — a clear H1 within the first 40 non-blank lines, of
70
+ * reasonable length. Returns null otherwise so the caller falls through to
71
+ * the LLM. Skipping the LLM for files that already have a human-written
72
+ * heading is the main throughput win during bulk ingest.
73
+ */
74
+ export function tryTitleDescription(mimeType: string, surrogate: string): string | null {
75
+ if (!mimeType.startsWith("text/") && !TEXTUAL_MIMES.has(mimeType)) return null;
76
+ const lines = surrogate.split(/\r?\n/);
77
+ let nonBlank = 0;
78
+ let heading: string | null = null;
79
+ for (const line of lines) {
80
+ const trimmed = line.trim();
81
+ if (!trimmed) continue;
82
+ nonBlank += 1;
83
+ if (nonBlank > 40) break;
84
+ const m = trimmed.match(/^#\s+(.+?)\s*#*$/);
85
+ if (m?.[1]) {
86
+ heading = m[1].trim();
87
+ break;
88
+ }
89
+ }
90
+ if (!heading) return null;
91
+ if (heading.length < 5 || heading.length > 200) return null;
92
+ const body = surrogate
93
+ .replace(/^#\s+.+$/m, "")
94
+ .trim()
95
+ .slice(0, 200)
96
+ .replace(/\s+/g, " ")
97
+ .trim();
98
+ return body ? `${heading} — ${body}` : heading;
99
+ }
100
+
55
101
  /**
56
102
  * Cheap, deterministic description used when the LLM isn't available.
57
103
  * For markdown/text it's the first heading + a 200-char prefix; for
@@ -0,0 +1,74 @@
1
+ import { createInterface } from "node:readline";
2
+ import { asHelpful, isHelpfulError } from "../errors.ts";
3
+ import { embed } from "./embedder.ts";
4
+
5
+ /**
6
+ * Wire-format message exchanged between the parent EmbedderPool and a worker
7
+ * subprocess over the worker's stdin/stdout. Each message is a single JSON
8
+ * object terminated by `\n` — a robust, language-agnostic encoding that
9
+ * survives partial reads on either end. There is no init/ready handshake;
10
+ * the worker lazy-loads the WASM pipeline on its first `embed` request.
11
+ */
12
+ interface EmbedRequest {
13
+ type: "embed";
14
+ id: number;
15
+ model: string;
16
+ texts: string[];
17
+ }
18
+
19
+ interface EmbedResponse {
20
+ type: "embed-response";
21
+ id: number;
22
+ vectors?: number[][];
23
+ error?: { kind: string; message: string; hint: string };
24
+ }
25
+
26
+ /** Atomic JSON-line write to stdout — the protocol channel back to the parent. */
27
+ function send(msg: EmbedResponse): void {
28
+ process.stdout.write(`${JSON.stringify(msg)}\n`);
29
+ }
30
+
31
+ /**
32
+ * Drive the embed-worker subprocess: read newline-delimited JSON requests
33
+ * from stdin, dispatch them to the local `embed()` (which uses the WASM
34
+ * pipeline), and write JSON responses to stdout. Diagnostics (logger output)
35
+ * go to stderr, so the protocol channel on stdout stays clean.
36
+ *
37
+ * The worker exits naturally when stdin closes (parent died or sent EOF).
38
+ */
39
+ export async function runEmbedWorker(): Promise<void> {
40
+ const rl = createInterface({ input: process.stdin, crlfDelay: Number.POSITIVE_INFINITY });
41
+ for await (const line of rl) {
42
+ const trimmed = line.trim();
43
+ if (!trimmed) continue;
44
+ let req: EmbedRequest;
45
+ try {
46
+ req = JSON.parse(trimmed) as EmbedRequest;
47
+ } catch {
48
+ // A malformed line on stdin is almost certainly a bug in the parent —
49
+ // log to stderr and keep the worker alive so the parent's other
50
+ // requests still get served.
51
+ process.stderr.write(`embed-worker: ignoring malformed stdin line: ${trimmed.slice(0, 200)}\n`);
52
+ continue;
53
+ }
54
+ if (req.type !== "embed") continue;
55
+ await handleEmbed(req);
56
+ }
57
+ }
58
+
59
+ /** Run one embed request and reply with vectors or a serialisable error. */
60
+ async function handleEmbed(req: EmbedRequest): Promise<void> {
61
+ try {
62
+ const vectors = await embed(req.texts, req.model);
63
+ send({ type: "embed-response", id: req.id, vectors });
64
+ } catch (err) {
65
+ const helpful = isHelpfulError(err)
66
+ ? err
67
+ : asHelpful(err, "in embed worker", "Inspect the parent process stderr for the full stack trace.");
68
+ send({
69
+ type: "embed-response",
70
+ id: req.id,
71
+ error: { kind: helpful.kind, message: helpful.message, hint: helpful.hint },
72
+ });
73
+ }
74
+ }