membot 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/skills/membot.md +3 -0
- package/.cursor/rules/membot.mdc +3 -0
- package/README.md +4 -0
- package/package.json +1 -1
- package/src/cli.ts +11 -0
- package/src/config/schemas.ts +14 -0
- package/src/constants.ts +8 -0
- package/src/context.ts +24 -0
- package/src/ingest/embed-worker.ts +74 -0
- package/src/ingest/embedder-pool.ts +391 -0
- package/src/ingest/embedder.ts +40 -2
- package/src/operations/add.ts +94 -86
- package/src/operations/index.ts +2 -0
- package/src/operations/refresh.ts +28 -20
- package/src/operations/stats.ts +342 -0
- package/src/operations/write.ts +48 -40
- package/src/refresh/scheduler.ts +22 -13
package/.claude/skills/membot.md
CHANGED
|
@@ -64,6 +64,7 @@ membot read <logical_path> # current markdown surrogate
|
|
|
64
64
|
membot read <logical_path> --bytes # original bytes (base64) — PDF/DOCX/image as ingested
|
|
65
65
|
membot read <logical_path> --version <ts> # historical snapshot
|
|
66
66
|
membot info <logical_path> # metadata only (no content)
|
|
67
|
+
membot stats [prefix] # whole-index summary; optional prefix scopes the aggregates
|
|
67
68
|
membot versions <logical_path> # every version, newest first
|
|
68
69
|
membot diff <logical_path> --a <ts> [--b <ts>] # unified diff between versions
|
|
69
70
|
```
|
|
@@ -129,6 +130,7 @@ Tombstones hide a path from `ls` / `tree` / `search` but `versions` and `read --
|
|
|
129
130
|
| `membot write <path> --content <txt>` | Write inline agent-authored markdown as a new version |
|
|
130
131
|
| `membot search <query>` | Hybrid search (semantic + BM25); add `--include-history` to search older versions |
|
|
131
132
|
| `membot info <path>` | Inspect metadata (source, downloader, refresh schedule, digests) without content |
|
|
133
|
+
| `membot stats [prefix]` | Summarize the index (file/version/chunk/blob counts, on-disk size, refresh health, mime/source/downloader breakdowns); optional prefix scopes |
|
|
132
134
|
| `membot versions <path>` | List every version newest-first with version_id and change notes |
|
|
133
135
|
| `membot diff <path> --a <ts>` | Unified diff between two versions |
|
|
134
136
|
| `membot mv <old> <new>` | Rename a logical_path (history preserved) |
|
|
@@ -161,4 +163,5 @@ Tombstones hide a path from `ls` / `tree` / `search` but `versions` and `read --
|
|
|
161
163
|
|
|
162
164
|
- Data lives in `~/.membot/index.duckdb` (override via `MEMBOT_HOME`).
|
|
163
165
|
- Optional `ANTHROPIC_API_KEY` enables LLM fallback for messy/binary input. Without it, conversion degrades to deterministic native output.
|
|
166
|
+
- `embedding.workers` (config key) caps the per-command embed-worker subprocess pool spawned at the top of `add` / `refresh` / `write`. Default `null` resolves to `cpus()-1`; set `1` to disable the pool.
|
|
164
167
|
- Config file: `~/.membot/config.json` (see `membot --help` for the global flags).
|
package/.cursor/rules/membot.mdc
CHANGED
|
@@ -64,6 +64,7 @@ membot read <logical_path> # current markdown surrogate
|
|
|
64
64
|
membot read <logical_path> --bytes # original bytes (base64) — PDF/DOCX/image as ingested
|
|
65
65
|
membot read <logical_path> --version <ts> # historical snapshot
|
|
66
66
|
membot info <logical_path> # metadata only (no content)
|
|
67
|
+
membot stats [prefix] # whole-index summary; optional prefix scopes the aggregates
|
|
67
68
|
membot versions <logical_path> # every version, newest first
|
|
68
69
|
membot diff <logical_path> --a <ts> [--b <ts>] # unified diff between versions
|
|
69
70
|
```
|
|
@@ -129,6 +130,7 @@ Tombstones hide a path from `ls` / `tree` / `search` but `versions` and `read --
|
|
|
129
130
|
| `membot write <path> --content <txt>` | Write inline agent-authored markdown as a new version |
|
|
130
131
|
| `membot search <query>` | Hybrid search (semantic + BM25); add `--include-history` to search older versions |
|
|
131
132
|
| `membot info <path>` | Inspect metadata (source, downloader, refresh schedule, digests) without content |
|
|
133
|
+
| `membot stats [prefix]` | Summarize the index (file/version/chunk/blob counts, on-disk size, refresh health, mime/source/downloader breakdowns); optional prefix scopes |
|
|
132
134
|
| `membot versions <path>` | List every version newest-first with version_id and change notes |
|
|
133
135
|
| `membot diff <path> --a <ts>` | Unified diff between two versions |
|
|
134
136
|
| `membot mv <old> <new>` | Rename a logical_path (history preserved) |
|
|
@@ -161,4 +163,5 @@ Tombstones hide a path from `ls` / `tree` / `search` but `versions` and `read --
|
|
|
161
163
|
|
|
162
164
|
- Data lives in `~/.membot/index.duckdb` (override via `MEMBOT_HOME`).
|
|
163
165
|
- Optional `ANTHROPIC_API_KEY` enables LLM fallback for messy/binary input. Without it, conversion degrades to deterministic native output.
|
|
166
|
+
- `embedding.workers` (config key) caps the per-command embed-worker subprocess pool spawned at the top of `add` / `refresh` / `write`. Default `null` resolves to `cpus()-1`; set `1` to disable the pool.
|
|
164
167
|
- Config file: `~/.membot/config.json` (see `membot --help` for the global flags).
|
package/README.md
CHANGED
|
@@ -83,6 +83,7 @@ The skill files describe the discover → ingest → search → read → write w
|
|
|
83
83
|
| `membot read <path>` | Read the markdown surrogate (or `--bytes` for original bytes, base64) |
|
|
84
84
|
| `membot search <query>` | Hybrid search (semantic + BM25); `--include-history` searches older versions |
|
|
85
85
|
| `membot info <path>` | Inspect metadata (source, fetcher, schedule, digests) without content |
|
|
86
|
+
| `membot stats [prefix]` | Summarize the index (file/version/chunk/blob counts, on-disk size, refresh health, mime/source/downloader breakdowns); optional prefix scopes the aggregates |
|
|
86
87
|
| `membot versions <path>` | List every version newest-first |
|
|
87
88
|
| `membot diff <path> <a> [b]` | Unified diff between two versions |
|
|
88
89
|
| `membot write <path>` | Write inline agent-authored markdown as a new version |
|
|
@@ -136,12 +137,15 @@ Add `--watch` (and optional `--tick <sec>`) to also run the refresh daemon, whic
|
|
|
136
137
|
membot config list # show every value (secrets masked)
|
|
137
138
|
membot config set llm.anthropic_api_key sk-ant-... # enable LLM-fallback paths
|
|
138
139
|
membot config set chunker.target_chars 800 # tweak any nested value
|
|
140
|
+
membot config set embedding.workers 4 # cap parallel embed workers
|
|
139
141
|
membot config set converters.max_inline_image_captions 50 # raise per-doc cap on vision captions for embedded images
|
|
140
142
|
membot config get llm.anthropic_api_key --show-secrets # reveal the masked key
|
|
141
143
|
membot config unset chunker.target_chars # back to schema default
|
|
142
144
|
membot config path # print the absolute config path
|
|
143
145
|
```
|
|
144
146
|
|
|
147
|
+
**Parallel embedding:** `embedding.workers` (default `null` → `cpus()-1`) controls how many subprocess workers fan out the WASM embedding work. The pool is **per-command** — spawned at the start of `add` / `refresh` / `write` and killed before the command returns, so membot doesn't keep idle workers around between invocations. Each worker loads its own ~50MB copy of the model, so on RAM-constrained machines drop it to a small fixed number (e.g. `4`); set `1` to disable the pool entirely and embed inline.
|
|
148
|
+
|
|
145
149
|
Values are written with file mode `0600`. `ANTHROPIC_API_KEY` set in the environment still wins on read, so existing env-var setups keep working.
|
|
146
150
|
- **Browser session:** `~/.membot/auth/browser-profile/` (Playwright persistent profile — cookies, localStorage, IndexedDB). Captured by `membot login`; cookie-based downloaders (Google) reuse it on every fetch. Delete the directory to force a fresh login.
|
|
147
151
|
- **API keys:** stored under `downloaders.<service>.api_key` in `~/.membot/config.json`. Read by API-based downloaders (GitHub, Linear).
|
package/package.json
CHANGED
package/src/cli.ts
CHANGED
|
@@ -10,12 +10,23 @@ import { registerReindexCommand } from "./commands/reindex.ts";
|
|
|
10
10
|
import { registerServeCommand } from "./commands/serve.ts";
|
|
11
11
|
import { registerSkillCommand } from "./commands/skill.ts";
|
|
12
12
|
import { registerUpgradeCommand } from "./commands/upgrade.ts";
|
|
13
|
+
import { EMBED_WORKER_SENTINEL } from "./constants.ts";
|
|
13
14
|
import type { BuildContextOptions } from "./context.ts";
|
|
15
|
+
import { runEmbedWorker } from "./ingest/embed-worker.ts";
|
|
14
16
|
import { mountAsCommanderCommand } from "./mount/commander.ts";
|
|
15
17
|
import { OPERATIONS } from "./operations/index.ts";
|
|
16
18
|
import { logger } from "./output/logger.ts";
|
|
17
19
|
import { maybeCheckForUpdate } from "./update/background.ts";
|
|
18
20
|
|
|
21
|
+
// Hidden worker mode: the EmbedderPool re-execs this binary with the sentinel
|
|
22
|
+
// as argv[2] (or argv[1] when `bun run src/cli.ts <sentinel>` is invoked
|
|
23
|
+
// directly during tests). We bypass commander entirely and run the worker
|
|
24
|
+
// stdin/stdout protocol loop instead.
|
|
25
|
+
if (process.argv.includes(EMBED_WORKER_SENTINEL)) {
|
|
26
|
+
await runEmbedWorker();
|
|
27
|
+
process.exit(0);
|
|
28
|
+
}
|
|
29
|
+
|
|
19
30
|
program
|
|
20
31
|
.name("membot")
|
|
21
32
|
.description("Versioned context store with hybrid search for AI agents. Stdio + HTTP MCP server and CLI.")
|
package/src/config/schemas.ts
CHANGED
|
@@ -23,6 +23,18 @@ export const DaemonConfigSchema = z.object({
|
|
|
23
23
|
tick_interval_sec: z.number().int().positive().default(DEFAULTS.DAEMON_TICK_SEC),
|
|
24
24
|
});
|
|
25
25
|
|
|
26
|
+
/**
|
|
27
|
+
* Embedding parallelism. `workers = null` (the default) resolves to
|
|
28
|
+
* `max(1, cpus()-1)` at context-build time so the pool grows with the host
|
|
29
|
+
* machine. Setting `workers = 1` disables the subprocess pool entirely
|
|
30
|
+
* and runs embedding inline in the parent (the original single-thread
|
|
31
|
+
* behaviour). Each worker loads its own copy of the WASM model
|
|
32
|
+
* (~50MB resident), so cap this on RAM-constrained machines.
|
|
33
|
+
*/
|
|
34
|
+
export const EmbeddingConfigSchema = z.object({
|
|
35
|
+
workers: z.number().int().min(1).nullable().default(null),
|
|
36
|
+
});
|
|
37
|
+
|
|
26
38
|
export const LinearDownloaderConfigSchema = z.object({
|
|
27
39
|
api_key: z.string().meta({ secret: true }).default(""),
|
|
28
40
|
});
|
|
@@ -47,6 +59,7 @@ export const MembotConfigSchema = z.object({
|
|
|
47
59
|
embedding_model: z.string().default(EMBEDDING_MODEL),
|
|
48
60
|
embedding_dimension: z.number().int().positive().default(EMBEDDING_DIMENSION),
|
|
49
61
|
chunker: ChunkerConfigSchema.default(() => ChunkerConfigSchema.parse({})),
|
|
62
|
+
embedding: EmbeddingConfigSchema.default(() => EmbeddingConfigSchema.parse({})),
|
|
50
63
|
converters: ConvertersConfigSchema.default(() => ConvertersConfigSchema.parse({})),
|
|
51
64
|
llm: LlmConfigSchema.default(() => LlmConfigSchema.parse({})),
|
|
52
65
|
downloaders: DownloadersConfigSchema.default(() => DownloadersConfigSchema.parse({})),
|
|
@@ -57,6 +70,7 @@ export const MembotConfigSchema = z.object({
|
|
|
57
70
|
|
|
58
71
|
export type MembotConfig = z.infer<typeof MembotConfigSchema>;
|
|
59
72
|
export type ChunkerConfig = z.infer<typeof ChunkerConfigSchema>;
|
|
73
|
+
export type EmbeddingConfig = z.infer<typeof EmbeddingConfigSchema>;
|
|
60
74
|
export type ConvertersConfig = z.infer<typeof ConvertersConfigSchema>;
|
|
61
75
|
export type LlmConfig = z.infer<typeof LlmConfigSchema>;
|
|
62
76
|
export type DownloadersConfig = z.infer<typeof DownloadersConfigSchema>;
|
package/src/constants.ts
CHANGED
|
@@ -28,6 +28,14 @@ export const EMBEDDING_DIMENSION = 384;
|
|
|
28
28
|
*/
|
|
29
29
|
export const EMBEDDING_BATCH_SIZE = 16;
|
|
30
30
|
|
|
31
|
+
/**
|
|
32
|
+
* Hidden first-arg sentinel that re-execs the membot binary as an embed
|
|
33
|
+
* worker. The pool spawns `process.execPath <sentinel>` so the same compiled
|
|
34
|
+
* binary serves both the user-facing CLI and the worker subprocess; cli.ts
|
|
35
|
+
* checks this argv slot before commander sees it.
|
|
36
|
+
*/
|
|
37
|
+
export const EMBED_WORKER_SENTINEL = "__embed_worker";
|
|
38
|
+
|
|
31
39
|
export const DEFAULTS = {
|
|
32
40
|
CHUNKER_MODE: "deterministic" as const,
|
|
33
41
|
CHUNKER_TARGET_CHARS: 4_000,
|
package/src/context.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { cpus } from "node:os";
|
|
1
2
|
import { join } from "node:path";
|
|
2
3
|
import { loadConfig } from "./config/loader.ts";
|
|
3
4
|
import type { MembotConfig } from "./config/schemas.ts";
|
|
@@ -25,11 +26,34 @@ export interface BuildContextOptions {
|
|
|
25
26
|
noInteractive?: boolean;
|
|
26
27
|
}
|
|
27
28
|
|
|
29
|
+
/**
|
|
30
|
+
* Resolve `config.embedding.workers` to a concrete worker count. Precedence:
|
|
31
|
+
* 1. An explicit numeric value in the config wins (user opt-in).
|
|
32
|
+
* 2. `MEMBOT_EMBEDDING_WORKERS` env var, if set to a positive integer.
|
|
33
|
+
* The test harness sets this to `1` so unit tests doing tiny writes
|
|
34
|
+
* don't pay the per-pool subprocess-spawn cost on slow CI runners.
|
|
35
|
+
* 3. Otherwise `null`/missing → `max(1, cpus()-1)`. The minus-one leaves
|
|
36
|
+
* a core for the parent process (DB writes, IO, the spinner).
|
|
37
|
+
*/
|
|
38
|
+
export function resolveEmbeddingWorkers(configured: number | null | undefined): number {
|
|
39
|
+
if (typeof configured === "number" && configured >= 1) return configured;
|
|
40
|
+
const envOverride = process.env.MEMBOT_EMBEDDING_WORKERS;
|
|
41
|
+
if (envOverride) {
|
|
42
|
+
const n = Number(envOverride);
|
|
43
|
+
if (Number.isFinite(n) && n >= 1) return Math.floor(n);
|
|
44
|
+
}
|
|
45
|
+
return Math.max(1, cpus().length - 1);
|
|
46
|
+
}
|
|
47
|
+
|
|
28
48
|
/**
|
|
29
49
|
* Build the AppContext used by every operation handler. Initializes:
|
|
30
50
|
* - output mode (TTY/JSON/color detection — frozen for the rest of the run)
|
|
31
51
|
* - config (~/.membot/config.json with env overrides)
|
|
32
52
|
* - DuckDB connection (~/.membot/index.duckdb), running migrations on first open
|
|
53
|
+
*
|
|
54
|
+
* The embedder worker pool is NOT created here — it's per-command,
|
|
55
|
+
* spawned by `withEmbedderPool()` at the top of bulk-embedding handlers
|
|
56
|
+
* (`add`, `refresh`, `write`) and disposed before they return.
|
|
33
57
|
*/
|
|
34
58
|
export async function buildContext(options: BuildContextOptions = {}): Promise<AppContext> {
|
|
35
59
|
setMode(detectMode({ json: options.json, verbose: options.verbose, noColor: options.noColor }));
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import { createInterface } from "node:readline";
|
|
2
|
+
import { asHelpful, isHelpfulError } from "../errors.ts";
|
|
3
|
+
import { embed } from "./embedder.ts";
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Wire-format message exchanged between the parent EmbedderPool and a worker
|
|
7
|
+
* subprocess over the worker's stdin/stdout. Each message is a single JSON
|
|
8
|
+
* object terminated by `\n` — a robust, language-agnostic encoding that
|
|
9
|
+
* survives partial reads on either end. There is no init/ready handshake;
|
|
10
|
+
* the worker lazy-loads the WASM pipeline on its first `embed` request.
|
|
11
|
+
*/
|
|
12
|
+
interface EmbedRequest {
|
|
13
|
+
type: "embed";
|
|
14
|
+
id: number;
|
|
15
|
+
model: string;
|
|
16
|
+
texts: string[];
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
interface EmbedResponse {
|
|
20
|
+
type: "embed-response";
|
|
21
|
+
id: number;
|
|
22
|
+
vectors?: number[][];
|
|
23
|
+
error?: { kind: string; message: string; hint: string };
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/** Atomic JSON-line write to stdout — the protocol channel back to the parent. */
|
|
27
|
+
function send(msg: EmbedResponse): void {
|
|
28
|
+
process.stdout.write(`${JSON.stringify(msg)}\n`);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Drive the embed-worker subprocess: read newline-delimited JSON requests
|
|
33
|
+
* from stdin, dispatch them to the local `embed()` (which uses the WASM
|
|
34
|
+
* pipeline), and write JSON responses to stdout. Diagnostics (logger output)
|
|
35
|
+
* go to stderr, so the protocol channel on stdout stays clean.
|
|
36
|
+
*
|
|
37
|
+
* The worker exits naturally when stdin closes (parent died or sent EOF).
|
|
38
|
+
*/
|
|
39
|
+
export async function runEmbedWorker(): Promise<void> {
|
|
40
|
+
const rl = createInterface({ input: process.stdin, crlfDelay: Number.POSITIVE_INFINITY });
|
|
41
|
+
for await (const line of rl) {
|
|
42
|
+
const trimmed = line.trim();
|
|
43
|
+
if (!trimmed) continue;
|
|
44
|
+
let req: EmbedRequest;
|
|
45
|
+
try {
|
|
46
|
+
req = JSON.parse(trimmed) as EmbedRequest;
|
|
47
|
+
} catch {
|
|
48
|
+
// A malformed line on stdin is almost certainly a bug in the parent —
|
|
49
|
+
// log to stderr and keep the worker alive so the parent's other
|
|
50
|
+
// requests still get served.
|
|
51
|
+
process.stderr.write(`embed-worker: ignoring malformed stdin line: ${trimmed.slice(0, 200)}\n`);
|
|
52
|
+
continue;
|
|
53
|
+
}
|
|
54
|
+
if (req.type !== "embed") continue;
|
|
55
|
+
await handleEmbed(req);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/** Run one embed request and reply with vectors or a serialisable error. */
|
|
60
|
+
async function handleEmbed(req: EmbedRequest): Promise<void> {
|
|
61
|
+
try {
|
|
62
|
+
const vectors = await embed(req.texts, req.model);
|
|
63
|
+
send({ type: "embed-response", id: req.id, vectors });
|
|
64
|
+
} catch (err) {
|
|
65
|
+
const helpful = isHelpfulError(err)
|
|
66
|
+
? err
|
|
67
|
+
: asHelpful(err, "in embed worker", "Inspect the parent process stderr for the full stack trace.");
|
|
68
|
+
send({
|
|
69
|
+
type: "embed-response",
|
|
70
|
+
id: req.id,
|
|
71
|
+
error: { kind: helpful.kind, message: helpful.message, hint: helpful.hint },
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
}
|
|
@@ -0,0 +1,391 @@
|
|
|
1
|
+
import type { Subprocess } from "bun";
|
|
2
|
+
import { EMBED_WORKER_SENTINEL, EMBEDDING_BATCH_SIZE, EMBEDDING_MODEL } from "../constants.ts";
|
|
3
|
+
import { asHelpful, HelpfulError } from "../errors.ts";
|
|
4
|
+
import { logger } from "../output/logger.ts";
|
|
5
|
+
import { type EmbedOptions, setEmbedderPool } from "./embedder.ts";
|
|
6
|
+
|
|
7
|
+
interface PendingRequest {
|
|
8
|
+
id: number;
|
|
9
|
+
resolve: (vectors: number[][]) => void;
|
|
10
|
+
reject: (err: unknown) => void;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
interface Worker {
|
|
14
|
+
proc: Subprocess<"pipe", "pipe", "inherit">;
|
|
15
|
+
busy: boolean;
|
|
16
|
+
pending: PendingRequest | null;
|
|
17
|
+
exited: boolean;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
interface EmbedResponseLine {
|
|
21
|
+
type: "embed-response";
|
|
22
|
+
id: number;
|
|
23
|
+
vectors?: number[][];
|
|
24
|
+
error?: { kind: string; message: string; hint: string };
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* A short-lived pool of embed-worker subprocesses. Created at the start of
|
|
29
|
+
* a bulk-embedding command (`add` / `refresh` / `write`), kept alive only
|
|
30
|
+
* for the duration of that command, and disposed before the command
|
|
31
|
+
* returns. Workers spawn lazily — they don't pre-load the WASM pipeline;
|
|
32
|
+
* the model is loaded on-demand inside the worker the first time a batch
|
|
33
|
+
* arrives. Each worker holds its own ~50MB WASM heap, so the parallelism
|
|
34
|
+
* comes for free in CPU but costs proportional RAM while the command runs.
|
|
35
|
+
*
|
|
36
|
+
* The pool is plugged in via `setEmbedderPool()` so the existing `embed()`
|
|
37
|
+
* call sites in the ingest pipeline transparently fan out without code
|
|
38
|
+
* changes.
|
|
39
|
+
*/
|
|
40
|
+
export class EmbedderPool {
|
|
41
|
+
private readonly workerCount: number;
|
|
42
|
+
private readonly model: string;
|
|
43
|
+
private workers: Worker[] = [];
|
|
44
|
+
private acquireQueue: Array<(w: Worker) => void> = [];
|
|
45
|
+
private nextRequestId = 1;
|
|
46
|
+
private spawned = false;
|
|
47
|
+
private disposed = false;
|
|
48
|
+
|
|
49
|
+
constructor(workerCount: number, model: string = EMBEDDING_MODEL) {
|
|
50
|
+
if (workerCount < 1 || !Number.isInteger(workerCount)) {
|
|
51
|
+
throw new HelpfulError({
|
|
52
|
+
kind: "input_error",
|
|
53
|
+
message: `EmbedderPool worker count must be a positive integer, got ${workerCount}`,
|
|
54
|
+
hint: "Set config.embedding.workers to a positive integer (or null for auto = cpus-1).",
|
|
55
|
+
});
|
|
56
|
+
}
|
|
57
|
+
this.workerCount = workerCount;
|
|
58
|
+
this.model = model;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/** Number of worker subprocesses this pool manages. */
|
|
62
|
+
get size(): number {
|
|
63
|
+
return this.workerCount;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Spawn the worker subprocesses. Returns immediately — workers load the
|
|
68
|
+
* WASM model lazily when the first batch arrives, so this is a cheap
|
|
69
|
+
* operation. The first batch a worker receives pays the ~hundreds-of-ms
|
|
70
|
+
* load cost; subsequent batches in the same worker are fast.
|
|
71
|
+
*/
|
|
72
|
+
spawn(): void {
|
|
73
|
+
if (this.spawned) return;
|
|
74
|
+
this.spawned = true;
|
|
75
|
+
logger.info(`embedder-pool: spawning ${this.workerCount} workers (model=${this.model})`);
|
|
76
|
+
for (let i = 0; i < this.workerCount; i++) {
|
|
77
|
+
this.workers.push(this.spawnWorker(i));
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Embed `texts` using the worker pool. Splits into batches of
|
|
83
|
+
* `EMBEDDING_BATCH_SIZE`, dispatches each batch to whichever worker is
|
|
84
|
+
* free, and reassembles vectors in original order. `opts.onProgress` is
|
|
85
|
+
* called once per completed batch with `(done, total)` chunk counts.
|
|
86
|
+
*/
|
|
87
|
+
async embed(texts: string[], model?: string, opts: EmbedOptions = {}): Promise<number[][]> {
|
|
88
|
+
if (this.disposed) {
|
|
89
|
+
throw new HelpfulError({
|
|
90
|
+
kind: "internal_error",
|
|
91
|
+
message: "EmbedderPool: embed() called after dispose()",
|
|
92
|
+
hint: "The pool is per-command — wrap your work in `withEmbedderPool()` so a fresh pool is created.",
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
if (!this.spawned) this.spawn();
|
|
96
|
+
if (texts.length === 0) return [];
|
|
97
|
+
|
|
98
|
+
const targetModel = model ?? this.model;
|
|
99
|
+
const out = new Array<number[]>(texts.length);
|
|
100
|
+
let done = 0;
|
|
101
|
+
|
|
102
|
+
const batches: Array<{ start: number; texts: string[] }> = [];
|
|
103
|
+
for (let i = 0; i < texts.length; i += EMBEDDING_BATCH_SIZE) {
|
|
104
|
+
batches.push({ start: i, texts: texts.slice(i, i + EMBEDDING_BATCH_SIZE) });
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
await Promise.all(
|
|
108
|
+
batches.map(async (batch) => {
|
|
109
|
+
const vectors = await this.dispatchBatch(batch.texts, targetModel);
|
|
110
|
+
for (let i = 0; i < vectors.length; i++) {
|
|
111
|
+
const vec = vectors[i];
|
|
112
|
+
if (!vec) {
|
|
113
|
+
throw new HelpfulError({
|
|
114
|
+
kind: "internal_error",
|
|
115
|
+
message: `embedder-pool: worker returned undefined vector at batch index ${i}`,
|
|
116
|
+
hint: "Re-run with --verbose; check the worker stderr for a transformers/WASM error.",
|
|
117
|
+
});
|
|
118
|
+
}
|
|
119
|
+
out[batch.start + i] = vec;
|
|
120
|
+
}
|
|
121
|
+
done += vectors.length;
|
|
122
|
+
opts.onProgress?.(done, texts.length);
|
|
123
|
+
}),
|
|
124
|
+
);
|
|
125
|
+
return out;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
/**
|
|
129
|
+
* Tear down every worker subprocess. Idempotent. Pending requests are
|
|
130
|
+
* rejected so any in-flight `embed()` callers see a HelpfulError instead
|
|
131
|
+
* of hanging forever.
|
|
132
|
+
*/
|
|
133
|
+
async dispose(): Promise<void> {
|
|
134
|
+
if (this.disposed) return;
|
|
135
|
+
this.disposed = true;
|
|
136
|
+
if (this.spawned) {
|
|
137
|
+
logger.info(`embedder-pool: tearing down ${this.workers.length} workers`);
|
|
138
|
+
}
|
|
139
|
+
const disposeError = () =>
|
|
140
|
+
new HelpfulError({
|
|
141
|
+
kind: "internal_error",
|
|
142
|
+
message: "EmbedderPool disposed while a request was in flight",
|
|
143
|
+
hint: "This is usually fine on shutdown; if it appears mid-run, file an issue with the preceding stderr.",
|
|
144
|
+
});
|
|
145
|
+
for (const w of this.workers) {
|
|
146
|
+
if (w.pending) {
|
|
147
|
+
w.pending.reject(disposeError());
|
|
148
|
+
w.pending = null;
|
|
149
|
+
}
|
|
150
|
+
try {
|
|
151
|
+
w.proc.stdin.end();
|
|
152
|
+
} catch {
|
|
153
|
+
// stdin may already be closed; ignore.
|
|
154
|
+
}
|
|
155
|
+
try {
|
|
156
|
+
w.proc.kill();
|
|
157
|
+
} catch {
|
|
158
|
+
// process may already be dead; ignore.
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
// Anyone waiting on acquire() will never get a worker — release them.
|
|
162
|
+
const queue = this.acquireQueue;
|
|
163
|
+
this.acquireQueue = [];
|
|
164
|
+
for (const resolver of queue) {
|
|
165
|
+
// Fabricate an "already exited" worker so dispatchBatch's disposed
|
|
166
|
+
// guard fires and rejects with a clear error.
|
|
167
|
+
resolver({
|
|
168
|
+
proc: null as unknown as Subprocess<"pipe", "pipe", "inherit">,
|
|
169
|
+
busy: true,
|
|
170
|
+
pending: null,
|
|
171
|
+
exited: true,
|
|
172
|
+
});
|
|
173
|
+
}
|
|
174
|
+
await Promise.all(
|
|
175
|
+
this.workers.map(async (w) => {
|
|
176
|
+
try {
|
|
177
|
+
await w.proc.exited;
|
|
178
|
+
} catch {
|
|
179
|
+
// best effort
|
|
180
|
+
}
|
|
181
|
+
}),
|
|
182
|
+
);
|
|
183
|
+
this.workers = [];
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
/** Send one batch to a free worker and resolve with its vectors. */
|
|
187
|
+
private async dispatchBatch(texts: string[], model: string): Promise<number[][]> {
|
|
188
|
+
const worker = await this.acquire();
|
|
189
|
+
try {
|
|
190
|
+
if (this.disposed || worker.exited) {
|
|
191
|
+
throw new HelpfulError({
|
|
192
|
+
kind: "internal_error",
|
|
193
|
+
message: "EmbedderPool disposed before batch could be dispatched",
|
|
194
|
+
hint: "The pool was torn down mid-call — wrap your work in `withEmbedderPool()` for a fresh per-command pool.",
|
|
195
|
+
});
|
|
196
|
+
}
|
|
197
|
+
const id = this.nextRequestId++;
|
|
198
|
+
return await new Promise<number[][]>((resolve, reject) => {
|
|
199
|
+
worker.pending = { id, resolve, reject };
|
|
200
|
+
try {
|
|
201
|
+
worker.proc.stdin.write(`${JSON.stringify({ type: "embed", id, model, texts })}\n`);
|
|
202
|
+
worker.proc.stdin.flush();
|
|
203
|
+
} catch (err) {
|
|
204
|
+
worker.pending = null;
|
|
205
|
+
reject(
|
|
206
|
+
asHelpful(
|
|
207
|
+
err,
|
|
208
|
+
"while writing to embed worker stdin",
|
|
209
|
+
"The worker subprocess likely crashed. Set config.embedding.workers=1 to bypass the pool while debugging.",
|
|
210
|
+
),
|
|
211
|
+
);
|
|
212
|
+
}
|
|
213
|
+
});
|
|
214
|
+
} finally {
|
|
215
|
+
this.release(worker);
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
/** Wait for a free worker; first-come, first-served via the acquireQueue. */
|
|
220
|
+
private acquire(): Promise<Worker> {
|
|
221
|
+
const free = this.workers.find((w) => !w.exited && !w.busy);
|
|
222
|
+
if (free) {
|
|
223
|
+
free.busy = true;
|
|
224
|
+
return Promise.resolve(free);
|
|
225
|
+
}
|
|
226
|
+
return new Promise((resolve) => {
|
|
227
|
+
this.acquireQueue.push((w) => {
|
|
228
|
+
w.busy = true;
|
|
229
|
+
resolve(w);
|
|
230
|
+
});
|
|
231
|
+
});
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
/**
|
|
235
|
+
* Hand a finished worker to the next waiter, or mark it idle. Called from
|
|
236
|
+
* `dispatchBatch`'s finally block so it runs whether the request resolved
|
|
237
|
+
* or rejected.
|
|
238
|
+
*/
|
|
239
|
+
private release(w: Worker): void {
|
|
240
|
+
w.pending = null;
|
|
241
|
+
w.busy = false;
|
|
242
|
+
if (w.exited) return;
|
|
243
|
+
const next = this.acquireQueue.shift();
|
|
244
|
+
if (next) next(w);
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
/**
|
|
248
|
+
* Build the spawn command for one worker. Two regimes:
|
|
249
|
+
* - Compiled binary (`./dist/membot`): `process.execPath` is the membot
|
|
250
|
+
* binary itself, so we just hand it the sentinel arg and the early
|
|
251
|
+
* branch in `cli.ts` takes over before commander sees it.
|
|
252
|
+
* - Bun dev / `bun add -g`: `process.execPath` is the `bun` binary; we
|
|
253
|
+
* must point it at `cli.ts` explicitly. Resolve the path relative to
|
|
254
|
+
* this module so it survives whatever working directory the user
|
|
255
|
+
* invoked membot from.
|
|
256
|
+
*/
|
|
257
|
+
private resolveSpawnCommand(): string[] {
|
|
258
|
+
const exec = process.execPath;
|
|
259
|
+
const isBun = /[\\/]bunx?(\.exe)?$/.test(exec);
|
|
260
|
+
if (!isBun) {
|
|
261
|
+
return [exec, EMBED_WORKER_SENTINEL];
|
|
262
|
+
}
|
|
263
|
+
const cliPath = new URL("../cli.ts", import.meta.url).pathname;
|
|
264
|
+
return [exec, cliPath, EMBED_WORKER_SENTINEL];
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
/**
|
|
268
|
+
* Spawn one worker subprocess and start its stdout reader. The worker
|
|
269
|
+
* lazy-loads the WASM pipeline on its first `embed` request, so spawn is
|
|
270
|
+
* cheap (no init handshake, no preload).
|
|
271
|
+
*/
|
|
272
|
+
private spawnWorker(index: number): Worker {
|
|
273
|
+
const proc = Bun.spawn(this.resolveSpawnCommand(), {
|
|
274
|
+
stdio: ["pipe", "pipe", "inherit"],
|
|
275
|
+
}) as Subprocess<"pipe", "pipe", "inherit">;
|
|
276
|
+
|
|
277
|
+
const worker: Worker = {
|
|
278
|
+
proc,
|
|
279
|
+
busy: false,
|
|
280
|
+
pending: null,
|
|
281
|
+
exited: false,
|
|
282
|
+
};
|
|
283
|
+
|
|
284
|
+
// Watch for premature exit and surface it to any in-flight request.
|
|
285
|
+
void proc.exited
|
|
286
|
+
.then((code) => {
|
|
287
|
+
worker.exited = true;
|
|
288
|
+
if (worker.pending) {
|
|
289
|
+
worker.pending.reject(
|
|
290
|
+
new HelpfulError({
|
|
291
|
+
kind: "internal_error",
|
|
292
|
+
message: `embed worker ${index} exited (code=${code}) with a request in flight`,
|
|
293
|
+
hint: "Run with --verbose; the worker's stderr was inherited and should explain the crash.",
|
|
294
|
+
}),
|
|
295
|
+
);
|
|
296
|
+
worker.pending = null;
|
|
297
|
+
}
|
|
298
|
+
})
|
|
299
|
+
.catch(() => {
|
|
300
|
+
// Bun's exited promise shouldn't reject, but guard anyway.
|
|
301
|
+
});
|
|
302
|
+
|
|
303
|
+
void this.readWorker(worker, index);
|
|
304
|
+
return worker;
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
/**
|
|
308
|
+
* Newline-delimited JSON reader for one worker's stdout. Matches every
|
|
309
|
+
* `{type:"embed-response", id}` to its pending request.
|
|
310
|
+
*/
|
|
311
|
+
private async readWorker(worker: Worker, index: number): Promise<void> {
|
|
312
|
+
const reader = worker.proc.stdout.getReader();
|
|
313
|
+
const decoder = new TextDecoder();
|
|
314
|
+
let buffer = "";
|
|
315
|
+
try {
|
|
316
|
+
while (true) {
|
|
317
|
+
const { done, value } = await reader.read();
|
|
318
|
+
if (done) break;
|
|
319
|
+
buffer += decoder.decode(value, { stream: true });
|
|
320
|
+
while (true) {
|
|
321
|
+
const nl = buffer.indexOf("\n");
|
|
322
|
+
if (nl === -1) break;
|
|
323
|
+
const line = buffer.slice(0, nl);
|
|
324
|
+
buffer = buffer.slice(nl + 1);
|
|
325
|
+
if (!line.trim()) continue;
|
|
326
|
+
this.handleWorkerLine(worker, index, line);
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
} catch (err) {
|
|
330
|
+
logger.debug(`embedder-pool: worker ${index} stdout read failed: ${(err as Error).message}`);
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
/** Parse + dispatch one JSON line emitted by a worker. */
|
|
335
|
+
private handleWorkerLine(worker: Worker, index: number, line: string): void {
|
|
336
|
+
let parsed: EmbedResponseLine;
|
|
337
|
+
try {
|
|
338
|
+
parsed = JSON.parse(line) as EmbedResponseLine;
|
|
339
|
+
} catch {
|
|
340
|
+
logger.debug(`embedder-pool: worker ${index} emitted unparseable line: ${line.slice(0, 200)}`);
|
|
341
|
+
return;
|
|
342
|
+
}
|
|
343
|
+
if (parsed.type !== "embed-response") return;
|
|
344
|
+
const pending = worker.pending;
|
|
345
|
+
if (!pending) {
|
|
346
|
+
logger.debug(`embedder-pool: worker ${index} returned response with no pending request`);
|
|
347
|
+
return;
|
|
348
|
+
}
|
|
349
|
+
if (parsed.error) {
|
|
350
|
+
pending.reject(
|
|
351
|
+
new HelpfulError({
|
|
352
|
+
kind: "internal_error",
|
|
353
|
+
message: `embed worker ${index} failed: ${parsed.error.message}`,
|
|
354
|
+
hint: parsed.error.hint || "Inspect parent stderr for the full error.",
|
|
355
|
+
}),
|
|
356
|
+
);
|
|
357
|
+
} else if (parsed.vectors) {
|
|
358
|
+
pending.resolve(parsed.vectors);
|
|
359
|
+
} else {
|
|
360
|
+
pending.reject(
|
|
361
|
+
new HelpfulError({
|
|
362
|
+
kind: "internal_error",
|
|
363
|
+
message: `embed worker ${index} returned response with neither vectors nor error`,
|
|
364
|
+
hint: "This is a worker protocol bug — file an issue with the preceding stderr.",
|
|
365
|
+
}),
|
|
366
|
+
);
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
/**
|
|
372
|
+
* Run `fn` with a fresh `EmbedderPool` registered as the global embedder. The
|
|
373
|
+
* pool is created, plugged in via `setEmbedderPool()`, and disposed
|
|
374
|
+
* (subprocesses killed) before `fn`'s promise resolves — so the workers only
|
|
375
|
+
* exist for the duration of one bulk-embedding command (`add` / `refresh` /
|
|
376
|
+
* `write` / a daemon tick). When `workers <= 1` the helper short-circuits
|
|
377
|
+
* and runs `fn` inline against the single-process embedder, with no spawn
|
|
378
|
+
* overhead.
|
|
379
|
+
*/
|
|
380
|
+
export async function withEmbedderPool<T>(workerCount: number, model: string, fn: () => Promise<T>): Promise<T> {
|
|
381
|
+
if (workerCount <= 1) return fn();
|
|
382
|
+
const pool = new EmbedderPool(workerCount, model);
|
|
383
|
+
pool.spawn();
|
|
384
|
+
setEmbedderPool(pool);
|
|
385
|
+
try {
|
|
386
|
+
return await fn();
|
|
387
|
+
} finally {
|
|
388
|
+
setEmbedderPool(null);
|
|
389
|
+
await pool.dispose();
|
|
390
|
+
}
|
|
391
|
+
}
|