membot 0.4.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "membot",
3
- "version": "0.4.0",
3
+ "version": "0.4.1",
4
4
  "description": "Versioned context store with hybrid search for AI agents. Stdio + HTTP MCP server and CLI.",
5
5
  "type": "module",
6
6
  "exports": {
package/src/constants.ts CHANGED
@@ -20,6 +20,15 @@ export const ENV = {
20
20
  export const EMBEDDING_MODEL = "Xenova/bge-small-en-v1.5";
21
21
  export const EMBEDDING_DIMENSION = 384;
22
22
 
23
+ /**
24
+ * Max chunks fed to the feature-extraction pipeline in one forward pass.
25
+ * ONNX/WASM allocates activations linearly with batch size, so a single
26
+ * unbounded call OOMs (`std::bad_alloc`) on large files — a 168-chunk file
27
+ * was the original repro. 16 is comfortably within the WASM heap for
28
+ * bge-small-en-v1.5 at 512 tokens and still amortizes the per-call overhead.
29
+ */
30
+ export const EMBEDDING_BATCH_SIZE = 16;
31
+
23
32
  export const DEFAULTS = {
24
33
  CHUNKER_MODE: "deterministic" as const,
25
34
  CHUNKER_TARGET_CHARS: 4_000,
@@ -1,7 +1,7 @@
1
1
  import { existsSync } from "node:fs";
2
2
  import { join } from "node:path";
3
3
  import { env, type FeatureExtractionPipeline, pipeline } from "@huggingface/transformers";
4
- import { EMBEDDING_DIMENSION, EMBEDDING_MODEL } from "../constants.ts";
4
+ import { EMBEDDING_BATCH_SIZE, EMBEDDING_DIMENSION, EMBEDDING_MODEL } from "../constants.ts";
5
5
  import { HelpfulError } from "../errors.ts";
6
6
  import { logger } from "../output/logger.ts";
7
7
 
@@ -67,20 +67,29 @@ async function getPipeline(model: string): Promise<FeatureExtractionPipeline> {
67
67
  * Embed an array of texts to L2-normalized vectors with the configured
68
68
  * model. Throws a HelpfulError when the model's dimension doesn't match
69
69
  * EMBEDDING_DIMENSION (the value baked into the DB schema).
70
+ *
71
+ * Inputs are sliced into windows of EMBEDDING_BATCH_SIZE so a single
72
+ * forward pass never has to allocate activations for arbitrarily many
73
+ * chunks — large files (hundreds of chunks) otherwise OOM the WASM heap.
70
74
  */
71
75
  export async function embed(texts: string[], model: string = EMBEDDING_MODEL): Promise<number[][]> {
72
76
  if (texts.length === 0) return [];
73
77
  const extractor = await getPipeline(model);
74
- const output = await extractor(texts, { pooling: "mean", normalize: true });
75
- const data = output.tolist() as number[][];
76
- if (data[0] && data[0].length !== EMBEDDING_DIMENSION) {
77
- throw new HelpfulError({
78
- kind: "internal_error",
79
- message: `embedding model ${model} returned ${data[0].length}-dim vectors, expected ${EMBEDDING_DIMENSION}`,
80
- hint: `Set config.embedding_model to a ${EMBEDDING_DIMENSION}-dim model (default: ${EMBEDDING_MODEL}).`,
81
- });
78
+ const out: number[][] = [];
79
+ for (let i = 0; i < texts.length; i += EMBEDDING_BATCH_SIZE) {
80
+ const slice = texts.slice(i, i + EMBEDDING_BATCH_SIZE);
81
+ const output = await extractor(slice, { pooling: "mean", normalize: true });
82
+ const data = output.tolist() as number[][];
83
+ if (out.length === 0 && data[0] && data[0].length !== EMBEDDING_DIMENSION) {
84
+ throw new HelpfulError({
85
+ kind: "internal_error",
86
+ message: `embedding model ${model} returned ${data[0].length}-dim vectors, expected ${EMBEDDING_DIMENSION}`,
87
+ hint: `Set config.embedding_model to a ${EMBEDDING_DIMENSION}-dim model (default: ${EMBEDDING_MODEL}).`,
88
+ });
89
+ }
90
+ for (const vec of data) out.push(vec);
82
91
  }
83
- return data;
92
+ return out;
84
93
  }
85
94
 
86
95
  /** Embed a single text — convenience wrapper for query-time embedding. */