membot 0.4.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/constants.ts +9 -0
- package/src/ingest/embedder.ts +19 -10
package/package.json
CHANGED
package/src/constants.ts
CHANGED
|
@@ -20,6 +20,15 @@ export const ENV = {
|
|
|
20
20
|
export const EMBEDDING_MODEL = "Xenova/bge-small-en-v1.5";
|
|
21
21
|
export const EMBEDDING_DIMENSION = 384;
|
|
22
22
|
|
|
23
|
+
/**
|
|
24
|
+
* Max chunks fed to the feature-extraction pipeline in one forward pass.
|
|
25
|
+
* ONNX/WASM allocates activations linearly with batch size, so a single
|
|
26
|
+
* unbounded call OOMs (`std::bad_alloc`) on large files — a 168-chunk file
|
|
27
|
+
* was the original repro. 16 is comfortably within the WASM heap for
|
|
28
|
+
* bge-small-en-v1.5 at 512 tokens and still amortizes the per-call overhead.
|
|
29
|
+
*/
|
|
30
|
+
export const EMBEDDING_BATCH_SIZE = 16;
|
|
31
|
+
|
|
23
32
|
export const DEFAULTS = {
|
|
24
33
|
CHUNKER_MODE: "deterministic" as const,
|
|
25
34
|
CHUNKER_TARGET_CHARS: 4_000,
|
package/src/ingest/embedder.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { existsSync } from "node:fs";
|
|
2
2
|
import { join } from "node:path";
|
|
3
3
|
import { env, type FeatureExtractionPipeline, pipeline } from "@huggingface/transformers";
|
|
4
|
-
import { EMBEDDING_DIMENSION, EMBEDDING_MODEL } from "../constants.ts";
|
|
4
|
+
import { EMBEDDING_BATCH_SIZE, EMBEDDING_DIMENSION, EMBEDDING_MODEL } from "../constants.ts";
|
|
5
5
|
import { HelpfulError } from "../errors.ts";
|
|
6
6
|
import { logger } from "../output/logger.ts";
|
|
7
7
|
|
|
@@ -67,20 +67,29 @@ async function getPipeline(model: string): Promise<FeatureExtractionPipeline> {
|
|
|
67
67
|
* Embed an array of texts to L2-normalized vectors with the configured
|
|
68
68
|
* model. Throws a HelpfulError when the model's dimension doesn't match
|
|
69
69
|
* EMBEDDING_DIMENSION (the value baked into the DB schema).
|
|
70
|
+
*
|
|
71
|
+
* Inputs are sliced into windows of EMBEDDING_BATCH_SIZE so a single
|
|
72
|
+
* forward pass never has to allocate activations for arbitrarily many
|
|
73
|
+
* chunks — large files (hundreds of chunks) otherwise OOM the WASM heap.
|
|
70
74
|
*/
|
|
71
75
|
export async function embed(texts: string[], model: string = EMBEDDING_MODEL): Promise<number[][]> {
|
|
72
76
|
if (texts.length === 0) return [];
|
|
73
77
|
const extractor = await getPipeline(model);
|
|
74
|
-
const
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
78
|
+
const out: number[][] = [];
|
|
79
|
+
for (let i = 0; i < texts.length; i += EMBEDDING_BATCH_SIZE) {
|
|
80
|
+
const slice = texts.slice(i, i + EMBEDDING_BATCH_SIZE);
|
|
81
|
+
const output = await extractor(slice, { pooling: "mean", normalize: true });
|
|
82
|
+
const data = output.tolist() as number[][];
|
|
83
|
+
if (out.length === 0 && data[0] && data[0].length !== EMBEDDING_DIMENSION) {
|
|
84
|
+
throw new HelpfulError({
|
|
85
|
+
kind: "internal_error",
|
|
86
|
+
message: `embedding model ${model} returned ${data[0].length}-dim vectors, expected ${EMBEDDING_DIMENSION}`,
|
|
87
|
+
hint: `Set config.embedding_model to a ${EMBEDDING_DIMENSION}-dim model (default: ${EMBEDDING_MODEL}).`,
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
for (const vec of data) out.push(vec);
|
|
82
91
|
}
|
|
83
|
-
return
|
|
92
|
+
return out;
|
|
84
93
|
}
|
|
85
94
|
|
|
86
95
|
/** Embed a single text — convenience wrapper for query-time embedding. */
|