@loreai/core 0.17.0 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bun/agents-file.d.ts +4 -0
- package/dist/bun/agents-file.d.ts.map +1 -1
- package/dist/bun/config.d.ts +2 -0
- package/dist/bun/config.d.ts.map +1 -1
- package/dist/bun/curator.d.ts +45 -0
- package/dist/bun/curator.d.ts.map +1 -1
- package/dist/bun/data-dir.d.ts +18 -0
- package/dist/bun/data-dir.d.ts.map +1 -0
- package/dist/bun/db.d.ts +12 -0
- package/dist/bun/db.d.ts.map +1 -1
- package/dist/bun/distillation.d.ts.map +1 -1
- package/dist/bun/embedding-vendor.d.ts +22 -38
- package/dist/bun/embedding-vendor.d.ts.map +1 -1
- package/dist/bun/embedding-worker-types.d.ts +17 -12
- package/dist/bun/embedding-worker-types.d.ts.map +1 -1
- package/dist/bun/embedding-worker.d.ts +9 -2
- package/dist/bun/embedding-worker.d.ts.map +1 -1
- package/dist/bun/embedding-worker.js +38864 -33
- package/dist/bun/embedding-worker.js.map +4 -4
- package/dist/bun/embedding.d.ts +30 -22
- package/dist/bun/embedding.d.ts.map +1 -1
- package/dist/bun/gradient.d.ts +8 -1
- package/dist/bun/gradient.d.ts.map +1 -1
- package/dist/bun/import/detect.d.ts +14 -0
- package/dist/bun/import/detect.d.ts.map +1 -0
- package/dist/bun/import/extract.d.ts +43 -0
- package/dist/bun/import/extract.d.ts.map +1 -0
- package/dist/bun/import/history.d.ts +40 -0
- package/dist/bun/import/history.d.ts.map +1 -0
- package/dist/bun/import/index.d.ts +17 -0
- package/dist/bun/import/index.d.ts.map +1 -0
- package/dist/bun/import/providers/aider.d.ts +2 -0
- package/dist/bun/import/providers/aider.d.ts.map +1 -0
- package/dist/bun/import/providers/claude-code.d.ts +2 -0
- package/dist/bun/import/providers/claude-code.d.ts.map +1 -0
- package/dist/bun/import/providers/cline.d.ts +2 -0
- package/dist/bun/import/providers/cline.d.ts.map +1 -0
- package/dist/bun/import/providers/codex.d.ts +2 -0
- package/dist/bun/import/providers/codex.d.ts.map +1 -0
- package/dist/bun/import/providers/continue.d.ts +2 -0
- package/dist/bun/import/providers/continue.d.ts.map +1 -0
- package/dist/bun/import/providers/index.d.ts +19 -0
- package/dist/bun/import/providers/index.d.ts.map +1 -0
- package/dist/bun/import/providers/opencode.d.ts +2 -0
- package/dist/bun/import/providers/opencode.d.ts.map +1 -0
- package/dist/bun/import/providers/pi.d.ts +2 -0
- package/dist/bun/import/providers/pi.d.ts.map +1 -0
- package/dist/bun/import/types.d.ts +82 -0
- package/dist/bun/import/types.d.ts.map +1 -0
- package/dist/bun/index.d.ts +4 -1
- package/dist/bun/index.d.ts.map +1 -1
- package/dist/bun/index.js +2217 -224
- package/dist/bun/index.js.map +4 -4
- package/dist/bun/instruction-detect.d.ts +66 -0
- package/dist/bun/instruction-detect.d.ts.map +1 -0
- package/dist/bun/log.d.ts +9 -0
- package/dist/bun/log.d.ts.map +1 -1
- package/dist/bun/ltm.d.ts +40 -0
- package/dist/bun/ltm.d.ts.map +1 -1
- package/dist/bun/pattern-extract.d.ts +7 -0
- package/dist/bun/pattern-extract.d.ts.map +1 -1
- package/dist/bun/prompt.d.ts +1 -1
- package/dist/bun/prompt.d.ts.map +1 -1
- package/dist/bun/recall.d.ts.map +1 -1
- package/dist/bun/search.d.ts +5 -3
- package/dist/bun/search.d.ts.map +1 -1
- package/dist/bun/temporal.d.ts.map +1 -1
- package/dist/bun/types.d.ts +1 -1
- package/dist/node/agents-file.d.ts +4 -0
- package/dist/node/agents-file.d.ts.map +1 -1
- package/dist/node/config.d.ts +2 -0
- package/dist/node/config.d.ts.map +1 -1
- package/dist/node/curator.d.ts +45 -0
- package/dist/node/curator.d.ts.map +1 -1
- package/dist/node/data-dir.d.ts +18 -0
- package/dist/node/data-dir.d.ts.map +1 -0
- package/dist/node/db.d.ts +12 -0
- package/dist/node/db.d.ts.map +1 -1
- package/dist/node/distillation.d.ts.map +1 -1
- package/dist/node/embedding-vendor.d.ts +22 -38
- package/dist/node/embedding-vendor.d.ts.map +1 -1
- package/dist/node/embedding-worker-types.d.ts +17 -12
- package/dist/node/embedding-worker-types.d.ts.map +1 -1
- package/dist/node/embedding-worker.d.ts +9 -2
- package/dist/node/embedding-worker.d.ts.map +1 -1
- package/dist/node/embedding-worker.js +38864 -33
- package/dist/node/embedding-worker.js.map +4 -4
- package/dist/node/embedding.d.ts +30 -22
- package/dist/node/embedding.d.ts.map +1 -1
- package/dist/node/gradient.d.ts +8 -1
- package/dist/node/gradient.d.ts.map +1 -1
- package/dist/node/import/detect.d.ts +14 -0
- package/dist/node/import/detect.d.ts.map +1 -0
- package/dist/node/import/extract.d.ts +43 -0
- package/dist/node/import/extract.d.ts.map +1 -0
- package/dist/node/import/history.d.ts +40 -0
- package/dist/node/import/history.d.ts.map +1 -0
- package/dist/node/import/index.d.ts +17 -0
- package/dist/node/import/index.d.ts.map +1 -0
- package/dist/node/import/providers/aider.d.ts +2 -0
- package/dist/node/import/providers/aider.d.ts.map +1 -0
- package/dist/node/import/providers/claude-code.d.ts +2 -0
- package/dist/node/import/providers/claude-code.d.ts.map +1 -0
- package/dist/node/import/providers/cline.d.ts +2 -0
- package/dist/node/import/providers/cline.d.ts.map +1 -0
- package/dist/node/import/providers/codex.d.ts +2 -0
- package/dist/node/import/providers/codex.d.ts.map +1 -0
- package/dist/node/import/providers/continue.d.ts +2 -0
- package/dist/node/import/providers/continue.d.ts.map +1 -0
- package/dist/node/import/providers/index.d.ts +19 -0
- package/dist/node/import/providers/index.d.ts.map +1 -0
- package/dist/node/import/providers/opencode.d.ts +2 -0
- package/dist/node/import/providers/opencode.d.ts.map +1 -0
- package/dist/node/import/providers/pi.d.ts +2 -0
- package/dist/node/import/providers/pi.d.ts.map +1 -0
- package/dist/node/import/types.d.ts +82 -0
- package/dist/node/import/types.d.ts.map +1 -0
- package/dist/node/index.d.ts +4 -1
- package/dist/node/index.d.ts.map +1 -1
- package/dist/node/index.js +2217 -224
- package/dist/node/index.js.map +4 -4
- package/dist/node/instruction-detect.d.ts +66 -0
- package/dist/node/instruction-detect.d.ts.map +1 -0
- package/dist/node/log.d.ts +9 -0
- package/dist/node/log.d.ts.map +1 -1
- package/dist/node/ltm.d.ts +40 -0
- package/dist/node/ltm.d.ts.map +1 -1
- package/dist/node/pattern-extract.d.ts +7 -0
- package/dist/node/pattern-extract.d.ts.map +1 -1
- package/dist/node/prompt.d.ts +1 -1
- package/dist/node/prompt.d.ts.map +1 -1
- package/dist/node/recall.d.ts.map +1 -1
- package/dist/node/search.d.ts +5 -3
- package/dist/node/search.d.ts.map +1 -1
- package/dist/node/temporal.d.ts.map +1 -1
- package/dist/node/types.d.ts +1 -1
- package/dist/types/agents-file.d.ts +4 -0
- package/dist/types/agents-file.d.ts.map +1 -1
- package/dist/types/config.d.ts +2 -0
- package/dist/types/config.d.ts.map +1 -1
- package/dist/types/curator.d.ts +45 -0
- package/dist/types/curator.d.ts.map +1 -1
- package/dist/types/data-dir.d.ts +18 -0
- package/dist/types/data-dir.d.ts.map +1 -0
- package/dist/types/db.d.ts +12 -0
- package/dist/types/db.d.ts.map +1 -1
- package/dist/types/distillation.d.ts.map +1 -1
- package/dist/types/embedding-vendor.d.ts +22 -38
- package/dist/types/embedding-vendor.d.ts.map +1 -1
- package/dist/types/embedding-worker-types.d.ts +17 -12
- package/dist/types/embedding-worker-types.d.ts.map +1 -1
- package/dist/types/embedding-worker.d.ts +9 -2
- package/dist/types/embedding-worker.d.ts.map +1 -1
- package/dist/types/embedding.d.ts +30 -22
- package/dist/types/embedding.d.ts.map +1 -1
- package/dist/types/gradient.d.ts +8 -1
- package/dist/types/gradient.d.ts.map +1 -1
- package/dist/types/import/detect.d.ts +14 -0
- package/dist/types/import/detect.d.ts.map +1 -0
- package/dist/types/import/extract.d.ts +43 -0
- package/dist/types/import/extract.d.ts.map +1 -0
- package/dist/types/import/history.d.ts +40 -0
- package/dist/types/import/history.d.ts.map +1 -0
- package/dist/types/import/index.d.ts +17 -0
- package/dist/types/import/index.d.ts.map +1 -0
- package/dist/types/import/providers/aider.d.ts +2 -0
- package/dist/types/import/providers/aider.d.ts.map +1 -0
- package/dist/types/import/providers/claude-code.d.ts +2 -0
- package/dist/types/import/providers/claude-code.d.ts.map +1 -0
- package/dist/types/import/providers/cline.d.ts +2 -0
- package/dist/types/import/providers/cline.d.ts.map +1 -0
- package/dist/types/import/providers/codex.d.ts +2 -0
- package/dist/types/import/providers/codex.d.ts.map +1 -0
- package/dist/types/import/providers/continue.d.ts +2 -0
- package/dist/types/import/providers/continue.d.ts.map +1 -0
- package/dist/types/import/providers/index.d.ts +19 -0
- package/dist/types/import/providers/index.d.ts.map +1 -0
- package/dist/types/import/providers/opencode.d.ts +2 -0
- package/dist/types/import/providers/opencode.d.ts.map +1 -0
- package/dist/types/import/providers/pi.d.ts +2 -0
- package/dist/types/import/providers/pi.d.ts.map +1 -0
- package/dist/types/import/types.d.ts +82 -0
- package/dist/types/import/types.d.ts.map +1 -0
- package/dist/types/index.d.ts +4 -1
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/instruction-detect.d.ts +66 -0
- package/dist/types/instruction-detect.d.ts.map +1 -0
- package/dist/types/log.d.ts +9 -0
- package/dist/types/log.d.ts.map +1 -1
- package/dist/types/ltm.d.ts +40 -0
- package/dist/types/ltm.d.ts.map +1 -1
- package/dist/types/pattern-extract.d.ts +7 -0
- package/dist/types/pattern-extract.d.ts.map +1 -1
- package/dist/types/prompt.d.ts +1 -1
- package/dist/types/prompt.d.ts.map +1 -1
- package/dist/types/recall.d.ts.map +1 -1
- package/dist/types/search.d.ts +5 -3
- package/dist/types/search.d.ts.map +1 -1
- package/dist/types/temporal.d.ts.map +1 -1
- package/dist/types/types.d.ts +1 -1
- package/package.json +2 -4
- package/src/agents-file.ts +41 -13
- package/src/config.ts +31 -18
- package/src/curator.ts +111 -75
- package/src/data-dir.ts +76 -0
- package/src/db.ts +110 -11
- package/src/distillation.ts +10 -2
- package/src/embedding-vendor.ts +23 -40
- package/src/embedding-worker-types.ts +19 -11
- package/src/embedding-worker.ts +111 -47
- package/src/embedding.ts +196 -171
- package/src/gradient.ts +9 -1
- package/src/import/detect.ts +37 -0
- package/src/import/extract.ts +137 -0
- package/src/import/history.ts +99 -0
- package/src/import/index.ts +45 -0
- package/src/import/providers/aider.ts +207 -0
- package/src/import/providers/claude-code.ts +339 -0
- package/src/import/providers/cline.ts +324 -0
- package/src/import/providers/codex.ts +369 -0
- package/src/import/providers/continue.ts +304 -0
- package/src/import/providers/index.ts +32 -0
- package/src/import/providers/opencode.ts +272 -0
- package/src/import/providers/pi.ts +332 -0
- package/src/import/types.ts +91 -0
- package/src/index.ts +5 -0
- package/src/instruction-detect.ts +275 -0
- package/src/log.ts +91 -3
- package/src/ltm.ts +316 -3
- package/src/pattern-extract.ts +41 -0
- package/src/prompt.ts +7 -1
- package/src/recall.ts +43 -5
- package/src/search.ts +7 -5
- package/src/temporal.ts +8 -6
- package/src/types.ts +1 -1
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Shared message types for the embedding worker thread.
|
|
3
3
|
*
|
|
4
|
-
* The embedding worker (`embedding-worker.ts`) runs
|
|
5
|
-
* in a separate `node:worker_threads` Worker
|
|
6
|
-
* stays free during inference. This file
|
|
7
|
-
* the main thread (`LocalProvider`
|
|
4
|
+
* The embedding worker (`embedding-worker.ts`) runs ONNX inference via
|
|
5
|
+
* `@huggingface/transformers` in a separate `node:worker_threads` Worker
|
|
6
|
+
* so the main thread's event loop stays free during inference. This file
|
|
7
|
+
* defines the message protocol between the main thread (`LocalProvider`
|
|
8
|
+
* in `embedding.ts`) and the worker.
|
|
8
9
|
*
|
|
9
10
|
* Imported by both sides — keep this file free of runtime dependencies.
|
|
10
11
|
*/
|
|
@@ -18,7 +19,7 @@ export interface EmbedRequest {
|
|
|
18
19
|
type: "embed";
|
|
19
20
|
/** Monotonic request ID for correlating responses. */
|
|
20
21
|
id: number;
|
|
21
|
-
/** Texts to embed. */
|
|
22
|
+
/** Texts to embed (already prefixed with task instruction by the caller). */
|
|
22
23
|
texts: string[];
|
|
23
24
|
/** "document" for storage, "query" for search. */
|
|
24
25
|
inputType: "document" | "query";
|
|
@@ -72,11 +73,18 @@ export type WorkerOutbound = EmbedResult | EmbedError | InitError;
|
|
|
72
73
|
|
|
73
74
|
/** Passed to the worker via `workerData` at construction time. */
|
|
74
75
|
export interface WorkerInitData {
|
|
75
|
-
/**
|
|
76
|
-
|
|
76
|
+
/** HuggingFace model ID, e.g. "nomic-ai/nomic-embed-text-v1.5". */
|
|
77
|
+
modelId: string;
|
|
78
|
+
/** Target embedding dimensions. For Nomic v1.5 with Matryoshka,
|
|
79
|
+
* this controls how many leading dims to keep (64–768). */
|
|
80
|
+
dimensions: number;
|
|
77
81
|
/** Vendored model info for binary mode, or null for npm mode.
|
|
78
|
-
*
|
|
79
|
-
*
|
|
80
|
-
*
|
|
81
|
-
vendorModel: {
|
|
82
|
+
* In binary mode, model files are pre-extracted to a local dir
|
|
83
|
+
* and we point transformers.js at that path instead of downloading
|
|
84
|
+
* from HuggingFace Hub. */
|
|
85
|
+
vendorModel: {
|
|
86
|
+
/** Absolute path to the dir containing model files
|
|
87
|
+
* (config.json, tokenizer.json, onnx/model_quantized.onnx, …). */
|
|
88
|
+
localModelPath: string;
|
|
89
|
+
} | null;
|
|
82
90
|
}
|
package/src/embedding-worker.ts
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Embedding worker thread — runs
|
|
2
|
+
* Embedding worker thread — runs ONNX inference via @huggingface/transformers
|
|
3
|
+
* off the main thread.
|
|
3
4
|
*
|
|
4
5
|
* This file is the entry point for a `node:worker_threads` Worker spawned by
|
|
5
|
-
* `LocalProvider` in `embedding.ts`. It owns the
|
|
6
|
+
* `LocalProvider` in `embedding.ts`. It owns the transformers.js pipeline
|
|
6
7
|
* and processes embed requests sequentially from a priority queue. Moving
|
|
7
8
|
* inference here keeps the main thread's event loop free — HTTP requests,
|
|
8
9
|
* SSE streams, and session APIs are no longer blocked during embedding.
|
|
@@ -10,6 +11,12 @@
|
|
|
10
11
|
* Communication uses `parentPort` message passing with structured clone.
|
|
11
12
|
* Float32Array vectors are sent back directly (Bun preserves identity).
|
|
12
13
|
*
|
|
14
|
+
* The worker applies Nomic's recommended post-processing:
|
|
15
|
+
* 1. Mean pooling (via pipeline option)
|
|
16
|
+
* 2. Layer normalization
|
|
17
|
+
* 3. Matryoshka dimension truncation (if dimensions < full 768)
|
|
18
|
+
* 4. L2 normalization
|
|
19
|
+
*
|
|
13
20
|
* @see embedding-worker-types.ts for the message protocol.
|
|
14
21
|
*/
|
|
15
22
|
|
|
@@ -25,53 +32,72 @@ import type {
|
|
|
25
32
|
// workerData
|
|
26
33
|
// ---------------------------------------------------------------------------
|
|
27
34
|
|
|
28
|
-
const {
|
|
35
|
+
const { modelId, dimensions, vendorModel } = workerData as WorkerInitData;
|
|
29
36
|
|
|
30
37
|
// ---------------------------------------------------------------------------
|
|
31
38
|
// Model lifecycle — lazy init on first embed request
|
|
32
39
|
// ---------------------------------------------------------------------------
|
|
33
40
|
|
|
34
|
-
/** The
|
|
35
|
-
type
|
|
36
|
-
|
|
37
|
-
|
|
41
|
+
/** The transformers.js pipeline instance, typed loosely since the exact
|
|
42
|
+
* return type depends on the pipeline task. */
|
|
43
|
+
type FeatureExtractionPipeline = {
|
|
44
|
+
(texts: string[], options?: Record<string, unknown>): Promise<{
|
|
45
|
+
dims: number[];
|
|
46
|
+
data: Float32Array;
|
|
47
|
+
tolist(): number[][];
|
|
48
|
+
}>;
|
|
49
|
+
dispose?(): Promise<void>;
|
|
38
50
|
};
|
|
39
51
|
|
|
40
|
-
let
|
|
52
|
+
let pipe: FeatureExtractionPipeline | null = null;
|
|
53
|
+
let layerNormFn: ((input: unknown, normalized_shape: number[]) => {
|
|
54
|
+
dims: number[];
|
|
55
|
+
data: Float32Array;
|
|
56
|
+
normalize(p: number, dim: number): { tolist(): number[][]; data: Float32Array; dims: number[] };
|
|
57
|
+
slice(...args: unknown[]): { normalize(p: number, dim: number): { tolist(): number[][]; data: Float32Array; dims: number[] } };
|
|
58
|
+
}) | null = null;
|
|
41
59
|
let initPromise: Promise<void> | null = null;
|
|
42
60
|
let initFailed = false;
|
|
43
61
|
let initError: string | null = null;
|
|
44
62
|
|
|
45
63
|
/**
|
|
46
|
-
* Ensure the
|
|
47
|
-
* dynamic import +
|
|
64
|
+
* Ensure the transformers.js pipeline is loaded. Lazy — first call triggers
|
|
65
|
+
* the dynamic import + pipeline creation, subsequent calls return immediately.
|
|
48
66
|
* On failure, marks the worker as permanently broken and posts `init-error`.
|
|
49
67
|
*/
|
|
50
|
-
async function
|
|
51
|
-
if (
|
|
52
|
-
if (initFailed) throw new Error(initError ?? "
|
|
68
|
+
async function ensurePipeline(): Promise<void> {
|
|
69
|
+
if (pipe) return;
|
|
70
|
+
if (initFailed) throw new Error(initError ?? "pipeline init previously failed");
|
|
53
71
|
|
|
54
72
|
if (!initPromise) {
|
|
55
73
|
initPromise = (async () => {
|
|
56
|
-
const
|
|
57
|
-
const {
|
|
74
|
+
const transformers = await import("@huggingface/transformers");
|
|
75
|
+
const { pipeline, env, layer_norm } = transformers;
|
|
76
|
+
|
|
77
|
+
// Configure transformers.js environment
|
|
78
|
+
env.allowRemoteModels = !vendorModel;
|
|
79
|
+
env.allowLocalModels = true;
|
|
58
80
|
|
|
59
|
-
let m: unknown;
|
|
60
81
|
if (vendorModel) {
|
|
61
|
-
// Binary mode:
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
modelAbsoluteDirPath: vendorModel.modelAbsoluteDirPath,
|
|
65
|
-
modelName: vendorModel.modelName,
|
|
66
|
-
});
|
|
67
|
-
} else {
|
|
68
|
-
// npm mode: resolve model name against fastembed's enum.
|
|
69
|
-
const enumValue = (EmbeddingModel as Record<string, string>)[modelName];
|
|
70
|
-
m = await FlagEmbedding.init({
|
|
71
|
-
model: enumValue ?? modelName,
|
|
72
|
-
} as { model: typeof EmbeddingModel.BGESmallENV15 });
|
|
82
|
+
// Binary mode: point at pre-extracted model files on disk.
|
|
83
|
+
env.localModelPath = vendorModel.localModelPath;
|
|
84
|
+
env.allowRemoteModels = false;
|
|
73
85
|
}
|
|
74
|
-
|
|
86
|
+
|
|
87
|
+
// Create feature-extraction pipeline with ONNX quantized model.
|
|
88
|
+
// dtype: 'q8' selects the INT8 quantized ONNX variant (model_quantized.onnx)
|
|
89
|
+
// which is ~137MB for Nomic v1.5 vs ~547MB for the full FP32 model.
|
|
90
|
+
//
|
|
91
|
+
// device: "cpu" — in npm mode, transformers.js uses onnxruntime-node
|
|
92
|
+
// (native CPU). In the compiled binary, onnxruntime-node is redirected
|
|
93
|
+
// to onnxruntime-web by the build plugin, which handles "cpu" via its
|
|
94
|
+
// WASM+SIMD backend (API-compatible, ~2x faster on batch workloads).
|
|
95
|
+
pipe = (await pipeline("feature-extraction", modelId, {
|
|
96
|
+
dtype: "q8",
|
|
97
|
+
device: "cpu",
|
|
98
|
+
})) as unknown as FeatureExtractionPipeline;
|
|
99
|
+
|
|
100
|
+
layerNormFn = layer_norm as typeof layerNormFn;
|
|
75
101
|
})().catch((err) => {
|
|
76
102
|
initFailed = true;
|
|
77
103
|
initError = err instanceof Error ? err.message : String(err);
|
|
@@ -83,8 +109,7 @@ async function ensureModel(): Promise<FastembedModel> {
|
|
|
83
109
|
}
|
|
84
110
|
|
|
85
111
|
await initPromise;
|
|
86
|
-
if (!
|
|
87
|
-
return model;
|
|
112
|
+
if (!pipe) throw new Error("pipeline init completed but pipe is null");
|
|
88
113
|
}
|
|
89
114
|
|
|
90
115
|
// ---------------------------------------------------------------------------
|
|
@@ -135,31 +160,70 @@ async function drain(): Promise<void> {
|
|
|
135
160
|
// Embed processing
|
|
136
161
|
// ---------------------------------------------------------------------------
|
|
137
162
|
|
|
163
|
+
/**
|
|
164
|
+
* Detect ONNX runtime out-of-memory errors. The runtime throws opaque
|
|
165
|
+
* numeric error codes (e.g. "287180544") for allocation failures rather
|
|
166
|
+
* than a readable message. We match on large numeric-only strings and
|
|
167
|
+
* known OOM patterns.
|
|
168
|
+
*/
|
|
169
|
+
function isOomError(msg: string): boolean {
|
|
170
|
+
// Pure numeric error codes ≥ 6 digits are ORT allocation failures
|
|
171
|
+
if (/^\d{6,}$/.test(msg)) return true;
|
|
172
|
+
// Explicit OOM messages from various ONNX backends
|
|
173
|
+
if (/out.of.memory|alloc.*fail|oom/i.test(msg)) return true;
|
|
174
|
+
return false;
|
|
175
|
+
}
|
|
176
|
+
|
|
138
177
|
async function processEmbed(req: EmbedRequest): Promise<void> {
|
|
139
178
|
try {
|
|
140
|
-
|
|
179
|
+
await ensurePipeline();
|
|
180
|
+
|
|
181
|
+
// Run feature extraction with mean pooling.
|
|
182
|
+
// truncation: true caps each text at the model's max length (8192 tokens
|
|
183
|
+
// for Nomic v1.5), preventing oversized inputs from causing OOM.
|
|
184
|
+
const output = await pipe!(req.texts, { pooling: "mean", truncation: true });
|
|
185
|
+
|
|
186
|
+
// Post-process following Nomic's recipe:
|
|
187
|
+
// 1. Layer normalization over the full hidden dimension
|
|
188
|
+
// 2. Matryoshka truncation to target dimensions
|
|
189
|
+
// 3. L2 normalization
|
|
190
|
+
const fullDim = output.dims[output.dims.length - 1]; // 768 for Nomic v1.5
|
|
191
|
+
const truncate = dimensions < fullDim;
|
|
192
|
+
|
|
193
|
+
let normalized: { tolist(): number[][]; data: Float32Array; dims: number[] };
|
|
194
|
+
if (truncate) {
|
|
195
|
+
// layer_norm → slice → L2 normalize
|
|
196
|
+
normalized = layerNormFn!(output, [fullDim])
|
|
197
|
+
.slice(null, [0, dimensions])
|
|
198
|
+
.normalize(2, -1);
|
|
199
|
+
} else {
|
|
200
|
+
// layer_norm → L2 normalize (no truncation)
|
|
201
|
+
normalized = layerNormFn!(output, [fullDim])
|
|
202
|
+
.normalize(2, -1);
|
|
203
|
+
}
|
|
141
204
|
|
|
142
|
-
|
|
205
|
+
// Extract per-text vectors from the batched tensor.
|
|
206
|
+
const numTexts = req.texts.length;
|
|
207
|
+
const vectors: Float32Array[] = [];
|
|
208
|
+
const dim = truncate ? dimensions : fullDim;
|
|
143
209
|
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
const vec =
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
// Batch document embedding via async generator.
|
|
150
|
-
vectors = [];
|
|
151
|
-
for await (const batch of m.passageEmbed(req.texts)) {
|
|
152
|
-
for (const vec of batch) {
|
|
153
|
-
vectors.push(new Float32Array(vec));
|
|
154
|
-
}
|
|
155
|
-
}
|
|
210
|
+
for (let i = 0; i < numTexts; i++) {
|
|
211
|
+
const start = i * dim;
|
|
212
|
+
const vec = new Float32Array(dim);
|
|
213
|
+
vec.set(normalized.data.subarray(start, start + dim));
|
|
214
|
+
vectors.push(vec);
|
|
156
215
|
}
|
|
157
216
|
|
|
158
217
|
post({ type: "result", id: req.id, vectors });
|
|
159
218
|
} catch (err) {
|
|
160
|
-
// Don't re-post init-error — it was already sent in
|
|
219
|
+
// Don't re-post init-error — it was already sent in ensurePipeline().
|
|
161
220
|
if (!initFailed) {
|
|
162
|
-
const
|
|
221
|
+
const raw = err instanceof Error ? err.message : String(err);
|
|
222
|
+
const msg = isOomError(raw)
|
|
223
|
+
? `ONNX runtime out of memory (batch=${req.texts.length}, ` +
|
|
224
|
+
`longest≈${Math.max(...req.texts.map((t) => t.length))} chars). ` +
|
|
225
|
+
`Try reducing batch size. Raw: ${raw}`
|
|
226
|
+
: raw;
|
|
163
227
|
post({ type: "error", id: req.id, error: msg });
|
|
164
228
|
}
|
|
165
229
|
}
|