@loreai/core 0.17.0 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (235) hide show
  1. package/dist/bun/agents-file.d.ts +4 -0
  2. package/dist/bun/agents-file.d.ts.map +1 -1
  3. package/dist/bun/config.d.ts +2 -0
  4. package/dist/bun/config.d.ts.map +1 -1
  5. package/dist/bun/curator.d.ts +45 -0
  6. package/dist/bun/curator.d.ts.map +1 -1
  7. package/dist/bun/data-dir.d.ts +18 -0
  8. package/dist/bun/data-dir.d.ts.map +1 -0
  9. package/dist/bun/db.d.ts +12 -0
  10. package/dist/bun/db.d.ts.map +1 -1
  11. package/dist/bun/distillation.d.ts.map +1 -1
  12. package/dist/bun/embedding-vendor.d.ts +22 -38
  13. package/dist/bun/embedding-vendor.d.ts.map +1 -1
  14. package/dist/bun/embedding-worker-types.d.ts +17 -12
  15. package/dist/bun/embedding-worker-types.d.ts.map +1 -1
  16. package/dist/bun/embedding-worker.d.ts +9 -2
  17. package/dist/bun/embedding-worker.d.ts.map +1 -1
  18. package/dist/bun/embedding-worker.js +38864 -33
  19. package/dist/bun/embedding-worker.js.map +4 -4
  20. package/dist/bun/embedding.d.ts +30 -22
  21. package/dist/bun/embedding.d.ts.map +1 -1
  22. package/dist/bun/gradient.d.ts +8 -1
  23. package/dist/bun/gradient.d.ts.map +1 -1
  24. package/dist/bun/import/detect.d.ts +14 -0
  25. package/dist/bun/import/detect.d.ts.map +1 -0
  26. package/dist/bun/import/extract.d.ts +43 -0
  27. package/dist/bun/import/extract.d.ts.map +1 -0
  28. package/dist/bun/import/history.d.ts +40 -0
  29. package/dist/bun/import/history.d.ts.map +1 -0
  30. package/dist/bun/import/index.d.ts +17 -0
  31. package/dist/bun/import/index.d.ts.map +1 -0
  32. package/dist/bun/import/providers/aider.d.ts +2 -0
  33. package/dist/bun/import/providers/aider.d.ts.map +1 -0
  34. package/dist/bun/import/providers/claude-code.d.ts +2 -0
  35. package/dist/bun/import/providers/claude-code.d.ts.map +1 -0
  36. package/dist/bun/import/providers/cline.d.ts +2 -0
  37. package/dist/bun/import/providers/cline.d.ts.map +1 -0
  38. package/dist/bun/import/providers/codex.d.ts +2 -0
  39. package/dist/bun/import/providers/codex.d.ts.map +1 -0
  40. package/dist/bun/import/providers/continue.d.ts +2 -0
  41. package/dist/bun/import/providers/continue.d.ts.map +1 -0
  42. package/dist/bun/import/providers/index.d.ts +19 -0
  43. package/dist/bun/import/providers/index.d.ts.map +1 -0
  44. package/dist/bun/import/providers/opencode.d.ts +2 -0
  45. package/dist/bun/import/providers/opencode.d.ts.map +1 -0
  46. package/dist/bun/import/providers/pi.d.ts +2 -0
  47. package/dist/bun/import/providers/pi.d.ts.map +1 -0
  48. package/dist/bun/import/types.d.ts +82 -0
  49. package/dist/bun/import/types.d.ts.map +1 -0
  50. package/dist/bun/index.d.ts +4 -1
  51. package/dist/bun/index.d.ts.map +1 -1
  52. package/dist/bun/index.js +2217 -224
  53. package/dist/bun/index.js.map +4 -4
  54. package/dist/bun/instruction-detect.d.ts +66 -0
  55. package/dist/bun/instruction-detect.d.ts.map +1 -0
  56. package/dist/bun/log.d.ts +9 -0
  57. package/dist/bun/log.d.ts.map +1 -1
  58. package/dist/bun/ltm.d.ts +40 -0
  59. package/dist/bun/ltm.d.ts.map +1 -1
  60. package/dist/bun/pattern-extract.d.ts +7 -0
  61. package/dist/bun/pattern-extract.d.ts.map +1 -1
  62. package/dist/bun/prompt.d.ts +1 -1
  63. package/dist/bun/prompt.d.ts.map +1 -1
  64. package/dist/bun/recall.d.ts.map +1 -1
  65. package/dist/bun/search.d.ts +5 -3
  66. package/dist/bun/search.d.ts.map +1 -1
  67. package/dist/bun/temporal.d.ts.map +1 -1
  68. package/dist/bun/types.d.ts +1 -1
  69. package/dist/node/agents-file.d.ts +4 -0
  70. package/dist/node/agents-file.d.ts.map +1 -1
  71. package/dist/node/config.d.ts +2 -0
  72. package/dist/node/config.d.ts.map +1 -1
  73. package/dist/node/curator.d.ts +45 -0
  74. package/dist/node/curator.d.ts.map +1 -1
  75. package/dist/node/data-dir.d.ts +18 -0
  76. package/dist/node/data-dir.d.ts.map +1 -0
  77. package/dist/node/db.d.ts +12 -0
  78. package/dist/node/db.d.ts.map +1 -1
  79. package/dist/node/distillation.d.ts.map +1 -1
  80. package/dist/node/embedding-vendor.d.ts +22 -38
  81. package/dist/node/embedding-vendor.d.ts.map +1 -1
  82. package/dist/node/embedding-worker-types.d.ts +17 -12
  83. package/dist/node/embedding-worker-types.d.ts.map +1 -1
  84. package/dist/node/embedding-worker.d.ts +9 -2
  85. package/dist/node/embedding-worker.d.ts.map +1 -1
  86. package/dist/node/embedding-worker.js +38864 -33
  87. package/dist/node/embedding-worker.js.map +4 -4
  88. package/dist/node/embedding.d.ts +30 -22
  89. package/dist/node/embedding.d.ts.map +1 -1
  90. package/dist/node/gradient.d.ts +8 -1
  91. package/dist/node/gradient.d.ts.map +1 -1
  92. package/dist/node/import/detect.d.ts +14 -0
  93. package/dist/node/import/detect.d.ts.map +1 -0
  94. package/dist/node/import/extract.d.ts +43 -0
  95. package/dist/node/import/extract.d.ts.map +1 -0
  96. package/dist/node/import/history.d.ts +40 -0
  97. package/dist/node/import/history.d.ts.map +1 -0
  98. package/dist/node/import/index.d.ts +17 -0
  99. package/dist/node/import/index.d.ts.map +1 -0
  100. package/dist/node/import/providers/aider.d.ts +2 -0
  101. package/dist/node/import/providers/aider.d.ts.map +1 -0
  102. package/dist/node/import/providers/claude-code.d.ts +2 -0
  103. package/dist/node/import/providers/claude-code.d.ts.map +1 -0
  104. package/dist/node/import/providers/cline.d.ts +2 -0
  105. package/dist/node/import/providers/cline.d.ts.map +1 -0
  106. package/dist/node/import/providers/codex.d.ts +2 -0
  107. package/dist/node/import/providers/codex.d.ts.map +1 -0
  108. package/dist/node/import/providers/continue.d.ts +2 -0
  109. package/dist/node/import/providers/continue.d.ts.map +1 -0
  110. package/dist/node/import/providers/index.d.ts +19 -0
  111. package/dist/node/import/providers/index.d.ts.map +1 -0
  112. package/dist/node/import/providers/opencode.d.ts +2 -0
  113. package/dist/node/import/providers/opencode.d.ts.map +1 -0
  114. package/dist/node/import/providers/pi.d.ts +2 -0
  115. package/dist/node/import/providers/pi.d.ts.map +1 -0
  116. package/dist/node/import/types.d.ts +82 -0
  117. package/dist/node/import/types.d.ts.map +1 -0
  118. package/dist/node/index.d.ts +4 -1
  119. package/dist/node/index.d.ts.map +1 -1
  120. package/dist/node/index.js +2217 -224
  121. package/dist/node/index.js.map +4 -4
  122. package/dist/node/instruction-detect.d.ts +66 -0
  123. package/dist/node/instruction-detect.d.ts.map +1 -0
  124. package/dist/node/log.d.ts +9 -0
  125. package/dist/node/log.d.ts.map +1 -1
  126. package/dist/node/ltm.d.ts +40 -0
  127. package/dist/node/ltm.d.ts.map +1 -1
  128. package/dist/node/pattern-extract.d.ts +7 -0
  129. package/dist/node/pattern-extract.d.ts.map +1 -1
  130. package/dist/node/prompt.d.ts +1 -1
  131. package/dist/node/prompt.d.ts.map +1 -1
  132. package/dist/node/recall.d.ts.map +1 -1
  133. package/dist/node/search.d.ts +5 -3
  134. package/dist/node/search.d.ts.map +1 -1
  135. package/dist/node/temporal.d.ts.map +1 -1
  136. package/dist/node/types.d.ts +1 -1
  137. package/dist/types/agents-file.d.ts +4 -0
  138. package/dist/types/agents-file.d.ts.map +1 -1
  139. package/dist/types/config.d.ts +2 -0
  140. package/dist/types/config.d.ts.map +1 -1
  141. package/dist/types/curator.d.ts +45 -0
  142. package/dist/types/curator.d.ts.map +1 -1
  143. package/dist/types/data-dir.d.ts +18 -0
  144. package/dist/types/data-dir.d.ts.map +1 -0
  145. package/dist/types/db.d.ts +12 -0
  146. package/dist/types/db.d.ts.map +1 -1
  147. package/dist/types/distillation.d.ts.map +1 -1
  148. package/dist/types/embedding-vendor.d.ts +22 -38
  149. package/dist/types/embedding-vendor.d.ts.map +1 -1
  150. package/dist/types/embedding-worker-types.d.ts +17 -12
  151. package/dist/types/embedding-worker-types.d.ts.map +1 -1
  152. package/dist/types/embedding-worker.d.ts +9 -2
  153. package/dist/types/embedding-worker.d.ts.map +1 -1
  154. package/dist/types/embedding.d.ts +30 -22
  155. package/dist/types/embedding.d.ts.map +1 -1
  156. package/dist/types/gradient.d.ts +8 -1
  157. package/dist/types/gradient.d.ts.map +1 -1
  158. package/dist/types/import/detect.d.ts +14 -0
  159. package/dist/types/import/detect.d.ts.map +1 -0
  160. package/dist/types/import/extract.d.ts +43 -0
  161. package/dist/types/import/extract.d.ts.map +1 -0
  162. package/dist/types/import/history.d.ts +40 -0
  163. package/dist/types/import/history.d.ts.map +1 -0
  164. package/dist/types/import/index.d.ts +17 -0
  165. package/dist/types/import/index.d.ts.map +1 -0
  166. package/dist/types/import/providers/aider.d.ts +2 -0
  167. package/dist/types/import/providers/aider.d.ts.map +1 -0
  168. package/dist/types/import/providers/claude-code.d.ts +2 -0
  169. package/dist/types/import/providers/claude-code.d.ts.map +1 -0
  170. package/dist/types/import/providers/cline.d.ts +2 -0
  171. package/dist/types/import/providers/cline.d.ts.map +1 -0
  172. package/dist/types/import/providers/codex.d.ts +2 -0
  173. package/dist/types/import/providers/codex.d.ts.map +1 -0
  174. package/dist/types/import/providers/continue.d.ts +2 -0
  175. package/dist/types/import/providers/continue.d.ts.map +1 -0
  176. package/dist/types/import/providers/index.d.ts +19 -0
  177. package/dist/types/import/providers/index.d.ts.map +1 -0
  178. package/dist/types/import/providers/opencode.d.ts +2 -0
  179. package/dist/types/import/providers/opencode.d.ts.map +1 -0
  180. package/dist/types/import/providers/pi.d.ts +2 -0
  181. package/dist/types/import/providers/pi.d.ts.map +1 -0
  182. package/dist/types/import/types.d.ts +82 -0
  183. package/dist/types/import/types.d.ts.map +1 -0
  184. package/dist/types/index.d.ts +4 -1
  185. package/dist/types/index.d.ts.map +1 -1
  186. package/dist/types/instruction-detect.d.ts +66 -0
  187. package/dist/types/instruction-detect.d.ts.map +1 -0
  188. package/dist/types/log.d.ts +9 -0
  189. package/dist/types/log.d.ts.map +1 -1
  190. package/dist/types/ltm.d.ts +40 -0
  191. package/dist/types/ltm.d.ts.map +1 -1
  192. package/dist/types/pattern-extract.d.ts +7 -0
  193. package/dist/types/pattern-extract.d.ts.map +1 -1
  194. package/dist/types/prompt.d.ts +1 -1
  195. package/dist/types/prompt.d.ts.map +1 -1
  196. package/dist/types/recall.d.ts.map +1 -1
  197. package/dist/types/search.d.ts +5 -3
  198. package/dist/types/search.d.ts.map +1 -1
  199. package/dist/types/temporal.d.ts.map +1 -1
  200. package/dist/types/types.d.ts +1 -1
  201. package/package.json +2 -4
  202. package/src/agents-file.ts +41 -13
  203. package/src/config.ts +31 -18
  204. package/src/curator.ts +111 -75
  205. package/src/data-dir.ts +76 -0
  206. package/src/db.ts +110 -11
  207. package/src/distillation.ts +10 -2
  208. package/src/embedding-vendor.ts +23 -40
  209. package/src/embedding-worker-types.ts +19 -11
  210. package/src/embedding-worker.ts +111 -47
  211. package/src/embedding.ts +196 -171
  212. package/src/gradient.ts +9 -1
  213. package/src/import/detect.ts +37 -0
  214. package/src/import/extract.ts +137 -0
  215. package/src/import/history.ts +99 -0
  216. package/src/import/index.ts +45 -0
  217. package/src/import/providers/aider.ts +207 -0
  218. package/src/import/providers/claude-code.ts +339 -0
  219. package/src/import/providers/cline.ts +324 -0
  220. package/src/import/providers/codex.ts +369 -0
  221. package/src/import/providers/continue.ts +304 -0
  222. package/src/import/providers/index.ts +32 -0
  223. package/src/import/providers/opencode.ts +272 -0
  224. package/src/import/providers/pi.ts +332 -0
  225. package/src/import/types.ts +91 -0
  226. package/src/index.ts +5 -0
  227. package/src/instruction-detect.ts +275 -0
  228. package/src/log.ts +91 -3
  229. package/src/ltm.ts +316 -3
  230. package/src/pattern-extract.ts +41 -0
  231. package/src/prompt.ts +7 -1
  232. package/src/recall.ts +43 -5
  233. package/src/search.ts +7 -5
  234. package/src/temporal.ts +8 -6
  235. package/src/types.ts +1 -1
@@ -1,10 +1,11 @@
1
1
  /**
2
2
  * Shared message types for the embedding worker thread.
3
3
  *
4
- * The embedding worker (`embedding-worker.ts`) runs fastembed/ONNX inference
5
- * in a separate `node:worker_threads` Worker so the main thread's event loop
6
- * stays free during inference. This file defines the message protocol between
7
- * the main thread (`LocalProvider` in `embedding.ts`) and the worker.
4
+ * The embedding worker (`embedding-worker.ts`) runs ONNX inference via
5
+ * `@huggingface/transformers` in a separate `node:worker_threads` Worker
6
+ * so the main thread's event loop stays free during inference. This file
7
+ * defines the message protocol between the main thread (`LocalProvider`
8
+ * in `embedding.ts`) and the worker.
8
9
  *
9
10
  * Imported by both sides — keep this file free of runtime dependencies.
10
11
  */
@@ -18,7 +19,7 @@ export interface EmbedRequest {
18
19
  type: "embed";
19
20
  /** Monotonic request ID for correlating responses. */
20
21
  id: number;
21
- /** Texts to embed. */
22
+ /** Texts to embed (already prefixed with task instruction by the caller). */
22
23
  texts: string[];
23
24
  /** "document" for storage, "query" for search. */
24
25
  inputType: "document" | "query";
@@ -72,11 +73,18 @@ export type WorkerOutbound = EmbedResult | EmbedError | InitError;
72
73
 
73
74
  /** Passed to the worker via `workerData` at construction time. */
74
75
  export interface WorkerInitData {
75
- /** fastembed model name, e.g. "BGESmallENV15". */
76
- modelName: string;
76
+ /** HuggingFace model ID, e.g. "nomic-ai/nomic-embed-text-v1.5". */
77
+ modelId: string;
78
+ /** Target embedding dimensions. For Nomic v1.5 with Matryoshka,
79
+ * this controls how many leading dims to keep (64–768). */
80
+ dimensions: number;
77
81
  /** Vendored model info for binary mode, or null for npm mode.
78
- * Mirrors the `globalThis.__LORE_VENDOR_MODEL__` registration which
79
- * only exists on the main thread passed explicitly so the worker
80
- * can hand it to `FlagEmbedding.init()`. */
81
- vendorModel: { modelAbsoluteDirPath: string; modelName: string } | null;
82
+ * In binary mode, model files are pre-extracted to a local dir
83
+ * and we point transformers.js at that path instead of downloading
84
+ * from HuggingFace Hub. */
85
+ vendorModel: {
86
+ /** Absolute path to the dir containing model files
87
+ * (config.json, tokenizer.json, onnx/model_quantized.onnx, …). */
88
+ localModelPath: string;
89
+ } | null;
82
90
  }
@@ -1,8 +1,9 @@
1
1
  /**
2
- * Embedding worker thread — runs fastembed/ONNX inference off the main thread.
2
+ * Embedding worker thread — runs ONNX inference via @huggingface/transformers
3
+ * off the main thread.
3
4
  *
4
5
  * This file is the entry point for a `node:worker_threads` Worker spawned by
5
- * `LocalProvider` in `embedding.ts`. It owns the `FlagEmbedding` ONNX model
6
+ * `LocalProvider` in `embedding.ts`. It owns the transformers.js pipeline
6
7
  * and processes embed requests sequentially from a priority queue. Moving
7
8
  * inference here keeps the main thread's event loop free — HTTP requests,
8
9
  * SSE streams, and session APIs are no longer blocked during embedding.
@@ -10,6 +11,12 @@
10
11
  * Communication uses `parentPort` message passing with structured clone.
11
12
  * Float32Array vectors are sent back directly (Bun preserves identity).
12
13
  *
14
+ * The worker applies Nomic's recommended post-processing:
15
+ * 1. Mean pooling (via pipeline option)
16
+ * 2. Layer normalization
17
+ * 3. Matryoshka dimension truncation (if dimensions < full 768)
18
+ * 4. L2 normalization
19
+ *
13
20
  * @see embedding-worker-types.ts for the message protocol.
14
21
  */
15
22
 
@@ -25,53 +32,72 @@ import type {
25
32
  // workerData
26
33
  // ---------------------------------------------------------------------------
27
34
 
28
- const { modelName, vendorModel } = workerData as WorkerInitData;
35
+ const { modelId, dimensions, vendorModel } = workerData as WorkerInitData;
29
36
 
30
37
  // ---------------------------------------------------------------------------
31
38
  // Model lifecycle — lazy init on first embed request
32
39
  // ---------------------------------------------------------------------------
33
40
 
34
- /** The fastembed model, typed to the subset of methods we use. */
35
- type FastembedModel = {
36
- queryEmbed(text: string): Promise<number[]>;
37
- passageEmbed(texts: string[], batchSize?: number): AsyncGenerator<number[][]>;
41
+ /** The transformers.js pipeline instance, typed loosely since the exact
42
+ * return type depends on the pipeline task. */
43
+ type FeatureExtractionPipeline = {
44
+ (texts: string[], options?: Record<string, unknown>): Promise<{
45
+ dims: number[];
46
+ data: Float32Array;
47
+ tolist(): number[][];
48
+ }>;
49
+ dispose?(): Promise<void>;
38
50
  };
39
51
 
40
- let model: FastembedModel | null = null;
52
+ let pipe: FeatureExtractionPipeline | null = null;
53
+ let layerNormFn: ((input: unknown, normalized_shape: number[]) => {
54
+ dims: number[];
55
+ data: Float32Array;
56
+ normalize(p: number, dim: number): { tolist(): number[][]; data: Float32Array; dims: number[] };
57
+ slice(...args: unknown[]): { normalize(p: number, dim: number): { tolist(): number[][]; data: Float32Array; dims: number[] } };
58
+ }) | null = null;
41
59
  let initPromise: Promise<void> | null = null;
42
60
  let initFailed = false;
43
61
  let initError: string | null = null;
44
62
 
45
63
  /**
46
- * Ensure the fastembed model is loaded. Lazy — first call triggers the
47
- * dynamic import + FlagEmbedding.init(), subsequent calls return immediately.
64
+ * Ensure the transformers.js pipeline is loaded. Lazy — first call triggers
65
+ * the dynamic import + pipeline creation, subsequent calls return immediately.
48
66
  * On failure, marks the worker as permanently broken and posts `init-error`.
49
67
  */
50
- async function ensureModel(): Promise<FastembedModel> {
51
- if (model) return model;
52
- if (initFailed) throw new Error(initError ?? "fastembed init previously failed");
68
+ async function ensurePipeline(): Promise<void> {
69
+ if (pipe) return;
70
+ if (initFailed) throw new Error(initError ?? "pipeline init previously failed");
53
71
 
54
72
  if (!initPromise) {
55
73
  initPromise = (async () => {
56
- const fastembed = await import("fastembed");
57
- const { EmbeddingModel, FlagEmbedding } = fastembed;
74
+ const transformers = await import("@huggingface/transformers");
75
+ const { pipeline, env, layer_norm } = transformers;
76
+
77
+ // Configure transformers.js environment
78
+ env.allowRemoteModels = !vendorModel;
79
+ env.allowLocalModels = true;
58
80
 
59
- let m: unknown;
60
81
  if (vendorModel) {
61
- // Binary mode: use pre-extracted model files.
62
- m = await FlagEmbedding.init({
63
- model: EmbeddingModel.CUSTOM,
64
- modelAbsoluteDirPath: vendorModel.modelAbsoluteDirPath,
65
- modelName: vendorModel.modelName,
66
- });
67
- } else {
68
- // npm mode: resolve model name against fastembed's enum.
69
- const enumValue = (EmbeddingModel as Record<string, string>)[modelName];
70
- m = await FlagEmbedding.init({
71
- model: enumValue ?? modelName,
72
- } as { model: typeof EmbeddingModel.BGESmallENV15 });
82
+ // Binary mode: point at pre-extracted model files on disk.
83
+ env.localModelPath = vendorModel.localModelPath;
84
+ env.allowRemoteModels = false;
73
85
  }
74
- model = m as FastembedModel;
86
+
87
+ // Create feature-extraction pipeline with ONNX quantized model.
88
+ // dtype: 'q8' selects the INT8 quantized ONNX variant (model_quantized.onnx)
89
+ // which is ~137MB for Nomic v1.5 vs ~547MB for the full FP32 model.
90
+ //
91
+ // device: "cpu" — in npm mode, transformers.js uses onnxruntime-node
92
+ // (native CPU). In the compiled binary, onnxruntime-node is redirected
93
+ // to onnxruntime-web by the build plugin, which handles "cpu" via its
94
+ // WASM+SIMD backend (API-compatible, ~2x faster on batch workloads).
95
+ pipe = (await pipeline("feature-extraction", modelId, {
96
+ dtype: "q8",
97
+ device: "cpu",
98
+ })) as unknown as FeatureExtractionPipeline;
99
+
100
+ layerNormFn = layer_norm as typeof layerNormFn;
75
101
  })().catch((err) => {
76
102
  initFailed = true;
77
103
  initError = err instanceof Error ? err.message : String(err);
@@ -83,8 +109,7 @@ async function ensureModel(): Promise<FastembedModel> {
83
109
  }
84
110
 
85
111
  await initPromise;
86
- if (!model) throw new Error("model init completed but model is null");
87
- return model;
112
+ if (!pipe) throw new Error("pipeline init completed but pipe is null");
88
113
  }
89
114
 
90
115
  // ---------------------------------------------------------------------------
@@ -135,31 +160,70 @@ async function drain(): Promise<void> {
135
160
  // Embed processing
136
161
  // ---------------------------------------------------------------------------
137
162
 
163
+ /**
164
+ * Detect ONNX runtime out-of-memory errors. The runtime throws opaque
165
+ * numeric error codes (e.g. "287180544") for allocation failures rather
166
+ * than a readable message. We match on large numeric-only strings and
167
+ * known OOM patterns.
168
+ */
169
+ function isOomError(msg: string): boolean {
170
+ // Pure numeric error codes ≥ 6 digits are ORT allocation failures
171
+ if (/^\d{6,}$/.test(msg)) return true;
172
+ // Explicit OOM messages from various ONNX backends
173
+ if (/out.of.memory|alloc.*fail|oom/i.test(msg)) return true;
174
+ return false;
175
+ }
176
+
138
177
  async function processEmbed(req: EmbedRequest): Promise<void> {
139
178
  try {
140
- const m = await ensureModel();
179
+ await ensurePipeline();
180
+
181
+ // Run feature extraction with mean pooling.
182
+ // truncation: true caps each text at the model's max length (8192 tokens
183
+ // for Nomic v1.5), preventing oversized inputs from causing OOM.
184
+ const output = await pipe!(req.texts, { pooling: "mean", truncation: true });
185
+
186
+ // Post-process following Nomic's recipe:
187
+ // 1. Layer normalization over the full hidden dimension
188
+ // 2. Matryoshka truncation to target dimensions
189
+ // 3. L2 normalization
190
+ const fullDim = output.dims[output.dims.length - 1]; // 768 for Nomic v1.5
191
+ const truncate = dimensions < fullDim;
192
+
193
+ let normalized: { tolist(): number[][]; data: Float32Array; dims: number[] };
194
+ if (truncate) {
195
+ // layer_norm → slice → L2 normalize
196
+ normalized = layerNormFn!(output, [fullDim])
197
+ .slice(null, [0, dimensions])
198
+ .normalize(2, -1);
199
+ } else {
200
+ // layer_norm → L2 normalize (no truncation)
201
+ normalized = layerNormFn!(output, [fullDim])
202
+ .normalize(2, -1);
203
+ }
141
204
 
142
- let vectors: Float32Array[];
205
+ // Extract per-text vectors from the batched tensor.
206
+ const numTexts = req.texts.length;
207
+ const vectors: Float32Array[] = [];
208
+ const dim = truncate ? dimensions : fullDim;
143
209
 
144
- if (req.inputType === "query" && req.texts.length === 1) {
145
- // Single query use queryEmbed for better quality.
146
- const vec = await m.queryEmbed(req.texts[0]);
147
- vectors = [new Float32Array(vec)];
148
- } else {
149
- // Batch document embedding via async generator.
150
- vectors = [];
151
- for await (const batch of m.passageEmbed(req.texts)) {
152
- for (const vec of batch) {
153
- vectors.push(new Float32Array(vec));
154
- }
155
- }
210
+ for (let i = 0; i < numTexts; i++) {
211
+ const start = i * dim;
212
+ const vec = new Float32Array(dim);
213
+ vec.set(normalized.data.subarray(start, start + dim));
214
+ vectors.push(vec);
156
215
  }
157
216
 
158
217
  post({ type: "result", id: req.id, vectors });
159
218
  } catch (err) {
160
- // Don't re-post init-error — it was already sent in ensureModel().
219
+ // Don't re-post init-error — it was already sent in ensurePipeline().
161
220
  if (!initFailed) {
162
- const msg = err instanceof Error ? err.message : String(err);
221
+ const raw = err instanceof Error ? err.message : String(err);
222
+ const msg = isOomError(raw)
223
+ ? `ONNX runtime out of memory (batch=${req.texts.length}, ` +
224
+ `longest≈${Math.max(...req.texts.map((t) => t.length))} chars). ` +
225
+ `Try reducing batch size. Raw: ${raw}`
226
+ : raw;
163
227
  post({ type: "error", id: req.id, error: msg });
164
228
  }
165
229
  }