@loreai/core 0.17.1 → 0.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (248) hide show
  1. package/dist/bun/agents-file.d.ts +4 -0
  2. package/dist/bun/agents-file.d.ts.map +1 -1
  3. package/dist/bun/config.d.ts +2 -0
  4. package/dist/bun/config.d.ts.map +1 -1
  5. package/dist/bun/curator.d.ts +45 -0
  6. package/dist/bun/curator.d.ts.map +1 -1
  7. package/dist/bun/data-dir.d.ts +18 -0
  8. package/dist/bun/data-dir.d.ts.map +1 -0
  9. package/dist/bun/db.d.ts +85 -0
  10. package/dist/bun/db.d.ts.map +1 -1
  11. package/dist/bun/distillation.d.ts +2 -13
  12. package/dist/bun/distillation.d.ts.map +1 -1
  13. package/dist/bun/embedding-vendor.d.ts +22 -38
  14. package/dist/bun/embedding-vendor.d.ts.map +1 -1
  15. package/dist/bun/embedding-worker-types.d.ts +17 -12
  16. package/dist/bun/embedding-worker-types.d.ts.map +1 -1
  17. package/dist/bun/embedding-worker.d.ts +9 -2
  18. package/dist/bun/embedding-worker.d.ts.map +1 -1
  19. package/dist/bun/embedding-worker.js +38864 -33
  20. package/dist/bun/embedding-worker.js.map +4 -4
  21. package/dist/bun/embedding.d.ts +35 -23
  22. package/dist/bun/embedding.d.ts.map +1 -1
  23. package/dist/bun/gradient.d.ts +17 -1
  24. package/dist/bun/gradient.d.ts.map +1 -1
  25. package/dist/bun/import/detect.d.ts +14 -0
  26. package/dist/bun/import/detect.d.ts.map +1 -0
  27. package/dist/bun/import/extract.d.ts +43 -0
  28. package/dist/bun/import/extract.d.ts.map +1 -0
  29. package/dist/bun/import/history.d.ts +40 -0
  30. package/dist/bun/import/history.d.ts.map +1 -0
  31. package/dist/bun/import/index.d.ts +17 -0
  32. package/dist/bun/import/index.d.ts.map +1 -0
  33. package/dist/bun/import/providers/aider.d.ts +2 -0
  34. package/dist/bun/import/providers/aider.d.ts.map +1 -0
  35. package/dist/bun/import/providers/claude-code.d.ts +2 -0
  36. package/dist/bun/import/providers/claude-code.d.ts.map +1 -0
  37. package/dist/bun/import/providers/cline.d.ts +2 -0
  38. package/dist/bun/import/providers/cline.d.ts.map +1 -0
  39. package/dist/bun/import/providers/codex.d.ts +2 -0
  40. package/dist/bun/import/providers/codex.d.ts.map +1 -0
  41. package/dist/bun/import/providers/continue.d.ts +2 -0
  42. package/dist/bun/import/providers/continue.d.ts.map +1 -0
  43. package/dist/bun/import/providers/index.d.ts +19 -0
  44. package/dist/bun/import/providers/index.d.ts.map +1 -0
  45. package/dist/bun/import/providers/opencode.d.ts +2 -0
  46. package/dist/bun/import/providers/opencode.d.ts.map +1 -0
  47. package/dist/bun/import/providers/pi.d.ts +2 -0
  48. package/dist/bun/import/providers/pi.d.ts.map +1 -0
  49. package/dist/bun/import/types.d.ts +82 -0
  50. package/dist/bun/import/types.d.ts.map +1 -0
  51. package/dist/bun/index.d.ts +5 -2
  52. package/dist/bun/index.d.ts.map +1 -1
  53. package/dist/bun/index.js +3150 -439
  54. package/dist/bun/index.js.map +4 -4
  55. package/dist/bun/instruction-detect.d.ts +66 -0
  56. package/dist/bun/instruction-detect.d.ts.map +1 -0
  57. package/dist/bun/log.d.ts +9 -0
  58. package/dist/bun/log.d.ts.map +1 -1
  59. package/dist/bun/ltm.d.ts +139 -5
  60. package/dist/bun/ltm.d.ts.map +1 -1
  61. package/dist/bun/pattern-extract.d.ts +7 -0
  62. package/dist/bun/pattern-extract.d.ts.map +1 -1
  63. package/dist/bun/prompt.d.ts +1 -1
  64. package/dist/bun/prompt.d.ts.map +1 -1
  65. package/dist/bun/recall.d.ts.map +1 -1
  66. package/dist/bun/search.d.ts +5 -3
  67. package/dist/bun/search.d.ts.map +1 -1
  68. package/dist/bun/session-limiter.d.ts +26 -0
  69. package/dist/bun/session-limiter.d.ts.map +1 -0
  70. package/dist/bun/temporal.d.ts +2 -0
  71. package/dist/bun/temporal.d.ts.map +1 -1
  72. package/dist/bun/types.d.ts +1 -1
  73. package/dist/node/agents-file.d.ts +4 -0
  74. package/dist/node/agents-file.d.ts.map +1 -1
  75. package/dist/node/config.d.ts +2 -0
  76. package/dist/node/config.d.ts.map +1 -1
  77. package/dist/node/curator.d.ts +45 -0
  78. package/dist/node/curator.d.ts.map +1 -1
  79. package/dist/node/data-dir.d.ts +18 -0
  80. package/dist/node/data-dir.d.ts.map +1 -0
  81. package/dist/node/db.d.ts +85 -0
  82. package/dist/node/db.d.ts.map +1 -1
  83. package/dist/node/distillation.d.ts +2 -13
  84. package/dist/node/distillation.d.ts.map +1 -1
  85. package/dist/node/embedding-vendor.d.ts +22 -38
  86. package/dist/node/embedding-vendor.d.ts.map +1 -1
  87. package/dist/node/embedding-worker-types.d.ts +17 -12
  88. package/dist/node/embedding-worker-types.d.ts.map +1 -1
  89. package/dist/node/embedding-worker.d.ts +9 -2
  90. package/dist/node/embedding-worker.d.ts.map +1 -1
  91. package/dist/node/embedding-worker.js +38864 -33
  92. package/dist/node/embedding-worker.js.map +4 -4
  93. package/dist/node/embedding.d.ts +35 -23
  94. package/dist/node/embedding.d.ts.map +1 -1
  95. package/dist/node/gradient.d.ts +17 -1
  96. package/dist/node/gradient.d.ts.map +1 -1
  97. package/dist/node/import/detect.d.ts +14 -0
  98. package/dist/node/import/detect.d.ts.map +1 -0
  99. package/dist/node/import/extract.d.ts +43 -0
  100. package/dist/node/import/extract.d.ts.map +1 -0
  101. package/dist/node/import/history.d.ts +40 -0
  102. package/dist/node/import/history.d.ts.map +1 -0
  103. package/dist/node/import/index.d.ts +17 -0
  104. package/dist/node/import/index.d.ts.map +1 -0
  105. package/dist/node/import/providers/aider.d.ts +2 -0
  106. package/dist/node/import/providers/aider.d.ts.map +1 -0
  107. package/dist/node/import/providers/claude-code.d.ts +2 -0
  108. package/dist/node/import/providers/claude-code.d.ts.map +1 -0
  109. package/dist/node/import/providers/cline.d.ts +2 -0
  110. package/dist/node/import/providers/cline.d.ts.map +1 -0
  111. package/dist/node/import/providers/codex.d.ts +2 -0
  112. package/dist/node/import/providers/codex.d.ts.map +1 -0
  113. package/dist/node/import/providers/continue.d.ts +2 -0
  114. package/dist/node/import/providers/continue.d.ts.map +1 -0
  115. package/dist/node/import/providers/index.d.ts +19 -0
  116. package/dist/node/import/providers/index.d.ts.map +1 -0
  117. package/dist/node/import/providers/opencode.d.ts +2 -0
  118. package/dist/node/import/providers/opencode.d.ts.map +1 -0
  119. package/dist/node/import/providers/pi.d.ts +2 -0
  120. package/dist/node/import/providers/pi.d.ts.map +1 -0
  121. package/dist/node/import/types.d.ts +82 -0
  122. package/dist/node/import/types.d.ts.map +1 -0
  123. package/dist/node/index.d.ts +5 -2
  124. package/dist/node/index.d.ts.map +1 -1
  125. package/dist/node/index.js +3150 -439
  126. package/dist/node/index.js.map +4 -4
  127. package/dist/node/instruction-detect.d.ts +66 -0
  128. package/dist/node/instruction-detect.d.ts.map +1 -0
  129. package/dist/node/log.d.ts +9 -0
  130. package/dist/node/log.d.ts.map +1 -1
  131. package/dist/node/ltm.d.ts +139 -5
  132. package/dist/node/ltm.d.ts.map +1 -1
  133. package/dist/node/pattern-extract.d.ts +7 -0
  134. package/dist/node/pattern-extract.d.ts.map +1 -1
  135. package/dist/node/prompt.d.ts +1 -1
  136. package/dist/node/prompt.d.ts.map +1 -1
  137. package/dist/node/recall.d.ts.map +1 -1
  138. package/dist/node/search.d.ts +5 -3
  139. package/dist/node/search.d.ts.map +1 -1
  140. package/dist/node/session-limiter.d.ts +26 -0
  141. package/dist/node/session-limiter.d.ts.map +1 -0
  142. package/dist/node/temporal.d.ts +2 -0
  143. package/dist/node/temporal.d.ts.map +1 -1
  144. package/dist/node/types.d.ts +1 -1
  145. package/dist/types/agents-file.d.ts +4 -0
  146. package/dist/types/agents-file.d.ts.map +1 -1
  147. package/dist/types/config.d.ts +2 -0
  148. package/dist/types/config.d.ts.map +1 -1
  149. package/dist/types/curator.d.ts +45 -0
  150. package/dist/types/curator.d.ts.map +1 -1
  151. package/dist/types/data-dir.d.ts +18 -0
  152. package/dist/types/data-dir.d.ts.map +1 -0
  153. package/dist/types/db.d.ts +85 -0
  154. package/dist/types/db.d.ts.map +1 -1
  155. package/dist/types/distillation.d.ts +2 -13
  156. package/dist/types/distillation.d.ts.map +1 -1
  157. package/dist/types/embedding-vendor.d.ts +22 -38
  158. package/dist/types/embedding-vendor.d.ts.map +1 -1
  159. package/dist/types/embedding-worker-types.d.ts +17 -12
  160. package/dist/types/embedding-worker-types.d.ts.map +1 -1
  161. package/dist/types/embedding-worker.d.ts +9 -2
  162. package/dist/types/embedding-worker.d.ts.map +1 -1
  163. package/dist/types/embedding.d.ts +35 -23
  164. package/dist/types/embedding.d.ts.map +1 -1
  165. package/dist/types/gradient.d.ts +17 -1
  166. package/dist/types/gradient.d.ts.map +1 -1
  167. package/dist/types/import/detect.d.ts +14 -0
  168. package/dist/types/import/detect.d.ts.map +1 -0
  169. package/dist/types/import/extract.d.ts +43 -0
  170. package/dist/types/import/extract.d.ts.map +1 -0
  171. package/dist/types/import/history.d.ts +40 -0
  172. package/dist/types/import/history.d.ts.map +1 -0
  173. package/dist/types/import/index.d.ts +17 -0
  174. package/dist/types/import/index.d.ts.map +1 -0
  175. package/dist/types/import/providers/aider.d.ts +2 -0
  176. package/dist/types/import/providers/aider.d.ts.map +1 -0
  177. package/dist/types/import/providers/claude-code.d.ts +2 -0
  178. package/dist/types/import/providers/claude-code.d.ts.map +1 -0
  179. package/dist/types/import/providers/cline.d.ts +2 -0
  180. package/dist/types/import/providers/cline.d.ts.map +1 -0
  181. package/dist/types/import/providers/codex.d.ts +2 -0
  182. package/dist/types/import/providers/codex.d.ts.map +1 -0
  183. package/dist/types/import/providers/continue.d.ts +2 -0
  184. package/dist/types/import/providers/continue.d.ts.map +1 -0
  185. package/dist/types/import/providers/index.d.ts +19 -0
  186. package/dist/types/import/providers/index.d.ts.map +1 -0
  187. package/dist/types/import/providers/opencode.d.ts +2 -0
  188. package/dist/types/import/providers/opencode.d.ts.map +1 -0
  189. package/dist/types/import/providers/pi.d.ts +2 -0
  190. package/dist/types/import/providers/pi.d.ts.map +1 -0
  191. package/dist/types/import/types.d.ts +82 -0
  192. package/dist/types/import/types.d.ts.map +1 -0
  193. package/dist/types/index.d.ts +5 -2
  194. package/dist/types/index.d.ts.map +1 -1
  195. package/dist/types/instruction-detect.d.ts +66 -0
  196. package/dist/types/instruction-detect.d.ts.map +1 -0
  197. package/dist/types/log.d.ts +9 -0
  198. package/dist/types/log.d.ts.map +1 -1
  199. package/dist/types/ltm.d.ts +139 -5
  200. package/dist/types/ltm.d.ts.map +1 -1
  201. package/dist/types/pattern-extract.d.ts +7 -0
  202. package/dist/types/pattern-extract.d.ts.map +1 -1
  203. package/dist/types/prompt.d.ts +1 -1
  204. package/dist/types/prompt.d.ts.map +1 -1
  205. package/dist/types/recall.d.ts.map +1 -1
  206. package/dist/types/search.d.ts +5 -3
  207. package/dist/types/search.d.ts.map +1 -1
  208. package/dist/types/session-limiter.d.ts +26 -0
  209. package/dist/types/session-limiter.d.ts.map +1 -0
  210. package/dist/types/temporal.d.ts +2 -0
  211. package/dist/types/temporal.d.ts.map +1 -1
  212. package/dist/types/types.d.ts +1 -1
  213. package/package.json +3 -4
  214. package/src/agents-file.ts +41 -13
  215. package/src/config.ts +31 -18
  216. package/src/curator.ts +163 -75
  217. package/src/data-dir.ts +76 -0
  218. package/src/db.ts +457 -11
  219. package/src/distillation.ts +65 -16
  220. package/src/embedding-vendor.ts +23 -40
  221. package/src/embedding-worker-types.ts +19 -11
  222. package/src/embedding-worker.ts +111 -47
  223. package/src/embedding.ts +224 -174
  224. package/src/gradient.ts +192 -75
  225. package/src/import/detect.ts +37 -0
  226. package/src/import/extract.ts +137 -0
  227. package/src/import/history.ts +99 -0
  228. package/src/import/index.ts +45 -0
  229. package/src/import/providers/aider.ts +207 -0
  230. package/src/import/providers/claude-code.ts +339 -0
  231. package/src/import/providers/cline.ts +324 -0
  232. package/src/import/providers/codex.ts +369 -0
  233. package/src/import/providers/continue.ts +304 -0
  234. package/src/import/providers/index.ts +32 -0
  235. package/src/import/providers/opencode.ts +272 -0
  236. package/src/import/providers/pi.ts +332 -0
  237. package/src/import/types.ts +91 -0
  238. package/src/index.ts +13 -0
  239. package/src/instruction-detect.ts +275 -0
  240. package/src/log.ts +91 -3
  241. package/src/ltm.ts +789 -41
  242. package/src/pattern-extract.ts +41 -0
  243. package/src/prompt.ts +7 -1
  244. package/src/recall.ts +43 -5
  245. package/src/search.ts +7 -5
  246. package/src/session-limiter.ts +47 -0
  247. package/src/temporal.ts +18 -6
  248. package/src/types.ts +1 -1
@@ -14,6 +14,7 @@ import {
14
14
  } from "./prompt";
15
15
  import { toolStripAnnotation } from "./gradient";
16
16
  import { workerSessionIDs } from "./worker";
17
+ import { distillLimiter } from "./session-limiter";
17
18
  import type { LLMClient } from "./types";
18
19
 
19
20
  // Re-export for backwards compat — index.ts and others may still import from here.
@@ -610,8 +611,23 @@ function resetOrphans(projectPath: string, sessionID: string): number {
610
611
  return orphans.length;
611
612
  }
612
613
 
613
- // Main distillation entry point — called on session.idle or when urgent
614
+ // Main distillation entry point — called on session.idle or when urgent.
615
+ // Serialized per session via p-limit(1) to prevent concurrent runs from
616
+ // reading the same undistilled messages and producing duplicate rows.
614
617
  export async function run(input: {
618
+ llm: LLMClient;
619
+ projectPath: string;
620
+ sessionID: string;
621
+ model?: { providerID: string; modelID: string };
622
+ force?: boolean;
623
+ skipMeta?: boolean;
624
+ urgent?: boolean;
625
+ callType?: "batch" | "direct";
626
+ }): Promise<{ rounds: number; distilled: number }> {
627
+ return distillLimiter.get(input.sessionID)(() => runInner(input));
628
+ }
629
+
630
+ async function runInner(input: {
615
631
  llm: LLMClient;
616
632
  projectPath: string;
617
633
  sessionID: string;
@@ -697,7 +713,8 @@ export async function run(input: {
697
713
  gen0Count(input.projectPath, input.sessionID) >=
698
714
  cfg.distillation.metaThreshold
699
715
  ) {
700
- await metaDistill({
716
+ // Call inner directly — we're already under the per-session limiter.
717
+ await metaDistillInner({
701
718
  llm: input.llm,
702
719
  projectPath: input.projectPath,
703
720
  sessionID: input.sessionID,
@@ -776,17 +793,29 @@ async function distillSegment(input: {
776
793
  return null;
777
794
  }
778
795
 
779
- const distillId = storeDistillation({
780
- projectPath: input.projectPath,
781
- sessionID: input.sessionID,
782
- observations: result.observations,
783
- sourceIDs: input.messages.map((m) => m.id),
784
- generation: 0,
785
- rCompression: rComp,
786
- cNorm,
787
- callType: input.callType,
788
- });
789
- temporal.markDistilled(input.messages.map((m) => m.id));
796
+ // Atomic: store distillation + mark source messages as distilled in one
797
+ // transaction. Without this, a crash between the two statements would leave
798
+ // messages undistilled but with an existing distillation row, causing
799
+ // re-processing on restart and duplicate distillation content.
800
+ let distillId: string;
801
+ db().exec("BEGIN IMMEDIATE");
802
+ try {
803
+ distillId = storeDistillation({
804
+ projectPath: input.projectPath,
805
+ sessionID: input.sessionID,
806
+ observations: result.observations,
807
+ sourceIDs: input.messages.map((m) => m.id),
808
+ generation: 0,
809
+ rCompression: rComp,
810
+ cNorm,
811
+ callType: input.callType,
812
+ });
813
+ temporal.markDistilled(input.messages.map((m) => m.id));
814
+ db().exec("COMMIT");
815
+ } catch (e) {
816
+ db().exec("ROLLBACK");
817
+ throw e;
818
+ }
790
819
 
791
820
  log.info(
792
821
  `distill segment: ${input.messages.length} msgs, ` +
@@ -811,7 +840,8 @@ async function distillSegment(input: {
811
840
 
812
841
  // Fire-and-forget: extract decision/preference patterns → knowledge entries
813
842
  if (config().knowledge.enabled) {
814
- for (const pat of extractPatterns(result.observations)) {
843
+ const patterns = extractPatterns(result.observations);
844
+ for (const pat of patterns) {
815
845
  try {
816
846
  ltm.create({
817
847
  projectPath: input.projectPath,
@@ -825,6 +855,9 @@ async function distillSegment(input: {
825
855
  // Dedup guard in ltm.create() handles duplicates — swallow errors
826
856
  }
827
857
  }
858
+ if (patterns.length > 0) {
859
+ log.info(`pattern extraction: ${patterns.length} entries from distillation`);
860
+ }
828
861
  }
829
862
 
830
863
  return result;
@@ -836,7 +869,8 @@ async function distillSegment(input: {
836
869
  * via `<previous-meta-summary>` so the LLM updates in place rather than
837
870
  * re-deriving from scratch.
838
871
  *
839
- * Exported for tests; `run()` is the production entry point.
872
+ * Serialized per session via the same p-limit(1) as `run()`. Exported for
873
+ * the idle handler which calls metaDistill() independently of run().
840
874
  */
841
875
  export async function metaDistill(input: {
842
876
  llm: LLMClient;
@@ -845,6 +879,17 @@ export async function metaDistill(input: {
845
879
  model?: { providerID: string; modelID: string };
846
880
  urgent?: boolean;
847
881
  callType?: "batch" | "direct";
882
+ }): Promise<DistillationResult | null> {
883
+ return distillLimiter.get(input.sessionID)(() => metaDistillInner(input));
884
+ }
885
+
886
+ async function metaDistillInner(input: {
887
+ llm: LLMClient;
888
+ projectPath: string;
889
+ sessionID: string;
890
+ model?: { providerID: string; modelID: string };
891
+ urgent?: boolean;
892
+ callType?: "batch" | "direct";
848
893
  }): Promise<DistillationResult | null> {
849
894
  const existing = loadGen0(input.projectPath, input.sessionID);
850
895
 
@@ -928,7 +973,8 @@ export async function metaDistill(input: {
928
973
 
929
974
  // Fire-and-forget: extract decision/preference patterns → knowledge entries
930
975
  if (config().knowledge.enabled) {
931
- for (const pat of extractPatterns(result.observations)) {
976
+ const patterns = extractPatterns(result.observations);
977
+ for (const pat of patterns) {
932
978
  try {
933
979
  ltm.create({
934
980
  projectPath: input.projectPath,
@@ -942,6 +988,9 @@ export async function metaDistill(input: {
942
988
  // Dedup guard in ltm.create() handles duplicates — swallow errors
943
989
  }
944
990
  }
991
+ if (patterns.length > 0) {
992
+ log.info(`pattern extraction: ${patterns.length} entries from meta-distillation`);
993
+ }
945
994
  }
946
995
 
947
996
  return result;
@@ -1,31 +1,19 @@
1
1
  /**
2
- * Vendored bge-small registration for the standalone Lore binary.
2
+ * Vendored model registration for the standalone Lore binary.
3
3
  *
4
- * The Bun-compiled `lore` binary uses `bun build --compile` to bundle
5
- * `fastembed` + `onnxruntime-node` + `@anush008/tokenizers-<platform>`
6
- * directly into the executable including the platform-specific
7
- * `.node` addons which Bun embeds and dlopens from `$bunfs` at runtime.
4
+ * The Bun-compiled `lore` binary bundles `@huggingface/transformers`
5
+ * (which includes ONNX Runtime) into the executable. The model weights
6
+ * and tokenizer files (nomic-embed-text-v1.5, INT8 quantized) are
7
+ * embedded as Bun assets and extracted to a cache dir on first run.
8
8
  *
9
- * Two pieces don't fit into Bun's automatic bundling and need our help:
10
- *
11
- * 1. **Side-load shared libraries**. `onnxruntime_binding.node` does a
12
- * runtime `dlopen("libonnxruntime.so.1")` (or the .dylib / .dll
13
- * equivalent) for the actual ONNX Runtime computation library. Bun
14
- * doesn't follow this kind of dependency. The binary's wrapper
15
- * pre-loads these libs via `bun:ffi` *before* fastembed evaluates,
16
- * so when the addon's dlopen fires it finds the cached handle.
17
- *
18
- * 2. **Model weights + tokenizer**. fastembed downloads from the HF
19
- * Hub on first use; we want zero network on first run. The wrapper
20
- * embeds the bge-small INT8 files as Bun assets, writes them to a
21
- * real disk dir on first run, and sets `globalThis.__LORE_VENDOR_MODEL__`
22
- * to that path. This module exposes that registration to the
23
- * LocalProvider so it can hand the path to fastembed's CUSTOM-mode
24
- * init (`modelAbsoluteDirPath` + `modelName`).
9
+ * The binary's wrapper sets `globalThis.__LORE_VENDOR_MODEL__` to the
10
+ * extraction path. This module exposes that registration to the
11
+ * `LocalProvider` so it can configure the transformers.js `env` to
12
+ * load from the local path instead of downloading from HuggingFace Hub.
25
13
  *
26
14
  * In npm-mode usage from `@loreai/opencode` / `@loreai/pi` the global
27
- * is unset and `vendorModelInfo()` returns `null`, so the LocalProvider
28
- * falls through to fastembed's default Qdrant repo + cache.
15
+ * is unset and `vendorModelInfo()` returns `null`, so transformers.js
16
+ * downloads the model from HF Hub on first use and caches it locally.
29
17
  */
30
18
 
31
19
  // ---------------------------------------------------------------------------
@@ -34,13 +22,10 @@
34
22
 
35
23
  /** What the binary wrapper writes to globalThis after extracting model files. */
36
24
  export interface VendorRegistration {
37
- /** Absolute path to the dir containing the bge-small files
38
- * (config.json, tokenizer.json, model_quantized.onnx, …). Pass to
39
- * fastembed as `modelAbsoluteDirPath` in CUSTOM init. */
40
- modelAbsoluteDirPath: string;
41
- /** Filename of the ONNX weights inside that dir. Pass to fastembed
42
- * as `modelName` in CUSTOM init. */
43
- modelName: string;
25
+ /** Absolute path to the dir containing the model files in HF layout
26
+ * (config.json, tokenizer.json, onnx/model_quantized.onnx, …).
27
+ * Passed to transformers.js as `env.localModelPath`. */
28
+ localModelPath: string;
44
29
  /** Target identifier the binary was built for, e.g. "linux-x64".
45
30
  * Diagnostic only — the runtime doesn't branch on it. */
46
31
  target: string;
@@ -69,25 +54,23 @@ export function _setVendorRegistration(reg: VendorRegistration | null): void {
69
54
  // Public entry
70
55
  // ---------------------------------------------------------------------------
71
56
 
72
- /** Subset of the registration fastembed needs. Stripped of the
73
- * diagnostic fields so the LocalProvider has exactly what it should
74
- * hand to `FlagEmbedding.init`. */
57
+ /** Subset of the registration the LocalProvider needs. */
75
58
  export interface VendorModelInfo {
76
- modelAbsoluteDirPath: string;
77
- modelName: string;
59
+ /** Absolute path to the dir containing the model in HF layout.
60
+ * Set as `env.localModelPath` in the worker thread. */
61
+ localModelPath: string;
78
62
  }
79
63
 
80
64
  /**
81
- * Resolve the bundled-model arguments for fastembed CUSTOM init. Returns
82
- * `null` when no vendor is registered (npm-mode), so the caller can fall
83
- * through to fastembed's default cacheDir/HF Hub flow.
65
+ * Resolve the vendored model path for transformers.js local loading.
66
+ * Returns `null` when no vendor is registered (npm-mode), so the caller
67
+ * falls through to transformers.js's default HF Hub download + cache.
84
68
  */
85
69
  export function vendorModelInfo(): VendorModelInfo | null {
86
70
  const reg = getRegistration();
87
71
  if (!reg) return null;
88
72
  return {
89
- modelAbsoluteDirPath: reg.modelAbsoluteDirPath,
90
- modelName: reg.modelName,
73
+ localModelPath: reg.localModelPath,
91
74
  };
92
75
  }
93
76
 
@@ -1,10 +1,11 @@
1
1
  /**
2
2
  * Shared message types for the embedding worker thread.
3
3
  *
4
- * The embedding worker (`embedding-worker.ts`) runs fastembed/ONNX inference
5
- * in a separate `node:worker_threads` Worker so the main thread's event loop
6
- * stays free during inference. This file defines the message protocol between
7
- * the main thread (`LocalProvider` in `embedding.ts`) and the worker.
4
+ * The embedding worker (`embedding-worker.ts`) runs ONNX inference via
5
+ * `@huggingface/transformers` in a separate `node:worker_threads` Worker
6
+ * so the main thread's event loop stays free during inference. This file
7
+ * defines the message protocol between the main thread (`LocalProvider`
8
+ * in `embedding.ts`) and the worker.
8
9
  *
9
10
  * Imported by both sides — keep this file free of runtime dependencies.
10
11
  */
@@ -18,7 +19,7 @@ export interface EmbedRequest {
18
19
  type: "embed";
19
20
  /** Monotonic request ID for correlating responses. */
20
21
  id: number;
21
- /** Texts to embed. */
22
+ /** Texts to embed (already prefixed with task instruction by the caller). */
22
23
  texts: string[];
23
24
  /** "document" for storage, "query" for search. */
24
25
  inputType: "document" | "query";
@@ -72,11 +73,18 @@ export type WorkerOutbound = EmbedResult | EmbedError | InitError;
72
73
 
73
74
  /** Passed to the worker via `workerData` at construction time. */
74
75
  export interface WorkerInitData {
75
- /** fastembed model name, e.g. "BGESmallENV15". */
76
- modelName: string;
76
+ /** HuggingFace model ID, e.g. "nomic-ai/nomic-embed-text-v1.5". */
77
+ modelId: string;
78
+ /** Target embedding dimensions. For Nomic v1.5 with Matryoshka,
79
+ * this controls how many leading dims to keep (64–768). */
80
+ dimensions: number;
77
81
  /** Vendored model info for binary mode, or null for npm mode.
78
- * Mirrors the `globalThis.__LORE_VENDOR_MODEL__` registration which
79
- * only exists on the main thread passed explicitly so the worker
80
- * can hand it to `FlagEmbedding.init()`. */
81
- vendorModel: { modelAbsoluteDirPath: string; modelName: string } | null;
82
+ * In binary mode, model files are pre-extracted to a local dir
83
+ * and we point transformers.js at that path instead of downloading
84
+ * from HuggingFace Hub. */
85
+ vendorModel: {
86
+ /** Absolute path to the dir containing model files
87
+ * (config.json, tokenizer.json, onnx/model_quantized.onnx, …). */
88
+ localModelPath: string;
89
+ } | null;
82
90
  }
@@ -1,8 +1,9 @@
1
1
  /**
2
- * Embedding worker thread — runs fastembed/ONNX inference off the main thread.
2
+ * Embedding worker thread — runs ONNX inference via @huggingface/transformers
3
+ * off the main thread.
3
4
  *
4
5
  * This file is the entry point for a `node:worker_threads` Worker spawned by
5
- * `LocalProvider` in `embedding.ts`. It owns the `FlagEmbedding` ONNX model
6
+ * `LocalProvider` in `embedding.ts`. It owns the transformers.js pipeline
6
7
  * and processes embed requests sequentially from a priority queue. Moving
7
8
  * inference here keeps the main thread's event loop free — HTTP requests,
8
9
  * SSE streams, and session APIs are no longer blocked during embedding.
@@ -10,6 +11,12 @@
10
11
  * Communication uses `parentPort` message passing with structured clone.
11
12
  * Float32Array vectors are sent back directly (Bun preserves identity).
12
13
  *
14
+ * The worker applies Nomic's recommended post-processing:
15
+ * 1. Mean pooling (via pipeline option)
16
+ * 2. Layer normalization
17
+ * 3. Matryoshka dimension truncation (if dimensions < full 768)
18
+ * 4. L2 normalization
19
+ *
13
20
  * @see embedding-worker-types.ts for the message protocol.
14
21
  */
15
22
 
@@ -25,53 +32,72 @@ import type {
25
32
  // workerData
26
33
  // ---------------------------------------------------------------------------
27
34
 
28
- const { modelName, vendorModel } = workerData as WorkerInitData;
35
+ const { modelId, dimensions, vendorModel } = workerData as WorkerInitData;
29
36
 
30
37
  // ---------------------------------------------------------------------------
31
38
  // Model lifecycle — lazy init on first embed request
32
39
  // ---------------------------------------------------------------------------
33
40
 
34
- /** The fastembed model, typed to the subset of methods we use. */
35
- type FastembedModel = {
36
- queryEmbed(text: string): Promise<number[]>;
37
- passageEmbed(texts: string[], batchSize?: number): AsyncGenerator<number[][]>;
41
+ /** The transformers.js pipeline instance, typed loosely since the exact
42
+ * return type depends on the pipeline task. */
43
+ type FeatureExtractionPipeline = {
44
+ (texts: string[], options?: Record<string, unknown>): Promise<{
45
+ dims: number[];
46
+ data: Float32Array;
47
+ tolist(): number[][];
48
+ }>;
49
+ dispose?(): Promise<void>;
38
50
  };
39
51
 
40
- let model: FastembedModel | null = null;
52
+ let pipe: FeatureExtractionPipeline | null = null;
53
+ let layerNormFn: ((input: unknown, normalized_shape: number[]) => {
54
+ dims: number[];
55
+ data: Float32Array;
56
+ normalize(p: number, dim: number): { tolist(): number[][]; data: Float32Array; dims: number[] };
57
+ slice(...args: unknown[]): { normalize(p: number, dim: number): { tolist(): number[][]; data: Float32Array; dims: number[] } };
58
+ }) | null = null;
41
59
  let initPromise: Promise<void> | null = null;
42
60
  let initFailed = false;
43
61
  let initError: string | null = null;
44
62
 
45
63
  /**
46
- * Ensure the fastembed model is loaded. Lazy — first call triggers the
47
- * dynamic import + FlagEmbedding.init(), subsequent calls return immediately.
64
+ * Ensure the transformers.js pipeline is loaded. Lazy — first call triggers
65
+ * the dynamic import + pipeline creation, subsequent calls return immediately.
48
66
  * On failure, marks the worker as permanently broken and posts `init-error`.
49
67
  */
50
- async function ensureModel(): Promise<FastembedModel> {
51
- if (model) return model;
52
- if (initFailed) throw new Error(initError ?? "fastembed init previously failed");
68
+ async function ensurePipeline(): Promise<void> {
69
+ if (pipe) return;
70
+ if (initFailed) throw new Error(initError ?? "pipeline init previously failed");
53
71
 
54
72
  if (!initPromise) {
55
73
  initPromise = (async () => {
56
- const fastembed = await import("fastembed");
57
- const { EmbeddingModel, FlagEmbedding } = fastembed;
74
+ const transformers = await import("@huggingface/transformers");
75
+ const { pipeline, env, layer_norm } = transformers;
76
+
77
+ // Configure transformers.js environment
78
+ env.allowRemoteModels = !vendorModel;
79
+ env.allowLocalModels = true;
58
80
 
59
- let m: unknown;
60
81
  if (vendorModel) {
61
- // Binary mode: use pre-extracted model files.
62
- m = await FlagEmbedding.init({
63
- model: EmbeddingModel.CUSTOM,
64
- modelAbsoluteDirPath: vendorModel.modelAbsoluteDirPath,
65
- modelName: vendorModel.modelName,
66
- });
67
- } else {
68
- // npm mode: resolve model name against fastembed's enum.
69
- const enumValue = (EmbeddingModel as Record<string, string>)[modelName];
70
- m = await FlagEmbedding.init({
71
- model: enumValue ?? modelName,
72
- } as { model: typeof EmbeddingModel.BGESmallENV15 });
82
+ // Binary mode: point at pre-extracted model files on disk.
83
+ env.localModelPath = vendorModel.localModelPath;
84
+ env.allowRemoteModels = false;
73
85
  }
74
- model = m as FastembedModel;
86
+
87
+ // Create feature-extraction pipeline with ONNX quantized model.
88
+ // dtype: 'q8' selects the INT8 quantized ONNX variant (model_quantized.onnx)
89
+ // which is ~137MB for Nomic v1.5 vs ~547MB for the full FP32 model.
90
+ //
91
+ // device: "cpu" — in npm mode, transformers.js uses onnxruntime-node
92
+ // (native CPU). In the compiled binary, onnxruntime-node is redirected
93
+ // to onnxruntime-web by the build plugin, which handles "cpu" via its
94
+ // WASM+SIMD backend (API-compatible, ~2x faster on batch workloads).
95
+ pipe = (await pipeline("feature-extraction", modelId, {
96
+ dtype: "q8",
97
+ device: "cpu",
98
+ })) as unknown as FeatureExtractionPipeline;
99
+
100
+ layerNormFn = layer_norm as typeof layerNormFn;
75
101
  })().catch((err) => {
76
102
  initFailed = true;
77
103
  initError = err instanceof Error ? err.message : String(err);
@@ -83,8 +109,7 @@ async function ensureModel(): Promise<FastembedModel> {
83
109
  }
84
110
 
85
111
  await initPromise;
86
- if (!model) throw new Error("model init completed but model is null");
87
- return model;
112
+ if (!pipe) throw new Error("pipeline init completed but pipe is null");
88
113
  }
89
114
 
90
115
  // ---------------------------------------------------------------------------
@@ -135,31 +160,70 @@ async function drain(): Promise<void> {
135
160
  // Embed processing
136
161
  // ---------------------------------------------------------------------------
137
162
 
163
+ /**
164
+ * Detect ONNX runtime out-of-memory errors. The runtime throws opaque
165
+ * numeric error codes (e.g. "287180544") for allocation failures rather
166
+ * than a readable message. We match on large numeric-only strings and
167
+ * known OOM patterns.
168
+ */
169
+ function isOomError(msg: string): boolean {
170
+ // Pure numeric error codes ≥ 6 digits are ORT allocation failures
171
+ if (/^\d{6,}$/.test(msg)) return true;
172
+ // Explicit OOM messages from various ONNX backends
173
+ if (/out.of.memory|alloc.*fail|oom/i.test(msg)) return true;
174
+ return false;
175
+ }
176
+
138
177
  async function processEmbed(req: EmbedRequest): Promise<void> {
139
178
  try {
140
- const m = await ensureModel();
179
+ await ensurePipeline();
180
+
181
+ // Run feature extraction with mean pooling.
182
+ // truncation: true caps each text at the model's max length (8192 tokens
183
+ // for Nomic v1.5), preventing oversized inputs from causing OOM.
184
+ const output = await pipe!(req.texts, { pooling: "mean", truncation: true });
185
+
186
+ // Post-process following Nomic's recipe:
187
+ // 1. Layer normalization over the full hidden dimension
188
+ // 2. Matryoshka truncation to target dimensions
189
+ // 3. L2 normalization
190
+ const fullDim = output.dims[output.dims.length - 1]; // 768 for Nomic v1.5
191
+ const truncate = dimensions < fullDim;
192
+
193
+ let normalized: { tolist(): number[][]; data: Float32Array; dims: number[] };
194
+ if (truncate) {
195
+ // layer_norm → slice → L2 normalize
196
+ normalized = layerNormFn!(output, [fullDim])
197
+ .slice(null, [0, dimensions])
198
+ .normalize(2, -1);
199
+ } else {
200
+ // layer_norm → L2 normalize (no truncation)
201
+ normalized = layerNormFn!(output, [fullDim])
202
+ .normalize(2, -1);
203
+ }
141
204
 
142
- let vectors: Float32Array[];
205
+ // Extract per-text vectors from the batched tensor.
206
+ const numTexts = req.texts.length;
207
+ const vectors: Float32Array[] = [];
208
+ const dim = truncate ? dimensions : fullDim;
143
209
 
144
- if (req.inputType === "query" && req.texts.length === 1) {
145
- // Single query use queryEmbed for better quality.
146
- const vec = await m.queryEmbed(req.texts[0]);
147
- vectors = [new Float32Array(vec)];
148
- } else {
149
- // Batch document embedding via async generator.
150
- vectors = [];
151
- for await (const batch of m.passageEmbed(req.texts)) {
152
- for (const vec of batch) {
153
- vectors.push(new Float32Array(vec));
154
- }
155
- }
210
+ for (let i = 0; i < numTexts; i++) {
211
+ const start = i * dim;
212
+ const vec = new Float32Array(dim);
213
+ vec.set(normalized.data.subarray(start, start + dim));
214
+ vectors.push(vec);
156
215
  }
157
216
 
158
217
  post({ type: "result", id: req.id, vectors });
159
218
  } catch (err) {
160
- // Don't re-post init-error — it was already sent in ensureModel().
219
+ // Don't re-post init-error — it was already sent in ensurePipeline().
161
220
  if (!initFailed) {
162
- const msg = err instanceof Error ? err.message : String(err);
221
+ const raw = err instanceof Error ? err.message : String(err);
222
+ const msg = isOomError(raw)
223
+ ? `ONNX runtime out of memory (batch=${req.texts.length}, ` +
224
+ `longest≈${Math.max(...req.texts.map((t) => t.length))} chars). ` +
225
+ `Try reducing batch size. Raw: ${raw}`
226
+ : raw;
163
227
  post({ type: "error", id: req.id, error: msg });
164
228
  }
165
229
  }