@loreai/core 0.17.1 → 0.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (248) hide show
  1. package/dist/bun/agents-file.d.ts +4 -0
  2. package/dist/bun/agents-file.d.ts.map +1 -1
  3. package/dist/bun/config.d.ts +2 -0
  4. package/dist/bun/config.d.ts.map +1 -1
  5. package/dist/bun/curator.d.ts +45 -0
  6. package/dist/bun/curator.d.ts.map +1 -1
  7. package/dist/bun/data-dir.d.ts +18 -0
  8. package/dist/bun/data-dir.d.ts.map +1 -0
  9. package/dist/bun/db.d.ts +85 -0
  10. package/dist/bun/db.d.ts.map +1 -1
  11. package/dist/bun/distillation.d.ts +2 -13
  12. package/dist/bun/distillation.d.ts.map +1 -1
  13. package/dist/bun/embedding-vendor.d.ts +22 -38
  14. package/dist/bun/embedding-vendor.d.ts.map +1 -1
  15. package/dist/bun/embedding-worker-types.d.ts +17 -12
  16. package/dist/bun/embedding-worker-types.d.ts.map +1 -1
  17. package/dist/bun/embedding-worker.d.ts +9 -2
  18. package/dist/bun/embedding-worker.d.ts.map +1 -1
  19. package/dist/bun/embedding-worker.js +38864 -33
  20. package/dist/bun/embedding-worker.js.map +4 -4
  21. package/dist/bun/embedding.d.ts +35 -23
  22. package/dist/bun/embedding.d.ts.map +1 -1
  23. package/dist/bun/gradient.d.ts +17 -1
  24. package/dist/bun/gradient.d.ts.map +1 -1
  25. package/dist/bun/import/detect.d.ts +14 -0
  26. package/dist/bun/import/detect.d.ts.map +1 -0
  27. package/dist/bun/import/extract.d.ts +43 -0
  28. package/dist/bun/import/extract.d.ts.map +1 -0
  29. package/dist/bun/import/history.d.ts +40 -0
  30. package/dist/bun/import/history.d.ts.map +1 -0
  31. package/dist/bun/import/index.d.ts +17 -0
  32. package/dist/bun/import/index.d.ts.map +1 -0
  33. package/dist/bun/import/providers/aider.d.ts +2 -0
  34. package/dist/bun/import/providers/aider.d.ts.map +1 -0
  35. package/dist/bun/import/providers/claude-code.d.ts +2 -0
  36. package/dist/bun/import/providers/claude-code.d.ts.map +1 -0
  37. package/dist/bun/import/providers/cline.d.ts +2 -0
  38. package/dist/bun/import/providers/cline.d.ts.map +1 -0
  39. package/dist/bun/import/providers/codex.d.ts +2 -0
  40. package/dist/bun/import/providers/codex.d.ts.map +1 -0
  41. package/dist/bun/import/providers/continue.d.ts +2 -0
  42. package/dist/bun/import/providers/continue.d.ts.map +1 -0
  43. package/dist/bun/import/providers/index.d.ts +19 -0
  44. package/dist/bun/import/providers/index.d.ts.map +1 -0
  45. package/dist/bun/import/providers/opencode.d.ts +2 -0
  46. package/dist/bun/import/providers/opencode.d.ts.map +1 -0
  47. package/dist/bun/import/providers/pi.d.ts +2 -0
  48. package/dist/bun/import/providers/pi.d.ts.map +1 -0
  49. package/dist/bun/import/types.d.ts +82 -0
  50. package/dist/bun/import/types.d.ts.map +1 -0
  51. package/dist/bun/index.d.ts +5 -2
  52. package/dist/bun/index.d.ts.map +1 -1
  53. package/dist/bun/index.js +3150 -439
  54. package/dist/bun/index.js.map +4 -4
  55. package/dist/bun/instruction-detect.d.ts +66 -0
  56. package/dist/bun/instruction-detect.d.ts.map +1 -0
  57. package/dist/bun/log.d.ts +9 -0
  58. package/dist/bun/log.d.ts.map +1 -1
  59. package/dist/bun/ltm.d.ts +139 -5
  60. package/dist/bun/ltm.d.ts.map +1 -1
  61. package/dist/bun/pattern-extract.d.ts +7 -0
  62. package/dist/bun/pattern-extract.d.ts.map +1 -1
  63. package/dist/bun/prompt.d.ts +1 -1
  64. package/dist/bun/prompt.d.ts.map +1 -1
  65. package/dist/bun/recall.d.ts.map +1 -1
  66. package/dist/bun/search.d.ts +5 -3
  67. package/dist/bun/search.d.ts.map +1 -1
  68. package/dist/bun/session-limiter.d.ts +26 -0
  69. package/dist/bun/session-limiter.d.ts.map +1 -0
  70. package/dist/bun/temporal.d.ts +2 -0
  71. package/dist/bun/temporal.d.ts.map +1 -1
  72. package/dist/bun/types.d.ts +1 -1
  73. package/dist/node/agents-file.d.ts +4 -0
  74. package/dist/node/agents-file.d.ts.map +1 -1
  75. package/dist/node/config.d.ts +2 -0
  76. package/dist/node/config.d.ts.map +1 -1
  77. package/dist/node/curator.d.ts +45 -0
  78. package/dist/node/curator.d.ts.map +1 -1
  79. package/dist/node/data-dir.d.ts +18 -0
  80. package/dist/node/data-dir.d.ts.map +1 -0
  81. package/dist/node/db.d.ts +85 -0
  82. package/dist/node/db.d.ts.map +1 -1
  83. package/dist/node/distillation.d.ts +2 -13
  84. package/dist/node/distillation.d.ts.map +1 -1
  85. package/dist/node/embedding-vendor.d.ts +22 -38
  86. package/dist/node/embedding-vendor.d.ts.map +1 -1
  87. package/dist/node/embedding-worker-types.d.ts +17 -12
  88. package/dist/node/embedding-worker-types.d.ts.map +1 -1
  89. package/dist/node/embedding-worker.d.ts +9 -2
  90. package/dist/node/embedding-worker.d.ts.map +1 -1
  91. package/dist/node/embedding-worker.js +38864 -33
  92. package/dist/node/embedding-worker.js.map +4 -4
  93. package/dist/node/embedding.d.ts +35 -23
  94. package/dist/node/embedding.d.ts.map +1 -1
  95. package/dist/node/gradient.d.ts +17 -1
  96. package/dist/node/gradient.d.ts.map +1 -1
  97. package/dist/node/import/detect.d.ts +14 -0
  98. package/dist/node/import/detect.d.ts.map +1 -0
  99. package/dist/node/import/extract.d.ts +43 -0
  100. package/dist/node/import/extract.d.ts.map +1 -0
  101. package/dist/node/import/history.d.ts +40 -0
  102. package/dist/node/import/history.d.ts.map +1 -0
  103. package/dist/node/import/index.d.ts +17 -0
  104. package/dist/node/import/index.d.ts.map +1 -0
  105. package/dist/node/import/providers/aider.d.ts +2 -0
  106. package/dist/node/import/providers/aider.d.ts.map +1 -0
  107. package/dist/node/import/providers/claude-code.d.ts +2 -0
  108. package/dist/node/import/providers/claude-code.d.ts.map +1 -0
  109. package/dist/node/import/providers/cline.d.ts +2 -0
  110. package/dist/node/import/providers/cline.d.ts.map +1 -0
  111. package/dist/node/import/providers/codex.d.ts +2 -0
  112. package/dist/node/import/providers/codex.d.ts.map +1 -0
  113. package/dist/node/import/providers/continue.d.ts +2 -0
  114. package/dist/node/import/providers/continue.d.ts.map +1 -0
  115. package/dist/node/import/providers/index.d.ts +19 -0
  116. package/dist/node/import/providers/index.d.ts.map +1 -0
  117. package/dist/node/import/providers/opencode.d.ts +2 -0
  118. package/dist/node/import/providers/opencode.d.ts.map +1 -0
  119. package/dist/node/import/providers/pi.d.ts +2 -0
  120. package/dist/node/import/providers/pi.d.ts.map +1 -0
  121. package/dist/node/import/types.d.ts +82 -0
  122. package/dist/node/import/types.d.ts.map +1 -0
  123. package/dist/node/index.d.ts +5 -2
  124. package/dist/node/index.d.ts.map +1 -1
  125. package/dist/node/index.js +3150 -439
  126. package/dist/node/index.js.map +4 -4
  127. package/dist/node/instruction-detect.d.ts +66 -0
  128. package/dist/node/instruction-detect.d.ts.map +1 -0
  129. package/dist/node/log.d.ts +9 -0
  130. package/dist/node/log.d.ts.map +1 -1
  131. package/dist/node/ltm.d.ts +139 -5
  132. package/dist/node/ltm.d.ts.map +1 -1
  133. package/dist/node/pattern-extract.d.ts +7 -0
  134. package/dist/node/pattern-extract.d.ts.map +1 -1
  135. package/dist/node/prompt.d.ts +1 -1
  136. package/dist/node/prompt.d.ts.map +1 -1
  137. package/dist/node/recall.d.ts.map +1 -1
  138. package/dist/node/search.d.ts +5 -3
  139. package/dist/node/search.d.ts.map +1 -1
  140. package/dist/node/session-limiter.d.ts +26 -0
  141. package/dist/node/session-limiter.d.ts.map +1 -0
  142. package/dist/node/temporal.d.ts +2 -0
  143. package/dist/node/temporal.d.ts.map +1 -1
  144. package/dist/node/types.d.ts +1 -1
  145. package/dist/types/agents-file.d.ts +4 -0
  146. package/dist/types/agents-file.d.ts.map +1 -1
  147. package/dist/types/config.d.ts +2 -0
  148. package/dist/types/config.d.ts.map +1 -1
  149. package/dist/types/curator.d.ts +45 -0
  150. package/dist/types/curator.d.ts.map +1 -1
  151. package/dist/types/data-dir.d.ts +18 -0
  152. package/dist/types/data-dir.d.ts.map +1 -0
  153. package/dist/types/db.d.ts +85 -0
  154. package/dist/types/db.d.ts.map +1 -1
  155. package/dist/types/distillation.d.ts +2 -13
  156. package/dist/types/distillation.d.ts.map +1 -1
  157. package/dist/types/embedding-vendor.d.ts +22 -38
  158. package/dist/types/embedding-vendor.d.ts.map +1 -1
  159. package/dist/types/embedding-worker-types.d.ts +17 -12
  160. package/dist/types/embedding-worker-types.d.ts.map +1 -1
  161. package/dist/types/embedding-worker.d.ts +9 -2
  162. package/dist/types/embedding-worker.d.ts.map +1 -1
  163. package/dist/types/embedding.d.ts +35 -23
  164. package/dist/types/embedding.d.ts.map +1 -1
  165. package/dist/types/gradient.d.ts +17 -1
  166. package/dist/types/gradient.d.ts.map +1 -1
  167. package/dist/types/import/detect.d.ts +14 -0
  168. package/dist/types/import/detect.d.ts.map +1 -0
  169. package/dist/types/import/extract.d.ts +43 -0
  170. package/dist/types/import/extract.d.ts.map +1 -0
  171. package/dist/types/import/history.d.ts +40 -0
  172. package/dist/types/import/history.d.ts.map +1 -0
  173. package/dist/types/import/index.d.ts +17 -0
  174. package/dist/types/import/index.d.ts.map +1 -0
  175. package/dist/types/import/providers/aider.d.ts +2 -0
  176. package/dist/types/import/providers/aider.d.ts.map +1 -0
  177. package/dist/types/import/providers/claude-code.d.ts +2 -0
  178. package/dist/types/import/providers/claude-code.d.ts.map +1 -0
  179. package/dist/types/import/providers/cline.d.ts +2 -0
  180. package/dist/types/import/providers/cline.d.ts.map +1 -0
  181. package/dist/types/import/providers/codex.d.ts +2 -0
  182. package/dist/types/import/providers/codex.d.ts.map +1 -0
  183. package/dist/types/import/providers/continue.d.ts +2 -0
  184. package/dist/types/import/providers/continue.d.ts.map +1 -0
  185. package/dist/types/import/providers/index.d.ts +19 -0
  186. package/dist/types/import/providers/index.d.ts.map +1 -0
  187. package/dist/types/import/providers/opencode.d.ts +2 -0
  188. package/dist/types/import/providers/opencode.d.ts.map +1 -0
  189. package/dist/types/import/providers/pi.d.ts +2 -0
  190. package/dist/types/import/providers/pi.d.ts.map +1 -0
  191. package/dist/types/import/types.d.ts +82 -0
  192. package/dist/types/import/types.d.ts.map +1 -0
  193. package/dist/types/index.d.ts +5 -2
  194. package/dist/types/index.d.ts.map +1 -1
  195. package/dist/types/instruction-detect.d.ts +66 -0
  196. package/dist/types/instruction-detect.d.ts.map +1 -0
  197. package/dist/types/log.d.ts +9 -0
  198. package/dist/types/log.d.ts.map +1 -1
  199. package/dist/types/ltm.d.ts +139 -5
  200. package/dist/types/ltm.d.ts.map +1 -1
  201. package/dist/types/pattern-extract.d.ts +7 -0
  202. package/dist/types/pattern-extract.d.ts.map +1 -1
  203. package/dist/types/prompt.d.ts +1 -1
  204. package/dist/types/prompt.d.ts.map +1 -1
  205. package/dist/types/recall.d.ts.map +1 -1
  206. package/dist/types/search.d.ts +5 -3
  207. package/dist/types/search.d.ts.map +1 -1
  208. package/dist/types/session-limiter.d.ts +26 -0
  209. package/dist/types/session-limiter.d.ts.map +1 -0
  210. package/dist/types/temporal.d.ts +2 -0
  211. package/dist/types/temporal.d.ts.map +1 -1
  212. package/dist/types/types.d.ts +1 -1
  213. package/package.json +3 -4
  214. package/src/agents-file.ts +41 -13
  215. package/src/config.ts +31 -18
  216. package/src/curator.ts +163 -75
  217. package/src/data-dir.ts +76 -0
  218. package/src/db.ts +457 -11
  219. package/src/distillation.ts +65 -16
  220. package/src/embedding-vendor.ts +23 -40
  221. package/src/embedding-worker-types.ts +19 -11
  222. package/src/embedding-worker.ts +111 -47
  223. package/src/embedding.ts +224 -174
  224. package/src/gradient.ts +192 -75
  225. package/src/import/detect.ts +37 -0
  226. package/src/import/extract.ts +137 -0
  227. package/src/import/history.ts +99 -0
  228. package/src/import/index.ts +45 -0
  229. package/src/import/providers/aider.ts +207 -0
  230. package/src/import/providers/claude-code.ts +339 -0
  231. package/src/import/providers/cline.ts +324 -0
  232. package/src/import/providers/codex.ts +369 -0
  233. package/src/import/providers/continue.ts +304 -0
  234. package/src/import/providers/index.ts +32 -0
  235. package/src/import/providers/opencode.ts +272 -0
  236. package/src/import/providers/pi.ts +332 -0
  237. package/src/import/types.ts +91 -0
  238. package/src/index.ts +13 -0
  239. package/src/instruction-detect.ts +275 -0
  240. package/src/log.ts +91 -3
  241. package/src/ltm.ts +789 -41
  242. package/src/pattern-extract.ts +41 -0
  243. package/src/prompt.ts +7 -1
  244. package/src/recall.ts +43 -5
  245. package/src/search.ts +7 -5
  246. package/src/session-limiter.ts +47 -0
  247. package/src/temporal.ts +18 -6
  248. package/src/types.ts +1 -1
package/src/embedding.ts CHANGED
@@ -1,17 +1,22 @@
1
1
  /**
2
2
  * Embedding integration for vector search.
3
3
  *
4
- * Supports multiple embedding providers (Voyage AI, OpenAI) behind a common
5
- * interface. Provides embedding generation, pure-JS cosine similarity, and
6
- * vector search over the knowledge and distillation tables. All operations
7
- * are gated behind `search.embeddings.enabled` config + the provider's API
8
- * key env var — falls back silently to FTS-only when unavailable.
4
+ * Supports multiple embedding providers behind a common interface:
5
+ * - "local" (default): @huggingface/transformers + nomic-embed-text-v1.5
6
+ * (768 dims, Matryoshka-capable). Runs ONNX inference in a worker thread.
7
+ * - "voyage": Voyage AI API (voyage-code-3, 1024 dims)
8
+ * - "openai": OpenAI API (text-embedding-3-small, 1536 dims)
9
+ *
10
+ * Provides embedding generation, pure-JS cosine similarity, and vector search
11
+ * over the knowledge and distillation tables. All operations are gated behind
12
+ * `search.embeddings.enabled` config + the provider's API key env var — falls
13
+ * back silently to FTS-only when unavailable.
9
14
  */
10
15
 
11
16
  import { db } from "./db";
12
17
  import { config } from "./config";
13
18
  import * as log from "./log";
14
- import { isVendoredBinary, vendorModelInfo } from "./embedding-vendor";
19
+ import { vendorModelInfo } from "./embedding-vendor";
15
20
  import type {
16
21
  WorkerInbound,
17
22
  WorkerOutbound,
@@ -139,152 +144,76 @@ class OpenAIProvider implements EmbeddingProvider {
139
144
  }
140
145
 
141
146
  // ---------------------------------------------------------------------------
142
- // Local provider (fastembed + ONNX Runtime)
147
+ // Local provider (@huggingface/transformers + nomic-embed-text-v1.5)
143
148
  // ---------------------------------------------------------------------------
144
149
 
145
150
  /**
146
- * Thrown when `LocalProvider` is requested but `fastembed` cannot be loaded.
147
- * `fastembed` is an optionalDependency of `@loreai/core`: if its postinstall
148
- * fails (e.g. CUDA 13 hits the upstream `onnxruntime-node` bug see #185),
149
- * the package install still succeeds but local embeddings are disabled.
150
- * Callers in `recall.ts` / `ltm.ts` / `distillation.ts` already gate on
151
- * `isAvailable()`, which flips to `false` after this error fires once.
151
+ * Thrown when `LocalProvider` cannot initialize (e.g. ONNX runtime fails
152
+ * to load). Callers in `recall.ts` / `ltm.ts` / `distillation.ts` gate
153
+ * on `isAvailable()`, which flips to `false` after this error fires once.
152
154
  */
153
155
  export class LocalProviderUnavailableError extends Error {
154
156
  constructor(cause?: unknown) {
155
157
  super(
156
- "Local embedding provider unavailable: 'fastembed' is not installed. " +
158
+ "Local embedding provider unavailable: '@huggingface/transformers' failed to initialize. " +
157
159
  "Configure search.embeddings.provider to 'voyage' or 'openai', or " +
158
- "reinstall with ONNXRUNTIME_NODE_INSTALL_CUDA=skip to retry the optional fastembed install.",
160
+ "set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback.",
159
161
  );
160
162
  this.name = "LocalProviderUnavailableError";
161
163
  if (cause !== undefined) (this as Error & { cause?: unknown }).cause = cause;
162
164
  }
163
165
  }
164
166
 
165
- /** Cache of the fastembed module-load probe.
166
- * null = not yet probed; module = imported successfully; false = import failed. */
167
- let fastembedModule: typeof import("fastembed") | null = null;
168
- let fastembedProbed: boolean = false;
169
- let fastembedAvailable: boolean = false;
170
- let fastembedLogged: boolean = false;
171
-
172
- /** For tests: reset the fastembed probe cache. */
173
- export function _resetFastembedProbe(): void {
174
- fastembedModule = null;
175
- fastembedProbed = false;
176
- fastembedAvailable = false;
177
- fastembedLogged = false;
178
- }
167
+ /** Tracks whether the local provider has been probed and found unavailable.
168
+ * Set to true after the first worker init failure so subsequent calls
169
+ * to `isAvailable()` short-circuit. */
170
+ let localProviderKnownBroken = false;
171
+ let localProviderErrorLogged = false;
179
172
 
180
- /** For tests: simulate fastembed being unresolvable, without mocking the
181
- * dynamic import. After this call, `tryLoadFastembed()` short-circuits to
182
- * `null` and `isAvailable()` returns false for the local provider. */
183
- export function _markFastembedUnavailable(): void {
184
- fastembedModule = null;
185
- fastembedProbed = true;
186
- fastembedAvailable = false;
187
- fastembedLogged = true; // suppress the info log in tests
188
- }
189
-
190
- /**
191
- * Probe `fastembed` once. Returns the module on success, `null` on failure.
192
- * Logs an info-level note exactly once on the first failure so users know
193
- * how to recover (switch provider, fix the install, or rely on the
194
- * VOYAGE/OPENAI auto-fallback in `embed()`).
195
- *
196
- * In binary mode `import("fastembed")` resolves to the bundle Bun packed
197
- * at compile time (the binary's wrapper has already preloaded the
198
- * side-load `libonnxruntime` lib so the addon's dlopen succeeds). In
199
- * npm mode it goes through standard module resolution and may fail if
200
- * the optional postinstall didn't run.
201
- */
202
- async function tryLoadFastembed(): Promise<typeof import("fastembed") | null> {
203
- if (fastembedProbed) return fastembedAvailable ? fastembedModule : null;
204
- try {
205
- const mod = await loadFastembedModule();
206
- // Re-check after the async boundary: another caller (e.g. a test helper
207
- // like _markFastembedUnavailable) may have set the probe while we were
208
- // awaiting. Their decision takes priority — don't overwrite it.
209
- if (fastembedProbed) return fastembedAvailable ? fastembedModule : null;
210
- fastembedModule = mod;
211
- fastembedAvailable = true;
212
- } catch (err) {
213
- if (fastembedProbed) return fastembedAvailable ? fastembedModule : null;
214
- fastembedAvailable = false;
215
- if (!fastembedLogged) {
216
- fastembedLogged = true;
217
- const msg = err instanceof Error ? err.message : String(err);
218
- // Binary mode: a load failure here is a real bug (everything was
219
- // bundled at build time). npm mode: the optional dep didn't
220
- // install — point the user at the standard recovery options.
221
- const remediation = isVendoredBinary()
222
- ? "this is a bug in the lore binary; please file an issue. " +
223
- "Set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback in the meantime"
224
- : "set search.embeddings.provider to 'voyage' or 'openai', " +
225
- "set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback, " +
226
- "or reinstall fastembed with ONNXRUNTIME_NODE_INSTALL_CUDA=skip";
227
- log.info(
228
- `local embedding provider unavailable (fastembed not installed: ${msg}) — ${remediation}`,
229
- );
230
- }
231
- } finally {
232
- fastembedProbed = true;
233
- }
234
- return fastembedAvailable ? fastembedModule : null;
173
+ /** For tests: reset the local provider probe state. */
174
+ export function _resetLocalProviderProbe(): void {
175
+ localProviderKnownBroken = false;
176
+ localProviderErrorLogged = false;
235
177
  }
236
178
 
237
- /**
238
- * Resolve and import the fastembed module.
239
- *
240
- * One bare import covers both modes:
241
- *
242
- * - Binary mode: `bun build --compile` resolves "fastembed" against the
243
- * per-target staging `node_modules/` at build time and bundles it
244
- * (plus its transitive deps and `.node` addons) into the binary. The
245
- * side-load `libonnxruntime.so.1` / `.dylib` / `.dll` is preloaded
246
- * by the binary's wrapper before this import evaluates, so the
247
- * bundled `onnxruntime_binding.node`'s dlopen finds the cached
248
- * handle instead of failing with "shared object not found".
249
- *
250
- * - npm mode: standard Node/Bun resolution — works for `@loreai/core`
251
- * consumers whose `npm install` cleanly installed the optional dep.
252
- * If the postinstall failed (CUDA-13 hosts), the import throws here
253
- * and the caller logs + falls back to a remote provider.
254
- */
255
- async function loadFastembedModule(): Promise<typeof import("fastembed")> {
256
- return (await import("fastembed")) as typeof import("fastembed");
179
+ /** For tests: simulate the local provider being unavailable, without
180
+ * actually spawning a worker. After this call, `isAvailable()` returns
181
+ * false for the local provider. */
182
+ export function _markLocalProviderUnavailable(): void {
183
+ localProviderKnownBroken = true;
184
+ localProviderErrorLogged = true; // suppress the info log in tests
257
185
  }
258
186
 
259
- /** True iff the fastembed probe has run and reported the module missing. */
260
- function fastembedKnownUnavailable(): boolean {
261
- return fastembedProbed && !fastembedAvailable;
187
+ /** True iff the local provider has been probed and found broken. */
188
+ function localProviderKnownUnavailable(): boolean {
189
+ return localProviderKnownBroken;
262
190
  }
263
191
 
264
192
  /**
265
- * Local embedding provider using fastembed (bge-small-en-v1.5 by default).
193
+ * Local embedding provider using @huggingface/transformers with
194
+ * nomic-embed-text-v1.5 by default.
266
195
  *
267
196
  * No API key required — runs entirely on-device via ONNX Runtime.
268
- * Model files are downloaded on first use (~33MB) and cached in
269
- * `~/.cache/fastembed`. Subsequent inits load from disk in ~350ms.
197
+ * Model files are downloaded on first use (~137MB for INT8 quantized)
198
+ * and cached locally. Subsequent inits load from cache.
270
199
  *
271
200
  * ONNX inference runs in a dedicated `node:worker_threads` Worker so the
272
201
  * main thread's event loop stays free. This class is a thin RPC client —
273
202
  * it posts `{ texts, inputType }` to the worker and awaits a reply.
274
- * The worker owns the `FlagEmbedding` model and processes requests
203
+ * The worker owns the transformers.js pipeline and processes requests
275
204
  * sequentially from a priority queue (recall queries jump ahead of
276
205
  * backfill batches).
277
206
  *
278
- * Uses dynamic import so the module is only loaded when the "local"
279
- * provider is actually selected — avoids startup cost and allows
280
- * graceful fallback when the optional `fastembed` peer isn't installed
281
- * (its native onnxruntime-node may fail to build, e.g. on CUDA 13).
207
+ * Task instruction prefixes are prepended automatically:
208
+ * - "document" "search_document: <text>"
209
+ * - "query" → "search_query: <text>"
282
210
  */
283
211
  class LocalProvider implements EmbeddingProvider {
284
212
  // With inference off the main thread, large batches no longer block
285
213
  // the event loop. 256 maximises throughput per round-trip to the
286
- // worker. Backfill callers use a smaller BACKFILL_CHUNK_SIZE to give
287
- // the worker's priority queue breathing room for recall queries.
214
+ // worker. Backfill callers use token-budget-based batching (see
215
+ // nextBatch) to give the worker's priority queue breathing room
216
+ // for recall queries and prevent OOM on long texts.
288
217
  readonly maxBatchSize = 256;
289
218
 
290
219
  private worker: import("node:worker_threads").Worker | null = null;
@@ -296,16 +225,16 @@ class LocalProvider implements EmbeddingProvider {
296
225
  >();
297
226
  private nextRequestId = 0;
298
227
  private initPromise: Promise<void> | null = null;
299
- private modelName: string;
228
+ private modelId: string;
229
+ private dimensions: number;
300
230
 
301
- constructor(modelName: string) {
302
- this.modelName = modelName;
231
+ constructor(modelId: string, dimensions: number) {
232
+ this.modelId = modelId;
233
+ this.dimensions = dimensions;
303
234
  }
304
235
 
305
236
  /**
306
- * Ensure the worker thread is running. Probes fastembed on the main
307
- * thread first (fast, cached) as a fast-fail gate — the worker is only
308
- * spawned if the module is known-loadable. Worker startup failure is
237
+ * Ensure the worker thread is running. Worker startup failure is
309
238
  * surfaced as `LocalProviderUnavailableError` to trigger the existing
310
239
  * auto-fallback to remote providers.
311
240
  */
@@ -315,10 +244,8 @@ class LocalProvider implements EmbeddingProvider {
315
244
  if (this.initPromise) return this.initPromise;
316
245
 
317
246
  this.initPromise = (async () => {
318
- // Fast-fail: probe fastembed on the main thread. This is cached
319
- // after the first call and preserves the existing error flow.
320
- const fastembed = await tryLoadFastembed();
321
- if (!fastembed) throw new LocalProviderUnavailableError();
247
+ // Fast-fail if a previous attempt already marked local broken.
248
+ if (localProviderKnownBroken) throw new LocalProviderUnavailableError();
322
249
 
323
250
  const { Worker } = await import("node:worker_threads");
324
251
 
@@ -334,17 +261,10 @@ class LocalProvider implements EmbeddingProvider {
334
261
  // In dev (Bun running .ts directly): embedding-worker.ts
335
262
  // In dist (esbuild bundle): embedding-worker.js
336
263
  const vendorWorkerUrl = (globalThis as Record<string, unknown>).__LORE_VENDOR_WORKER_URL__ as string | undefined;
337
- // On Windows, new Worker() with a file:// URL pointing to $bunfs
338
- // fails with ENOENT. Pass the raw path instead (B:\~BUN\root\...).
339
- // On macOS/Linux the file:// URL works fine with $bunfs paths.
340
264
  let workerUrl: string | URL;
341
265
  if (vendorWorkerUrl) {
342
266
  if (process.platform === "win32") {
343
- // On Windows, new Worker() with a file:// URL pointing to $bunfs
344
- // fails with ENOENT (Bun bug). Extract the raw path instead.
345
- // URL.pathname keeps %7E encoded; decodeURIComponent restores ~.
346
267
  workerUrl = decodeURIComponent(new URL(vendorWorkerUrl).pathname);
347
- // URL.pathname on Windows: /B:/~BUN/root/wrapper.js → strip leading /
348
268
  if (/^\/[A-Za-z]:/.test(workerUrl)) {
349
269
  workerUrl = workerUrl.slice(1);
350
270
  }
@@ -352,14 +272,29 @@ class LocalProvider implements EmbeddingProvider {
352
272
  workerUrl = vendorWorkerUrl;
353
273
  }
354
274
  } else {
355
- workerUrl = new URL(`./embedding-worker${import.meta.url.endsWith(".ts") ? ".ts" : ".js"}`, import.meta.url);
275
+ // In CJS bundles (gateway npm package), esbuild shims import.meta as
276
+ // an empty object {}, so import.meta.url is undefined. Fall back to
277
+ // __filename which esbuild defines in CJS output.
278
+ const selfUrl = typeof import.meta.url === "string" ? import.meta.url : undefined;
279
+ if (selfUrl) {
280
+ workerUrl = new URL(
281
+ `./embedding-worker${selfUrl.endsWith(".ts") ? ".ts" : ".js"}`,
282
+ selfUrl,
283
+ );
284
+ } else {
285
+ // CJS fallback: __filename is defined by esbuild's CJS output.
286
+ // The embedding-worker.cjs is built alongside the main bundle.
287
+ const { pathToFileURL } = await import("node:url");
288
+ workerUrl = new URL("./embedding-worker.cjs", pathToFileURL(__filename));
289
+ }
356
290
  }
357
291
 
358
292
  const vendor = vendorModelInfo();
359
293
  const workerInitData: WorkerInitData = {
360
- modelName: this.modelName,
294
+ modelId: this.modelId,
295
+ dimensions: this.dimensions,
361
296
  vendorModel: vendor
362
- ? { modelAbsoluteDirPath: vendor.modelAbsoluteDirPath, modelName: vendor.modelName }
297
+ ? { localModelPath: vendor.localModelPath }
363
298
  : null,
364
299
  };
365
300
 
@@ -394,6 +329,14 @@ class LocalProvider implements EmbeddingProvider {
394
329
  // LocalProviderUnavailableError on all pending + future requests.
395
330
  this.workerInitError = msg.error;
396
331
  this.workerReady = false;
332
+ localProviderKnownBroken = true;
333
+ if (!localProviderErrorLogged) {
334
+ localProviderErrorLogged = true;
335
+ log.info(
336
+ `local embedding provider failed to init: ${msg.error}. ` +
337
+ `Set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback.`,
338
+ );
339
+ }
397
340
  for (const [, p] of this.pendingRequests) {
398
341
  p.reject(new LocalProviderUnavailableError(msg.error));
399
342
  }
@@ -453,6 +396,10 @@ class LocalProvider implements EmbeddingProvider {
453
396
  async embed(texts: string[], inputType: "document" | "query"): Promise<Float32Array[]> {
454
397
  await this.ensureWorker();
455
398
 
399
+ // Prepend Nomic task instruction prefix.
400
+ const prefix = inputType === "document" ? "search_document: " : "search_query: ";
401
+ const prefixed = texts.map((t) => prefix + t);
402
+
456
403
  const id = this.nextRequestId++;
457
404
  // Recall queries (single query-type texts) get high priority so they
458
405
  // jump ahead of any queued backfill batches in the worker.
@@ -464,7 +411,7 @@ class LocalProvider implements EmbeddingProvider {
464
411
  this.worker!.postMessage({
465
412
  type: "embed",
466
413
  id,
467
- texts,
414
+ texts: prefixed,
468
415
  inputType,
469
416
  priority,
470
417
  } satisfies WorkerInbound);
@@ -473,8 +420,6 @@ class LocalProvider implements EmbeddingProvider {
473
420
 
474
421
  /** Shut down the worker thread. Called by `resetProvider()` on config change.
475
422
  * Sends a shutdown message so the worker calls `process.exit(0)` internally.
476
- * We avoid `worker.terminate()` because Bun's forced termination triggers a
477
- * NAPI fatal error when tearing down onnxruntime's native bindings.
478
423
  *
479
424
  * Returns a promise that resolves once the worker has fully exited. Callers
480
425
  * that need a clean teardown (tests, config change) should await the result.
@@ -507,7 +452,7 @@ class LocalProvider implements EmbeddingProvider {
507
452
 
508
453
  /** Default models per provider — used when config doesn't override. */
509
454
  const PROVIDER_DEFAULTS: Record<string, { model: string; dimensions: number }> = {
510
- local: { model: "BGESmallENV15", dimensions: 384 },
455
+ local: { model: "nomic-ai/nomic-embed-text-v1.5", dimensions: 768 },
511
456
  voyage: { model: "voyage-code-3", dimensions: 1024 },
512
457
  openai: { model: "text-embedding-3-small", dimensions: 1536 },
513
458
  };
@@ -539,12 +484,11 @@ function getProvider(): EmbeddingProvider | null {
539
484
 
540
485
  switch (providerName) {
541
486
  case "local": {
542
- // `fastembed` is an optionalDependency. We construct the provider
543
- // optimistically here; the import + ONNX init happens lazily in
544
- // `LocalProvider.getModel()`, which throws `LocalProviderUnavailableError`
545
- // if the optional dep isn't installed. After that first failure
546
- // `isAvailable()` short-circuits to false and callers fall back to FTS.
547
- cachedProvider = new LocalProvider(model);
487
+ // Construct the provider optimistically the ONNX model init
488
+ // happens lazily in the worker thread on first `embed()` call.
489
+ // If it fails, `LocalProviderUnavailableError` triggers the
490
+ // auto-fallback to a remote provider or FTS-only search.
491
+ cachedProvider = new LocalProvider(model, cfg.dimensions);
548
492
  break;
549
493
  }
550
494
  case "voyage": {
@@ -619,7 +563,7 @@ export function _saveAndClearProvider(): unknown {
619
563
  /** Restore a provider previously saved by `_saveAndClearProvider()`. Any
620
564
  * provider created between save and restore is discarded (callers must
621
565
  * ensure it's not a LocalProvider with a live worker — those suites only
622
- * use `_markFastembedUnavailable()` so no worker is spawned). */
566
+ * use `_markLocalProviderUnavailable()` so no worker is spawned). */
623
567
  export function _restoreProvider(token: unknown): void {
624
568
  const saved = token as { provider: EmbeddingProvider | null | undefined; remoteFallbackLogged: boolean };
625
569
  cachedProvider = saved.provider;
@@ -669,13 +613,13 @@ export function pickRemoteFallback(): {
669
613
  * Active when the configured provider's API key is set, unless explicitly
670
614
  * disabled via `search.embeddings.enabled: false` in .lore.json.
671
615
  *
672
- * For the `local` provider, also returns false once we've discovered the
673
- * optional `fastembed` peer is missing — callers (recall, ltm, distillation)
674
- * use this gate to skip embedding work and fall back to FTS-only search. */
616
+ * For the `local` provider, also returns false once the worker has reported
617
+ * an init failure — callers (recall, ltm, distillation) use this gate to
618
+ * skip embedding work and fall back to FTS-only search. */
675
619
  export function isAvailable(): boolean {
676
620
  const provider = getProvider();
677
621
  if (!provider) return false;
678
- if (provider instanceof LocalProvider && fastembedKnownUnavailable()) return false;
622
+ if (provider instanceof LocalProvider && localProviderKnownUnavailable()) return false;
679
623
  return true;
680
624
  }
681
625
 
@@ -686,7 +630,7 @@ export function isAvailable(): boolean {
686
630
  /**
687
631
  * Generate embeddings for the given texts using the configured provider.
688
632
  *
689
- * If the configured provider is `local` and `fastembed` turns out to be
633
+ * If the configured provider is `local` and the local provider turns out to be
690
634
  * unavailable at runtime (failed install, vendor extraction blocked, etc.),
691
635
  * automatically swap to a remote provider when `VOYAGE_API_KEY` or
692
636
  * `OPENAI_API_KEY` is set in env. The swap is permanent for the rest of
@@ -717,7 +661,7 @@ export async function embed(
717
661
  if (!remoteFallbackLogged) {
718
662
  remoteFallbackLogged = true;
719
663
  log.info(
720
- `fastembed unavailable; auto-switching to ${fallback.name} ` +
664
+ `local embedding provider unavailable; auto-switching to ${fallback.name} ` +
721
665
  `(set search.embeddings.provider in .lore.json to silence this)`,
722
666
  );
723
667
  }
@@ -776,14 +720,25 @@ type VectorHit = { id: string; similarity: number };
776
720
  * Search all knowledge entries with embeddings by cosine similarity.
777
721
  * Returns top-k entries sorted by similarity descending.
778
722
  * Pure brute-force — fine for <100 entries (microseconds).
723
+ *
724
+ * @param excludeCategories Optional category names to exclude from results.
725
+ * Useful when preferences are injected in a separate system block and
726
+ * shouldn't compete for vector search slots with context-bound entries.
779
727
  */
780
728
  export function vectorSearch(
781
729
  queryEmbedding: Float32Array,
782
730
  limit = 10,
731
+ excludeCategories?: string[],
783
732
  ): VectorHit[] {
733
+ let sql = "SELECT id, embedding FROM knowledge WHERE embedding IS NOT NULL AND confidence > 0.2";
734
+ const params: string[] = [];
735
+ if (excludeCategories?.length) {
736
+ sql += ` AND category NOT IN (${excludeCategories.map(() => "?").join(",")})`;
737
+ params.push(...excludeCategories);
738
+ }
784
739
  const rows = db()
785
- .query("SELECT id, embedding FROM knowledge WHERE embedding IS NOT NULL AND confidence > 0.2")
786
- .all() as Array<{ id: string; embedding: Buffer }>;
740
+ .query(sql)
741
+ .all(...params) as Array<{ id: string; embedding: Buffer }>;
787
742
 
788
743
  const scored: VectorHit[] = [];
789
744
  for (const row of rows) {
@@ -826,6 +781,53 @@ export function vectorSearchDistillations(
826
781
  return scored.slice(0, limit);
827
782
  }
828
783
 
784
+ // ---------------------------------------------------------------------------
785
+ // Vector search — all distillations (including archived)
786
+ // ---------------------------------------------------------------------------
787
+
788
+ export type DistillationVectorHit = {
789
+ id: string;
790
+ session_id: string;
791
+ similarity: number;
792
+ };
793
+
794
+ /**
795
+ * Search ALL distillations (including archived) with embeddings by cosine
796
+ * similarity, scoped to a single project. Returns session_id alongside
797
+ * similarity for cross-session counting.
798
+ *
799
+ * Unlike vectorSearchDistillations() which filters to non-archived only,
800
+ * this searches the full distillation archive — necessary for detecting
801
+ * repeated instructions across sessions where older distillations have
802
+ * been archived after meta-distillation.
803
+ *
804
+ * Pure brute-force — fine for ~200 entries per project. Safety-capped
805
+ * at 500 rows to prevent excessive CPU on long-running projects.
806
+ */
807
+ const MAX_DISTILLATION_VECTOR_ROWS = 500;
808
+
809
+ export function vectorSearchAllDistillations(
810
+ queryEmbedding: Float32Array,
811
+ projectId: string,
812
+ limit = 20,
813
+ ): DistillationVectorHit[] {
814
+ const rows = db()
815
+ .query(
816
+ "SELECT id, session_id, embedding FROM distillations WHERE embedding IS NOT NULL AND project_id = ? ORDER BY created_at DESC LIMIT ?",
817
+ )
818
+ .all(projectId, MAX_DISTILLATION_VECTOR_ROWS) as Array<{ id: string; session_id: string; embedding: Buffer }>;
819
+
820
+ const scored: DistillationVectorHit[] = [];
821
+ for (const row of rows) {
822
+ const vec = fromBlob(row.embedding);
823
+ const sim = cosineSimilarity(queryEmbedding, vec);
824
+ scored.push({ id: row.id, session_id: row.session_id, similarity: sim });
825
+ }
826
+
827
+ scored.sort((a, b) => b.similarity - a.similarity);
828
+ return scored.slice(0, limit);
829
+ }
830
+
829
831
  // ---------------------------------------------------------------------------
830
832
  // Fire-and-forget embedding
831
833
  // ---------------------------------------------------------------------------
@@ -1107,14 +1109,52 @@ export async function runStartupBackfill(): Promise<void> {
1107
1109
  // ---------------------------------------------------------------------------
1108
1110
 
1109
1111
  /**
1110
- * Chunk size for backfill embed requests. Each chunk becomes a separate
1111
- * message to the embedding worker. Keeping chunks small (32) gives the
1112
- * worker's priority queue natural gaps to interleave high-priority recall
1113
- * queries between backfill batches. The provider's `maxBatchSize` (256)
1114
- * is the upper limit for any single embed call; this is intentionally
1115
- * smaller for backfill-vs-live interleaving.
1112
+ * Maximum chunk size for backfill embed requests. Each chunk becomes a
1113
+ * separate message to the embedding worker. Keeping chunks small gives
1114
+ * the worker's priority queue natural gaps to interleave high-priority
1115
+ * recall queries between backfill batches.
1116
+ */
1117
+ const MAX_BACKFILL_CHUNK = 8;
1118
+
1119
+ /**
1120
+ * Maximum total "token area" (batch_size × max_sequence_length) per
1121
+ * backfill batch. ONNX runtime pads all texts to the longest sequence,
1122
+ * so the peak tensor size is proportional to this product. A budget of
1123
+ * 4096 tokens allows e.g. 8 × 512-token texts, or 2 × 2048-token texts.
1124
+ * Prevents OOM on batches with long distillation observations (~4000+
1125
+ * chars) that were blowing up at fixed batch sizes.
1126
+ */
1127
+ const MAX_BATCH_TOKEN_AREA = 4096;
1128
+
1129
+ /**
1130
+ * Rough chars-per-token ratio for budget estimation. Nomic v1.5 uses a
1131
+ * WordPiece tokenizer; English text averages ~4 chars/token.
1132
+ */
1133
+ const CHARS_PER_TOKEN = 4;
1134
+
1135
+ /**
1136
+ * Partition `rows` into batches that respect both MAX_BACKFILL_CHUNK and
1137
+ * MAX_BATCH_TOKEN_AREA. Each batch's estimated token area is
1138
+ * `batch.length × max_tokens_in_batch`. We greedily add rows until the
1139
+ * next row would push the area over budget.
1116
1140
  */
1117
- const BACKFILL_CHUNK_SIZE = 32;
1141
+ function nextBatch<T extends { text: string }>(rows: T[], start: number): T[] {
1142
+ const batch: T[] = [];
1143
+ let maxTokens = 0;
1144
+
1145
+ for (let i = start; i < rows.length && batch.length < MAX_BACKFILL_CHUNK; i++) {
1146
+ const estTokens = Math.ceil(rows[i].text.length / CHARS_PER_TOKEN);
1147
+ const newMax = Math.max(maxTokens, estTokens);
1148
+ const newArea = (batch.length + 1) * newMax;
1149
+
1150
+ if (batch.length > 0 && newArea > MAX_BATCH_TOKEN_AREA) break;
1151
+
1152
+ batch.push(rows[i]);
1153
+ maxTokens = newMax;
1154
+ }
1155
+
1156
+ return batch;
1157
+ }
1118
1158
 
1119
1159
  /**
1120
1160
  * Embed all knowledge entries that are missing embeddings.
@@ -1136,14 +1176,18 @@ export async function backfillEmbeddings(): Promise<number> {
1136
1176
 
1137
1177
  if (!rows.length) return 0;
1138
1178
 
1179
+ // Pre-compute text for token-budget batching
1180
+ const items = rows.map((r) => ({ ...r, text: `${r.title}\n${r.content}` }));
1181
+
1139
1182
  let embedded = 0;
1183
+ let i = 0;
1140
1184
 
1141
- for (let i = 0; i < rows.length; i += BACKFILL_CHUNK_SIZE) {
1142
- const batch = rows.slice(i, i + BACKFILL_CHUNK_SIZE);
1143
- const texts = batch.map((r) => `${r.title}\n${r.content}`);
1185
+ while (i < items.length) {
1186
+ const batch = nextBatch(items, i);
1187
+ i += batch.length;
1144
1188
 
1145
1189
  try {
1146
- const vectors = await embed(texts, "document");
1190
+ const vectors = await embed(batch.map((b) => b.text), "document");
1147
1191
  const update = db().prepare(
1148
1192
  "UPDATE knowledge SET embedding = ? WHERE id = ?",
1149
1193
  );
@@ -1153,7 +1197,8 @@ export async function backfillEmbeddings(): Promise<number> {
1153
1197
  embedded++;
1154
1198
  }
1155
1199
  } catch (err) {
1156
- log.info(`embedding backfill batch ${i}-${i + batch.length} failed:`, err);
1200
+ // log.error sends to Sentry via captureException
1201
+ log.error(`embedding backfill batch failed (${batch.length} items):`, err);
1157
1202
  }
1158
1203
  // No yieldToEventLoop() needed — embed() is truly async (worker thread).
1159
1204
  }
@@ -1188,17 +1233,21 @@ export async function backfillDistillationEmbeddings(): Promise<number> {
1188
1233
  let embedded = 0;
1189
1234
 
1190
1235
  // Progress logging: heartbeat every PROGRESS_INTERVAL embedded so a long
1191
- // backfill (e.g. 1000+ pending after a fastembed reinstall) doesn't look
1236
+ // backfill (e.g. 1000+ pending after a model change) doesn't look
1192
1237
  // like a silent hang. Without this, only the final tally was logged.
1193
1238
  const PROGRESS_INTERVAL = 256;
1194
1239
  let nextProgressAt = PROGRESS_INTERVAL;
1195
1240
 
1196
- for (let i = 0; i < rows.length; i += BACKFILL_CHUNK_SIZE) {
1197
- const batch = rows.slice(i, i + BACKFILL_CHUNK_SIZE);
1198
- const texts = batch.map((r) => r.observations);
1241
+ // Pre-compute text for token-budget batching
1242
+ const items = rows.map((r) => ({ ...r, text: r.observations }));
1243
+ let i = 0;
1244
+
1245
+ while (i < items.length) {
1246
+ const batch = nextBatch(items, i);
1247
+ i += batch.length;
1199
1248
 
1200
1249
  try {
1201
- const vectors = await embed(texts, "document");
1250
+ const vectors = await embed(batch.map((b) => b.text), "document");
1202
1251
  const update = db().prepare(
1203
1252
  "UPDATE distillations SET embedding = ? WHERE id = ?",
1204
1253
  );
@@ -1208,7 +1257,8 @@ export async function backfillDistillationEmbeddings(): Promise<number> {
1208
1257
  embedded++;
1209
1258
  }
1210
1259
  } catch (err) {
1211
- log.info(`distillation embedding backfill batch ${i}-${i + batch.length} failed:`, err);
1260
+ // log.error sends to Sentry via captureException
1261
+ log.error(`distillation embedding backfill batch failed (${batch.length} items):`, err);
1212
1262
  }
1213
1263
 
1214
1264
  if (embedded >= nextProgressAt) {