@loreai/core 0.17.1 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (235) hide show
  1. package/dist/bun/agents-file.d.ts +4 -0
  2. package/dist/bun/agents-file.d.ts.map +1 -1
  3. package/dist/bun/config.d.ts +2 -0
  4. package/dist/bun/config.d.ts.map +1 -1
  5. package/dist/bun/curator.d.ts +45 -0
  6. package/dist/bun/curator.d.ts.map +1 -1
  7. package/dist/bun/data-dir.d.ts +18 -0
  8. package/dist/bun/data-dir.d.ts.map +1 -0
  9. package/dist/bun/db.d.ts +12 -0
  10. package/dist/bun/db.d.ts.map +1 -1
  11. package/dist/bun/distillation.d.ts.map +1 -1
  12. package/dist/bun/embedding-vendor.d.ts +22 -38
  13. package/dist/bun/embedding-vendor.d.ts.map +1 -1
  14. package/dist/bun/embedding-worker-types.d.ts +17 -12
  15. package/dist/bun/embedding-worker-types.d.ts.map +1 -1
  16. package/dist/bun/embedding-worker.d.ts +9 -2
  17. package/dist/bun/embedding-worker.d.ts.map +1 -1
  18. package/dist/bun/embedding-worker.js +38864 -33
  19. package/dist/bun/embedding-worker.js.map +4 -4
  20. package/dist/bun/embedding.d.ts +30 -22
  21. package/dist/bun/embedding.d.ts.map +1 -1
  22. package/dist/bun/gradient.d.ts +8 -1
  23. package/dist/bun/gradient.d.ts.map +1 -1
  24. package/dist/bun/import/detect.d.ts +14 -0
  25. package/dist/bun/import/detect.d.ts.map +1 -0
  26. package/dist/bun/import/extract.d.ts +43 -0
  27. package/dist/bun/import/extract.d.ts.map +1 -0
  28. package/dist/bun/import/history.d.ts +40 -0
  29. package/dist/bun/import/history.d.ts.map +1 -0
  30. package/dist/bun/import/index.d.ts +17 -0
  31. package/dist/bun/import/index.d.ts.map +1 -0
  32. package/dist/bun/import/providers/aider.d.ts +2 -0
  33. package/dist/bun/import/providers/aider.d.ts.map +1 -0
  34. package/dist/bun/import/providers/claude-code.d.ts +2 -0
  35. package/dist/bun/import/providers/claude-code.d.ts.map +1 -0
  36. package/dist/bun/import/providers/cline.d.ts +2 -0
  37. package/dist/bun/import/providers/cline.d.ts.map +1 -0
  38. package/dist/bun/import/providers/codex.d.ts +2 -0
  39. package/dist/bun/import/providers/codex.d.ts.map +1 -0
  40. package/dist/bun/import/providers/continue.d.ts +2 -0
  41. package/dist/bun/import/providers/continue.d.ts.map +1 -0
  42. package/dist/bun/import/providers/index.d.ts +19 -0
  43. package/dist/bun/import/providers/index.d.ts.map +1 -0
  44. package/dist/bun/import/providers/opencode.d.ts +2 -0
  45. package/dist/bun/import/providers/opencode.d.ts.map +1 -0
  46. package/dist/bun/import/providers/pi.d.ts +2 -0
  47. package/dist/bun/import/providers/pi.d.ts.map +1 -0
  48. package/dist/bun/import/types.d.ts +82 -0
  49. package/dist/bun/import/types.d.ts.map +1 -0
  50. package/dist/bun/index.d.ts +4 -1
  51. package/dist/bun/index.d.ts.map +1 -1
  52. package/dist/bun/index.js +2217 -224
  53. package/dist/bun/index.js.map +4 -4
  54. package/dist/bun/instruction-detect.d.ts +66 -0
  55. package/dist/bun/instruction-detect.d.ts.map +1 -0
  56. package/dist/bun/log.d.ts +9 -0
  57. package/dist/bun/log.d.ts.map +1 -1
  58. package/dist/bun/ltm.d.ts +40 -0
  59. package/dist/bun/ltm.d.ts.map +1 -1
  60. package/dist/bun/pattern-extract.d.ts +7 -0
  61. package/dist/bun/pattern-extract.d.ts.map +1 -1
  62. package/dist/bun/prompt.d.ts +1 -1
  63. package/dist/bun/prompt.d.ts.map +1 -1
  64. package/dist/bun/recall.d.ts.map +1 -1
  65. package/dist/bun/search.d.ts +5 -3
  66. package/dist/bun/search.d.ts.map +1 -1
  67. package/dist/bun/temporal.d.ts.map +1 -1
  68. package/dist/bun/types.d.ts +1 -1
  69. package/dist/node/agents-file.d.ts +4 -0
  70. package/dist/node/agents-file.d.ts.map +1 -1
  71. package/dist/node/config.d.ts +2 -0
  72. package/dist/node/config.d.ts.map +1 -1
  73. package/dist/node/curator.d.ts +45 -0
  74. package/dist/node/curator.d.ts.map +1 -1
  75. package/dist/node/data-dir.d.ts +18 -0
  76. package/dist/node/data-dir.d.ts.map +1 -0
  77. package/dist/node/db.d.ts +12 -0
  78. package/dist/node/db.d.ts.map +1 -1
  79. package/dist/node/distillation.d.ts.map +1 -1
  80. package/dist/node/embedding-vendor.d.ts +22 -38
  81. package/dist/node/embedding-vendor.d.ts.map +1 -1
  82. package/dist/node/embedding-worker-types.d.ts +17 -12
  83. package/dist/node/embedding-worker-types.d.ts.map +1 -1
  84. package/dist/node/embedding-worker.d.ts +9 -2
  85. package/dist/node/embedding-worker.d.ts.map +1 -1
  86. package/dist/node/embedding-worker.js +38864 -33
  87. package/dist/node/embedding-worker.js.map +4 -4
  88. package/dist/node/embedding.d.ts +30 -22
  89. package/dist/node/embedding.d.ts.map +1 -1
  90. package/dist/node/gradient.d.ts +8 -1
  91. package/dist/node/gradient.d.ts.map +1 -1
  92. package/dist/node/import/detect.d.ts +14 -0
  93. package/dist/node/import/detect.d.ts.map +1 -0
  94. package/dist/node/import/extract.d.ts +43 -0
  95. package/dist/node/import/extract.d.ts.map +1 -0
  96. package/dist/node/import/history.d.ts +40 -0
  97. package/dist/node/import/history.d.ts.map +1 -0
  98. package/dist/node/import/index.d.ts +17 -0
  99. package/dist/node/import/index.d.ts.map +1 -0
  100. package/dist/node/import/providers/aider.d.ts +2 -0
  101. package/dist/node/import/providers/aider.d.ts.map +1 -0
  102. package/dist/node/import/providers/claude-code.d.ts +2 -0
  103. package/dist/node/import/providers/claude-code.d.ts.map +1 -0
  104. package/dist/node/import/providers/cline.d.ts +2 -0
  105. package/dist/node/import/providers/cline.d.ts.map +1 -0
  106. package/dist/node/import/providers/codex.d.ts +2 -0
  107. package/dist/node/import/providers/codex.d.ts.map +1 -0
  108. package/dist/node/import/providers/continue.d.ts +2 -0
  109. package/dist/node/import/providers/continue.d.ts.map +1 -0
  110. package/dist/node/import/providers/index.d.ts +19 -0
  111. package/dist/node/import/providers/index.d.ts.map +1 -0
  112. package/dist/node/import/providers/opencode.d.ts +2 -0
  113. package/dist/node/import/providers/opencode.d.ts.map +1 -0
  114. package/dist/node/import/providers/pi.d.ts +2 -0
  115. package/dist/node/import/providers/pi.d.ts.map +1 -0
  116. package/dist/node/import/types.d.ts +82 -0
  117. package/dist/node/import/types.d.ts.map +1 -0
  118. package/dist/node/index.d.ts +4 -1
  119. package/dist/node/index.d.ts.map +1 -1
  120. package/dist/node/index.js +2217 -224
  121. package/dist/node/index.js.map +4 -4
  122. package/dist/node/instruction-detect.d.ts +66 -0
  123. package/dist/node/instruction-detect.d.ts.map +1 -0
  124. package/dist/node/log.d.ts +9 -0
  125. package/dist/node/log.d.ts.map +1 -1
  126. package/dist/node/ltm.d.ts +40 -0
  127. package/dist/node/ltm.d.ts.map +1 -1
  128. package/dist/node/pattern-extract.d.ts +7 -0
  129. package/dist/node/pattern-extract.d.ts.map +1 -1
  130. package/dist/node/prompt.d.ts +1 -1
  131. package/dist/node/prompt.d.ts.map +1 -1
  132. package/dist/node/recall.d.ts.map +1 -1
  133. package/dist/node/search.d.ts +5 -3
  134. package/dist/node/search.d.ts.map +1 -1
  135. package/dist/node/temporal.d.ts.map +1 -1
  136. package/dist/node/types.d.ts +1 -1
  137. package/dist/types/agents-file.d.ts +4 -0
  138. package/dist/types/agents-file.d.ts.map +1 -1
  139. package/dist/types/config.d.ts +2 -0
  140. package/dist/types/config.d.ts.map +1 -1
  141. package/dist/types/curator.d.ts +45 -0
  142. package/dist/types/curator.d.ts.map +1 -1
  143. package/dist/types/data-dir.d.ts +18 -0
  144. package/dist/types/data-dir.d.ts.map +1 -0
  145. package/dist/types/db.d.ts +12 -0
  146. package/dist/types/db.d.ts.map +1 -1
  147. package/dist/types/distillation.d.ts.map +1 -1
  148. package/dist/types/embedding-vendor.d.ts +22 -38
  149. package/dist/types/embedding-vendor.d.ts.map +1 -1
  150. package/dist/types/embedding-worker-types.d.ts +17 -12
  151. package/dist/types/embedding-worker-types.d.ts.map +1 -1
  152. package/dist/types/embedding-worker.d.ts +9 -2
  153. package/dist/types/embedding-worker.d.ts.map +1 -1
  154. package/dist/types/embedding.d.ts +30 -22
  155. package/dist/types/embedding.d.ts.map +1 -1
  156. package/dist/types/gradient.d.ts +8 -1
  157. package/dist/types/gradient.d.ts.map +1 -1
  158. package/dist/types/import/detect.d.ts +14 -0
  159. package/dist/types/import/detect.d.ts.map +1 -0
  160. package/dist/types/import/extract.d.ts +43 -0
  161. package/dist/types/import/extract.d.ts.map +1 -0
  162. package/dist/types/import/history.d.ts +40 -0
  163. package/dist/types/import/history.d.ts.map +1 -0
  164. package/dist/types/import/index.d.ts +17 -0
  165. package/dist/types/import/index.d.ts.map +1 -0
  166. package/dist/types/import/providers/aider.d.ts +2 -0
  167. package/dist/types/import/providers/aider.d.ts.map +1 -0
  168. package/dist/types/import/providers/claude-code.d.ts +2 -0
  169. package/dist/types/import/providers/claude-code.d.ts.map +1 -0
  170. package/dist/types/import/providers/cline.d.ts +2 -0
  171. package/dist/types/import/providers/cline.d.ts.map +1 -0
  172. package/dist/types/import/providers/codex.d.ts +2 -0
  173. package/dist/types/import/providers/codex.d.ts.map +1 -0
  174. package/dist/types/import/providers/continue.d.ts +2 -0
  175. package/dist/types/import/providers/continue.d.ts.map +1 -0
  176. package/dist/types/import/providers/index.d.ts +19 -0
  177. package/dist/types/import/providers/index.d.ts.map +1 -0
  178. package/dist/types/import/providers/opencode.d.ts +2 -0
  179. package/dist/types/import/providers/opencode.d.ts.map +1 -0
  180. package/dist/types/import/providers/pi.d.ts +2 -0
  181. package/dist/types/import/providers/pi.d.ts.map +1 -0
  182. package/dist/types/import/types.d.ts +82 -0
  183. package/dist/types/import/types.d.ts.map +1 -0
  184. package/dist/types/index.d.ts +4 -1
  185. package/dist/types/index.d.ts.map +1 -1
  186. package/dist/types/instruction-detect.d.ts +66 -0
  187. package/dist/types/instruction-detect.d.ts.map +1 -0
  188. package/dist/types/log.d.ts +9 -0
  189. package/dist/types/log.d.ts.map +1 -1
  190. package/dist/types/ltm.d.ts +40 -0
  191. package/dist/types/ltm.d.ts.map +1 -1
  192. package/dist/types/pattern-extract.d.ts +7 -0
  193. package/dist/types/pattern-extract.d.ts.map +1 -1
  194. package/dist/types/prompt.d.ts +1 -1
  195. package/dist/types/prompt.d.ts.map +1 -1
  196. package/dist/types/recall.d.ts.map +1 -1
  197. package/dist/types/search.d.ts +5 -3
  198. package/dist/types/search.d.ts.map +1 -1
  199. package/dist/types/temporal.d.ts.map +1 -1
  200. package/dist/types/types.d.ts +1 -1
  201. package/package.json +2 -4
  202. package/src/agents-file.ts +41 -13
  203. package/src/config.ts +31 -18
  204. package/src/curator.ts +111 -75
  205. package/src/data-dir.ts +76 -0
  206. package/src/db.ts +110 -11
  207. package/src/distillation.ts +10 -2
  208. package/src/embedding-vendor.ts +23 -40
  209. package/src/embedding-worker-types.ts +19 -11
  210. package/src/embedding-worker.ts +111 -47
  211. package/src/embedding.ts +196 -171
  212. package/src/gradient.ts +9 -1
  213. package/src/import/detect.ts +37 -0
  214. package/src/import/extract.ts +137 -0
  215. package/src/import/history.ts +99 -0
  216. package/src/import/index.ts +45 -0
  217. package/src/import/providers/aider.ts +207 -0
  218. package/src/import/providers/claude-code.ts +339 -0
  219. package/src/import/providers/cline.ts +324 -0
  220. package/src/import/providers/codex.ts +369 -0
  221. package/src/import/providers/continue.ts +304 -0
  222. package/src/import/providers/index.ts +32 -0
  223. package/src/import/providers/opencode.ts +272 -0
  224. package/src/import/providers/pi.ts +332 -0
  225. package/src/import/types.ts +91 -0
  226. package/src/index.ts +5 -0
  227. package/src/instruction-detect.ts +275 -0
  228. package/src/log.ts +91 -3
  229. package/src/ltm.ts +316 -3
  230. package/src/pattern-extract.ts +41 -0
  231. package/src/prompt.ts +7 -1
  232. package/src/recall.ts +43 -5
  233. package/src/search.ts +7 -5
  234. package/src/temporal.ts +8 -6
  235. package/src/types.ts +1 -1
package/src/embedding.ts CHANGED
@@ -1,17 +1,22 @@
1
1
  /**
2
2
  * Embedding integration for vector search.
3
3
  *
4
- * Supports multiple embedding providers (Voyage AI, OpenAI) behind a common
5
- * interface. Provides embedding generation, pure-JS cosine similarity, and
6
- * vector search over the knowledge and distillation tables. All operations
7
- * are gated behind `search.embeddings.enabled` config + the provider's API
8
- * key env var — falls back silently to FTS-only when unavailable.
4
+ * Supports multiple embedding providers behind a common interface:
5
+ * - "local" (default): @huggingface/transformers + nomic-embed-text-v1.5
6
+ * (768 dims, Matryoshka-capable). Runs ONNX inference in a worker thread.
7
+ * - "voyage": Voyage AI API (voyage-code-3, 1024 dims)
8
+ * - "openai": OpenAI API (text-embedding-3-small, 1536 dims)
9
+ *
10
+ * Provides embedding generation, pure-JS cosine similarity, and vector search
11
+ * over the knowledge and distillation tables. All operations are gated behind
12
+ * `search.embeddings.enabled` config + the provider's API key env var — falls
13
+ * back silently to FTS-only when unavailable.
9
14
  */
10
15
 
11
16
  import { db } from "./db";
12
17
  import { config } from "./config";
13
18
  import * as log from "./log";
14
- import { isVendoredBinary, vendorModelInfo } from "./embedding-vendor";
19
+ import { vendorModelInfo } from "./embedding-vendor";
15
20
  import type {
16
21
  WorkerInbound,
17
22
  WorkerOutbound,
@@ -139,152 +144,76 @@ class OpenAIProvider implements EmbeddingProvider {
139
144
  }
140
145
 
141
146
  // ---------------------------------------------------------------------------
142
- // Local provider (fastembed + ONNX Runtime)
147
+ // Local provider (@huggingface/transformers + nomic-embed-text-v1.5)
143
148
  // ---------------------------------------------------------------------------
144
149
 
145
150
  /**
146
- * Thrown when `LocalProvider` is requested but `fastembed` cannot be loaded.
147
- * `fastembed` is an optionalDependency of `@loreai/core`: if its postinstall
148
- * fails (e.g. CUDA 13 hits the upstream `onnxruntime-node` bug see #185),
149
- * the package install still succeeds but local embeddings are disabled.
150
- * Callers in `recall.ts` / `ltm.ts` / `distillation.ts` already gate on
151
- * `isAvailable()`, which flips to `false` after this error fires once.
151
+ * Thrown when `LocalProvider` cannot initialize (e.g. ONNX runtime fails
152
+ * to load). Callers in `recall.ts` / `ltm.ts` / `distillation.ts` gate
153
+ * on `isAvailable()`, which flips to `false` after this error fires once.
152
154
  */
153
155
  export class LocalProviderUnavailableError extends Error {
154
156
  constructor(cause?: unknown) {
155
157
  super(
156
- "Local embedding provider unavailable: 'fastembed' is not installed. " +
158
+ "Local embedding provider unavailable: '@huggingface/transformers' failed to initialize. " +
157
159
  "Configure search.embeddings.provider to 'voyage' or 'openai', or " +
158
- "reinstall with ONNXRUNTIME_NODE_INSTALL_CUDA=skip to retry the optional fastembed install.",
160
+ "set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback.",
159
161
  );
160
162
  this.name = "LocalProviderUnavailableError";
161
163
  if (cause !== undefined) (this as Error & { cause?: unknown }).cause = cause;
162
164
  }
163
165
  }
164
166
 
165
- /** Cache of the fastembed module-load probe.
166
- * null = not yet probed; module = imported successfully; false = import failed. */
167
- let fastembedModule: typeof import("fastembed") | null = null;
168
- let fastembedProbed: boolean = false;
169
- let fastembedAvailable: boolean = false;
170
- let fastembedLogged: boolean = false;
171
-
172
- /** For tests: reset the fastembed probe cache. */
173
- export function _resetFastembedProbe(): void {
174
- fastembedModule = null;
175
- fastembedProbed = false;
176
- fastembedAvailable = false;
177
- fastembedLogged = false;
178
- }
179
-
180
- /** For tests: simulate fastembed being unresolvable, without mocking the
181
- * dynamic import. After this call, `tryLoadFastembed()` short-circuits to
182
- * `null` and `isAvailable()` returns false for the local provider. */
183
- export function _markFastembedUnavailable(): void {
184
- fastembedModule = null;
185
- fastembedProbed = true;
186
- fastembedAvailable = false;
187
- fastembedLogged = true; // suppress the info log in tests
188
- }
167
+ /** Tracks whether the local provider has been probed and found unavailable.
168
+ * Set to true after the first worker init failure so subsequent calls
169
+ * to `isAvailable()` short-circuit. */
170
+ let localProviderKnownBroken = false;
171
+ let localProviderErrorLogged = false;
189
172
 
190
- /**
191
- * Probe `fastembed` once. Returns the module on success, `null` on failure.
192
- * Logs an info-level note exactly once on the first failure so users know
193
- * how to recover (switch provider, fix the install, or rely on the
194
- * VOYAGE/OPENAI auto-fallback in `embed()`).
195
- *
196
- * In binary mode `import("fastembed")` resolves to the bundle Bun packed
197
- * at compile time (the binary's wrapper has already preloaded the
198
- * side-load `libonnxruntime` lib so the addon's dlopen succeeds). In
199
- * npm mode it goes through standard module resolution and may fail if
200
- * the optional postinstall didn't run.
201
- */
202
- async function tryLoadFastembed(): Promise<typeof import("fastembed") | null> {
203
- if (fastembedProbed) return fastembedAvailable ? fastembedModule : null;
204
- try {
205
- const mod = await loadFastembedModule();
206
- // Re-check after the async boundary: another caller (e.g. a test helper
207
- // like _markFastembedUnavailable) may have set the probe while we were
208
- // awaiting. Their decision takes priority — don't overwrite it.
209
- if (fastembedProbed) return fastembedAvailable ? fastembedModule : null;
210
- fastembedModule = mod;
211
- fastembedAvailable = true;
212
- } catch (err) {
213
- if (fastembedProbed) return fastembedAvailable ? fastembedModule : null;
214
- fastembedAvailable = false;
215
- if (!fastembedLogged) {
216
- fastembedLogged = true;
217
- const msg = err instanceof Error ? err.message : String(err);
218
- // Binary mode: a load failure here is a real bug (everything was
219
- // bundled at build time). npm mode: the optional dep didn't
220
- // install — point the user at the standard recovery options.
221
- const remediation = isVendoredBinary()
222
- ? "this is a bug in the lore binary; please file an issue. " +
223
- "Set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback in the meantime"
224
- : "set search.embeddings.provider to 'voyage' or 'openai', " +
225
- "set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback, " +
226
- "or reinstall fastembed with ONNXRUNTIME_NODE_INSTALL_CUDA=skip";
227
- log.info(
228
- `local embedding provider unavailable (fastembed not installed: ${msg}) — ${remediation}`,
229
- );
230
- }
231
- } finally {
232
- fastembedProbed = true;
233
- }
234
- return fastembedAvailable ? fastembedModule : null;
173
+ /** For tests: reset the local provider probe state. */
174
+ export function _resetLocalProviderProbe(): void {
175
+ localProviderKnownBroken = false;
176
+ localProviderErrorLogged = false;
235
177
  }
236
178
 
237
- /**
238
- * Resolve and import the fastembed module.
239
- *
240
- * One bare import covers both modes:
241
- *
242
- * - Binary mode: `bun build --compile` resolves "fastembed" against the
243
- * per-target staging `node_modules/` at build time and bundles it
244
- * (plus its transitive deps and `.node` addons) into the binary. The
245
- * side-load `libonnxruntime.so.1` / `.dylib` / `.dll` is preloaded
246
- * by the binary's wrapper before this import evaluates, so the
247
- * bundled `onnxruntime_binding.node`'s dlopen finds the cached
248
- * handle instead of failing with "shared object not found".
249
- *
250
- * - npm mode: standard Node/Bun resolution — works for `@loreai/core`
251
- * consumers whose `npm install` cleanly installed the optional dep.
252
- * If the postinstall failed (CUDA-13 hosts), the import throws here
253
- * and the caller logs + falls back to a remote provider.
254
- */
255
- async function loadFastembedModule(): Promise<typeof import("fastembed")> {
256
- return (await import("fastembed")) as typeof import("fastembed");
179
+ /** For tests: simulate the local provider being unavailable, without
180
+ * actually spawning a worker. After this call, `isAvailable()` returns
181
+ * false for the local provider. */
182
+ export function _markLocalProviderUnavailable(): void {
183
+ localProviderKnownBroken = true;
184
+ localProviderErrorLogged = true; // suppress the info log in tests
257
185
  }
258
186
 
259
- /** True iff the fastembed probe has run and reported the module missing. */
260
- function fastembedKnownUnavailable(): boolean {
261
- return fastembedProbed && !fastembedAvailable;
187
+ /** True iff the local provider has been probed and found broken. */
188
+ function localProviderKnownUnavailable(): boolean {
189
+ return localProviderKnownBroken;
262
190
  }
263
191
 
264
192
  /**
265
- * Local embedding provider using fastembed (bge-small-en-v1.5 by default).
193
+ * Local embedding provider using @huggingface/transformers with
194
+ * nomic-embed-text-v1.5 by default.
266
195
  *
267
196
  * No API key required — runs entirely on-device via ONNX Runtime.
268
- * Model files are downloaded on first use (~33MB) and cached in
269
- * `~/.cache/fastembed`. Subsequent inits load from disk in ~350ms.
197
+ * Model files are downloaded on first use (~137MB for INT8 quantized)
198
+ * and cached locally. Subsequent inits load from cache.
270
199
  *
271
200
  * ONNX inference runs in a dedicated `node:worker_threads` Worker so the
272
201
  * main thread's event loop stays free. This class is a thin RPC client —
273
202
  * it posts `{ texts, inputType }` to the worker and awaits a reply.
274
- * The worker owns the `FlagEmbedding` model and processes requests
203
+ * The worker owns the transformers.js pipeline and processes requests
275
204
  * sequentially from a priority queue (recall queries jump ahead of
276
205
  * backfill batches).
277
206
  *
278
- * Uses dynamic import so the module is only loaded when the "local"
279
- * provider is actually selected — avoids startup cost and allows
280
- * graceful fallback when the optional `fastembed` peer isn't installed
281
- * (its native onnxruntime-node may fail to build, e.g. on CUDA 13).
207
+ * Task instruction prefixes are prepended automatically:
208
+ * - "document" "search_document: <text>"
209
+ * - "query" → "search_query: <text>"
282
210
  */
283
211
  class LocalProvider implements EmbeddingProvider {
284
212
  // With inference off the main thread, large batches no longer block
285
213
  // the event loop. 256 maximises throughput per round-trip to the
286
- // worker. Backfill callers use a smaller BACKFILL_CHUNK_SIZE to give
287
- // the worker's priority queue breathing room for recall queries.
214
+ // worker. Backfill callers use token-budget-based batching (see
215
+ // nextBatch) to give the worker's priority queue breathing room
216
+ // for recall queries and prevent OOM on long texts.
288
217
  readonly maxBatchSize = 256;
289
218
 
290
219
  private worker: import("node:worker_threads").Worker | null = null;
@@ -296,16 +225,16 @@ class LocalProvider implements EmbeddingProvider {
296
225
  >();
297
226
  private nextRequestId = 0;
298
227
  private initPromise: Promise<void> | null = null;
299
- private modelName: string;
228
+ private modelId: string;
229
+ private dimensions: number;
300
230
 
301
- constructor(modelName: string) {
302
- this.modelName = modelName;
231
+ constructor(modelId: string, dimensions: number) {
232
+ this.modelId = modelId;
233
+ this.dimensions = dimensions;
303
234
  }
304
235
 
305
236
  /**
306
- * Ensure the worker thread is running. Probes fastembed on the main
307
- * thread first (fast, cached) as a fast-fail gate — the worker is only
308
- * spawned if the module is known-loadable. Worker startup failure is
237
+ * Ensure the worker thread is running. Worker startup failure is
309
238
  * surfaced as `LocalProviderUnavailableError` to trigger the existing
310
239
  * auto-fallback to remote providers.
311
240
  */
@@ -315,10 +244,8 @@ class LocalProvider implements EmbeddingProvider {
315
244
  if (this.initPromise) return this.initPromise;
316
245
 
317
246
  this.initPromise = (async () => {
318
- // Fast-fail: probe fastembed on the main thread. This is cached
319
- // after the first call and preserves the existing error flow.
320
- const fastembed = await tryLoadFastembed();
321
- if (!fastembed) throw new LocalProviderUnavailableError();
247
+ // Fast-fail if a previous attempt already marked local broken.
248
+ if (localProviderKnownBroken) throw new LocalProviderUnavailableError();
322
249
 
323
250
  const { Worker } = await import("node:worker_threads");
324
251
 
@@ -334,17 +261,10 @@ class LocalProvider implements EmbeddingProvider {
334
261
  // In dev (Bun running .ts directly): embedding-worker.ts
335
262
  // In dist (esbuild bundle): embedding-worker.js
336
263
  const vendorWorkerUrl = (globalThis as Record<string, unknown>).__LORE_VENDOR_WORKER_URL__ as string | undefined;
337
- // On Windows, new Worker() with a file:// URL pointing to $bunfs
338
- // fails with ENOENT. Pass the raw path instead (B:\~BUN\root\...).
339
- // On macOS/Linux the file:// URL works fine with $bunfs paths.
340
264
  let workerUrl: string | URL;
341
265
  if (vendorWorkerUrl) {
342
266
  if (process.platform === "win32") {
343
- // On Windows, new Worker() with a file:// URL pointing to $bunfs
344
- // fails with ENOENT (Bun bug). Extract the raw path instead.
345
- // URL.pathname keeps %7E encoded; decodeURIComponent restores ~.
346
267
  workerUrl = decodeURIComponent(new URL(vendorWorkerUrl).pathname);
347
- // URL.pathname on Windows: /B:/~BUN/root/wrapper.js → strip leading /
348
268
  if (/^\/[A-Za-z]:/.test(workerUrl)) {
349
269
  workerUrl = workerUrl.slice(1);
350
270
  }
@@ -357,9 +277,10 @@ class LocalProvider implements EmbeddingProvider {
357
277
 
358
278
  const vendor = vendorModelInfo();
359
279
  const workerInitData: WorkerInitData = {
360
- modelName: this.modelName,
280
+ modelId: this.modelId,
281
+ dimensions: this.dimensions,
361
282
  vendorModel: vendor
362
- ? { modelAbsoluteDirPath: vendor.modelAbsoluteDirPath, modelName: vendor.modelName }
283
+ ? { localModelPath: vendor.localModelPath }
363
284
  : null,
364
285
  };
365
286
 
@@ -394,6 +315,14 @@ class LocalProvider implements EmbeddingProvider {
394
315
  // LocalProviderUnavailableError on all pending + future requests.
395
316
  this.workerInitError = msg.error;
396
317
  this.workerReady = false;
318
+ localProviderKnownBroken = true;
319
+ if (!localProviderErrorLogged) {
320
+ localProviderErrorLogged = true;
321
+ log.info(
322
+ `local embedding provider failed to init: ${msg.error}. ` +
323
+ `Set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback.`,
324
+ );
325
+ }
397
326
  for (const [, p] of this.pendingRequests) {
398
327
  p.reject(new LocalProviderUnavailableError(msg.error));
399
328
  }
@@ -453,6 +382,10 @@ class LocalProvider implements EmbeddingProvider {
453
382
  async embed(texts: string[], inputType: "document" | "query"): Promise<Float32Array[]> {
454
383
  await this.ensureWorker();
455
384
 
385
+ // Prepend Nomic task instruction prefix.
386
+ const prefix = inputType === "document" ? "search_document: " : "search_query: ";
387
+ const prefixed = texts.map((t) => prefix + t);
388
+
456
389
  const id = this.nextRequestId++;
457
390
  // Recall queries (single query-type texts) get high priority so they
458
391
  // jump ahead of any queued backfill batches in the worker.
@@ -464,7 +397,7 @@ class LocalProvider implements EmbeddingProvider {
464
397
  this.worker!.postMessage({
465
398
  type: "embed",
466
399
  id,
467
- texts,
400
+ texts: prefixed,
468
401
  inputType,
469
402
  priority,
470
403
  } satisfies WorkerInbound);
@@ -473,8 +406,6 @@ class LocalProvider implements EmbeddingProvider {
473
406
 
474
407
  /** Shut down the worker thread. Called by `resetProvider()` on config change.
475
408
  * Sends a shutdown message so the worker calls `process.exit(0)` internally.
476
- * We avoid `worker.terminate()` because Bun's forced termination triggers a
477
- * NAPI fatal error when tearing down onnxruntime's native bindings.
478
409
  *
479
410
  * Returns a promise that resolves once the worker has fully exited. Callers
480
411
  * that need a clean teardown (tests, config change) should await the result.
@@ -507,7 +438,7 @@ class LocalProvider implements EmbeddingProvider {
507
438
 
508
439
  /** Default models per provider — used when config doesn't override. */
509
440
  const PROVIDER_DEFAULTS: Record<string, { model: string; dimensions: number }> = {
510
- local: { model: "BGESmallENV15", dimensions: 384 },
441
+ local: { model: "nomic-ai/nomic-embed-text-v1.5", dimensions: 768 },
511
442
  voyage: { model: "voyage-code-3", dimensions: 1024 },
512
443
  openai: { model: "text-embedding-3-small", dimensions: 1536 },
513
444
  };
@@ -539,12 +470,11 @@ function getProvider(): EmbeddingProvider | null {
539
470
 
540
471
  switch (providerName) {
541
472
  case "local": {
542
- // `fastembed` is an optionalDependency. We construct the provider
543
- // optimistically here; the import + ONNX init happens lazily in
544
- // `LocalProvider.getModel()`, which throws `LocalProviderUnavailableError`
545
- // if the optional dep isn't installed. After that first failure
546
- // `isAvailable()` short-circuits to false and callers fall back to FTS.
547
- cachedProvider = new LocalProvider(model);
473
+ // Construct the provider optimistically the ONNX model init
474
+ // happens lazily in the worker thread on first `embed()` call.
475
+ // If it fails, `LocalProviderUnavailableError` triggers the
476
+ // auto-fallback to a remote provider or FTS-only search.
477
+ cachedProvider = new LocalProvider(model, cfg.dimensions);
548
478
  break;
549
479
  }
550
480
  case "voyage": {
@@ -619,7 +549,7 @@ export function _saveAndClearProvider(): unknown {
619
549
  /** Restore a provider previously saved by `_saveAndClearProvider()`. Any
620
550
  * provider created between save and restore is discarded (callers must
621
551
  * ensure it's not a LocalProvider with a live worker — those suites only
622
- * use `_markFastembedUnavailable()` so no worker is spawned). */
552
+ * use `_markLocalProviderUnavailable()` so no worker is spawned). */
623
553
  export function _restoreProvider(token: unknown): void {
624
554
  const saved = token as { provider: EmbeddingProvider | null | undefined; remoteFallbackLogged: boolean };
625
555
  cachedProvider = saved.provider;
@@ -669,13 +599,13 @@ export function pickRemoteFallback(): {
669
599
  * Active when the configured provider's API key is set, unless explicitly
670
600
  * disabled via `search.embeddings.enabled: false` in .lore.json.
671
601
  *
672
- * For the `local` provider, also returns false once we've discovered the
673
- * optional `fastembed` peer is missing — callers (recall, ltm, distillation)
674
- * use this gate to skip embedding work and fall back to FTS-only search. */
602
+ * For the `local` provider, also returns false once the worker has reported
603
+ * an init failure — callers (recall, ltm, distillation) use this gate to
604
+ * skip embedding work and fall back to FTS-only search. */
675
605
  export function isAvailable(): boolean {
676
606
  const provider = getProvider();
677
607
  if (!provider) return false;
678
- if (provider instanceof LocalProvider && fastembedKnownUnavailable()) return false;
608
+ if (provider instanceof LocalProvider && localProviderKnownUnavailable()) return false;
679
609
  return true;
680
610
  }
681
611
 
@@ -686,7 +616,7 @@ export function isAvailable(): boolean {
686
616
  /**
687
617
  * Generate embeddings for the given texts using the configured provider.
688
618
  *
689
- * If the configured provider is `local` and `fastembed` turns out to be
619
+ * If the configured provider is `local` and the local provider turns out to be
690
620
  * unavailable at runtime (failed install, vendor extraction blocked, etc.),
691
621
  * automatically swap to a remote provider when `VOYAGE_API_KEY` or
692
622
  * `OPENAI_API_KEY` is set in env. The swap is permanent for the rest of
@@ -717,7 +647,7 @@ export async function embed(
717
647
  if (!remoteFallbackLogged) {
718
648
  remoteFallbackLogged = true;
719
649
  log.info(
720
- `fastembed unavailable; auto-switching to ${fallback.name} ` +
650
+ `local embedding provider unavailable; auto-switching to ${fallback.name} ` +
721
651
  `(set search.embeddings.provider in .lore.json to silence this)`,
722
652
  );
723
653
  }
@@ -826,6 +756,53 @@ export function vectorSearchDistillations(
826
756
  return scored.slice(0, limit);
827
757
  }
828
758
 
759
+ // ---------------------------------------------------------------------------
760
+ // Vector search — all distillations (including archived)
761
+ // ---------------------------------------------------------------------------
762
+
763
+ export type DistillationVectorHit = {
764
+ id: string;
765
+ session_id: string;
766
+ similarity: number;
767
+ };
768
+
769
+ /**
770
+ * Search ALL distillations (including archived) with embeddings by cosine
771
+ * similarity, scoped to a single project. Returns session_id alongside
772
+ * similarity for cross-session counting.
773
+ *
774
+ * Unlike vectorSearchDistillations() which filters to non-archived only,
775
+ * this searches the full distillation archive — necessary for detecting
776
+ * repeated instructions across sessions where older distillations have
777
+ * been archived after meta-distillation.
778
+ *
779
+ * Pure brute-force — fine for ~200 entries per project. Safety-capped
780
+ * at 500 rows to prevent excessive CPU on long-running projects.
781
+ */
782
+ const MAX_DISTILLATION_VECTOR_ROWS = 500;
783
+
784
+ export function vectorSearchAllDistillations(
785
+ queryEmbedding: Float32Array,
786
+ projectId: string,
787
+ limit = 20,
788
+ ): DistillationVectorHit[] {
789
+ const rows = db()
790
+ .query(
791
+ "SELECT id, session_id, embedding FROM distillations WHERE embedding IS NOT NULL AND project_id = ? ORDER BY created_at DESC LIMIT ?",
792
+ )
793
+ .all(projectId, MAX_DISTILLATION_VECTOR_ROWS) as Array<{ id: string; session_id: string; embedding: Buffer }>;
794
+
795
+ const scored: DistillationVectorHit[] = [];
796
+ for (const row of rows) {
797
+ const vec = fromBlob(row.embedding);
798
+ const sim = cosineSimilarity(queryEmbedding, vec);
799
+ scored.push({ id: row.id, session_id: row.session_id, similarity: sim });
800
+ }
801
+
802
+ scored.sort((a, b) => b.similarity - a.similarity);
803
+ return scored.slice(0, limit);
804
+ }
805
+
829
806
  // ---------------------------------------------------------------------------
830
807
  // Fire-and-forget embedding
831
808
  // ---------------------------------------------------------------------------
@@ -1107,14 +1084,52 @@ export async function runStartupBackfill(): Promise<void> {
1107
1084
  // ---------------------------------------------------------------------------
1108
1085
 
1109
1086
  /**
1110
- * Chunk size for backfill embed requests. Each chunk becomes a separate
1111
- * message to the embedding worker. Keeping chunks small (32) gives the
1112
- * worker's priority queue natural gaps to interleave high-priority recall
1113
- * queries between backfill batches. The provider's `maxBatchSize` (256)
1114
- * is the upper limit for any single embed call; this is intentionally
1115
- * smaller for backfill-vs-live interleaving.
1087
+ * Maximum chunk size for backfill embed requests. Each chunk becomes a
1088
+ * separate message to the embedding worker. Keeping chunks small gives
1089
+ * the worker's priority queue natural gaps to interleave high-priority
1090
+ * recall queries between backfill batches.
1091
+ */
1092
+ const MAX_BACKFILL_CHUNK = 8;
1093
+
1094
+ /**
1095
+ * Maximum total "token area" (batch_size × max_sequence_length) per
1096
+ * backfill batch. ONNX runtime pads all texts to the longest sequence,
1097
+ * so the peak tensor size is proportional to this product. A budget of
1098
+ * 4096 tokens allows e.g. 8 × 512-token texts, or 2 × 2048-token texts.
1099
+ * Prevents OOM on batches with long distillation observations (~4000+
1100
+ * chars) that were blowing up at fixed batch sizes.
1101
+ */
1102
+ const MAX_BATCH_TOKEN_AREA = 4096;
1103
+
1104
+ /**
1105
+ * Rough chars-per-token ratio for budget estimation. Nomic v1.5 uses a
1106
+ * WordPiece tokenizer; English text averages ~4 chars/token.
1107
+ */
1108
+ const CHARS_PER_TOKEN = 4;
1109
+
1110
+ /**
1111
+ * Partition `rows` into batches that respect both MAX_BACKFILL_CHUNK and
1112
+ * MAX_BATCH_TOKEN_AREA. Each batch's estimated token area is
1113
+ * `batch.length × max_tokens_in_batch`. We greedily add rows until the
1114
+ * next row would push the area over budget.
1116
1115
  */
1117
- const BACKFILL_CHUNK_SIZE = 32;
1116
+ function nextBatch<T extends { text: string }>(rows: T[], start: number): T[] {
1117
+ const batch: T[] = [];
1118
+ let maxTokens = 0;
1119
+
1120
+ for (let i = start; i < rows.length && batch.length < MAX_BACKFILL_CHUNK; i++) {
1121
+ const estTokens = Math.ceil(rows[i].text.length / CHARS_PER_TOKEN);
1122
+ const newMax = Math.max(maxTokens, estTokens);
1123
+ const newArea = (batch.length + 1) * newMax;
1124
+
1125
+ if (batch.length > 0 && newArea > MAX_BATCH_TOKEN_AREA) break;
1126
+
1127
+ batch.push(rows[i]);
1128
+ maxTokens = newMax;
1129
+ }
1130
+
1131
+ return batch;
1132
+ }
1118
1133
 
1119
1134
  /**
1120
1135
  * Embed all knowledge entries that are missing embeddings.
@@ -1136,14 +1151,18 @@ export async function backfillEmbeddings(): Promise<number> {
1136
1151
 
1137
1152
  if (!rows.length) return 0;
1138
1153
 
1154
+ // Pre-compute text for token-budget batching
1155
+ const items = rows.map((r) => ({ ...r, text: `${r.title}\n${r.content}` }));
1156
+
1139
1157
  let embedded = 0;
1158
+ let i = 0;
1140
1159
 
1141
- for (let i = 0; i < rows.length; i += BACKFILL_CHUNK_SIZE) {
1142
- const batch = rows.slice(i, i + BACKFILL_CHUNK_SIZE);
1143
- const texts = batch.map((r) => `${r.title}\n${r.content}`);
1160
+ while (i < items.length) {
1161
+ const batch = nextBatch(items, i);
1162
+ i += batch.length;
1144
1163
 
1145
1164
  try {
1146
- const vectors = await embed(texts, "document");
1165
+ const vectors = await embed(batch.map((b) => b.text), "document");
1147
1166
  const update = db().prepare(
1148
1167
  "UPDATE knowledge SET embedding = ? WHERE id = ?",
1149
1168
  );
@@ -1153,7 +1172,8 @@ export async function backfillEmbeddings(): Promise<number> {
1153
1172
  embedded++;
1154
1173
  }
1155
1174
  } catch (err) {
1156
- log.info(`embedding backfill batch ${i}-${i + batch.length} failed:`, err);
1175
+ // log.error sends to Sentry via captureException
1176
+ log.error(`embedding backfill batch failed (${batch.length} items):`, err);
1157
1177
  }
1158
1178
  // No yieldToEventLoop() needed — embed() is truly async (worker thread).
1159
1179
  }
@@ -1188,17 +1208,21 @@ export async function backfillDistillationEmbeddings(): Promise<number> {
1188
1208
  let embedded = 0;
1189
1209
 
1190
1210
  // Progress logging: heartbeat every PROGRESS_INTERVAL embedded so a long
1191
- // backfill (e.g. 1000+ pending after a fastembed reinstall) doesn't look
1211
+ // backfill (e.g. 1000+ pending after a model change) doesn't look
1192
1212
  // like a silent hang. Without this, only the final tally was logged.
1193
1213
  const PROGRESS_INTERVAL = 256;
1194
1214
  let nextProgressAt = PROGRESS_INTERVAL;
1195
1215
 
1196
- for (let i = 0; i < rows.length; i += BACKFILL_CHUNK_SIZE) {
1197
- const batch = rows.slice(i, i + BACKFILL_CHUNK_SIZE);
1198
- const texts = batch.map((r) => r.observations);
1216
+ // Pre-compute text for token-budget batching
1217
+ const items = rows.map((r) => ({ ...r, text: r.observations }));
1218
+ let i = 0;
1219
+
1220
+ while (i < items.length) {
1221
+ const batch = nextBatch(items, i);
1222
+ i += batch.length;
1199
1223
 
1200
1224
  try {
1201
- const vectors = await embed(texts, "document");
1225
+ const vectors = await embed(batch.map((b) => b.text), "document");
1202
1226
  const update = db().prepare(
1203
1227
  "UPDATE distillations SET embedding = ? WHERE id = ?",
1204
1228
  );
@@ -1208,7 +1232,8 @@ export async function backfillDistillationEmbeddings(): Promise<number> {
1208
1232
  embedded++;
1209
1233
  }
1210
1234
  } catch (err) {
1211
- log.info(`distillation embedding backfill batch ${i}-${i + batch.length} failed:`, err);
1235
+ // log.error sends to Sentry via captureException
1236
+ log.error(`distillation embedding backfill batch failed (${batch.length} items):`, err);
1212
1237
  }
1213
1238
 
1214
1239
  if (embedded >= nextProgressAt) {
package/src/gradient.ts CHANGED
@@ -354,11 +354,19 @@ function getSessionState(sessionID: string): SessionState {
354
354
  *
355
355
  * Set `thresholdMs <= 0` to disable. Returns true if a reset fired so the
356
356
  * caller can log/observe.
357
+ *
358
+ * @param skipCompact When true, perform all idle-resume housekeeping
359
+ * (clear caches, set cameOutOfIdle) but do NOT set postIdleCompact.
360
+ * Used when the caller knows the upstream prompt cache is still warm
361
+ * (e.g. cache warmer recently refreshed it) — compacting would produce
362
+ * a different prompt body that doesn't match the warmed prefix, causing
363
+ * a cache bust and wasting the warming cost.
357
364
  */
358
365
  export function onIdleResume(
359
366
  sessionID: string,
360
367
  thresholdMs: number,
361
368
  now: number = Date.now(),
369
+ skipCompact: boolean = false,
362
370
  ): { triggered: false } | { triggered: true; idleMs: number } {
363
371
  if (thresholdMs <= 0) return { triggered: false };
364
372
  const state = getSessionState(sessionID);
@@ -369,7 +377,7 @@ export function onIdleResume(
369
377
  state.rawWindowCache = null;
370
378
  state.distillationSnapshot = null;
371
379
  state.cameOutOfIdle = true;
372
- state.postIdleCompact = true;
380
+ state.postIdleCompact = !skipCompact;
373
381
  return { triggered: true, idleMs };
374
382
  }
375
383
 
@@ -0,0 +1,37 @@
1
+ /**
2
+ * Detection orchestrator — scans all registered providers for conversation
3
+ * history matching a given project path.
4
+ */
5
+ import type { DetectionResult } from "./types";
6
+ import { getProviders } from "./providers";
7
+
8
+ /**
9
+ * Scan all registered providers for conversation history matching the
10
+ * given project path.
11
+ *
12
+ * @returns Results from all providers that found data, sorted by
13
+ * total messages descending (richest source first).
14
+ */
15
+ export function detectAll(projectPath: string): DetectionResult[] {
16
+ const results: DetectionResult[] = [];
17
+
18
+ for (const provider of getProviders()) {
19
+ try {
20
+ const sessions = provider.detect(projectPath);
21
+ if (sessions.length > 0) {
22
+ results.push({
23
+ agentName: provider.name,
24
+ agentDisplayName: provider.displayName,
25
+ sessions,
26
+ totalTokens: sessions.reduce((s, sess) => s + sess.estimatedTokens, 0),
27
+ totalMessages: sessions.reduce((s, sess) => s + sess.messageCount, 0),
28
+ });
29
+ }
30
+ } catch (err) {
31
+ // Provider failed (e.g. corrupt DB, missing directory) — skip silently.
32
+ // Avoid log.warn to not alarm users about agents they don't use.
33
+ }
34
+ }
35
+
36
+ return results.sort((a, b) => b.totalMessages - a.totalMessages);
37
+ }