@loreai/core 0.16.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. package/README.md +11 -0
  2. package/dist/bun/agents-file.d.ts +13 -1
  3. package/dist/bun/agents-file.d.ts.map +1 -1
  4. package/dist/bun/config.d.ts +20 -1
  5. package/dist/bun/config.d.ts.map +1 -1
  6. package/dist/bun/data.d.ts +174 -0
  7. package/dist/bun/data.d.ts.map +1 -0
  8. package/dist/bun/db.d.ts +65 -0
  9. package/dist/bun/db.d.ts.map +1 -1
  10. package/dist/bun/distillation.d.ts +49 -6
  11. package/dist/bun/distillation.d.ts.map +1 -1
  12. package/dist/bun/embedding-vendor.d.ts +66 -0
  13. package/dist/bun/embedding-vendor.d.ts.map +1 -0
  14. package/dist/bun/embedding-worker-types.d.ts +66 -0
  15. package/dist/bun/embedding-worker-types.d.ts.map +1 -0
  16. package/dist/bun/embedding-worker.d.ts +16 -0
  17. package/dist/bun/embedding-worker.d.ts.map +1 -0
  18. package/dist/bun/embedding-worker.js +100 -0
  19. package/dist/bun/embedding-worker.js.map +7 -0
  20. package/dist/bun/embedding.d.ts +91 -8
  21. package/dist/bun/embedding.d.ts.map +1 -1
  22. package/dist/bun/git.d.ts +47 -0
  23. package/dist/bun/git.d.ts.map +1 -0
  24. package/dist/bun/gradient.d.ts +19 -1
  25. package/dist/bun/gradient.d.ts.map +1 -1
  26. package/dist/bun/index.d.ts +9 -6
  27. package/dist/bun/index.d.ts.map +1 -1
  28. package/dist/bun/index.js +13029 -10885
  29. package/dist/bun/index.js.map +4 -4
  30. package/dist/bun/lat-reader.d.ts +1 -1
  31. package/dist/bun/lat-reader.d.ts.map +1 -1
  32. package/dist/bun/ltm.d.ts.map +1 -1
  33. package/dist/bun/markdown.d.ts +11 -0
  34. package/dist/bun/markdown.d.ts.map +1 -1
  35. package/dist/bun/prompt.d.ts +1 -1
  36. package/dist/bun/prompt.d.ts.map +1 -1
  37. package/dist/bun/recall.d.ts +53 -0
  38. package/dist/bun/recall.d.ts.map +1 -1
  39. package/dist/bun/search.d.ts +29 -0
  40. package/dist/bun/search.d.ts.map +1 -1
  41. package/dist/bun/temporal.d.ts +2 -0
  42. package/dist/bun/temporal.d.ts.map +1 -1
  43. package/dist/bun/types.d.ts +15 -0
  44. package/dist/bun/types.d.ts.map +1 -1
  45. package/dist/bun/worker-model.d.ts +12 -9
  46. package/dist/bun/worker-model.d.ts.map +1 -1
  47. package/dist/node/agents-file.d.ts +13 -1
  48. package/dist/node/agents-file.d.ts.map +1 -1
  49. package/dist/node/config.d.ts +20 -1
  50. package/dist/node/config.d.ts.map +1 -1
  51. package/dist/node/data.d.ts +174 -0
  52. package/dist/node/data.d.ts.map +1 -0
  53. package/dist/node/db.d.ts +65 -0
  54. package/dist/node/db.d.ts.map +1 -1
  55. package/dist/node/distillation.d.ts +49 -6
  56. package/dist/node/distillation.d.ts.map +1 -1
  57. package/dist/node/embedding-vendor.d.ts +66 -0
  58. package/dist/node/embedding-vendor.d.ts.map +1 -0
  59. package/dist/node/embedding-worker-types.d.ts +66 -0
  60. package/dist/node/embedding-worker-types.d.ts.map +1 -0
  61. package/dist/node/embedding-worker.d.ts +16 -0
  62. package/dist/node/embedding-worker.d.ts.map +1 -0
  63. package/dist/node/embedding-worker.js +100 -0
  64. package/dist/node/embedding-worker.js.map +7 -0
  65. package/dist/node/embedding.d.ts +91 -8
  66. package/dist/node/embedding.d.ts.map +1 -1
  67. package/dist/node/git.d.ts +47 -0
  68. package/dist/node/git.d.ts.map +1 -0
  69. package/dist/node/gradient.d.ts +19 -1
  70. package/dist/node/gradient.d.ts.map +1 -1
  71. package/dist/node/index.d.ts +9 -6
  72. package/dist/node/index.d.ts.map +1 -1
  73. package/dist/node/index.js +13029 -10885
  74. package/dist/node/index.js.map +4 -4
  75. package/dist/node/lat-reader.d.ts +1 -1
  76. package/dist/node/lat-reader.d.ts.map +1 -1
  77. package/dist/node/ltm.d.ts.map +1 -1
  78. package/dist/node/markdown.d.ts +11 -0
  79. package/dist/node/markdown.d.ts.map +1 -1
  80. package/dist/node/prompt.d.ts +1 -1
  81. package/dist/node/prompt.d.ts.map +1 -1
  82. package/dist/node/recall.d.ts +53 -0
  83. package/dist/node/recall.d.ts.map +1 -1
  84. package/dist/node/search.d.ts +29 -0
  85. package/dist/node/search.d.ts.map +1 -1
  86. package/dist/node/temporal.d.ts +2 -0
  87. package/dist/node/temporal.d.ts.map +1 -1
  88. package/dist/node/types.d.ts +15 -0
  89. package/dist/node/types.d.ts.map +1 -1
  90. package/dist/node/worker-model.d.ts +12 -9
  91. package/dist/node/worker-model.d.ts.map +1 -1
  92. package/dist/types/agents-file.d.ts +13 -1
  93. package/dist/types/agents-file.d.ts.map +1 -1
  94. package/dist/types/config.d.ts +20 -1
  95. package/dist/types/config.d.ts.map +1 -1
  96. package/dist/types/data.d.ts +174 -0
  97. package/dist/types/data.d.ts.map +1 -0
  98. package/dist/types/db.d.ts +65 -0
  99. package/dist/types/db.d.ts.map +1 -1
  100. package/dist/types/distillation.d.ts +49 -6
  101. package/dist/types/distillation.d.ts.map +1 -1
  102. package/dist/types/embedding-vendor.d.ts +66 -0
  103. package/dist/types/embedding-vendor.d.ts.map +1 -0
  104. package/dist/types/embedding-worker-types.d.ts +66 -0
  105. package/dist/types/embedding-worker-types.d.ts.map +1 -0
  106. package/dist/types/embedding-worker.d.ts +16 -0
  107. package/dist/types/embedding-worker.d.ts.map +1 -0
  108. package/dist/types/embedding.d.ts +91 -8
  109. package/dist/types/embedding.d.ts.map +1 -1
  110. package/dist/types/git.d.ts +47 -0
  111. package/dist/types/git.d.ts.map +1 -0
  112. package/dist/types/gradient.d.ts +19 -1
  113. package/dist/types/gradient.d.ts.map +1 -1
  114. package/dist/types/index.d.ts +9 -6
  115. package/dist/types/index.d.ts.map +1 -1
  116. package/dist/types/lat-reader.d.ts +1 -1
  117. package/dist/types/lat-reader.d.ts.map +1 -1
  118. package/dist/types/ltm.d.ts.map +1 -1
  119. package/dist/types/markdown.d.ts +11 -0
  120. package/dist/types/markdown.d.ts.map +1 -1
  121. package/dist/types/prompt.d.ts +1 -1
  122. package/dist/types/prompt.d.ts.map +1 -1
  123. package/dist/types/recall.d.ts +53 -0
  124. package/dist/types/recall.d.ts.map +1 -1
  125. package/dist/types/search.d.ts +29 -0
  126. package/dist/types/search.d.ts.map +1 -1
  127. package/dist/types/temporal.d.ts +2 -0
  128. package/dist/types/temporal.d.ts.map +1 -1
  129. package/dist/types/types.d.ts +15 -0
  130. package/dist/types/types.d.ts.map +1 -1
  131. package/dist/types/worker-model.d.ts +12 -9
  132. package/dist/types/worker-model.d.ts.map +1 -1
  133. package/package.json +5 -2
  134. package/src/agents-file.ts +87 -4
  135. package/src/config.ts +68 -5
  136. package/src/curator.ts +2 -2
  137. package/src/data.ts +768 -0
  138. package/src/db.ts +386 -7
  139. package/src/distillation.ts +178 -35
  140. package/src/embedding-vendor.ts +102 -0
  141. package/src/embedding-worker-types.ts +82 -0
  142. package/src/embedding-worker.ts +185 -0
  143. package/src/embedding.ts +607 -61
  144. package/src/git.ts +144 -0
  145. package/src/gradient.ts +174 -17
  146. package/src/index.ts +20 -0
  147. package/src/lat-reader.ts +5 -11
  148. package/src/ltm.ts +17 -44
  149. package/src/markdown.ts +15 -0
  150. package/src/prompt.ts +1 -2
  151. package/src/recall.ts +401 -70
  152. package/src/search.ts +71 -1
  153. package/src/temporal.ts +42 -35
  154. package/src/types.ts +15 -0
  155. package/src/worker-model.ts +14 -9
package/src/embedding.ts CHANGED
@@ -11,6 +11,12 @@
11
11
  import { db } from "./db";
12
12
  import { config } from "./config";
13
13
  import * as log from "./log";
14
+ import { isVendoredBinary, vendorModelInfo } from "./embedding-vendor";
15
+ import type {
16
+ WorkerInbound,
17
+ WorkerOutbound,
18
+ WorkerInitData,
19
+ } from "./embedding-worker-types";
14
20
 
15
21
  /** Timeout for embedding API fetch calls (ms). Prevents a hanging API from
16
22
  * blocking the recall tool indefinitely. 10s is generous for typical 100-500ms
@@ -136,6 +142,125 @@ class OpenAIProvider implements EmbeddingProvider {
136
142
  // Local provider (fastembed + ONNX Runtime)
137
143
  // ---------------------------------------------------------------------------
138
144
 
145
+ /**
146
+ * Thrown when `LocalProvider` is requested but `fastembed` cannot be loaded.
147
+ * `fastembed` is an optionalDependency of `@loreai/core`: if its postinstall
148
+ * fails (e.g. CUDA 13 hits the upstream `onnxruntime-node` bug — see #185),
149
+ * the package install still succeeds but local embeddings are disabled.
150
+ * Callers in `recall.ts` / `ltm.ts` / `distillation.ts` already gate on
151
+ * `isAvailable()`, which flips to `false` after this error fires once.
152
+ */
153
+ export class LocalProviderUnavailableError extends Error {
154
+ constructor(cause?: unknown) {
155
+ super(
156
+ "Local embedding provider unavailable: 'fastembed' is not installed. " +
157
+ "Configure search.embeddings.provider to 'voyage' or 'openai', or " +
158
+ "reinstall with ONNXRUNTIME_NODE_INSTALL_CUDA=skip to retry the optional fastembed install.",
159
+ );
160
+ this.name = "LocalProviderUnavailableError";
161
+ if (cause !== undefined) (this as Error & { cause?: unknown }).cause = cause;
162
+ }
163
+ }
164
+
165
+ /** Cache of the fastembed module-load probe.
166
+ * null = not yet probed; module = imported successfully; false = import failed. */
167
+ let fastembedModule: typeof import("fastembed") | null = null;
168
+ let fastembedProbed: boolean = false;
169
+ let fastembedAvailable: boolean = false;
170
+ let fastembedLogged: boolean = false;
171
+
172
+ /** For tests: reset the fastembed probe cache. */
173
+ export function _resetFastembedProbe(): void {
174
+ fastembedModule = null;
175
+ fastembedProbed = false;
176
+ fastembedAvailable = false;
177
+ fastembedLogged = false;
178
+ }
179
+
180
+ /** For tests: simulate fastembed being unresolvable, without mocking the
181
+ * dynamic import. After this call, `tryLoadFastembed()` short-circuits to
182
+ * `null` and `isAvailable()` returns false for the local provider. */
183
+ export function _markFastembedUnavailable(): void {
184
+ fastembedModule = null;
185
+ fastembedProbed = true;
186
+ fastembedAvailable = false;
187
+ fastembedLogged = true; // suppress the info log in tests
188
+ }
189
+
190
+ /**
191
+ * Probe `fastembed` once. Returns the module on success, `null` on failure.
192
+ * Logs an info-level note exactly once on the first failure so users know
193
+ * how to recover (switch provider, fix the install, or rely on the
194
+ * VOYAGE/OPENAI auto-fallback in `embed()`).
195
+ *
196
+ * In binary mode `import("fastembed")` resolves to the bundle Bun packed
197
+ * at compile time (the binary's wrapper has already preloaded the
198
+ * side-load `libonnxruntime` lib so the addon's dlopen succeeds). In
199
+ * npm mode it goes through standard module resolution and may fail if
200
+ * the optional postinstall didn't run.
201
+ */
202
+ async function tryLoadFastembed(): Promise<typeof import("fastembed") | null> {
203
+ if (fastembedProbed) return fastembedAvailable ? fastembedModule : null;
204
+ try {
205
+ const mod = await loadFastembedModule();
206
+ // Re-check after the async boundary: another caller (e.g. a test helper
207
+ // like _markFastembedUnavailable) may have set the probe while we were
208
+ // awaiting. Their decision takes priority — don't overwrite it.
209
+ if (fastembedProbed) return fastembedAvailable ? fastembedModule : null;
210
+ fastembedModule = mod;
211
+ fastembedAvailable = true;
212
+ } catch (err) {
213
+ if (fastembedProbed) return fastembedAvailable ? fastembedModule : null;
214
+ fastembedAvailable = false;
215
+ if (!fastembedLogged) {
216
+ fastembedLogged = true;
217
+ const msg = err instanceof Error ? err.message : String(err);
218
+ // Binary mode: a load failure here is a real bug (everything was
219
+ // bundled at build time). npm mode: the optional dep didn't
220
+ // install — point the user at the standard recovery options.
221
+ const remediation = isVendoredBinary()
222
+ ? "this is a bug in the lore binary; please file an issue. " +
223
+ "Set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback in the meantime"
224
+ : "set search.embeddings.provider to 'voyage' or 'openai', " +
225
+ "set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback, " +
226
+ "or reinstall fastembed with ONNXRUNTIME_NODE_INSTALL_CUDA=skip";
227
+ log.info(
228
+ `local embedding provider unavailable (fastembed not installed: ${msg}) — ${remediation}`,
229
+ );
230
+ }
231
+ } finally {
232
+ fastembedProbed = true;
233
+ }
234
+ return fastembedAvailable ? fastembedModule : null;
235
+ }
236
+
237
+ /**
238
+ * Resolve and import the fastembed module.
239
+ *
240
+ * One bare import covers both modes:
241
+ *
242
+ * - Binary mode: `bun build --compile` resolves "fastembed" against the
243
+ * per-target staging `node_modules/` at build time and bundles it
244
+ * (plus its transitive deps and `.node` addons) into the binary. The
245
+ * side-load `libonnxruntime.so.1` / `.dylib` / `.dll` is preloaded
246
+ * by the binary's wrapper before this import evaluates, so the
247
+ * bundled `onnxruntime_binding.node`'s dlopen finds the cached
248
+ * handle instead of failing with "shared object not found".
249
+ *
250
+ * - npm mode: standard Node/Bun resolution — works for `@loreai/core`
251
+ * consumers whose `npm install` cleanly installed the optional dep.
252
+ * If the postinstall failed (CUDA-13 hosts), the import throws here
253
+ * and the caller logs + falls back to a remote provider.
254
+ */
255
+ async function loadFastembedModule(): Promise<typeof import("fastembed")> {
256
+ return (await import("fastembed")) as typeof import("fastembed");
257
+ }
258
+
259
+ /** True iff the fastembed probe has run and reported the module missing. */
260
+ function fastembedKnownUnavailable(): boolean {
261
+ return fastembedProbed && !fastembedAvailable;
262
+ }
263
+
139
264
  /**
140
265
  * Local embedding provider using fastembed (bge-small-en-v1.5 by default).
141
266
  *
@@ -143,61 +268,236 @@ class OpenAIProvider implements EmbeddingProvider {
143
268
  * Model files are downloaded on first use (~33MB) and cached in
144
269
  * `~/.cache/fastembed`. Subsequent inits load from disk in ~350ms.
145
270
  *
271
+ * ONNX inference runs in a dedicated `node:worker_threads` Worker so the
272
+ * main thread's event loop stays free. This class is a thin RPC client —
273
+ * it posts `{ texts, inputType }` to the worker and awaits a reply.
274
+ * The worker owns the `FlagEmbedding` model and processes requests
275
+ * sequentially from a priority queue (recall queries jump ahead of
276
+ * backfill batches).
277
+ *
146
278
  * Uses dynamic import so the module is only loaded when the "local"
147
279
  * provider is actually selected — avoids startup cost and allows
148
- * graceful fallback if fastembed is not installed.
280
+ * graceful fallback when the optional `fastembed` peer isn't installed
281
+ * (its native onnxruntime-node may fail to build, e.g. on CUDA 13).
149
282
  */
150
283
  class LocalProvider implements EmbeddingProvider {
284
+ // With inference off the main thread, large batches no longer block
285
+ // the event loop. 256 maximises throughput per round-trip to the
286
+ // worker. Backfill callers use a smaller BACKFILL_CHUNK_SIZE to give
287
+ // the worker's priority queue breathing room for recall queries.
151
288
  readonly maxBatchSize = 256;
152
- private model: unknown | null = null;
153
- private initPromise: Promise<unknown> | null = null;
289
+
290
+ private worker: import("node:worker_threads").Worker | null = null;
291
+ private workerReady = false;
292
+ private workerInitError: string | null = null;
293
+ private pendingRequests = new Map<
294
+ number,
295
+ { resolve: (vectors: Float32Array[]) => void; reject: (error: Error) => void }
296
+ >();
297
+ private nextRequestId = 0;
298
+ private initPromise: Promise<void> | null = null;
154
299
  private modelName: string;
155
300
 
156
301
  constructor(modelName: string) {
157
302
  this.modelName = modelName;
158
303
  }
159
304
 
160
- private async getModel(): Promise<unknown> {
161
- if (this.model) return this.model;
162
- if (!this.initPromise) {
163
- this.initPromise = (async () => {
164
- const { EmbeddingModel, FlagEmbedding } = await import("fastembed");
165
- // Map config model string to EmbeddingModel enum value.
166
- // If the configured model matches an enum key, use it; otherwise try
167
- // the raw string as a model name (CUSTOM model support in fastembed).
168
- const enumValue = (EmbeddingModel as Record<string, string>)[this.modelName];
169
- // fastembed's init() has overloaded signatures expecting specific enum
170
- // members, but we resolve the model dynamically from config. The enum
171
- // lookup guarantees a valid value at runtime; cast to satisfy the type.
172
- const m = await FlagEmbedding.init({
173
- model: enumValue ?? this.modelName,
174
- } as { model: typeof EmbeddingModel.BGESmallENV15 });
175
- this.model = m;
176
- return m;
177
- })();
178
- }
305
+ /**
306
+ * Ensure the worker thread is running. Probes fastembed on the main
307
+ * thread first (fast, cached) as a fast-fail gate — the worker is only
308
+ * spawned if the module is known-loadable. Worker startup failure is
309
+ * surfaced as `LocalProviderUnavailableError` to trigger the existing
310
+ * auto-fallback to remote providers.
311
+ */
312
+ private async ensureWorker(): Promise<void> {
313
+ if (this.workerReady) return;
314
+ if (this.workerInitError) throw new LocalProviderUnavailableError(this.workerInitError);
315
+ if (this.initPromise) return this.initPromise;
316
+
317
+ this.initPromise = (async () => {
318
+ // Fast-fail: probe fastembed on the main thread. This is cached
319
+ // after the first call and preserves the existing error flow.
320
+ const fastembed = await tryLoadFastembed();
321
+ if (!fastembed) throw new LocalProviderUnavailableError();
322
+
323
+ const { Worker } = await import("node:worker_threads");
324
+
325
+ // Resolve the worker script path.
326
+ //
327
+ // In vendored binary mode: the compiled binary's wrapper.ts detects
328
+ // `!isMainThread` and runs the embedding worker code path. We spawn
329
+ // the Worker with the wrapper's own `import.meta.url` (registered as
330
+ // __LORE_VENDOR_WORKER_URL__). This avoids needing a separate worker
331
+ // entrypoint — Bun's --compile silently drops additional entrypoints
332
+ // on macOS and Windows.
333
+ //
334
+ // In dev (Bun running .ts directly): embedding-worker.ts
335
+ // In dist (esbuild bundle): embedding-worker.js
336
+ const vendorWorkerUrl = (globalThis as Record<string, unknown>).__LORE_VENDOR_WORKER_URL__ as string | undefined;
337
+ // On Windows, new Worker() with a file:// URL pointing to $bunfs
338
+ // fails with ENOENT. Pass the raw path instead (B:\~BUN\root\...).
339
+ // On macOS/Linux the file:// URL works fine with $bunfs paths.
340
+ let workerUrl: string | URL;
341
+ if (vendorWorkerUrl) {
342
+ if (process.platform === "win32") {
343
+ // On Windows, new Worker() with a file:// URL pointing to $bunfs
344
+ // fails with ENOENT (Bun bug). Extract the raw path instead.
345
+ // URL.pathname keeps %7E encoded; decodeURIComponent restores ~.
346
+ workerUrl = decodeURIComponent(new URL(vendorWorkerUrl).pathname);
347
+ // URL.pathname on Windows: /B:/~BUN/root/wrapper.js → strip leading /
348
+ if (/^\/[A-Za-z]:/.test(workerUrl)) {
349
+ workerUrl = workerUrl.slice(1);
350
+ }
351
+ } else {
352
+ workerUrl = vendorWorkerUrl;
353
+ }
354
+ } else {
355
+ workerUrl = new URL(`./embedding-worker${import.meta.url.endsWith(".ts") ? ".ts" : ".js"}`, import.meta.url);
356
+ }
357
+
358
+ const vendor = vendorModelInfo();
359
+ const workerInitData: WorkerInitData = {
360
+ modelName: this.modelName,
361
+ vendorModel: vendor
362
+ ? { modelAbsoluteDirPath: vendor.modelAbsoluteDirPath, modelName: vendor.modelName }
363
+ : null,
364
+ };
365
+
366
+ this.worker = new Worker(workerUrl, { workerData: workerInitData });
367
+
368
+ // Don't let the worker prevent process exit.
369
+ this.worker.unref();
370
+
371
+ // Wire up response handler.
372
+ this.worker.on("message", (msg: WorkerOutbound) => {
373
+ switch (msg.type) {
374
+ case "result": {
375
+ const pending = this.pendingRequests.get(msg.id);
376
+ if (pending) {
377
+ this.pendingRequests.delete(msg.id);
378
+ this.updateWorkerRef();
379
+ pending.resolve(msg.vectors);
380
+ }
381
+ break;
382
+ }
383
+ case "error": {
384
+ const pending = this.pendingRequests.get(msg.id);
385
+ if (pending) {
386
+ this.pendingRequests.delete(msg.id);
387
+ this.updateWorkerRef();
388
+ pending.reject(new Error(`Worker embedding failed: ${msg.error}`));
389
+ }
390
+ break;
391
+ }
392
+ case "init-error": {
393
+ // Model init failed inside the worker — surface as
394
+ // LocalProviderUnavailableError on all pending + future requests.
395
+ this.workerInitError = msg.error;
396
+ this.workerReady = false;
397
+ for (const [, p] of this.pendingRequests) {
398
+ p.reject(new LocalProviderUnavailableError(msg.error));
399
+ }
400
+ this.pendingRequests.clear();
401
+ this.updateWorkerRef();
402
+ break;
403
+ }
404
+ }
405
+ });
406
+
407
+ // Worker crash / exit — reject all in-flight requests.
408
+ this.worker.on("error", (err: Error) => {
409
+ this.workerInitError = err.message;
410
+ this.workerReady = false;
411
+ for (const [, p] of this.pendingRequests) {
412
+ p.reject(new LocalProviderUnavailableError(err));
413
+ }
414
+ this.pendingRequests.clear();
415
+ this.updateWorkerRef();
416
+ });
417
+
418
+ this.worker.on("exit", (code) => {
419
+ if (code !== 0 && !this.workerInitError) {
420
+ this.workerInitError = `embedding worker exited with code ${code}`;
421
+ }
422
+ this.workerReady = false;
423
+ for (const [, p] of this.pendingRequests) {
424
+ p.reject(
425
+ new LocalProviderUnavailableError(this.workerInitError ?? "embedding worker exited"),
426
+ );
427
+ }
428
+ this.pendingRequests.clear();
429
+ this.updateWorkerRef();
430
+ });
431
+
432
+ this.workerReady = true;
433
+ })().catch((err) => {
434
+ this.initPromise = null; // allow retry
435
+ throw err;
436
+ });
437
+
179
438
  return this.initPromise;
180
439
  }
181
440
 
441
+ /** Keep the worker ref'd while requests are in flight so the event loop
442
+ * doesn't exit before responses arrive. When the pending map drains,
443
+ * unref again so the worker doesn't prevent graceful process exit. */
444
+ private updateWorkerRef(): void {
445
+ if (!this.worker) return;
446
+ if (this.pendingRequests.size > 0) {
447
+ this.worker.ref();
448
+ } else {
449
+ this.worker.unref();
450
+ }
451
+ }
452
+
182
453
  async embed(texts: string[], inputType: "document" | "query"): Promise<Float32Array[]> {
183
- const model = (await this.getModel()) as {
184
- queryEmbed(text: string): Promise<number[]>;
185
- passageEmbed(texts: string[], batchSize?: number): AsyncGenerator<number[][]>;
186
- };
454
+ await this.ensureWorker();
455
+
456
+ const id = this.nextRequestId++;
457
+ // Recall queries (single query-type texts) get high priority so they
458
+ // jump ahead of any queued backfill batches in the worker.
459
+ const priority = inputType === "query" && texts.length === 1 ? "high" : "normal";
460
+
461
+ return new Promise<Float32Array[]>((resolve, reject) => {
462
+ this.pendingRequests.set(id, { resolve, reject });
463
+ this.updateWorkerRef();
464
+ this.worker!.postMessage({
465
+ type: "embed",
466
+ id,
467
+ texts,
468
+ inputType,
469
+ priority,
470
+ } satisfies WorkerInbound);
471
+ });
472
+ }
187
473
 
188
- if (inputType === "query" && texts.length === 1) {
189
- const vec = await model.queryEmbed(texts[0]);
190
- return [new Float32Array(vec)];
474
+ /** Shut down the worker thread. Called by `resetProvider()` on config change.
475
+ * Sends a shutdown message so the worker calls `process.exit(0)` internally.
476
+ * We avoid `worker.terminate()` because Bun's forced termination triggers a
477
+ * NAPI fatal error when tearing down onnxruntime's native bindings.
478
+ *
479
+ * Returns a promise that resolves once the worker has fully exited. Callers
480
+ * that need a clean teardown (tests, config change) should await the result.
481
+ * Fire-and-forget callers (process exit) can ignore it. */
482
+ shutdown(): Promise<void> {
483
+ if (!this.worker) return Promise.resolve();
484
+
485
+ const worker = this.worker;
486
+ this.worker = null;
487
+ this.workerReady = false;
488
+ this.workerInitError = null;
489
+ this.initPromise = null;
490
+
491
+ // Reject any in-flight requests.
492
+ for (const [, p] of this.pendingRequests) {
493
+ p.reject(new Error("embedding worker shut down"));
191
494
  }
495
+ this.pendingRequests.clear();
192
496
 
193
- // passageEmbed returns an async generator of batches
194
- const results: Float32Array[] = [];
195
- for await (const batch of model.passageEmbed(texts)) {
196
- for (const vec of batch) {
197
- results.push(new Float32Array(vec));
198
- }
199
- }
200
- return results;
497
+ return new Promise<void>((resolve) => {
498
+ worker.on("exit", () => resolve());
499
+ worker.postMessage({ type: "shutdown" } satisfies WorkerInbound);
500
+ });
201
501
  }
202
502
  }
203
503
 
@@ -239,12 +539,12 @@ function getProvider(): EmbeddingProvider | null {
239
539
 
240
540
  switch (providerName) {
241
541
  case "local": {
242
- try {
243
- cachedProvider = new LocalProvider(model);
244
- } catch {
245
- log.info("local embedding provider unavailable (fastembed not installed)");
246
- cachedProvider = null;
247
- }
542
+ // `fastembed` is an optionalDependency. We construct the provider
543
+ // optimistically here; the import + ONNX init happens lazily in
544
+ // `LocalProvider.getModel()`, which throws `LocalProviderUnavailableError`
545
+ // if the optional dep isn't installed. After that first failure
546
+ // `isAvailable()` short-circuits to false and callers fall back to FTS.
547
+ cachedProvider = new LocalProvider(model);
248
548
  break;
249
549
  }
250
550
  case "voyage": {
@@ -273,9 +573,92 @@ function getProvider(): EmbeddingProvider | null {
273
573
  return cachedProvider;
274
574
  }
275
575
 
276
- /** Reset cached provider — called when config changes. */
277
- export function resetProvider(): void {
576
+ /** Reset cached provider — called when config changes.
577
+ * Shuts down the worker thread if the current provider is a LocalProvider.
578
+ * Returns a promise that resolves once any worker has fully exited.
579
+ * Callers that need clean teardown (tests) should await the result. */
580
+ export function resetProvider(): Promise<void> {
581
+ let shutdownPromise: Promise<void> = Promise.resolve();
582
+ if (cachedProvider instanceof LocalProvider) {
583
+ shutdownPromise = cachedProvider.shutdown();
584
+ }
278
585
  cachedProvider = undefined;
586
+ remoteFallbackLogged = false;
587
+ return shutdownPromise;
588
+ }
589
+
590
+ /** Shut down the current provider and prevent any new provider from being
591
+ * created. After this call, `embed()` throws and `isAvailable()` returns
592
+ * false. Test-only: prevents fire-and-forget embeds (queued by other test
593
+ * files) from spawning a new worker after cleanup. */
594
+ export function _shutdownAndDisable(): Promise<void> {
595
+ let shutdownPromise: Promise<void> = Promise.resolve();
596
+ if (cachedProvider instanceof LocalProvider) {
597
+ shutdownPromise = cachedProvider.shutdown();
598
+ }
599
+ cachedProvider = null; // null (not undefined) → getProvider() returns null, won't create new
600
+ remoteFallbackLogged = false;
601
+ return shutdownPromise;
602
+ }
603
+
604
+ /** Save the current cached provider reference (including the live worker)
605
+ * and clear the cache so the next `getProvider()` call creates a fresh one.
606
+ * Returns an opaque token that must be passed to `_restoreProvider()` to
607
+ * put the original provider back — without this, the worker is orphaned and
608
+ * a second ONNX load in the same Bun process will crash.
609
+ *
610
+ * Test-only helper: lets suites temporarily swap in a mock/unavailable
611
+ * provider without killing the real worker. */
612
+ export function _saveAndClearProvider(): unknown {
613
+ const saved = { provider: cachedProvider, remoteFallbackLogged };
614
+ cachedProvider = undefined;
615
+ remoteFallbackLogged = false;
616
+ return saved;
617
+ }
618
+
619
+ /** Restore a provider previously saved by `_saveAndClearProvider()`. Any
620
+ * provider created between save and restore is discarded (callers must
621
+ * ensure it's not a LocalProvider with a live worker — those suites only
622
+ * use `_markFastembedUnavailable()` so no worker is spawned). */
623
+ export function _restoreProvider(token: unknown): void {
624
+ const saved = token as { provider: EmbeddingProvider | null | undefined; remoteFallbackLogged: boolean };
625
+ cachedProvider = saved.provider;
626
+ remoteFallbackLogged = saved.remoteFallbackLogged;
627
+ }
628
+
629
+ /** True once we've logged an auto-fallback notice this process — keeps the
630
+ * one-line warning from spamming on every fire-and-forget embed call. */
631
+ let remoteFallbackLogged = false;
632
+
633
+
634
+ /**
635
+ * Build a remote `EmbeddingProvider` from whichever API key is in env.
636
+ * Returns `null` when neither `VOYAGE_API_KEY` nor `OPENAI_API_KEY` is set,
637
+ * which is the signal for callers to fall through to FTS-only behaviour.
638
+ *
639
+ * Voyage wins ties because it's the higher-quality option for code search;
640
+ * users who want OpenAI specifically can pin `search.embeddings.provider`
641
+ * in `.lore.json` and skip the fallback path entirely.
642
+ */
643
+ export function pickRemoteFallback(): {
644
+ name: "voyage" | "openai";
645
+ provider: EmbeddingProvider;
646
+ } | null {
647
+ if (process.env.VOYAGE_API_KEY) {
648
+ const d = PROVIDER_DEFAULTS.voyage;
649
+ return {
650
+ name: "voyage",
651
+ provider: new VoyageProvider(process.env.VOYAGE_API_KEY, d.model, d.dimensions),
652
+ };
653
+ }
654
+ if (process.env.OPENAI_API_KEY) {
655
+ const d = PROVIDER_DEFAULTS.openai;
656
+ return {
657
+ name: "openai",
658
+ provider: new OpenAIProvider(process.env.OPENAI_API_KEY, d.model, d.dimensions),
659
+ };
660
+ }
661
+ return null;
279
662
  }
280
663
 
281
664
  // ---------------------------------------------------------------------------
@@ -284,9 +667,16 @@ export function resetProvider(): void {
284
667
 
285
668
  /** Returns true if embedding is available.
286
669
  * Active when the configured provider's API key is set, unless explicitly
287
- * disabled via `search.embeddings.enabled: false` in .lore.json. */
670
+ * disabled via `search.embeddings.enabled: false` in .lore.json.
671
+ *
672
+ * For the `local` provider, also returns false once we've discovered the
673
+ * optional `fastembed` peer is missing — callers (recall, ltm, distillation)
674
+ * use this gate to skip embedding work and fall back to FTS-only search. */
288
675
  export function isAvailable(): boolean {
289
- return getProvider() !== null;
676
+ const provider = getProvider();
677
+ if (!provider) return false;
678
+ if (provider instanceof LocalProvider && fastembedKnownUnavailable()) return false;
679
+ return true;
290
680
  }
291
681
 
292
682
  // ---------------------------------------------------------------------------
@@ -296,10 +686,18 @@ export function isAvailable(): boolean {
296
686
  /**
297
687
  * Generate embeddings for the given texts using the configured provider.
298
688
  *
689
+ * If the configured provider is `local` and `fastembed` turns out to be
690
+ * unavailable at runtime (failed install, vendor extraction blocked, etc.),
691
+ * automatically swap to a remote provider when `VOYAGE_API_KEY` or
692
+ * `OPENAI_API_KEY` is set in env. The swap is permanent for the rest of
693
+ * the process — `cachedProvider` is replaced so subsequent calls skip the
694
+ * local-then-fail path.
695
+ *
299
696
  * @param texts Array of texts to embed
300
697
  * @param inputType "document" for storage, "query" for search
301
698
  * @returns Float32Array per input text
302
- * @throws On API errors or missing provider
699
+ * @throws On API errors or when no provider (local or remote) is
700
+ * available
303
701
  */
304
702
  export async function embed(
305
703
  texts: string[],
@@ -307,7 +705,26 @@ export async function embed(
307
705
  ): Promise<Float32Array[]> {
308
706
  const provider = getProvider();
309
707
  if (!provider) throw new Error("No embedding provider available");
310
- return provider.embed(texts, inputType);
708
+
709
+ try {
710
+ return await provider.embed(texts, inputType);
711
+ } catch (err) {
712
+ if (!(err instanceof LocalProviderUnavailableError)) throw err;
713
+
714
+ const fallback = pickRemoteFallback();
715
+ if (!fallback) throw err;
716
+
717
+ if (!remoteFallbackLogged) {
718
+ remoteFallbackLogged = true;
719
+ log.info(
720
+ `fastembed unavailable; auto-switching to ${fallback.name} ` +
721
+ `(set search.embeddings.provider in .lore.json to silence this)`,
722
+ );
723
+ }
724
+
725
+ cachedProvider = fallback.provider;
726
+ return fallback.provider.embed(texts, inputType);
727
+ }
311
728
  }
312
729
 
313
730
  // ---------------------------------------------------------------------------
@@ -455,6 +872,71 @@ export function embedDistillation(
455
872
  });
456
873
  }
457
874
 
875
+ /**
876
+ * Embed a temporal message and store the result in the DB.
877
+ * Fire-and-forget — errors are logged, never thrown.
878
+ * Only called for undistilled messages; once distilled, the embedding
879
+ * is NULLed (semantic content captured by distillation embedding).
880
+ */
881
+ export function embedTemporalMessage(
882
+ id: string,
883
+ content: string,
884
+ ): void {
885
+ // Skip very short messages — they don't carry enough semantic signal
886
+ // to be useful in vector search and would waste embedding capacity.
887
+ if (content.length < 50) return;
888
+
889
+ embed([content], "document")
890
+ .then(([vec]) => {
891
+ db()
892
+ .query("UPDATE temporal_messages SET embedding = ? WHERE id = ?")
893
+ .run(toBlob(vec), id);
894
+ })
895
+ .catch((err) => {
896
+ log.info("embedding failed for temporal message", id, ":", err);
897
+ });
898
+ }
899
+
900
+ // ---------------------------------------------------------------------------
901
+ // Vector search — temporal messages (undistilled only)
902
+ // ---------------------------------------------------------------------------
903
+
904
+ /**
905
+ * Search undistilled temporal messages with embeddings by cosine similarity.
906
+ * Returns top-k entries sorted by similarity descending.
907
+ *
908
+ * Only scans undistilled messages (distilled=0) — once a message is
909
+ * distilled, its semantic content is captured by the distillation
910
+ * embedding and the temporal embedding is cleared.
911
+ *
912
+ * Scoped to a single project. Optionally scoped to a single session.
913
+ */
914
+ export function vectorSearchTemporal(
915
+ queryEmbedding: Float32Array,
916
+ projectId: string,
917
+ limit = 10,
918
+ sessionId?: string,
919
+ ): VectorHit[] {
920
+ const sql = sessionId
921
+ ? "SELECT id, embedding FROM temporal_messages WHERE embedding IS NOT NULL AND distilled = 0 AND project_id = ? AND session_id = ?"
922
+ : "SELECT id, embedding FROM temporal_messages WHERE embedding IS NOT NULL AND distilled = 0 AND project_id = ?";
923
+ const params = sessionId ? [projectId, sessionId] : [projectId];
924
+
925
+ const rows = db()
926
+ .query(sql)
927
+ .all(...params) as Array<{ id: string; embedding: Buffer }>;
928
+
929
+ const scored: VectorHit[] = [];
930
+ for (const row of rows) {
931
+ const vec = fromBlob(row.embedding);
932
+ const sim = cosineSimilarity(queryEmbedding, vec);
933
+ scored.push({ id: row.id, similarity: sim });
934
+ }
935
+
936
+ scored.sort((a, b) => b.similarity - a.similarity);
937
+ return scored.slice(0, limit);
938
+ }
939
+
458
940
  // ---------------------------------------------------------------------------
459
941
  // Config change detection
460
942
  // ---------------------------------------------------------------------------
@@ -488,7 +970,7 @@ export function checkConfigChange(): boolean {
488
970
 
489
971
  if (stored && stored.value === current) return false;
490
972
 
491
- // Config changed (or first run) — clear all embeddings in both tables
973
+ // Config changed (or first run) — clear all embeddings in all tables
492
974
  if (stored) {
493
975
  const knowledgeCount = db()
494
976
  .query("SELECT COUNT(*) as n FROM knowledge WHERE embedding IS NOT NULL")
@@ -496,10 +978,14 @@ export function checkConfigChange(): boolean {
496
978
  const distillCount = db()
497
979
  .query("SELECT COUNT(*) as n FROM distillations WHERE embedding IS NOT NULL")
498
980
  .get() as { n: number };
499
- const total = knowledgeCount.n + distillCount.n;
981
+ const temporalCount = db()
982
+ .query("SELECT COUNT(*) as n FROM temporal_messages WHERE embedding IS NOT NULL")
983
+ .get() as { n: number };
984
+ const total = knowledgeCount.n + distillCount.n + temporalCount.n;
500
985
  if (total > 0) {
501
986
  db().query("UPDATE knowledge SET embedding = NULL").run();
502
987
  db().query("UPDATE distillations SET embedding = NULL").run();
988
+ db().query("UPDATE temporal_messages SET embedding = NULL").run();
503
989
  log.info(
504
990
  `embedding config changed (${stored.value} → ${current}), cleared ${total} stale embeddings`,
505
991
  );
@@ -520,15 +1006,26 @@ export function checkConfigChange(): boolean {
520
1006
  // Startup backfill — single entry point for all hosts
521
1007
  // ---------------------------------------------------------------------------
522
1008
 
1009
+ /**
1010
+ * Delay before the startup backfill begins, so the host's HTTP server has
1011
+ * a clear window to answer the first wave of requests (web UI shell load,
1012
+ * terminal session-connect handshake) before the embedding worker starts
1013
+ * competing for CPU. With inference off the main thread the event loop
1014
+ * isn't blocked, but the worker still consumes a CPU core — a short delay
1015
+ * avoids contention during the first-connect burst.
1016
+ */
1017
+ const STARTUP_BACKFILL_DELAY_MS = 2_000;
1018
+
523
1019
  /**
524
1020
  * Run all embedding backfills and log coverage stats.
525
1021
  *
526
1022
  * This is the canonical entry point that every host adapter (OpenCode, Pi,
527
1023
  * future ACP) should call once during init. It:
528
- * 1. Detects config changes (provider swap) and clears stale embeddings
529
- * 2. Backfills knowledge entries missing embeddings
530
- * 3. Backfills non-archived distillations missing embeddings
531
- * 4. Logs a one-line coverage summary to stderr (always visible, not gated)
1024
+ * 1. Waits a short grace period so first-connect HTTP requests can finish
1025
+ * 2. Detects config changes (provider swap) and clears stale embeddings
1026
+ * 3. Backfills knowledge entries missing embeddings
1027
+ * 4. Backfills non-archived distillations missing embeddings
1028
+ * 5. Logs a one-line coverage summary to stderr (always visible, not gated)
532
1029
  *
533
1030
  * Fire-and-forget: callers should `.catch()` — embedding failures must not
534
1031
  * block plugin initialization.
@@ -536,6 +1033,34 @@ export function checkConfigChange(): boolean {
536
1033
  export async function runStartupBackfill(): Promise<void> {
537
1034
  if (!isAvailable()) return;
538
1035
 
1036
+ // Surface backlog up-front so a slow startup is self-explanatory in logs.
1037
+ // Counts use the same predicates the backfill loops use, so the two
1038
+ // numbers always match what we're about to do.
1039
+ const pendingKnowledge = (
1040
+ db()
1041
+ .query(
1042
+ "SELECT COUNT(*) as n FROM knowledge WHERE embedding IS NULL AND confidence > 0.2",
1043
+ )
1044
+ .get() as { n: number }
1045
+ ).n;
1046
+ const pendingDistillations = (
1047
+ db()
1048
+ .query(
1049
+ "SELECT COUNT(*) as n FROM distillations WHERE embedding IS NULL AND archived = 0 AND observations != ''",
1050
+ )
1051
+ .get() as { n: number }
1052
+ ).n;
1053
+
1054
+ if (pendingKnowledge + pendingDistillations > 0) {
1055
+ log.info(
1056
+ `embedding backfill scheduled: ${pendingKnowledge} knowledge + ` +
1057
+ `${pendingDistillations} distillations pending — starting in ` +
1058
+ `${STARTUP_BACKFILL_DELAY_MS / 1000}s, batches yield between calls ` +
1059
+ `(host stays responsive)`,
1060
+ );
1061
+ await new Promise<void>((r) => setTimeout(r, STARTUP_BACKFILL_DELAY_MS));
1062
+ }
1063
+
539
1064
  const knowledgeEmbedded = await backfillEmbeddings();
540
1065
  const distillationEmbedded = await backfillDistillationEmbeddings();
541
1066
 
@@ -581,6 +1106,16 @@ export async function runStartupBackfill(): Promise<void> {
581
1106
  // Backfill — knowledge
582
1107
  // ---------------------------------------------------------------------------
583
1108
 
1109
+ /**
1110
+ * Chunk size for backfill embed requests. Each chunk becomes a separate
1111
+ * message to the embedding worker. Keeping chunks small (32) gives the
1112
+ * worker's priority queue natural gaps to interleave high-priority recall
1113
+ * queries between backfill batches. The provider's `maxBatchSize` (256)
1114
+ * is the upper limit for any single embed call; this is intentionally
1115
+ * smaller for backfill-vs-live interleaving.
1116
+ */
1117
+ const BACKFILL_CHUNK_SIZE = 32;
1118
+
584
1119
  /**
585
1120
  * Embed all knowledge entries that are missing embeddings.
586
1121
  * Called by `runStartupBackfill()`.
@@ -601,11 +1136,10 @@ export async function backfillEmbeddings(): Promise<number> {
601
1136
 
602
1137
  if (!rows.length) return 0;
603
1138
 
604
- const batchSize = provider.maxBatchSize;
605
1139
  let embedded = 0;
606
1140
 
607
- for (let i = 0; i < rows.length; i += batchSize) {
608
- const batch = rows.slice(i, i + batchSize);
1141
+ for (let i = 0; i < rows.length; i += BACKFILL_CHUNK_SIZE) {
1142
+ const batch = rows.slice(i, i + BACKFILL_CHUNK_SIZE);
609
1143
  const texts = batch.map((r) => `${r.title}\n${r.content}`);
610
1144
 
611
1145
  try {
@@ -621,6 +1155,7 @@ export async function backfillEmbeddings(): Promise<number> {
621
1155
  } catch (err) {
622
1156
  log.info(`embedding backfill batch ${i}-${i + batch.length} failed:`, err);
623
1157
  }
1158
+ // No yieldToEventLoop() needed — embed() is truly async (worker thread).
624
1159
  }
625
1160
 
626
1161
  if (embedded > 0) {
@@ -650,11 +1185,16 @@ export async function backfillDistillationEmbeddings(): Promise<number> {
650
1185
 
651
1186
  if (!rows.length) return 0;
652
1187
 
653
- const batchSize = provider.maxBatchSize;
654
1188
  let embedded = 0;
655
1189
 
656
- for (let i = 0; i < rows.length; i += batchSize) {
657
- const batch = rows.slice(i, i + batchSize);
1190
+ // Progress logging: heartbeat every PROGRESS_INTERVAL embedded so a long
1191
+ // backfill (e.g. 1000+ pending after a fastembed reinstall) doesn't look
1192
+ // like a silent hang. Without this, only the final tally was logged.
1193
+ const PROGRESS_INTERVAL = 256;
1194
+ let nextProgressAt = PROGRESS_INTERVAL;
1195
+
1196
+ for (let i = 0; i < rows.length; i += BACKFILL_CHUNK_SIZE) {
1197
+ const batch = rows.slice(i, i + BACKFILL_CHUNK_SIZE);
658
1198
  const texts = batch.map((r) => r.observations);
659
1199
 
660
1200
  try {
@@ -670,6 +1210,12 @@ export async function backfillDistillationEmbeddings(): Promise<number> {
670
1210
  } catch (err) {
671
1211
  log.info(`distillation embedding backfill batch ${i}-${i + batch.length} failed:`, err);
672
1212
  }
1213
+
1214
+ if (embedded >= nextProgressAt) {
1215
+ log.info(`embedding distillations: ${embedded}/${rows.length}…`);
1216
+ nextProgressAt = embedded + PROGRESS_INTERVAL;
1217
+ }
1218
+ // No yieldToEventLoop() needed — embed() is truly async (worker thread).
673
1219
  }
674
1220
 
675
1221
  if (embedded > 0) {