@loreai/core 0.16.0 → 0.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -0
- package/dist/bun/agents-file.d.ts +13 -1
- package/dist/bun/agents-file.d.ts.map +1 -1
- package/dist/bun/config.d.ts +20 -1
- package/dist/bun/config.d.ts.map +1 -1
- package/dist/bun/data.d.ts +174 -0
- package/dist/bun/data.d.ts.map +1 -0
- package/dist/bun/db.d.ts +65 -0
- package/dist/bun/db.d.ts.map +1 -1
- package/dist/bun/distillation.d.ts +49 -6
- package/dist/bun/distillation.d.ts.map +1 -1
- package/dist/bun/embedding-vendor.d.ts +66 -0
- package/dist/bun/embedding-vendor.d.ts.map +1 -0
- package/dist/bun/embedding-worker-types.d.ts +66 -0
- package/dist/bun/embedding-worker-types.d.ts.map +1 -0
- package/dist/bun/embedding-worker.d.ts +16 -0
- package/dist/bun/embedding-worker.d.ts.map +1 -0
- package/dist/bun/embedding-worker.js +100 -0
- package/dist/bun/embedding-worker.js.map +7 -0
- package/dist/bun/embedding.d.ts +91 -8
- package/dist/bun/embedding.d.ts.map +1 -1
- package/dist/bun/git.d.ts +47 -0
- package/dist/bun/git.d.ts.map +1 -0
- package/dist/bun/gradient.d.ts +19 -1
- package/dist/bun/gradient.d.ts.map +1 -1
- package/dist/bun/index.d.ts +9 -6
- package/dist/bun/index.d.ts.map +1 -1
- package/dist/bun/index.js +13029 -10885
- package/dist/bun/index.js.map +4 -4
- package/dist/bun/lat-reader.d.ts +1 -1
- package/dist/bun/lat-reader.d.ts.map +1 -1
- package/dist/bun/ltm.d.ts.map +1 -1
- package/dist/bun/markdown.d.ts +11 -0
- package/dist/bun/markdown.d.ts.map +1 -1
- package/dist/bun/prompt.d.ts +1 -1
- package/dist/bun/prompt.d.ts.map +1 -1
- package/dist/bun/recall.d.ts +53 -0
- package/dist/bun/recall.d.ts.map +1 -1
- package/dist/bun/search.d.ts +29 -0
- package/dist/bun/search.d.ts.map +1 -1
- package/dist/bun/temporal.d.ts +2 -0
- package/dist/bun/temporal.d.ts.map +1 -1
- package/dist/bun/types.d.ts +15 -0
- package/dist/bun/types.d.ts.map +1 -1
- package/dist/bun/worker-model.d.ts +12 -9
- package/dist/bun/worker-model.d.ts.map +1 -1
- package/dist/node/agents-file.d.ts +13 -1
- package/dist/node/agents-file.d.ts.map +1 -1
- package/dist/node/config.d.ts +20 -1
- package/dist/node/config.d.ts.map +1 -1
- package/dist/node/data.d.ts +174 -0
- package/dist/node/data.d.ts.map +1 -0
- package/dist/node/db.d.ts +65 -0
- package/dist/node/db.d.ts.map +1 -1
- package/dist/node/distillation.d.ts +49 -6
- package/dist/node/distillation.d.ts.map +1 -1
- package/dist/node/embedding-vendor.d.ts +66 -0
- package/dist/node/embedding-vendor.d.ts.map +1 -0
- package/dist/node/embedding-worker-types.d.ts +66 -0
- package/dist/node/embedding-worker-types.d.ts.map +1 -0
- package/dist/node/embedding-worker.d.ts +16 -0
- package/dist/node/embedding-worker.d.ts.map +1 -0
- package/dist/node/embedding-worker.js +100 -0
- package/dist/node/embedding-worker.js.map +7 -0
- package/dist/node/embedding.d.ts +91 -8
- package/dist/node/embedding.d.ts.map +1 -1
- package/dist/node/git.d.ts +47 -0
- package/dist/node/git.d.ts.map +1 -0
- package/dist/node/gradient.d.ts +19 -1
- package/dist/node/gradient.d.ts.map +1 -1
- package/dist/node/index.d.ts +9 -6
- package/dist/node/index.d.ts.map +1 -1
- package/dist/node/index.js +13029 -10885
- package/dist/node/index.js.map +4 -4
- package/dist/node/lat-reader.d.ts +1 -1
- package/dist/node/lat-reader.d.ts.map +1 -1
- package/dist/node/ltm.d.ts.map +1 -1
- package/dist/node/markdown.d.ts +11 -0
- package/dist/node/markdown.d.ts.map +1 -1
- package/dist/node/prompt.d.ts +1 -1
- package/dist/node/prompt.d.ts.map +1 -1
- package/dist/node/recall.d.ts +53 -0
- package/dist/node/recall.d.ts.map +1 -1
- package/dist/node/search.d.ts +29 -0
- package/dist/node/search.d.ts.map +1 -1
- package/dist/node/temporal.d.ts +2 -0
- package/dist/node/temporal.d.ts.map +1 -1
- package/dist/node/types.d.ts +15 -0
- package/dist/node/types.d.ts.map +1 -1
- package/dist/node/worker-model.d.ts +12 -9
- package/dist/node/worker-model.d.ts.map +1 -1
- package/dist/types/agents-file.d.ts +13 -1
- package/dist/types/agents-file.d.ts.map +1 -1
- package/dist/types/config.d.ts +20 -1
- package/dist/types/config.d.ts.map +1 -1
- package/dist/types/data.d.ts +174 -0
- package/dist/types/data.d.ts.map +1 -0
- package/dist/types/db.d.ts +65 -0
- package/dist/types/db.d.ts.map +1 -1
- package/dist/types/distillation.d.ts +49 -6
- package/dist/types/distillation.d.ts.map +1 -1
- package/dist/types/embedding-vendor.d.ts +66 -0
- package/dist/types/embedding-vendor.d.ts.map +1 -0
- package/dist/types/embedding-worker-types.d.ts +66 -0
- package/dist/types/embedding-worker-types.d.ts.map +1 -0
- package/dist/types/embedding-worker.d.ts +16 -0
- package/dist/types/embedding-worker.d.ts.map +1 -0
- package/dist/types/embedding.d.ts +91 -8
- package/dist/types/embedding.d.ts.map +1 -1
- package/dist/types/git.d.ts +47 -0
- package/dist/types/git.d.ts.map +1 -0
- package/dist/types/gradient.d.ts +19 -1
- package/dist/types/gradient.d.ts.map +1 -1
- package/dist/types/index.d.ts +9 -6
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/lat-reader.d.ts +1 -1
- package/dist/types/lat-reader.d.ts.map +1 -1
- package/dist/types/ltm.d.ts.map +1 -1
- package/dist/types/markdown.d.ts +11 -0
- package/dist/types/markdown.d.ts.map +1 -1
- package/dist/types/prompt.d.ts +1 -1
- package/dist/types/prompt.d.ts.map +1 -1
- package/dist/types/recall.d.ts +53 -0
- package/dist/types/recall.d.ts.map +1 -1
- package/dist/types/search.d.ts +29 -0
- package/dist/types/search.d.ts.map +1 -1
- package/dist/types/temporal.d.ts +2 -0
- package/dist/types/temporal.d.ts.map +1 -1
- package/dist/types/types.d.ts +15 -0
- package/dist/types/types.d.ts.map +1 -1
- package/dist/types/worker-model.d.ts +12 -9
- package/dist/types/worker-model.d.ts.map +1 -1
- package/package.json +5 -2
- package/src/agents-file.ts +87 -4
- package/src/config.ts +68 -5
- package/src/curator.ts +2 -2
- package/src/data.ts +768 -0
- package/src/db.ts +386 -7
- package/src/distillation.ts +178 -35
- package/src/embedding-vendor.ts +102 -0
- package/src/embedding-worker-types.ts +82 -0
- package/src/embedding-worker.ts +185 -0
- package/src/embedding.ts +607 -61
- package/src/git.ts +144 -0
- package/src/gradient.ts +174 -17
- package/src/index.ts +20 -0
- package/src/lat-reader.ts +5 -11
- package/src/ltm.ts +17 -44
- package/src/markdown.ts +15 -0
- package/src/prompt.ts +1 -2
- package/src/recall.ts +401 -70
- package/src/search.ts +71 -1
- package/src/temporal.ts +42 -35
- package/src/types.ts +15 -0
- package/src/worker-model.ts +14 -9
package/src/embedding.ts
CHANGED
|
@@ -11,6 +11,12 @@
|
|
|
11
11
|
import { db } from "./db";
|
|
12
12
|
import { config } from "./config";
|
|
13
13
|
import * as log from "./log";
|
|
14
|
+
import { isVendoredBinary, vendorModelInfo } from "./embedding-vendor";
|
|
15
|
+
import type {
|
|
16
|
+
WorkerInbound,
|
|
17
|
+
WorkerOutbound,
|
|
18
|
+
WorkerInitData,
|
|
19
|
+
} from "./embedding-worker-types";
|
|
14
20
|
|
|
15
21
|
/** Timeout for embedding API fetch calls (ms). Prevents a hanging API from
|
|
16
22
|
* blocking the recall tool indefinitely. 10s is generous for typical 100-500ms
|
|
@@ -136,6 +142,125 @@ class OpenAIProvider implements EmbeddingProvider {
|
|
|
136
142
|
// Local provider (fastembed + ONNX Runtime)
|
|
137
143
|
// ---------------------------------------------------------------------------
|
|
138
144
|
|
|
145
|
+
/**
|
|
146
|
+
* Thrown when `LocalProvider` is requested but `fastembed` cannot be loaded.
|
|
147
|
+
* `fastembed` is an optionalDependency of `@loreai/core`: if its postinstall
|
|
148
|
+
* fails (e.g. CUDA 13 hits the upstream `onnxruntime-node` bug — see #185),
|
|
149
|
+
* the package install still succeeds but local embeddings are disabled.
|
|
150
|
+
* Callers in `recall.ts` / `ltm.ts` / `distillation.ts` already gate on
|
|
151
|
+
* `isAvailable()`, which flips to `false` after this error fires once.
|
|
152
|
+
*/
|
|
153
|
+
export class LocalProviderUnavailableError extends Error {
|
|
154
|
+
constructor(cause?: unknown) {
|
|
155
|
+
super(
|
|
156
|
+
"Local embedding provider unavailable: 'fastembed' is not installed. " +
|
|
157
|
+
"Configure search.embeddings.provider to 'voyage' or 'openai', or " +
|
|
158
|
+
"reinstall with ONNXRUNTIME_NODE_INSTALL_CUDA=skip to retry the optional fastembed install.",
|
|
159
|
+
);
|
|
160
|
+
this.name = "LocalProviderUnavailableError";
|
|
161
|
+
if (cause !== undefined) (this as Error & { cause?: unknown }).cause = cause;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/** Cache of the fastembed module-load probe.
|
|
166
|
+
* null = not yet probed; module = imported successfully; false = import failed. */
|
|
167
|
+
let fastembedModule: typeof import("fastembed") | null = null;
|
|
168
|
+
let fastembedProbed: boolean = false;
|
|
169
|
+
let fastembedAvailable: boolean = false;
|
|
170
|
+
let fastembedLogged: boolean = false;
|
|
171
|
+
|
|
172
|
+
/** For tests: reset the fastembed probe cache. */
|
|
173
|
+
export function _resetFastembedProbe(): void {
|
|
174
|
+
fastembedModule = null;
|
|
175
|
+
fastembedProbed = false;
|
|
176
|
+
fastembedAvailable = false;
|
|
177
|
+
fastembedLogged = false;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
/** For tests: simulate fastembed being unresolvable, without mocking the
|
|
181
|
+
* dynamic import. After this call, `tryLoadFastembed()` short-circuits to
|
|
182
|
+
* `null` and `isAvailable()` returns false for the local provider. */
|
|
183
|
+
export function _markFastembedUnavailable(): void {
|
|
184
|
+
fastembedModule = null;
|
|
185
|
+
fastembedProbed = true;
|
|
186
|
+
fastembedAvailable = false;
|
|
187
|
+
fastembedLogged = true; // suppress the info log in tests
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
/**
|
|
191
|
+
* Probe `fastembed` once. Returns the module on success, `null` on failure.
|
|
192
|
+
* Logs an info-level note exactly once on the first failure so users know
|
|
193
|
+
* how to recover (switch provider, fix the install, or rely on the
|
|
194
|
+
* VOYAGE/OPENAI auto-fallback in `embed()`).
|
|
195
|
+
*
|
|
196
|
+
* In binary mode `import("fastembed")` resolves to the bundle Bun packed
|
|
197
|
+
* at compile time (the binary's wrapper has already preloaded the
|
|
198
|
+
* side-load `libonnxruntime` lib so the addon's dlopen succeeds). In
|
|
199
|
+
* npm mode it goes through standard module resolution and may fail if
|
|
200
|
+
* the optional postinstall didn't run.
|
|
201
|
+
*/
|
|
202
|
+
async function tryLoadFastembed(): Promise<typeof import("fastembed") | null> {
|
|
203
|
+
if (fastembedProbed) return fastembedAvailable ? fastembedModule : null;
|
|
204
|
+
try {
|
|
205
|
+
const mod = await loadFastembedModule();
|
|
206
|
+
// Re-check after the async boundary: another caller (e.g. a test helper
|
|
207
|
+
// like _markFastembedUnavailable) may have set the probe while we were
|
|
208
|
+
// awaiting. Their decision takes priority — don't overwrite it.
|
|
209
|
+
if (fastembedProbed) return fastembedAvailable ? fastembedModule : null;
|
|
210
|
+
fastembedModule = mod;
|
|
211
|
+
fastembedAvailable = true;
|
|
212
|
+
} catch (err) {
|
|
213
|
+
if (fastembedProbed) return fastembedAvailable ? fastembedModule : null;
|
|
214
|
+
fastembedAvailable = false;
|
|
215
|
+
if (!fastembedLogged) {
|
|
216
|
+
fastembedLogged = true;
|
|
217
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
218
|
+
// Binary mode: a load failure here is a real bug (everything was
|
|
219
|
+
// bundled at build time). npm mode: the optional dep didn't
|
|
220
|
+
// install — point the user at the standard recovery options.
|
|
221
|
+
const remediation = isVendoredBinary()
|
|
222
|
+
? "this is a bug in the lore binary; please file an issue. " +
|
|
223
|
+
"Set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback in the meantime"
|
|
224
|
+
: "set search.embeddings.provider to 'voyage' or 'openai', " +
|
|
225
|
+
"set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback, " +
|
|
226
|
+
"or reinstall fastembed with ONNXRUNTIME_NODE_INSTALL_CUDA=skip";
|
|
227
|
+
log.info(
|
|
228
|
+
`local embedding provider unavailable (fastembed not installed: ${msg}) — ${remediation}`,
|
|
229
|
+
);
|
|
230
|
+
}
|
|
231
|
+
} finally {
|
|
232
|
+
fastembedProbed = true;
|
|
233
|
+
}
|
|
234
|
+
return fastembedAvailable ? fastembedModule : null;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
/**
|
|
238
|
+
* Resolve and import the fastembed module.
|
|
239
|
+
*
|
|
240
|
+
* One bare import covers both modes:
|
|
241
|
+
*
|
|
242
|
+
* - Binary mode: `bun build --compile` resolves "fastembed" against the
|
|
243
|
+
* per-target staging `node_modules/` at build time and bundles it
|
|
244
|
+
* (plus its transitive deps and `.node` addons) into the binary. The
|
|
245
|
+
* side-load `libonnxruntime.so.1` / `.dylib` / `.dll` is preloaded
|
|
246
|
+
* by the binary's wrapper before this import evaluates, so the
|
|
247
|
+
* bundled `onnxruntime_binding.node`'s dlopen finds the cached
|
|
248
|
+
* handle instead of failing with "shared object not found".
|
|
249
|
+
*
|
|
250
|
+
* - npm mode: standard Node/Bun resolution — works for `@loreai/core`
|
|
251
|
+
* consumers whose `npm install` cleanly installed the optional dep.
|
|
252
|
+
* If the postinstall failed (CUDA-13 hosts), the import throws here
|
|
253
|
+
* and the caller logs + falls back to a remote provider.
|
|
254
|
+
*/
|
|
255
|
+
async function loadFastembedModule(): Promise<typeof import("fastembed")> {
|
|
256
|
+
return (await import("fastembed")) as typeof import("fastembed");
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
/** True iff the fastembed probe has run and reported the module missing. */
|
|
260
|
+
function fastembedKnownUnavailable(): boolean {
|
|
261
|
+
return fastembedProbed && !fastembedAvailable;
|
|
262
|
+
}
|
|
263
|
+
|
|
139
264
|
/**
|
|
140
265
|
* Local embedding provider using fastembed (bge-small-en-v1.5 by default).
|
|
141
266
|
*
|
|
@@ -143,61 +268,236 @@ class OpenAIProvider implements EmbeddingProvider {
|
|
|
143
268
|
* Model files are downloaded on first use (~33MB) and cached in
|
|
144
269
|
* `~/.cache/fastembed`. Subsequent inits load from disk in ~350ms.
|
|
145
270
|
*
|
|
271
|
+
* ONNX inference runs in a dedicated `node:worker_threads` Worker so the
|
|
272
|
+
* main thread's event loop stays free. This class is a thin RPC client —
|
|
273
|
+
* it posts `{ texts, inputType }` to the worker and awaits a reply.
|
|
274
|
+
* The worker owns the `FlagEmbedding` model and processes requests
|
|
275
|
+
* sequentially from a priority queue (recall queries jump ahead of
|
|
276
|
+
* backfill batches).
|
|
277
|
+
*
|
|
146
278
|
* Uses dynamic import so the module is only loaded when the "local"
|
|
147
279
|
* provider is actually selected — avoids startup cost and allows
|
|
148
|
-
* graceful fallback
|
|
280
|
+
* graceful fallback when the optional `fastembed` peer isn't installed
|
|
281
|
+
* (its native onnxruntime-node may fail to build, e.g. on CUDA 13).
|
|
149
282
|
*/
|
|
150
283
|
class LocalProvider implements EmbeddingProvider {
|
|
284
|
+
// With inference off the main thread, large batches no longer block
|
|
285
|
+
// the event loop. 256 maximises throughput per round-trip to the
|
|
286
|
+
// worker. Backfill callers use a smaller BACKFILL_CHUNK_SIZE to give
|
|
287
|
+
// the worker's priority queue breathing room for recall queries.
|
|
151
288
|
readonly maxBatchSize = 256;
|
|
152
|
-
|
|
153
|
-
private
|
|
289
|
+
|
|
290
|
+
private worker: import("node:worker_threads").Worker | null = null;
|
|
291
|
+
private workerReady = false;
|
|
292
|
+
private workerInitError: string | null = null;
|
|
293
|
+
private pendingRequests = new Map<
|
|
294
|
+
number,
|
|
295
|
+
{ resolve: (vectors: Float32Array[]) => void; reject: (error: Error) => void }
|
|
296
|
+
>();
|
|
297
|
+
private nextRequestId = 0;
|
|
298
|
+
private initPromise: Promise<void> | null = null;
|
|
154
299
|
private modelName: string;
|
|
155
300
|
|
|
156
301
|
constructor(modelName: string) {
|
|
157
302
|
this.modelName = modelName;
|
|
158
303
|
}
|
|
159
304
|
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
305
|
+
/**
|
|
306
|
+
* Ensure the worker thread is running. Probes fastembed on the main
|
|
307
|
+
* thread first (fast, cached) as a fast-fail gate — the worker is only
|
|
308
|
+
* spawned if the module is known-loadable. Worker startup failure is
|
|
309
|
+
* surfaced as `LocalProviderUnavailableError` to trigger the existing
|
|
310
|
+
* auto-fallback to remote providers.
|
|
311
|
+
*/
|
|
312
|
+
private async ensureWorker(): Promise<void> {
|
|
313
|
+
if (this.workerReady) return;
|
|
314
|
+
if (this.workerInitError) throw new LocalProviderUnavailableError(this.workerInitError);
|
|
315
|
+
if (this.initPromise) return this.initPromise;
|
|
316
|
+
|
|
317
|
+
this.initPromise = (async () => {
|
|
318
|
+
// Fast-fail: probe fastembed on the main thread. This is cached
|
|
319
|
+
// after the first call and preserves the existing error flow.
|
|
320
|
+
const fastembed = await tryLoadFastembed();
|
|
321
|
+
if (!fastembed) throw new LocalProviderUnavailableError();
|
|
322
|
+
|
|
323
|
+
const { Worker } = await import("node:worker_threads");
|
|
324
|
+
|
|
325
|
+
// Resolve the worker script path.
|
|
326
|
+
//
|
|
327
|
+
// In vendored binary mode: the compiled binary's wrapper.ts detects
|
|
328
|
+
// `!isMainThread` and runs the embedding worker code path. We spawn
|
|
329
|
+
// the Worker with the wrapper's own `import.meta.url` (registered as
|
|
330
|
+
// __LORE_VENDOR_WORKER_URL__). This avoids needing a separate worker
|
|
331
|
+
// entrypoint — Bun's --compile silently drops additional entrypoints
|
|
332
|
+
// on macOS and Windows.
|
|
333
|
+
//
|
|
334
|
+
// In dev (Bun running .ts directly): embedding-worker.ts
|
|
335
|
+
// In dist (esbuild bundle): embedding-worker.js
|
|
336
|
+
const vendorWorkerUrl = (globalThis as Record<string, unknown>).__LORE_VENDOR_WORKER_URL__ as string | undefined;
|
|
337
|
+
// On Windows, new Worker() with a file:// URL pointing to $bunfs
|
|
338
|
+
// fails with ENOENT. Pass the raw path instead (B:\~BUN\root\...).
|
|
339
|
+
// On macOS/Linux the file:// URL works fine with $bunfs paths.
|
|
340
|
+
let workerUrl: string | URL;
|
|
341
|
+
if (vendorWorkerUrl) {
|
|
342
|
+
if (process.platform === "win32") {
|
|
343
|
+
// On Windows, new Worker() with a file:// URL pointing to $bunfs
|
|
344
|
+
// fails with ENOENT (Bun bug). Extract the raw path instead.
|
|
345
|
+
// URL.pathname keeps %7E encoded; decodeURIComponent restores ~.
|
|
346
|
+
workerUrl = decodeURIComponent(new URL(vendorWorkerUrl).pathname);
|
|
347
|
+
// URL.pathname on Windows: /B:/~BUN/root/wrapper.js → strip leading /
|
|
348
|
+
if (/^\/[A-Za-z]:/.test(workerUrl)) {
|
|
349
|
+
workerUrl = workerUrl.slice(1);
|
|
350
|
+
}
|
|
351
|
+
} else {
|
|
352
|
+
workerUrl = vendorWorkerUrl;
|
|
353
|
+
}
|
|
354
|
+
} else {
|
|
355
|
+
workerUrl = new URL(`./embedding-worker${import.meta.url.endsWith(".ts") ? ".ts" : ".js"}`, import.meta.url);
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
const vendor = vendorModelInfo();
|
|
359
|
+
const workerInitData: WorkerInitData = {
|
|
360
|
+
modelName: this.modelName,
|
|
361
|
+
vendorModel: vendor
|
|
362
|
+
? { modelAbsoluteDirPath: vendor.modelAbsoluteDirPath, modelName: vendor.modelName }
|
|
363
|
+
: null,
|
|
364
|
+
};
|
|
365
|
+
|
|
366
|
+
this.worker = new Worker(workerUrl, { workerData: workerInitData });
|
|
367
|
+
|
|
368
|
+
// Don't let the worker prevent process exit.
|
|
369
|
+
this.worker.unref();
|
|
370
|
+
|
|
371
|
+
// Wire up response handler.
|
|
372
|
+
this.worker.on("message", (msg: WorkerOutbound) => {
|
|
373
|
+
switch (msg.type) {
|
|
374
|
+
case "result": {
|
|
375
|
+
const pending = this.pendingRequests.get(msg.id);
|
|
376
|
+
if (pending) {
|
|
377
|
+
this.pendingRequests.delete(msg.id);
|
|
378
|
+
this.updateWorkerRef();
|
|
379
|
+
pending.resolve(msg.vectors);
|
|
380
|
+
}
|
|
381
|
+
break;
|
|
382
|
+
}
|
|
383
|
+
case "error": {
|
|
384
|
+
const pending = this.pendingRequests.get(msg.id);
|
|
385
|
+
if (pending) {
|
|
386
|
+
this.pendingRequests.delete(msg.id);
|
|
387
|
+
this.updateWorkerRef();
|
|
388
|
+
pending.reject(new Error(`Worker embedding failed: ${msg.error}`));
|
|
389
|
+
}
|
|
390
|
+
break;
|
|
391
|
+
}
|
|
392
|
+
case "init-error": {
|
|
393
|
+
// Model init failed inside the worker — surface as
|
|
394
|
+
// LocalProviderUnavailableError on all pending + future requests.
|
|
395
|
+
this.workerInitError = msg.error;
|
|
396
|
+
this.workerReady = false;
|
|
397
|
+
for (const [, p] of this.pendingRequests) {
|
|
398
|
+
p.reject(new LocalProviderUnavailableError(msg.error));
|
|
399
|
+
}
|
|
400
|
+
this.pendingRequests.clear();
|
|
401
|
+
this.updateWorkerRef();
|
|
402
|
+
break;
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
});
|
|
406
|
+
|
|
407
|
+
// Worker crash / exit — reject all in-flight requests.
|
|
408
|
+
this.worker.on("error", (err: Error) => {
|
|
409
|
+
this.workerInitError = err.message;
|
|
410
|
+
this.workerReady = false;
|
|
411
|
+
for (const [, p] of this.pendingRequests) {
|
|
412
|
+
p.reject(new LocalProviderUnavailableError(err));
|
|
413
|
+
}
|
|
414
|
+
this.pendingRequests.clear();
|
|
415
|
+
this.updateWorkerRef();
|
|
416
|
+
});
|
|
417
|
+
|
|
418
|
+
this.worker.on("exit", (code) => {
|
|
419
|
+
if (code !== 0 && !this.workerInitError) {
|
|
420
|
+
this.workerInitError = `embedding worker exited with code ${code}`;
|
|
421
|
+
}
|
|
422
|
+
this.workerReady = false;
|
|
423
|
+
for (const [, p] of this.pendingRequests) {
|
|
424
|
+
p.reject(
|
|
425
|
+
new LocalProviderUnavailableError(this.workerInitError ?? "embedding worker exited"),
|
|
426
|
+
);
|
|
427
|
+
}
|
|
428
|
+
this.pendingRequests.clear();
|
|
429
|
+
this.updateWorkerRef();
|
|
430
|
+
});
|
|
431
|
+
|
|
432
|
+
this.workerReady = true;
|
|
433
|
+
})().catch((err) => {
|
|
434
|
+
this.initPromise = null; // allow retry
|
|
435
|
+
throw err;
|
|
436
|
+
});
|
|
437
|
+
|
|
179
438
|
return this.initPromise;
|
|
180
439
|
}
|
|
181
440
|
|
|
441
|
+
/** Keep the worker ref'd while requests are in flight so the event loop
|
|
442
|
+
* doesn't exit before responses arrive. When the pending map drains,
|
|
443
|
+
* unref again so the worker doesn't prevent graceful process exit. */
|
|
444
|
+
private updateWorkerRef(): void {
|
|
445
|
+
if (!this.worker) return;
|
|
446
|
+
if (this.pendingRequests.size > 0) {
|
|
447
|
+
this.worker.ref();
|
|
448
|
+
} else {
|
|
449
|
+
this.worker.unref();
|
|
450
|
+
}
|
|
451
|
+
}
|
|
452
|
+
|
|
182
453
|
async embed(texts: string[], inputType: "document" | "query"): Promise<Float32Array[]> {
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
454
|
+
await this.ensureWorker();
|
|
455
|
+
|
|
456
|
+
const id = this.nextRequestId++;
|
|
457
|
+
// Recall queries (single query-type texts) get high priority so they
|
|
458
|
+
// jump ahead of any queued backfill batches in the worker.
|
|
459
|
+
const priority = inputType === "query" && texts.length === 1 ? "high" : "normal";
|
|
460
|
+
|
|
461
|
+
return new Promise<Float32Array[]>((resolve, reject) => {
|
|
462
|
+
this.pendingRequests.set(id, { resolve, reject });
|
|
463
|
+
this.updateWorkerRef();
|
|
464
|
+
this.worker!.postMessage({
|
|
465
|
+
type: "embed",
|
|
466
|
+
id,
|
|
467
|
+
texts,
|
|
468
|
+
inputType,
|
|
469
|
+
priority,
|
|
470
|
+
} satisfies WorkerInbound);
|
|
471
|
+
});
|
|
472
|
+
}
|
|
187
473
|
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
474
|
+
/** Shut down the worker thread. Called by `resetProvider()` on config change.
|
|
475
|
+
* Sends a shutdown message so the worker calls `process.exit(0)` internally.
|
|
476
|
+
* We avoid `worker.terminate()` because Bun's forced termination triggers a
|
|
477
|
+
* NAPI fatal error when tearing down onnxruntime's native bindings.
|
|
478
|
+
*
|
|
479
|
+
* Returns a promise that resolves once the worker has fully exited. Callers
|
|
480
|
+
* that need a clean teardown (tests, config change) should await the result.
|
|
481
|
+
* Fire-and-forget callers (process exit) can ignore it. */
|
|
482
|
+
shutdown(): Promise<void> {
|
|
483
|
+
if (!this.worker) return Promise.resolve();
|
|
484
|
+
|
|
485
|
+
const worker = this.worker;
|
|
486
|
+
this.worker = null;
|
|
487
|
+
this.workerReady = false;
|
|
488
|
+
this.workerInitError = null;
|
|
489
|
+
this.initPromise = null;
|
|
490
|
+
|
|
491
|
+
// Reject any in-flight requests.
|
|
492
|
+
for (const [, p] of this.pendingRequests) {
|
|
493
|
+
p.reject(new Error("embedding worker shut down"));
|
|
191
494
|
}
|
|
495
|
+
this.pendingRequests.clear();
|
|
192
496
|
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
results.push(new Float32Array(vec));
|
|
198
|
-
}
|
|
199
|
-
}
|
|
200
|
-
return results;
|
|
497
|
+
return new Promise<void>((resolve) => {
|
|
498
|
+
worker.on("exit", () => resolve());
|
|
499
|
+
worker.postMessage({ type: "shutdown" } satisfies WorkerInbound);
|
|
500
|
+
});
|
|
201
501
|
}
|
|
202
502
|
}
|
|
203
503
|
|
|
@@ -239,12 +539,12 @@ function getProvider(): EmbeddingProvider | null {
|
|
|
239
539
|
|
|
240
540
|
switch (providerName) {
|
|
241
541
|
case "local": {
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
542
|
+
// `fastembed` is an optionalDependency. We construct the provider
|
|
543
|
+
// optimistically here; the import + ONNX init happens lazily in
|
|
544
|
+
// `LocalProvider.getModel()`, which throws `LocalProviderUnavailableError`
|
|
545
|
+
// if the optional dep isn't installed. After that first failure
|
|
546
|
+
// `isAvailable()` short-circuits to false and callers fall back to FTS.
|
|
547
|
+
cachedProvider = new LocalProvider(model);
|
|
248
548
|
break;
|
|
249
549
|
}
|
|
250
550
|
case "voyage": {
|
|
@@ -273,9 +573,92 @@ function getProvider(): EmbeddingProvider | null {
|
|
|
273
573
|
return cachedProvider;
|
|
274
574
|
}
|
|
275
575
|
|
|
276
|
-
/** Reset cached provider — called when config changes.
|
|
277
|
-
|
|
576
|
+
/** Reset cached provider — called when config changes.
|
|
577
|
+
* Shuts down the worker thread if the current provider is a LocalProvider.
|
|
578
|
+
* Returns a promise that resolves once any worker has fully exited.
|
|
579
|
+
* Callers that need clean teardown (tests) should await the result. */
|
|
580
|
+
export function resetProvider(): Promise<void> {
|
|
581
|
+
let shutdownPromise: Promise<void> = Promise.resolve();
|
|
582
|
+
if (cachedProvider instanceof LocalProvider) {
|
|
583
|
+
shutdownPromise = cachedProvider.shutdown();
|
|
584
|
+
}
|
|
278
585
|
cachedProvider = undefined;
|
|
586
|
+
remoteFallbackLogged = false;
|
|
587
|
+
return shutdownPromise;
|
|
588
|
+
}
|
|
589
|
+
|
|
590
|
+
/** Shut down the current provider and prevent any new provider from being
|
|
591
|
+
* created. After this call, `embed()` throws and `isAvailable()` returns
|
|
592
|
+
* false. Test-only: prevents fire-and-forget embeds (queued by other test
|
|
593
|
+
* files) from spawning a new worker after cleanup. */
|
|
594
|
+
export function _shutdownAndDisable(): Promise<void> {
|
|
595
|
+
let shutdownPromise: Promise<void> = Promise.resolve();
|
|
596
|
+
if (cachedProvider instanceof LocalProvider) {
|
|
597
|
+
shutdownPromise = cachedProvider.shutdown();
|
|
598
|
+
}
|
|
599
|
+
cachedProvider = null; // null (not undefined) → getProvider() returns null, won't create new
|
|
600
|
+
remoteFallbackLogged = false;
|
|
601
|
+
return shutdownPromise;
|
|
602
|
+
}
|
|
603
|
+
|
|
604
|
+
/** Save the current cached provider reference (including the live worker)
|
|
605
|
+
* and clear the cache so the next `getProvider()` call creates a fresh one.
|
|
606
|
+
* Returns an opaque token that must be passed to `_restoreProvider()` to
|
|
607
|
+
* put the original provider back — without this, the worker is orphaned and
|
|
608
|
+
* a second ONNX load in the same Bun process will crash.
|
|
609
|
+
*
|
|
610
|
+
* Test-only helper: lets suites temporarily swap in a mock/unavailable
|
|
611
|
+
* provider without killing the real worker. */
|
|
612
|
+
export function _saveAndClearProvider(): unknown {
|
|
613
|
+
const saved = { provider: cachedProvider, remoteFallbackLogged };
|
|
614
|
+
cachedProvider = undefined;
|
|
615
|
+
remoteFallbackLogged = false;
|
|
616
|
+
return saved;
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
/** Restore a provider previously saved by `_saveAndClearProvider()`. Any
|
|
620
|
+
* provider created between save and restore is discarded (callers must
|
|
621
|
+
* ensure it's not a LocalProvider with a live worker — those suites only
|
|
622
|
+
* use `_markFastembedUnavailable()` so no worker is spawned). */
|
|
623
|
+
export function _restoreProvider(token: unknown): void {
|
|
624
|
+
const saved = token as { provider: EmbeddingProvider | null | undefined; remoteFallbackLogged: boolean };
|
|
625
|
+
cachedProvider = saved.provider;
|
|
626
|
+
remoteFallbackLogged = saved.remoteFallbackLogged;
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
/** True once we've logged an auto-fallback notice this process — keeps the
|
|
630
|
+
* one-line warning from spamming on every fire-and-forget embed call. */
|
|
631
|
+
let remoteFallbackLogged = false;
|
|
632
|
+
|
|
633
|
+
|
|
634
|
+
/**
|
|
635
|
+
* Build a remote `EmbeddingProvider` from whichever API key is in env.
|
|
636
|
+
* Returns `null` when neither `VOYAGE_API_KEY` nor `OPENAI_API_KEY` is set,
|
|
637
|
+
* which is the signal for callers to fall through to FTS-only behaviour.
|
|
638
|
+
*
|
|
639
|
+
* Voyage wins ties because it's the higher-quality option for code search;
|
|
640
|
+
* users who want OpenAI specifically can pin `search.embeddings.provider`
|
|
641
|
+
* in `.lore.json` and skip the fallback path entirely.
|
|
642
|
+
*/
|
|
643
|
+
export function pickRemoteFallback(): {
|
|
644
|
+
name: "voyage" | "openai";
|
|
645
|
+
provider: EmbeddingProvider;
|
|
646
|
+
} | null {
|
|
647
|
+
if (process.env.VOYAGE_API_KEY) {
|
|
648
|
+
const d = PROVIDER_DEFAULTS.voyage;
|
|
649
|
+
return {
|
|
650
|
+
name: "voyage",
|
|
651
|
+
provider: new VoyageProvider(process.env.VOYAGE_API_KEY, d.model, d.dimensions),
|
|
652
|
+
};
|
|
653
|
+
}
|
|
654
|
+
if (process.env.OPENAI_API_KEY) {
|
|
655
|
+
const d = PROVIDER_DEFAULTS.openai;
|
|
656
|
+
return {
|
|
657
|
+
name: "openai",
|
|
658
|
+
provider: new OpenAIProvider(process.env.OPENAI_API_KEY, d.model, d.dimensions),
|
|
659
|
+
};
|
|
660
|
+
}
|
|
661
|
+
return null;
|
|
279
662
|
}
|
|
280
663
|
|
|
281
664
|
// ---------------------------------------------------------------------------
|
|
@@ -284,9 +667,16 @@ export function resetProvider(): void {
|
|
|
284
667
|
|
|
285
668
|
/** Returns true if embedding is available.
|
|
286
669
|
* Active when the configured provider's API key is set, unless explicitly
|
|
287
|
-
* disabled via `search.embeddings.enabled: false` in .lore.json.
|
|
670
|
+
* disabled via `search.embeddings.enabled: false` in .lore.json.
|
|
671
|
+
*
|
|
672
|
+
* For the `local` provider, also returns false once we've discovered the
|
|
673
|
+
* optional `fastembed` peer is missing — callers (recall, ltm, distillation)
|
|
674
|
+
* use this gate to skip embedding work and fall back to FTS-only search. */
|
|
288
675
|
export function isAvailable(): boolean {
|
|
289
|
-
|
|
676
|
+
const provider = getProvider();
|
|
677
|
+
if (!provider) return false;
|
|
678
|
+
if (provider instanceof LocalProvider && fastembedKnownUnavailable()) return false;
|
|
679
|
+
return true;
|
|
290
680
|
}
|
|
291
681
|
|
|
292
682
|
// ---------------------------------------------------------------------------
|
|
@@ -296,10 +686,18 @@ export function isAvailable(): boolean {
|
|
|
296
686
|
/**
|
|
297
687
|
* Generate embeddings for the given texts using the configured provider.
|
|
298
688
|
*
|
|
689
|
+
* If the configured provider is `local` and `fastembed` turns out to be
|
|
690
|
+
* unavailable at runtime (failed install, vendor extraction blocked, etc.),
|
|
691
|
+
* automatically swap to a remote provider when `VOYAGE_API_KEY` or
|
|
692
|
+
* `OPENAI_API_KEY` is set in env. The swap is permanent for the rest of
|
|
693
|
+
* the process — `cachedProvider` is replaced so subsequent calls skip the
|
|
694
|
+
* local-then-fail path.
|
|
695
|
+
*
|
|
299
696
|
* @param texts Array of texts to embed
|
|
300
697
|
* @param inputType "document" for storage, "query" for search
|
|
301
698
|
* @returns Float32Array per input text
|
|
302
|
-
* @throws On API errors or
|
|
699
|
+
* @throws On API errors or when no provider (local or remote) is
|
|
700
|
+
* available
|
|
303
701
|
*/
|
|
304
702
|
export async function embed(
|
|
305
703
|
texts: string[],
|
|
@@ -307,7 +705,26 @@ export async function embed(
|
|
|
307
705
|
): Promise<Float32Array[]> {
|
|
308
706
|
const provider = getProvider();
|
|
309
707
|
if (!provider) throw new Error("No embedding provider available");
|
|
310
|
-
|
|
708
|
+
|
|
709
|
+
try {
|
|
710
|
+
return await provider.embed(texts, inputType);
|
|
711
|
+
} catch (err) {
|
|
712
|
+
if (!(err instanceof LocalProviderUnavailableError)) throw err;
|
|
713
|
+
|
|
714
|
+
const fallback = pickRemoteFallback();
|
|
715
|
+
if (!fallback) throw err;
|
|
716
|
+
|
|
717
|
+
if (!remoteFallbackLogged) {
|
|
718
|
+
remoteFallbackLogged = true;
|
|
719
|
+
log.info(
|
|
720
|
+
`fastembed unavailable; auto-switching to ${fallback.name} ` +
|
|
721
|
+
`(set search.embeddings.provider in .lore.json to silence this)`,
|
|
722
|
+
);
|
|
723
|
+
}
|
|
724
|
+
|
|
725
|
+
cachedProvider = fallback.provider;
|
|
726
|
+
return fallback.provider.embed(texts, inputType);
|
|
727
|
+
}
|
|
311
728
|
}
|
|
312
729
|
|
|
313
730
|
// ---------------------------------------------------------------------------
|
|
@@ -455,6 +872,71 @@ export function embedDistillation(
|
|
|
455
872
|
});
|
|
456
873
|
}
|
|
457
874
|
|
|
875
|
+
/**
|
|
876
|
+
* Embed a temporal message and store the result in the DB.
|
|
877
|
+
* Fire-and-forget — errors are logged, never thrown.
|
|
878
|
+
* Only called for undistilled messages; once distilled, the embedding
|
|
879
|
+
* is NULLed (semantic content captured by distillation embedding).
|
|
880
|
+
*/
|
|
881
|
+
export function embedTemporalMessage(
|
|
882
|
+
id: string,
|
|
883
|
+
content: string,
|
|
884
|
+
): void {
|
|
885
|
+
// Skip very short messages — they don't carry enough semantic signal
|
|
886
|
+
// to be useful in vector search and would waste embedding capacity.
|
|
887
|
+
if (content.length < 50) return;
|
|
888
|
+
|
|
889
|
+
embed([content], "document")
|
|
890
|
+
.then(([vec]) => {
|
|
891
|
+
db()
|
|
892
|
+
.query("UPDATE temporal_messages SET embedding = ? WHERE id = ?")
|
|
893
|
+
.run(toBlob(vec), id);
|
|
894
|
+
})
|
|
895
|
+
.catch((err) => {
|
|
896
|
+
log.info("embedding failed for temporal message", id, ":", err);
|
|
897
|
+
});
|
|
898
|
+
}
|
|
899
|
+
|
|
900
|
+
// ---------------------------------------------------------------------------
|
|
901
|
+
// Vector search — temporal messages (undistilled only)
|
|
902
|
+
// ---------------------------------------------------------------------------
|
|
903
|
+
|
|
904
|
+
/**
|
|
905
|
+
* Search undistilled temporal messages with embeddings by cosine similarity.
|
|
906
|
+
* Returns top-k entries sorted by similarity descending.
|
|
907
|
+
*
|
|
908
|
+
* Only scans undistilled messages (distilled=0) — once a message is
|
|
909
|
+
* distilled, its semantic content is captured by the distillation
|
|
910
|
+
* embedding and the temporal embedding is cleared.
|
|
911
|
+
*
|
|
912
|
+
* Scoped to a single project. Optionally scoped to a single session.
|
|
913
|
+
*/
|
|
914
|
+
export function vectorSearchTemporal(
|
|
915
|
+
queryEmbedding: Float32Array,
|
|
916
|
+
projectId: string,
|
|
917
|
+
limit = 10,
|
|
918
|
+
sessionId?: string,
|
|
919
|
+
): VectorHit[] {
|
|
920
|
+
const sql = sessionId
|
|
921
|
+
? "SELECT id, embedding FROM temporal_messages WHERE embedding IS NOT NULL AND distilled = 0 AND project_id = ? AND session_id = ?"
|
|
922
|
+
: "SELECT id, embedding FROM temporal_messages WHERE embedding IS NOT NULL AND distilled = 0 AND project_id = ?";
|
|
923
|
+
const params = sessionId ? [projectId, sessionId] : [projectId];
|
|
924
|
+
|
|
925
|
+
const rows = db()
|
|
926
|
+
.query(sql)
|
|
927
|
+
.all(...params) as Array<{ id: string; embedding: Buffer }>;
|
|
928
|
+
|
|
929
|
+
const scored: VectorHit[] = [];
|
|
930
|
+
for (const row of rows) {
|
|
931
|
+
const vec = fromBlob(row.embedding);
|
|
932
|
+
const sim = cosineSimilarity(queryEmbedding, vec);
|
|
933
|
+
scored.push({ id: row.id, similarity: sim });
|
|
934
|
+
}
|
|
935
|
+
|
|
936
|
+
scored.sort((a, b) => b.similarity - a.similarity);
|
|
937
|
+
return scored.slice(0, limit);
|
|
938
|
+
}
|
|
939
|
+
|
|
458
940
|
// ---------------------------------------------------------------------------
|
|
459
941
|
// Config change detection
|
|
460
942
|
// ---------------------------------------------------------------------------
|
|
@@ -488,7 +970,7 @@ export function checkConfigChange(): boolean {
|
|
|
488
970
|
|
|
489
971
|
if (stored && stored.value === current) return false;
|
|
490
972
|
|
|
491
|
-
// Config changed (or first run) — clear all embeddings in
|
|
973
|
+
// Config changed (or first run) — clear all embeddings in all tables
|
|
492
974
|
if (stored) {
|
|
493
975
|
const knowledgeCount = db()
|
|
494
976
|
.query("SELECT COUNT(*) as n FROM knowledge WHERE embedding IS NOT NULL")
|
|
@@ -496,10 +978,14 @@ export function checkConfigChange(): boolean {
|
|
|
496
978
|
const distillCount = db()
|
|
497
979
|
.query("SELECT COUNT(*) as n FROM distillations WHERE embedding IS NOT NULL")
|
|
498
980
|
.get() as { n: number };
|
|
499
|
-
const
|
|
981
|
+
const temporalCount = db()
|
|
982
|
+
.query("SELECT COUNT(*) as n FROM temporal_messages WHERE embedding IS NOT NULL")
|
|
983
|
+
.get() as { n: number };
|
|
984
|
+
const total = knowledgeCount.n + distillCount.n + temporalCount.n;
|
|
500
985
|
if (total > 0) {
|
|
501
986
|
db().query("UPDATE knowledge SET embedding = NULL").run();
|
|
502
987
|
db().query("UPDATE distillations SET embedding = NULL").run();
|
|
988
|
+
db().query("UPDATE temporal_messages SET embedding = NULL").run();
|
|
503
989
|
log.info(
|
|
504
990
|
`embedding config changed (${stored.value} → ${current}), cleared ${total} stale embeddings`,
|
|
505
991
|
);
|
|
@@ -520,15 +1006,26 @@ export function checkConfigChange(): boolean {
|
|
|
520
1006
|
// Startup backfill — single entry point for all hosts
|
|
521
1007
|
// ---------------------------------------------------------------------------
|
|
522
1008
|
|
|
1009
|
+
/**
|
|
1010
|
+
* Delay before the startup backfill begins, so the host's HTTP server has
|
|
1011
|
+
* a clear window to answer the first wave of requests (web UI shell load,
|
|
1012
|
+
* terminal session-connect handshake) before the embedding worker starts
|
|
1013
|
+
* competing for CPU. With inference off the main thread the event loop
|
|
1014
|
+
* isn't blocked, but the worker still consumes a CPU core — a short delay
|
|
1015
|
+
* avoids contention during the first-connect burst.
|
|
1016
|
+
*/
|
|
1017
|
+
const STARTUP_BACKFILL_DELAY_MS = 2_000;
|
|
1018
|
+
|
|
523
1019
|
/**
|
|
524
1020
|
* Run all embedding backfills and log coverage stats.
|
|
525
1021
|
*
|
|
526
1022
|
* This is the canonical entry point that every host adapter (OpenCode, Pi,
|
|
527
1023
|
* future ACP) should call once during init. It:
|
|
528
|
-
* 1.
|
|
529
|
-
* 2.
|
|
530
|
-
* 3. Backfills
|
|
531
|
-
* 4.
|
|
1024
|
+
* 1. Waits a short grace period so first-connect HTTP requests can finish
|
|
1025
|
+
* 2. Detects config changes (provider swap) and clears stale embeddings
|
|
1026
|
+
* 3. Backfills knowledge entries missing embeddings
|
|
1027
|
+
* 4. Backfills non-archived distillations missing embeddings
|
|
1028
|
+
* 5. Logs a one-line coverage summary to stderr (always visible, not gated)
|
|
532
1029
|
*
|
|
533
1030
|
* Fire-and-forget: callers should `.catch()` — embedding failures must not
|
|
534
1031
|
* block plugin initialization.
|
|
@@ -536,6 +1033,34 @@ export function checkConfigChange(): boolean {
|
|
|
536
1033
|
export async function runStartupBackfill(): Promise<void> {
|
|
537
1034
|
if (!isAvailable()) return;
|
|
538
1035
|
|
|
1036
|
+
// Surface backlog up-front so a slow startup is self-explanatory in logs.
|
|
1037
|
+
// Counts use the same predicates the backfill loops use, so the two
|
|
1038
|
+
// numbers always match what we're about to do.
|
|
1039
|
+
const pendingKnowledge = (
|
|
1040
|
+
db()
|
|
1041
|
+
.query(
|
|
1042
|
+
"SELECT COUNT(*) as n FROM knowledge WHERE embedding IS NULL AND confidence > 0.2",
|
|
1043
|
+
)
|
|
1044
|
+
.get() as { n: number }
|
|
1045
|
+
).n;
|
|
1046
|
+
const pendingDistillations = (
|
|
1047
|
+
db()
|
|
1048
|
+
.query(
|
|
1049
|
+
"SELECT COUNT(*) as n FROM distillations WHERE embedding IS NULL AND archived = 0 AND observations != ''",
|
|
1050
|
+
)
|
|
1051
|
+
.get() as { n: number }
|
|
1052
|
+
).n;
|
|
1053
|
+
|
|
1054
|
+
if (pendingKnowledge + pendingDistillations > 0) {
|
|
1055
|
+
log.info(
|
|
1056
|
+
`embedding backfill scheduled: ${pendingKnowledge} knowledge + ` +
|
|
1057
|
+
`${pendingDistillations} distillations pending — starting in ` +
|
|
1058
|
+
`${STARTUP_BACKFILL_DELAY_MS / 1000}s, batches yield between calls ` +
|
|
1059
|
+
`(host stays responsive)`,
|
|
1060
|
+
);
|
|
1061
|
+
await new Promise<void>((r) => setTimeout(r, STARTUP_BACKFILL_DELAY_MS));
|
|
1062
|
+
}
|
|
1063
|
+
|
|
539
1064
|
const knowledgeEmbedded = await backfillEmbeddings();
|
|
540
1065
|
const distillationEmbedded = await backfillDistillationEmbeddings();
|
|
541
1066
|
|
|
@@ -581,6 +1106,16 @@ export async function runStartupBackfill(): Promise<void> {
|
|
|
581
1106
|
// Backfill — knowledge
|
|
582
1107
|
// ---------------------------------------------------------------------------
|
|
583
1108
|
|
|
1109
|
+
/**
|
|
1110
|
+
* Chunk size for backfill embed requests. Each chunk becomes a separate
|
|
1111
|
+
* message to the embedding worker. Keeping chunks small (32) gives the
|
|
1112
|
+
* worker's priority queue natural gaps to interleave high-priority recall
|
|
1113
|
+
* queries between backfill batches. The provider's `maxBatchSize` (256)
|
|
1114
|
+
* is the upper limit for any single embed call; this is intentionally
|
|
1115
|
+
* smaller for backfill-vs-live interleaving.
|
|
1116
|
+
*/
|
|
1117
|
+
const BACKFILL_CHUNK_SIZE = 32;
|
|
1118
|
+
|
|
584
1119
|
/**
|
|
585
1120
|
* Embed all knowledge entries that are missing embeddings.
|
|
586
1121
|
* Called by `runStartupBackfill()`.
|
|
@@ -601,11 +1136,10 @@ export async function backfillEmbeddings(): Promise<number> {
|
|
|
601
1136
|
|
|
602
1137
|
if (!rows.length) return 0;
|
|
603
1138
|
|
|
604
|
-
const batchSize = provider.maxBatchSize;
|
|
605
1139
|
let embedded = 0;
|
|
606
1140
|
|
|
607
|
-
for (let i = 0; i < rows.length; i +=
|
|
608
|
-
const batch = rows.slice(i, i +
|
|
1141
|
+
for (let i = 0; i < rows.length; i += BACKFILL_CHUNK_SIZE) {
|
|
1142
|
+
const batch = rows.slice(i, i + BACKFILL_CHUNK_SIZE);
|
|
609
1143
|
const texts = batch.map((r) => `${r.title}\n${r.content}`);
|
|
610
1144
|
|
|
611
1145
|
try {
|
|
@@ -621,6 +1155,7 @@ export async function backfillEmbeddings(): Promise<number> {
|
|
|
621
1155
|
} catch (err) {
|
|
622
1156
|
log.info(`embedding backfill batch ${i}-${i + batch.length} failed:`, err);
|
|
623
1157
|
}
|
|
1158
|
+
// No yieldToEventLoop() needed — embed() is truly async (worker thread).
|
|
624
1159
|
}
|
|
625
1160
|
|
|
626
1161
|
if (embedded > 0) {
|
|
@@ -650,11 +1185,16 @@ export async function backfillDistillationEmbeddings(): Promise<number> {
|
|
|
650
1185
|
|
|
651
1186
|
if (!rows.length) return 0;
|
|
652
1187
|
|
|
653
|
-
const batchSize = provider.maxBatchSize;
|
|
654
1188
|
let embedded = 0;
|
|
655
1189
|
|
|
656
|
-
|
|
657
|
-
|
|
1190
|
+
// Progress logging: heartbeat every PROGRESS_INTERVAL embedded so a long
|
|
1191
|
+
// backfill (e.g. 1000+ pending after a fastembed reinstall) doesn't look
|
|
1192
|
+
// like a silent hang. Without this, only the final tally was logged.
|
|
1193
|
+
const PROGRESS_INTERVAL = 256;
|
|
1194
|
+
let nextProgressAt = PROGRESS_INTERVAL;
|
|
1195
|
+
|
|
1196
|
+
for (let i = 0; i < rows.length; i += BACKFILL_CHUNK_SIZE) {
|
|
1197
|
+
const batch = rows.slice(i, i + BACKFILL_CHUNK_SIZE);
|
|
658
1198
|
const texts = batch.map((r) => r.observations);
|
|
659
1199
|
|
|
660
1200
|
try {
|
|
@@ -670,6 +1210,12 @@ export async function backfillDistillationEmbeddings(): Promise<number> {
|
|
|
670
1210
|
} catch (err) {
|
|
671
1211
|
log.info(`distillation embedding backfill batch ${i}-${i + batch.length} failed:`, err);
|
|
672
1212
|
}
|
|
1213
|
+
|
|
1214
|
+
if (embedded >= nextProgressAt) {
|
|
1215
|
+
log.info(`embedding distillations: ${embedded}/${rows.length}…`);
|
|
1216
|
+
nextProgressAt = embedded + PROGRESS_INTERVAL;
|
|
1217
|
+
}
|
|
1218
|
+
// No yieldToEventLoop() needed — embed() is truly async (worker thread).
|
|
673
1219
|
}
|
|
674
1220
|
|
|
675
1221
|
if (embedded > 0) {
|