@loreai/core 0.17.1 → 0.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bun/agents-file.d.ts +4 -0
- package/dist/bun/agents-file.d.ts.map +1 -1
- package/dist/bun/config.d.ts +2 -0
- package/dist/bun/config.d.ts.map +1 -1
- package/dist/bun/curator.d.ts +45 -0
- package/dist/bun/curator.d.ts.map +1 -1
- package/dist/bun/data-dir.d.ts +18 -0
- package/dist/bun/data-dir.d.ts.map +1 -0
- package/dist/bun/db.d.ts +85 -0
- package/dist/bun/db.d.ts.map +1 -1
- package/dist/bun/distillation.d.ts +2 -13
- package/dist/bun/distillation.d.ts.map +1 -1
- package/dist/bun/embedding-vendor.d.ts +22 -38
- package/dist/bun/embedding-vendor.d.ts.map +1 -1
- package/dist/bun/embedding-worker-types.d.ts +17 -12
- package/dist/bun/embedding-worker-types.d.ts.map +1 -1
- package/dist/bun/embedding-worker.d.ts +9 -2
- package/dist/bun/embedding-worker.d.ts.map +1 -1
- package/dist/bun/embedding-worker.js +38864 -33
- package/dist/bun/embedding-worker.js.map +4 -4
- package/dist/bun/embedding.d.ts +35 -23
- package/dist/bun/embedding.d.ts.map +1 -1
- package/dist/bun/gradient.d.ts +17 -1
- package/dist/bun/gradient.d.ts.map +1 -1
- package/dist/bun/import/detect.d.ts +14 -0
- package/dist/bun/import/detect.d.ts.map +1 -0
- package/dist/bun/import/extract.d.ts +43 -0
- package/dist/bun/import/extract.d.ts.map +1 -0
- package/dist/bun/import/history.d.ts +40 -0
- package/dist/bun/import/history.d.ts.map +1 -0
- package/dist/bun/import/index.d.ts +17 -0
- package/dist/bun/import/index.d.ts.map +1 -0
- package/dist/bun/import/providers/aider.d.ts +2 -0
- package/dist/bun/import/providers/aider.d.ts.map +1 -0
- package/dist/bun/import/providers/claude-code.d.ts +2 -0
- package/dist/bun/import/providers/claude-code.d.ts.map +1 -0
- package/dist/bun/import/providers/cline.d.ts +2 -0
- package/dist/bun/import/providers/cline.d.ts.map +1 -0
- package/dist/bun/import/providers/codex.d.ts +2 -0
- package/dist/bun/import/providers/codex.d.ts.map +1 -0
- package/dist/bun/import/providers/continue.d.ts +2 -0
- package/dist/bun/import/providers/continue.d.ts.map +1 -0
- package/dist/bun/import/providers/index.d.ts +19 -0
- package/dist/bun/import/providers/index.d.ts.map +1 -0
- package/dist/bun/import/providers/opencode.d.ts +2 -0
- package/dist/bun/import/providers/opencode.d.ts.map +1 -0
- package/dist/bun/import/providers/pi.d.ts +2 -0
- package/dist/bun/import/providers/pi.d.ts.map +1 -0
- package/dist/bun/import/types.d.ts +82 -0
- package/dist/bun/import/types.d.ts.map +1 -0
- package/dist/bun/index.d.ts +5 -2
- package/dist/bun/index.d.ts.map +1 -1
- package/dist/bun/index.js +3150 -439
- package/dist/bun/index.js.map +4 -4
- package/dist/bun/instruction-detect.d.ts +66 -0
- package/dist/bun/instruction-detect.d.ts.map +1 -0
- package/dist/bun/log.d.ts +9 -0
- package/dist/bun/log.d.ts.map +1 -1
- package/dist/bun/ltm.d.ts +139 -5
- package/dist/bun/ltm.d.ts.map +1 -1
- package/dist/bun/pattern-extract.d.ts +7 -0
- package/dist/bun/pattern-extract.d.ts.map +1 -1
- package/dist/bun/prompt.d.ts +1 -1
- package/dist/bun/prompt.d.ts.map +1 -1
- package/dist/bun/recall.d.ts.map +1 -1
- package/dist/bun/search.d.ts +5 -3
- package/dist/bun/search.d.ts.map +1 -1
- package/dist/bun/session-limiter.d.ts +26 -0
- package/dist/bun/session-limiter.d.ts.map +1 -0
- package/dist/bun/temporal.d.ts +2 -0
- package/dist/bun/temporal.d.ts.map +1 -1
- package/dist/bun/types.d.ts +1 -1
- package/dist/node/agents-file.d.ts +4 -0
- package/dist/node/agents-file.d.ts.map +1 -1
- package/dist/node/config.d.ts +2 -0
- package/dist/node/config.d.ts.map +1 -1
- package/dist/node/curator.d.ts +45 -0
- package/dist/node/curator.d.ts.map +1 -1
- package/dist/node/data-dir.d.ts +18 -0
- package/dist/node/data-dir.d.ts.map +1 -0
- package/dist/node/db.d.ts +85 -0
- package/dist/node/db.d.ts.map +1 -1
- package/dist/node/distillation.d.ts +2 -13
- package/dist/node/distillation.d.ts.map +1 -1
- package/dist/node/embedding-vendor.d.ts +22 -38
- package/dist/node/embedding-vendor.d.ts.map +1 -1
- package/dist/node/embedding-worker-types.d.ts +17 -12
- package/dist/node/embedding-worker-types.d.ts.map +1 -1
- package/dist/node/embedding-worker.d.ts +9 -2
- package/dist/node/embedding-worker.d.ts.map +1 -1
- package/dist/node/embedding-worker.js +38864 -33
- package/dist/node/embedding-worker.js.map +4 -4
- package/dist/node/embedding.d.ts +35 -23
- package/dist/node/embedding.d.ts.map +1 -1
- package/dist/node/gradient.d.ts +17 -1
- package/dist/node/gradient.d.ts.map +1 -1
- package/dist/node/import/detect.d.ts +14 -0
- package/dist/node/import/detect.d.ts.map +1 -0
- package/dist/node/import/extract.d.ts +43 -0
- package/dist/node/import/extract.d.ts.map +1 -0
- package/dist/node/import/history.d.ts +40 -0
- package/dist/node/import/history.d.ts.map +1 -0
- package/dist/node/import/index.d.ts +17 -0
- package/dist/node/import/index.d.ts.map +1 -0
- package/dist/node/import/providers/aider.d.ts +2 -0
- package/dist/node/import/providers/aider.d.ts.map +1 -0
- package/dist/node/import/providers/claude-code.d.ts +2 -0
- package/dist/node/import/providers/claude-code.d.ts.map +1 -0
- package/dist/node/import/providers/cline.d.ts +2 -0
- package/dist/node/import/providers/cline.d.ts.map +1 -0
- package/dist/node/import/providers/codex.d.ts +2 -0
- package/dist/node/import/providers/codex.d.ts.map +1 -0
- package/dist/node/import/providers/continue.d.ts +2 -0
- package/dist/node/import/providers/continue.d.ts.map +1 -0
- package/dist/node/import/providers/index.d.ts +19 -0
- package/dist/node/import/providers/index.d.ts.map +1 -0
- package/dist/node/import/providers/opencode.d.ts +2 -0
- package/dist/node/import/providers/opencode.d.ts.map +1 -0
- package/dist/node/import/providers/pi.d.ts +2 -0
- package/dist/node/import/providers/pi.d.ts.map +1 -0
- package/dist/node/import/types.d.ts +82 -0
- package/dist/node/import/types.d.ts.map +1 -0
- package/dist/node/index.d.ts +5 -2
- package/dist/node/index.d.ts.map +1 -1
- package/dist/node/index.js +3150 -439
- package/dist/node/index.js.map +4 -4
- package/dist/node/instruction-detect.d.ts +66 -0
- package/dist/node/instruction-detect.d.ts.map +1 -0
- package/dist/node/log.d.ts +9 -0
- package/dist/node/log.d.ts.map +1 -1
- package/dist/node/ltm.d.ts +139 -5
- package/dist/node/ltm.d.ts.map +1 -1
- package/dist/node/pattern-extract.d.ts +7 -0
- package/dist/node/pattern-extract.d.ts.map +1 -1
- package/dist/node/prompt.d.ts +1 -1
- package/dist/node/prompt.d.ts.map +1 -1
- package/dist/node/recall.d.ts.map +1 -1
- package/dist/node/search.d.ts +5 -3
- package/dist/node/search.d.ts.map +1 -1
- package/dist/node/session-limiter.d.ts +26 -0
- package/dist/node/session-limiter.d.ts.map +1 -0
- package/dist/node/temporal.d.ts +2 -0
- package/dist/node/temporal.d.ts.map +1 -1
- package/dist/node/types.d.ts +1 -1
- package/dist/types/agents-file.d.ts +4 -0
- package/dist/types/agents-file.d.ts.map +1 -1
- package/dist/types/config.d.ts +2 -0
- package/dist/types/config.d.ts.map +1 -1
- package/dist/types/curator.d.ts +45 -0
- package/dist/types/curator.d.ts.map +1 -1
- package/dist/types/data-dir.d.ts +18 -0
- package/dist/types/data-dir.d.ts.map +1 -0
- package/dist/types/db.d.ts +85 -0
- package/dist/types/db.d.ts.map +1 -1
- package/dist/types/distillation.d.ts +2 -13
- package/dist/types/distillation.d.ts.map +1 -1
- package/dist/types/embedding-vendor.d.ts +22 -38
- package/dist/types/embedding-vendor.d.ts.map +1 -1
- package/dist/types/embedding-worker-types.d.ts +17 -12
- package/dist/types/embedding-worker-types.d.ts.map +1 -1
- package/dist/types/embedding-worker.d.ts +9 -2
- package/dist/types/embedding-worker.d.ts.map +1 -1
- package/dist/types/embedding.d.ts +35 -23
- package/dist/types/embedding.d.ts.map +1 -1
- package/dist/types/gradient.d.ts +17 -1
- package/dist/types/gradient.d.ts.map +1 -1
- package/dist/types/import/detect.d.ts +14 -0
- package/dist/types/import/detect.d.ts.map +1 -0
- package/dist/types/import/extract.d.ts +43 -0
- package/dist/types/import/extract.d.ts.map +1 -0
- package/dist/types/import/history.d.ts +40 -0
- package/dist/types/import/history.d.ts.map +1 -0
- package/dist/types/import/index.d.ts +17 -0
- package/dist/types/import/index.d.ts.map +1 -0
- package/dist/types/import/providers/aider.d.ts +2 -0
- package/dist/types/import/providers/aider.d.ts.map +1 -0
- package/dist/types/import/providers/claude-code.d.ts +2 -0
- package/dist/types/import/providers/claude-code.d.ts.map +1 -0
- package/dist/types/import/providers/cline.d.ts +2 -0
- package/dist/types/import/providers/cline.d.ts.map +1 -0
- package/dist/types/import/providers/codex.d.ts +2 -0
- package/dist/types/import/providers/codex.d.ts.map +1 -0
- package/dist/types/import/providers/continue.d.ts +2 -0
- package/dist/types/import/providers/continue.d.ts.map +1 -0
- package/dist/types/import/providers/index.d.ts +19 -0
- package/dist/types/import/providers/index.d.ts.map +1 -0
- package/dist/types/import/providers/opencode.d.ts +2 -0
- package/dist/types/import/providers/opencode.d.ts.map +1 -0
- package/dist/types/import/providers/pi.d.ts +2 -0
- package/dist/types/import/providers/pi.d.ts.map +1 -0
- package/dist/types/import/types.d.ts +82 -0
- package/dist/types/import/types.d.ts.map +1 -0
- package/dist/types/index.d.ts +5 -2
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/instruction-detect.d.ts +66 -0
- package/dist/types/instruction-detect.d.ts.map +1 -0
- package/dist/types/log.d.ts +9 -0
- package/dist/types/log.d.ts.map +1 -1
- package/dist/types/ltm.d.ts +139 -5
- package/dist/types/ltm.d.ts.map +1 -1
- package/dist/types/pattern-extract.d.ts +7 -0
- package/dist/types/pattern-extract.d.ts.map +1 -1
- package/dist/types/prompt.d.ts +1 -1
- package/dist/types/prompt.d.ts.map +1 -1
- package/dist/types/recall.d.ts.map +1 -1
- package/dist/types/search.d.ts +5 -3
- package/dist/types/search.d.ts.map +1 -1
- package/dist/types/session-limiter.d.ts +26 -0
- package/dist/types/session-limiter.d.ts.map +1 -0
- package/dist/types/temporal.d.ts +2 -0
- package/dist/types/temporal.d.ts.map +1 -1
- package/dist/types/types.d.ts +1 -1
- package/package.json +3 -4
- package/src/agents-file.ts +41 -13
- package/src/config.ts +31 -18
- package/src/curator.ts +163 -75
- package/src/data-dir.ts +76 -0
- package/src/db.ts +457 -11
- package/src/distillation.ts +65 -16
- package/src/embedding-vendor.ts +23 -40
- package/src/embedding-worker-types.ts +19 -11
- package/src/embedding-worker.ts +111 -47
- package/src/embedding.ts +224 -174
- package/src/gradient.ts +192 -75
- package/src/import/detect.ts +37 -0
- package/src/import/extract.ts +137 -0
- package/src/import/history.ts +99 -0
- package/src/import/index.ts +45 -0
- package/src/import/providers/aider.ts +207 -0
- package/src/import/providers/claude-code.ts +339 -0
- package/src/import/providers/cline.ts +324 -0
- package/src/import/providers/codex.ts +369 -0
- package/src/import/providers/continue.ts +304 -0
- package/src/import/providers/index.ts +32 -0
- package/src/import/providers/opencode.ts +272 -0
- package/src/import/providers/pi.ts +332 -0
- package/src/import/types.ts +91 -0
- package/src/index.ts +13 -0
- package/src/instruction-detect.ts +275 -0
- package/src/log.ts +91 -3
- package/src/ltm.ts +789 -41
- package/src/pattern-extract.ts +41 -0
- package/src/prompt.ts +7 -1
- package/src/recall.ts +43 -5
- package/src/search.ts +7 -5
- package/src/session-limiter.ts +47 -0
- package/src/temporal.ts +18 -6
- package/src/types.ts +1 -1
package/src/embedding.ts
CHANGED
|
@@ -1,17 +1,22 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Embedding integration for vector search.
|
|
3
3
|
*
|
|
4
|
-
* Supports multiple embedding providers
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
4
|
+
* Supports multiple embedding providers behind a common interface:
|
|
5
|
+
* - "local" (default): @huggingface/transformers + nomic-embed-text-v1.5
|
|
6
|
+
* (768 dims, Matryoshka-capable). Runs ONNX inference in a worker thread.
|
|
7
|
+
* - "voyage": Voyage AI API (voyage-code-3, 1024 dims)
|
|
8
|
+
* - "openai": OpenAI API (text-embedding-3-small, 1536 dims)
|
|
9
|
+
*
|
|
10
|
+
* Provides embedding generation, pure-JS cosine similarity, and vector search
|
|
11
|
+
* over the knowledge and distillation tables. All operations are gated behind
|
|
12
|
+
* `search.embeddings.enabled` config + the provider's API key env var — falls
|
|
13
|
+
* back silently to FTS-only when unavailable.
|
|
9
14
|
*/
|
|
10
15
|
|
|
11
16
|
import { db } from "./db";
|
|
12
17
|
import { config } from "./config";
|
|
13
18
|
import * as log from "./log";
|
|
14
|
-
import {
|
|
19
|
+
import { vendorModelInfo } from "./embedding-vendor";
|
|
15
20
|
import type {
|
|
16
21
|
WorkerInbound,
|
|
17
22
|
WorkerOutbound,
|
|
@@ -139,152 +144,76 @@ class OpenAIProvider implements EmbeddingProvider {
|
|
|
139
144
|
}
|
|
140
145
|
|
|
141
146
|
// ---------------------------------------------------------------------------
|
|
142
|
-
// Local provider (
|
|
147
|
+
// Local provider (@huggingface/transformers + nomic-embed-text-v1.5)
|
|
143
148
|
// ---------------------------------------------------------------------------
|
|
144
149
|
|
|
145
150
|
/**
|
|
146
|
-
* Thrown when `LocalProvider`
|
|
147
|
-
*
|
|
148
|
-
*
|
|
149
|
-
* the package install still succeeds but local embeddings are disabled.
|
|
150
|
-
* Callers in `recall.ts` / `ltm.ts` / `distillation.ts` already gate on
|
|
151
|
-
* `isAvailable()`, which flips to `false` after this error fires once.
|
|
151
|
+
* Thrown when `LocalProvider` cannot initialize (e.g. ONNX runtime fails
|
|
152
|
+
* to load). Callers in `recall.ts` / `ltm.ts` / `distillation.ts` gate
|
|
153
|
+
* on `isAvailable()`, which flips to `false` after this error fires once.
|
|
152
154
|
*/
|
|
153
155
|
export class LocalProviderUnavailableError extends Error {
|
|
154
156
|
constructor(cause?: unknown) {
|
|
155
157
|
super(
|
|
156
|
-
"Local embedding provider unavailable: '
|
|
158
|
+
"Local embedding provider unavailable: '@huggingface/transformers' failed to initialize. " +
|
|
157
159
|
"Configure search.embeddings.provider to 'voyage' or 'openai', or " +
|
|
158
|
-
"
|
|
160
|
+
"set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback.",
|
|
159
161
|
);
|
|
160
162
|
this.name = "LocalProviderUnavailableError";
|
|
161
163
|
if (cause !== undefined) (this as Error & { cause?: unknown }).cause = cause;
|
|
162
164
|
}
|
|
163
165
|
}
|
|
164
166
|
|
|
165
|
-
/**
|
|
166
|
-
*
|
|
167
|
-
|
|
168
|
-
let
|
|
169
|
-
let
|
|
170
|
-
let fastembedLogged: boolean = false;
|
|
171
|
-
|
|
172
|
-
/** For tests: reset the fastembed probe cache. */
|
|
173
|
-
export function _resetFastembedProbe(): void {
|
|
174
|
-
fastembedModule = null;
|
|
175
|
-
fastembedProbed = false;
|
|
176
|
-
fastembedAvailable = false;
|
|
177
|
-
fastembedLogged = false;
|
|
178
|
-
}
|
|
167
|
+
/** Tracks whether the local provider has been probed and found unavailable.
|
|
168
|
+
* Set to true after the first worker init failure so subsequent calls
|
|
169
|
+
* to `isAvailable()` short-circuit. */
|
|
170
|
+
let localProviderKnownBroken = false;
|
|
171
|
+
let localProviderErrorLogged = false;
|
|
179
172
|
|
|
180
|
-
/** For tests:
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
fastembedModule = null;
|
|
185
|
-
fastembedProbed = true;
|
|
186
|
-
fastembedAvailable = false;
|
|
187
|
-
fastembedLogged = true; // suppress the info log in tests
|
|
188
|
-
}
|
|
189
|
-
|
|
190
|
-
/**
|
|
191
|
-
* Probe `fastembed` once. Returns the module on success, `null` on failure.
|
|
192
|
-
* Logs an info-level note exactly once on the first failure so users know
|
|
193
|
-
* how to recover (switch provider, fix the install, or rely on the
|
|
194
|
-
* VOYAGE/OPENAI auto-fallback in `embed()`).
|
|
195
|
-
*
|
|
196
|
-
* In binary mode `import("fastembed")` resolves to the bundle Bun packed
|
|
197
|
-
* at compile time (the binary's wrapper has already preloaded the
|
|
198
|
-
* side-load `libonnxruntime` lib so the addon's dlopen succeeds). In
|
|
199
|
-
* npm mode it goes through standard module resolution and may fail if
|
|
200
|
-
* the optional postinstall didn't run.
|
|
201
|
-
*/
|
|
202
|
-
async function tryLoadFastembed(): Promise<typeof import("fastembed") | null> {
|
|
203
|
-
if (fastembedProbed) return fastembedAvailable ? fastembedModule : null;
|
|
204
|
-
try {
|
|
205
|
-
const mod = await loadFastembedModule();
|
|
206
|
-
// Re-check after the async boundary: another caller (e.g. a test helper
|
|
207
|
-
// like _markFastembedUnavailable) may have set the probe while we were
|
|
208
|
-
// awaiting. Their decision takes priority — don't overwrite it.
|
|
209
|
-
if (fastembedProbed) return fastembedAvailable ? fastembedModule : null;
|
|
210
|
-
fastembedModule = mod;
|
|
211
|
-
fastembedAvailable = true;
|
|
212
|
-
} catch (err) {
|
|
213
|
-
if (fastembedProbed) return fastembedAvailable ? fastembedModule : null;
|
|
214
|
-
fastembedAvailable = false;
|
|
215
|
-
if (!fastembedLogged) {
|
|
216
|
-
fastembedLogged = true;
|
|
217
|
-
const msg = err instanceof Error ? err.message : String(err);
|
|
218
|
-
// Binary mode: a load failure here is a real bug (everything was
|
|
219
|
-
// bundled at build time). npm mode: the optional dep didn't
|
|
220
|
-
// install — point the user at the standard recovery options.
|
|
221
|
-
const remediation = isVendoredBinary()
|
|
222
|
-
? "this is a bug in the lore binary; please file an issue. " +
|
|
223
|
-
"Set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback in the meantime"
|
|
224
|
-
: "set search.embeddings.provider to 'voyage' or 'openai', " +
|
|
225
|
-
"set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback, " +
|
|
226
|
-
"or reinstall fastembed with ONNXRUNTIME_NODE_INSTALL_CUDA=skip";
|
|
227
|
-
log.info(
|
|
228
|
-
`local embedding provider unavailable (fastembed not installed: ${msg}) — ${remediation}`,
|
|
229
|
-
);
|
|
230
|
-
}
|
|
231
|
-
} finally {
|
|
232
|
-
fastembedProbed = true;
|
|
233
|
-
}
|
|
234
|
-
return fastembedAvailable ? fastembedModule : null;
|
|
173
|
+
/** For tests: reset the local provider probe state. */
|
|
174
|
+
export function _resetLocalProviderProbe(): void {
|
|
175
|
+
localProviderKnownBroken = false;
|
|
176
|
+
localProviderErrorLogged = false;
|
|
235
177
|
}
|
|
236
178
|
|
|
237
|
-
/**
|
|
238
|
-
*
|
|
239
|
-
*
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
* per-target staging `node_modules/` at build time and bundles it
|
|
244
|
-
* (plus its transitive deps and `.node` addons) into the binary. The
|
|
245
|
-
* side-load `libonnxruntime.so.1` / `.dylib` / `.dll` is preloaded
|
|
246
|
-
* by the binary's wrapper before this import evaluates, so the
|
|
247
|
-
* bundled `onnxruntime_binding.node`'s dlopen finds the cached
|
|
248
|
-
* handle instead of failing with "shared object not found".
|
|
249
|
-
*
|
|
250
|
-
* - npm mode: standard Node/Bun resolution — works for `@loreai/core`
|
|
251
|
-
* consumers whose `npm install` cleanly installed the optional dep.
|
|
252
|
-
* If the postinstall failed (CUDA-13 hosts), the import throws here
|
|
253
|
-
* and the caller logs + falls back to a remote provider.
|
|
254
|
-
*/
|
|
255
|
-
async function loadFastembedModule(): Promise<typeof import("fastembed")> {
|
|
256
|
-
return (await import("fastembed")) as typeof import("fastembed");
|
|
179
|
+
/** For tests: simulate the local provider being unavailable, without
|
|
180
|
+
* actually spawning a worker. After this call, `isAvailable()` returns
|
|
181
|
+
* false for the local provider. */
|
|
182
|
+
export function _markLocalProviderUnavailable(): void {
|
|
183
|
+
localProviderKnownBroken = true;
|
|
184
|
+
localProviderErrorLogged = true; // suppress the info log in tests
|
|
257
185
|
}
|
|
258
186
|
|
|
259
|
-
/** True iff the
|
|
260
|
-
function
|
|
261
|
-
return
|
|
187
|
+
/** True iff the local provider has been probed and found broken. */
|
|
188
|
+
function localProviderKnownUnavailable(): boolean {
|
|
189
|
+
return localProviderKnownBroken;
|
|
262
190
|
}
|
|
263
191
|
|
|
264
192
|
/**
|
|
265
|
-
* Local embedding provider using
|
|
193
|
+
* Local embedding provider using @huggingface/transformers with
|
|
194
|
+
* nomic-embed-text-v1.5 by default.
|
|
266
195
|
*
|
|
267
196
|
* No API key required — runs entirely on-device via ONNX Runtime.
|
|
268
|
-
* Model files are downloaded on first use (~
|
|
269
|
-
*
|
|
197
|
+
* Model files are downloaded on first use (~137MB for INT8 quantized)
|
|
198
|
+
* and cached locally. Subsequent inits load from cache.
|
|
270
199
|
*
|
|
271
200
|
* ONNX inference runs in a dedicated `node:worker_threads` Worker so the
|
|
272
201
|
* main thread's event loop stays free. This class is a thin RPC client —
|
|
273
202
|
* it posts `{ texts, inputType }` to the worker and awaits a reply.
|
|
274
|
-
* The worker owns the
|
|
203
|
+
* The worker owns the transformers.js pipeline and processes requests
|
|
275
204
|
* sequentially from a priority queue (recall queries jump ahead of
|
|
276
205
|
* backfill batches).
|
|
277
206
|
*
|
|
278
|
-
*
|
|
279
|
-
*
|
|
280
|
-
*
|
|
281
|
-
* (its native onnxruntime-node may fail to build, e.g. on CUDA 13).
|
|
207
|
+
* Task instruction prefixes are prepended automatically:
|
|
208
|
+
* - "document" → "search_document: <text>"
|
|
209
|
+
* - "query" → "search_query: <text>"
|
|
282
210
|
*/
|
|
283
211
|
class LocalProvider implements EmbeddingProvider {
|
|
284
212
|
// With inference off the main thread, large batches no longer block
|
|
285
213
|
// the event loop. 256 maximises throughput per round-trip to the
|
|
286
|
-
// worker. Backfill callers use
|
|
287
|
-
// the worker's priority queue breathing room
|
|
214
|
+
// worker. Backfill callers use token-budget-based batching (see
|
|
215
|
+
// nextBatch) to give the worker's priority queue breathing room
|
|
216
|
+
// for recall queries and prevent OOM on long texts.
|
|
288
217
|
readonly maxBatchSize = 256;
|
|
289
218
|
|
|
290
219
|
private worker: import("node:worker_threads").Worker | null = null;
|
|
@@ -296,16 +225,16 @@ class LocalProvider implements EmbeddingProvider {
|
|
|
296
225
|
>();
|
|
297
226
|
private nextRequestId = 0;
|
|
298
227
|
private initPromise: Promise<void> | null = null;
|
|
299
|
-
private
|
|
228
|
+
private modelId: string;
|
|
229
|
+
private dimensions: number;
|
|
300
230
|
|
|
301
|
-
constructor(
|
|
302
|
-
this.
|
|
231
|
+
constructor(modelId: string, dimensions: number) {
|
|
232
|
+
this.modelId = modelId;
|
|
233
|
+
this.dimensions = dimensions;
|
|
303
234
|
}
|
|
304
235
|
|
|
305
236
|
/**
|
|
306
|
-
* Ensure the worker thread is running.
|
|
307
|
-
* thread first (fast, cached) as a fast-fail gate — the worker is only
|
|
308
|
-
* spawned if the module is known-loadable. Worker startup failure is
|
|
237
|
+
* Ensure the worker thread is running. Worker startup failure is
|
|
309
238
|
* surfaced as `LocalProviderUnavailableError` to trigger the existing
|
|
310
239
|
* auto-fallback to remote providers.
|
|
311
240
|
*/
|
|
@@ -315,10 +244,8 @@ class LocalProvider implements EmbeddingProvider {
|
|
|
315
244
|
if (this.initPromise) return this.initPromise;
|
|
316
245
|
|
|
317
246
|
this.initPromise = (async () => {
|
|
318
|
-
// Fast-fail
|
|
319
|
-
|
|
320
|
-
const fastembed = await tryLoadFastembed();
|
|
321
|
-
if (!fastembed) throw new LocalProviderUnavailableError();
|
|
247
|
+
// Fast-fail if a previous attempt already marked local broken.
|
|
248
|
+
if (localProviderKnownBroken) throw new LocalProviderUnavailableError();
|
|
322
249
|
|
|
323
250
|
const { Worker } = await import("node:worker_threads");
|
|
324
251
|
|
|
@@ -334,17 +261,10 @@ class LocalProvider implements EmbeddingProvider {
|
|
|
334
261
|
// In dev (Bun running .ts directly): embedding-worker.ts
|
|
335
262
|
// In dist (esbuild bundle): embedding-worker.js
|
|
336
263
|
const vendorWorkerUrl = (globalThis as Record<string, unknown>).__LORE_VENDOR_WORKER_URL__ as string | undefined;
|
|
337
|
-
// On Windows, new Worker() with a file:// URL pointing to $bunfs
|
|
338
|
-
// fails with ENOENT. Pass the raw path instead (B:\~BUN\root\...).
|
|
339
|
-
// On macOS/Linux the file:// URL works fine with $bunfs paths.
|
|
340
264
|
let workerUrl: string | URL;
|
|
341
265
|
if (vendorWorkerUrl) {
|
|
342
266
|
if (process.platform === "win32") {
|
|
343
|
-
// On Windows, new Worker() with a file:// URL pointing to $bunfs
|
|
344
|
-
// fails with ENOENT (Bun bug). Extract the raw path instead.
|
|
345
|
-
// URL.pathname keeps %7E encoded; decodeURIComponent restores ~.
|
|
346
267
|
workerUrl = decodeURIComponent(new URL(vendorWorkerUrl).pathname);
|
|
347
|
-
// URL.pathname on Windows: /B:/~BUN/root/wrapper.js → strip leading /
|
|
348
268
|
if (/^\/[A-Za-z]:/.test(workerUrl)) {
|
|
349
269
|
workerUrl = workerUrl.slice(1);
|
|
350
270
|
}
|
|
@@ -352,14 +272,29 @@ class LocalProvider implements EmbeddingProvider {
|
|
|
352
272
|
workerUrl = vendorWorkerUrl;
|
|
353
273
|
}
|
|
354
274
|
} else {
|
|
355
|
-
|
|
275
|
+
// In CJS bundles (gateway npm package), esbuild shims import.meta as
|
|
276
|
+
// an empty object {}, so import.meta.url is undefined. Fall back to
|
|
277
|
+
// __filename which esbuild defines in CJS output.
|
|
278
|
+
const selfUrl = typeof import.meta.url === "string" ? import.meta.url : undefined;
|
|
279
|
+
if (selfUrl) {
|
|
280
|
+
workerUrl = new URL(
|
|
281
|
+
`./embedding-worker${selfUrl.endsWith(".ts") ? ".ts" : ".js"}`,
|
|
282
|
+
selfUrl,
|
|
283
|
+
);
|
|
284
|
+
} else {
|
|
285
|
+
// CJS fallback: __filename is defined by esbuild's CJS output.
|
|
286
|
+
// The embedding-worker.cjs is built alongside the main bundle.
|
|
287
|
+
const { pathToFileURL } = await import("node:url");
|
|
288
|
+
workerUrl = new URL("./embedding-worker.cjs", pathToFileURL(__filename));
|
|
289
|
+
}
|
|
356
290
|
}
|
|
357
291
|
|
|
358
292
|
const vendor = vendorModelInfo();
|
|
359
293
|
const workerInitData: WorkerInitData = {
|
|
360
|
-
|
|
294
|
+
modelId: this.modelId,
|
|
295
|
+
dimensions: this.dimensions,
|
|
361
296
|
vendorModel: vendor
|
|
362
|
-
? {
|
|
297
|
+
? { localModelPath: vendor.localModelPath }
|
|
363
298
|
: null,
|
|
364
299
|
};
|
|
365
300
|
|
|
@@ -394,6 +329,14 @@ class LocalProvider implements EmbeddingProvider {
|
|
|
394
329
|
// LocalProviderUnavailableError on all pending + future requests.
|
|
395
330
|
this.workerInitError = msg.error;
|
|
396
331
|
this.workerReady = false;
|
|
332
|
+
localProviderKnownBroken = true;
|
|
333
|
+
if (!localProviderErrorLogged) {
|
|
334
|
+
localProviderErrorLogged = true;
|
|
335
|
+
log.info(
|
|
336
|
+
`local embedding provider failed to init: ${msg.error}. ` +
|
|
337
|
+
`Set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback.`,
|
|
338
|
+
);
|
|
339
|
+
}
|
|
397
340
|
for (const [, p] of this.pendingRequests) {
|
|
398
341
|
p.reject(new LocalProviderUnavailableError(msg.error));
|
|
399
342
|
}
|
|
@@ -453,6 +396,10 @@ class LocalProvider implements EmbeddingProvider {
|
|
|
453
396
|
async embed(texts: string[], inputType: "document" | "query"): Promise<Float32Array[]> {
|
|
454
397
|
await this.ensureWorker();
|
|
455
398
|
|
|
399
|
+
// Prepend Nomic task instruction prefix.
|
|
400
|
+
const prefix = inputType === "document" ? "search_document: " : "search_query: ";
|
|
401
|
+
const prefixed = texts.map((t) => prefix + t);
|
|
402
|
+
|
|
456
403
|
const id = this.nextRequestId++;
|
|
457
404
|
// Recall queries (single query-type texts) get high priority so they
|
|
458
405
|
// jump ahead of any queued backfill batches in the worker.
|
|
@@ -464,7 +411,7 @@ class LocalProvider implements EmbeddingProvider {
|
|
|
464
411
|
this.worker!.postMessage({
|
|
465
412
|
type: "embed",
|
|
466
413
|
id,
|
|
467
|
-
texts,
|
|
414
|
+
texts: prefixed,
|
|
468
415
|
inputType,
|
|
469
416
|
priority,
|
|
470
417
|
} satisfies WorkerInbound);
|
|
@@ -473,8 +420,6 @@ class LocalProvider implements EmbeddingProvider {
|
|
|
473
420
|
|
|
474
421
|
/** Shut down the worker thread. Called by `resetProvider()` on config change.
|
|
475
422
|
* Sends a shutdown message so the worker calls `process.exit(0)` internally.
|
|
476
|
-
* We avoid `worker.terminate()` because Bun's forced termination triggers a
|
|
477
|
-
* NAPI fatal error when tearing down onnxruntime's native bindings.
|
|
478
423
|
*
|
|
479
424
|
* Returns a promise that resolves once the worker has fully exited. Callers
|
|
480
425
|
* that need a clean teardown (tests, config change) should await the result.
|
|
@@ -507,7 +452,7 @@ class LocalProvider implements EmbeddingProvider {
|
|
|
507
452
|
|
|
508
453
|
/** Default models per provider — used when config doesn't override. */
|
|
509
454
|
const PROVIDER_DEFAULTS: Record<string, { model: string; dimensions: number }> = {
|
|
510
|
-
local: { model: "
|
|
455
|
+
local: { model: "nomic-ai/nomic-embed-text-v1.5", dimensions: 768 },
|
|
511
456
|
voyage: { model: "voyage-code-3", dimensions: 1024 },
|
|
512
457
|
openai: { model: "text-embedding-3-small", dimensions: 1536 },
|
|
513
458
|
};
|
|
@@ -539,12 +484,11 @@ function getProvider(): EmbeddingProvider | null {
|
|
|
539
484
|
|
|
540
485
|
switch (providerName) {
|
|
541
486
|
case "local": {
|
|
542
|
-
//
|
|
543
|
-
//
|
|
544
|
-
//
|
|
545
|
-
//
|
|
546
|
-
|
|
547
|
-
cachedProvider = new LocalProvider(model);
|
|
487
|
+
// Construct the provider optimistically — the ONNX model init
|
|
488
|
+
// happens lazily in the worker thread on first `embed()` call.
|
|
489
|
+
// If it fails, `LocalProviderUnavailableError` triggers the
|
|
490
|
+
// auto-fallback to a remote provider or FTS-only search.
|
|
491
|
+
cachedProvider = new LocalProvider(model, cfg.dimensions);
|
|
548
492
|
break;
|
|
549
493
|
}
|
|
550
494
|
case "voyage": {
|
|
@@ -619,7 +563,7 @@ export function _saveAndClearProvider(): unknown {
|
|
|
619
563
|
/** Restore a provider previously saved by `_saveAndClearProvider()`. Any
|
|
620
564
|
* provider created between save and restore is discarded (callers must
|
|
621
565
|
* ensure it's not a LocalProvider with a live worker — those suites only
|
|
622
|
-
* use `
|
|
566
|
+
* use `_markLocalProviderUnavailable()` so no worker is spawned). */
|
|
623
567
|
export function _restoreProvider(token: unknown): void {
|
|
624
568
|
const saved = token as { provider: EmbeddingProvider | null | undefined; remoteFallbackLogged: boolean };
|
|
625
569
|
cachedProvider = saved.provider;
|
|
@@ -669,13 +613,13 @@ export function pickRemoteFallback(): {
|
|
|
669
613
|
* Active when the configured provider's API key is set, unless explicitly
|
|
670
614
|
* disabled via `search.embeddings.enabled: false` in .lore.json.
|
|
671
615
|
*
|
|
672
|
-
* For the `local` provider, also returns false once
|
|
673
|
-
*
|
|
674
|
-
*
|
|
616
|
+
* For the `local` provider, also returns false once the worker has reported
|
|
617
|
+
* an init failure — callers (recall, ltm, distillation) use this gate to
|
|
618
|
+
* skip embedding work and fall back to FTS-only search. */
|
|
675
619
|
export function isAvailable(): boolean {
|
|
676
620
|
const provider = getProvider();
|
|
677
621
|
if (!provider) return false;
|
|
678
|
-
if (provider instanceof LocalProvider &&
|
|
622
|
+
if (provider instanceof LocalProvider && localProviderKnownUnavailable()) return false;
|
|
679
623
|
return true;
|
|
680
624
|
}
|
|
681
625
|
|
|
@@ -686,7 +630,7 @@ export function isAvailable(): boolean {
|
|
|
686
630
|
/**
|
|
687
631
|
* Generate embeddings for the given texts using the configured provider.
|
|
688
632
|
*
|
|
689
|
-
* If the configured provider is `local` and
|
|
633
|
+
* If the configured provider is `local` and the local provider turns out to be
|
|
690
634
|
* unavailable at runtime (failed install, vendor extraction blocked, etc.),
|
|
691
635
|
* automatically swap to a remote provider when `VOYAGE_API_KEY` or
|
|
692
636
|
* `OPENAI_API_KEY` is set in env. The swap is permanent for the rest of
|
|
@@ -717,7 +661,7 @@ export async function embed(
|
|
|
717
661
|
if (!remoteFallbackLogged) {
|
|
718
662
|
remoteFallbackLogged = true;
|
|
719
663
|
log.info(
|
|
720
|
-
`
|
|
664
|
+
`local embedding provider unavailable; auto-switching to ${fallback.name} ` +
|
|
721
665
|
`(set search.embeddings.provider in .lore.json to silence this)`,
|
|
722
666
|
);
|
|
723
667
|
}
|
|
@@ -776,14 +720,25 @@ type VectorHit = { id: string; similarity: number };
|
|
|
776
720
|
* Search all knowledge entries with embeddings by cosine similarity.
|
|
777
721
|
* Returns top-k entries sorted by similarity descending.
|
|
778
722
|
* Pure brute-force — fine for <100 entries (microseconds).
|
|
723
|
+
*
|
|
724
|
+
* @param excludeCategories Optional category names to exclude from results.
|
|
725
|
+
* Useful when preferences are injected in a separate system block and
|
|
726
|
+
* shouldn't compete for vector search slots with context-bound entries.
|
|
779
727
|
*/
|
|
780
728
|
export function vectorSearch(
|
|
781
729
|
queryEmbedding: Float32Array,
|
|
782
730
|
limit = 10,
|
|
731
|
+
excludeCategories?: string[],
|
|
783
732
|
): VectorHit[] {
|
|
733
|
+
let sql = "SELECT id, embedding FROM knowledge WHERE embedding IS NOT NULL AND confidence > 0.2";
|
|
734
|
+
const params: string[] = [];
|
|
735
|
+
if (excludeCategories?.length) {
|
|
736
|
+
sql += ` AND category NOT IN (${excludeCategories.map(() => "?").join(",")})`;
|
|
737
|
+
params.push(...excludeCategories);
|
|
738
|
+
}
|
|
784
739
|
const rows = db()
|
|
785
|
-
.query(
|
|
786
|
-
.all() as Array<{ id: string; embedding: Buffer }>;
|
|
740
|
+
.query(sql)
|
|
741
|
+
.all(...params) as Array<{ id: string; embedding: Buffer }>;
|
|
787
742
|
|
|
788
743
|
const scored: VectorHit[] = [];
|
|
789
744
|
for (const row of rows) {
|
|
@@ -826,6 +781,53 @@ export function vectorSearchDistillations(
|
|
|
826
781
|
return scored.slice(0, limit);
|
|
827
782
|
}
|
|
828
783
|
|
|
784
|
+
// ---------------------------------------------------------------------------
|
|
785
|
+
// Vector search — all distillations (including archived)
|
|
786
|
+
// ---------------------------------------------------------------------------
|
|
787
|
+
|
|
788
|
+
export type DistillationVectorHit = {
|
|
789
|
+
id: string;
|
|
790
|
+
session_id: string;
|
|
791
|
+
similarity: number;
|
|
792
|
+
};
|
|
793
|
+
|
|
794
|
+
/**
|
|
795
|
+
* Search ALL distillations (including archived) with embeddings by cosine
|
|
796
|
+
* similarity, scoped to a single project. Returns session_id alongside
|
|
797
|
+
* similarity for cross-session counting.
|
|
798
|
+
*
|
|
799
|
+
* Unlike vectorSearchDistillations() which filters to non-archived only,
|
|
800
|
+
* this searches the full distillation archive — necessary for detecting
|
|
801
|
+
* repeated instructions across sessions where older distillations have
|
|
802
|
+
* been archived after meta-distillation.
|
|
803
|
+
*
|
|
804
|
+
* Pure brute-force — fine for ~200 entries per project. Safety-capped
|
|
805
|
+
* at 500 rows to prevent excessive CPU on long-running projects.
|
|
806
|
+
*/
|
|
807
|
+
const MAX_DISTILLATION_VECTOR_ROWS = 500;
|
|
808
|
+
|
|
809
|
+
export function vectorSearchAllDistillations(
|
|
810
|
+
queryEmbedding: Float32Array,
|
|
811
|
+
projectId: string,
|
|
812
|
+
limit = 20,
|
|
813
|
+
): DistillationVectorHit[] {
|
|
814
|
+
const rows = db()
|
|
815
|
+
.query(
|
|
816
|
+
"SELECT id, session_id, embedding FROM distillations WHERE embedding IS NOT NULL AND project_id = ? ORDER BY created_at DESC LIMIT ?",
|
|
817
|
+
)
|
|
818
|
+
.all(projectId, MAX_DISTILLATION_VECTOR_ROWS) as Array<{ id: string; session_id: string; embedding: Buffer }>;
|
|
819
|
+
|
|
820
|
+
const scored: DistillationVectorHit[] = [];
|
|
821
|
+
for (const row of rows) {
|
|
822
|
+
const vec = fromBlob(row.embedding);
|
|
823
|
+
const sim = cosineSimilarity(queryEmbedding, vec);
|
|
824
|
+
scored.push({ id: row.id, session_id: row.session_id, similarity: sim });
|
|
825
|
+
}
|
|
826
|
+
|
|
827
|
+
scored.sort((a, b) => b.similarity - a.similarity);
|
|
828
|
+
return scored.slice(0, limit);
|
|
829
|
+
}
|
|
830
|
+
|
|
829
831
|
// ---------------------------------------------------------------------------
|
|
830
832
|
// Fire-and-forget embedding
|
|
831
833
|
// ---------------------------------------------------------------------------
|
|
@@ -1107,14 +1109,52 @@ export async function runStartupBackfill(): Promise<void> {
|
|
|
1107
1109
|
// ---------------------------------------------------------------------------
|
|
1108
1110
|
|
|
1109
1111
|
/**
|
|
1110
|
-
*
|
|
1111
|
-
* message to the embedding worker. Keeping chunks small
|
|
1112
|
-
* worker's priority queue natural gaps to interleave high-priority
|
|
1113
|
-
* queries between backfill batches.
|
|
1114
|
-
|
|
1115
|
-
|
|
1112
|
+
* Maximum chunk size for backfill embed requests. Each chunk becomes a
|
|
1113
|
+
* separate message to the embedding worker. Keeping chunks small gives
|
|
1114
|
+
* the worker's priority queue natural gaps to interleave high-priority
|
|
1115
|
+
* recall queries between backfill batches.
|
|
1116
|
+
*/
|
|
1117
|
+
const MAX_BACKFILL_CHUNK = 8;
|
|
1118
|
+
|
|
1119
|
+
/**
|
|
1120
|
+
* Maximum total "token area" (batch_size × max_sequence_length) per
|
|
1121
|
+
* backfill batch. ONNX runtime pads all texts to the longest sequence,
|
|
1122
|
+
* so the peak tensor size is proportional to this product. A budget of
|
|
1123
|
+
* 4096 tokens allows e.g. 8 × 512-token texts, or 2 × 2048-token texts.
|
|
1124
|
+
* Prevents OOM on batches with long distillation observations (~4000+
|
|
1125
|
+
* chars) that were blowing up at fixed batch sizes.
|
|
1126
|
+
*/
|
|
1127
|
+
const MAX_BATCH_TOKEN_AREA = 4096;
|
|
1128
|
+
|
|
1129
|
+
/**
|
|
1130
|
+
* Rough chars-per-token ratio for budget estimation. Nomic v1.5 uses a
|
|
1131
|
+
* WordPiece tokenizer; English text averages ~4 chars/token.
|
|
1132
|
+
*/
|
|
1133
|
+
const CHARS_PER_TOKEN = 4;
|
|
1134
|
+
|
|
1135
|
+
/**
|
|
1136
|
+
* Partition `rows` into batches that respect both MAX_BACKFILL_CHUNK and
|
|
1137
|
+
* MAX_BATCH_TOKEN_AREA. Each batch's estimated token area is
|
|
1138
|
+
* `batch.length × max_tokens_in_batch`. We greedily add rows until the
|
|
1139
|
+
* next row would push the area over budget.
|
|
1116
1140
|
*/
|
|
1117
|
-
|
|
1141
|
+
function nextBatch<T extends { text: string }>(rows: T[], start: number): T[] {
|
|
1142
|
+
const batch: T[] = [];
|
|
1143
|
+
let maxTokens = 0;
|
|
1144
|
+
|
|
1145
|
+
for (let i = start; i < rows.length && batch.length < MAX_BACKFILL_CHUNK; i++) {
|
|
1146
|
+
const estTokens = Math.ceil(rows[i].text.length / CHARS_PER_TOKEN);
|
|
1147
|
+
const newMax = Math.max(maxTokens, estTokens);
|
|
1148
|
+
const newArea = (batch.length + 1) * newMax;
|
|
1149
|
+
|
|
1150
|
+
if (batch.length > 0 && newArea > MAX_BATCH_TOKEN_AREA) break;
|
|
1151
|
+
|
|
1152
|
+
batch.push(rows[i]);
|
|
1153
|
+
maxTokens = newMax;
|
|
1154
|
+
}
|
|
1155
|
+
|
|
1156
|
+
return batch;
|
|
1157
|
+
}
|
|
1118
1158
|
|
|
1119
1159
|
/**
|
|
1120
1160
|
* Embed all knowledge entries that are missing embeddings.
|
|
@@ -1136,14 +1176,18 @@ export async function backfillEmbeddings(): Promise<number> {
|
|
|
1136
1176
|
|
|
1137
1177
|
if (!rows.length) return 0;
|
|
1138
1178
|
|
|
1179
|
+
// Pre-compute text for token-budget batching
|
|
1180
|
+
const items = rows.map((r) => ({ ...r, text: `${r.title}\n${r.content}` }));
|
|
1181
|
+
|
|
1139
1182
|
let embedded = 0;
|
|
1183
|
+
let i = 0;
|
|
1140
1184
|
|
|
1141
|
-
|
|
1142
|
-
const batch =
|
|
1143
|
-
|
|
1185
|
+
while (i < items.length) {
|
|
1186
|
+
const batch = nextBatch(items, i);
|
|
1187
|
+
i += batch.length;
|
|
1144
1188
|
|
|
1145
1189
|
try {
|
|
1146
|
-
const vectors = await embed(
|
|
1190
|
+
const vectors = await embed(batch.map((b) => b.text), "document");
|
|
1147
1191
|
const update = db().prepare(
|
|
1148
1192
|
"UPDATE knowledge SET embedding = ? WHERE id = ?",
|
|
1149
1193
|
);
|
|
@@ -1153,7 +1197,8 @@ export async function backfillEmbeddings(): Promise<number> {
|
|
|
1153
1197
|
embedded++;
|
|
1154
1198
|
}
|
|
1155
1199
|
} catch (err) {
|
|
1156
|
-
log.
|
|
1200
|
+
// log.error sends to Sentry via captureException
|
|
1201
|
+
log.error(`embedding backfill batch failed (${batch.length} items):`, err);
|
|
1157
1202
|
}
|
|
1158
1203
|
// No yieldToEventLoop() needed — embed() is truly async (worker thread).
|
|
1159
1204
|
}
|
|
@@ -1188,17 +1233,21 @@ export async function backfillDistillationEmbeddings(): Promise<number> {
|
|
|
1188
1233
|
let embedded = 0;
|
|
1189
1234
|
|
|
1190
1235
|
// Progress logging: heartbeat every PROGRESS_INTERVAL embedded so a long
|
|
1191
|
-
// backfill (e.g. 1000+ pending after a
|
|
1236
|
+
// backfill (e.g. 1000+ pending after a model change) doesn't look
|
|
1192
1237
|
// like a silent hang. Without this, only the final tally was logged.
|
|
1193
1238
|
const PROGRESS_INTERVAL = 256;
|
|
1194
1239
|
let nextProgressAt = PROGRESS_INTERVAL;
|
|
1195
1240
|
|
|
1196
|
-
|
|
1197
|
-
|
|
1198
|
-
|
|
1241
|
+
// Pre-compute text for token-budget batching
|
|
1242
|
+
const items = rows.map((r) => ({ ...r, text: r.observations }));
|
|
1243
|
+
let i = 0;
|
|
1244
|
+
|
|
1245
|
+
while (i < items.length) {
|
|
1246
|
+
const batch = nextBatch(items, i);
|
|
1247
|
+
i += batch.length;
|
|
1199
1248
|
|
|
1200
1249
|
try {
|
|
1201
|
-
const vectors = await embed(
|
|
1250
|
+
const vectors = await embed(batch.map((b) => b.text), "document");
|
|
1202
1251
|
const update = db().prepare(
|
|
1203
1252
|
"UPDATE distillations SET embedding = ? WHERE id = ?",
|
|
1204
1253
|
);
|
|
@@ -1208,7 +1257,8 @@ export async function backfillDistillationEmbeddings(): Promise<number> {
|
|
|
1208
1257
|
embedded++;
|
|
1209
1258
|
}
|
|
1210
1259
|
} catch (err) {
|
|
1211
|
-
log.
|
|
1260
|
+
// log.error sends to Sentry via captureException
|
|
1261
|
+
log.error(`distillation embedding backfill batch failed (${batch.length} items):`, err);
|
|
1212
1262
|
}
|
|
1213
1263
|
|
|
1214
1264
|
if (embedded >= nextProgressAt) {
|