npm - @loreai/core - Versions diffs - 0.17.1 → 0.18.0 - Mend

@loreai/core 0.17.1 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (235) hide show

package/dist/bun/agents-file.d.ts +4 -0
package/dist/bun/agents-file.d.ts.map +1 -1
package/dist/bun/config.d.ts +2 -0
package/dist/bun/config.d.ts.map +1 -1
package/dist/bun/curator.d.ts +45 -0
package/dist/bun/curator.d.ts.map +1 -1
package/dist/bun/data-dir.d.ts +18 -0
package/dist/bun/data-dir.d.ts.map +1 -0
package/dist/bun/db.d.ts +12 -0
package/dist/bun/db.d.ts.map +1 -1
package/dist/bun/distillation.d.ts.map +1 -1
package/dist/bun/embedding-vendor.d.ts +22 -38
package/dist/bun/embedding-vendor.d.ts.map +1 -1
package/dist/bun/embedding-worker-types.d.ts +17 -12
package/dist/bun/embedding-worker-types.d.ts.map +1 -1
package/dist/bun/embedding-worker.d.ts +9 -2
package/dist/bun/embedding-worker.d.ts.map +1 -1
package/dist/bun/embedding-worker.js +38864 -33
package/dist/bun/embedding-worker.js.map +4 -4
package/dist/bun/embedding.d.ts +30 -22
package/dist/bun/embedding.d.ts.map +1 -1
package/dist/bun/gradient.d.ts +8 -1
package/dist/bun/gradient.d.ts.map +1 -1
package/dist/bun/import/detect.d.ts +14 -0
package/dist/bun/import/detect.d.ts.map +1 -0
package/dist/bun/import/extract.d.ts +43 -0
package/dist/bun/import/extract.d.ts.map +1 -0
package/dist/bun/import/history.d.ts +40 -0
package/dist/bun/import/history.d.ts.map +1 -0
package/dist/bun/import/index.d.ts +17 -0
package/dist/bun/import/index.d.ts.map +1 -0
package/dist/bun/import/providers/aider.d.ts +2 -0
package/dist/bun/import/providers/aider.d.ts.map +1 -0
package/dist/bun/import/providers/claude-code.d.ts +2 -0
package/dist/bun/import/providers/claude-code.d.ts.map +1 -0
package/dist/bun/import/providers/cline.d.ts +2 -0
package/dist/bun/import/providers/cline.d.ts.map +1 -0
package/dist/bun/import/providers/codex.d.ts +2 -0
package/dist/bun/import/providers/codex.d.ts.map +1 -0
package/dist/bun/import/providers/continue.d.ts +2 -0
package/dist/bun/import/providers/continue.d.ts.map +1 -0
package/dist/bun/import/providers/index.d.ts +19 -0
package/dist/bun/import/providers/index.d.ts.map +1 -0
package/dist/bun/import/providers/opencode.d.ts +2 -0
package/dist/bun/import/providers/opencode.d.ts.map +1 -0
package/dist/bun/import/providers/pi.d.ts +2 -0
package/dist/bun/import/providers/pi.d.ts.map +1 -0
package/dist/bun/import/types.d.ts +82 -0
package/dist/bun/import/types.d.ts.map +1 -0
package/dist/bun/index.d.ts +4 -1
package/dist/bun/index.d.ts.map +1 -1
package/dist/bun/index.js +2217 -224
package/dist/bun/index.js.map +4 -4
package/dist/bun/instruction-detect.d.ts +66 -0
package/dist/bun/instruction-detect.d.ts.map +1 -0
package/dist/bun/log.d.ts +9 -0
package/dist/bun/log.d.ts.map +1 -1
package/dist/bun/ltm.d.ts +40 -0
package/dist/bun/ltm.d.ts.map +1 -1
package/dist/bun/pattern-extract.d.ts +7 -0
package/dist/bun/pattern-extract.d.ts.map +1 -1
package/dist/bun/prompt.d.ts +1 -1
package/dist/bun/prompt.d.ts.map +1 -1
package/dist/bun/recall.d.ts.map +1 -1
package/dist/bun/search.d.ts +5 -3
package/dist/bun/search.d.ts.map +1 -1
package/dist/bun/temporal.d.ts.map +1 -1
package/dist/bun/types.d.ts +1 -1
package/dist/node/agents-file.d.ts +4 -0
package/dist/node/agents-file.d.ts.map +1 -1
package/dist/node/config.d.ts +2 -0
package/dist/node/config.d.ts.map +1 -1
package/dist/node/curator.d.ts +45 -0
package/dist/node/curator.d.ts.map +1 -1
package/dist/node/data-dir.d.ts +18 -0
package/dist/node/data-dir.d.ts.map +1 -0
package/dist/node/db.d.ts +12 -0
package/dist/node/db.d.ts.map +1 -1
package/dist/node/distillation.d.ts.map +1 -1
package/dist/node/embedding-vendor.d.ts +22 -38
package/dist/node/embedding-vendor.d.ts.map +1 -1
package/dist/node/embedding-worker-types.d.ts +17 -12
package/dist/node/embedding-worker-types.d.ts.map +1 -1
package/dist/node/embedding-worker.d.ts +9 -2
package/dist/node/embedding-worker.d.ts.map +1 -1
package/dist/node/embedding-worker.js +38864 -33
package/dist/node/embedding-worker.js.map +4 -4
package/dist/node/embedding.d.ts +30 -22
package/dist/node/embedding.d.ts.map +1 -1
package/dist/node/gradient.d.ts +8 -1
package/dist/node/gradient.d.ts.map +1 -1
package/dist/node/import/detect.d.ts +14 -0
package/dist/node/import/detect.d.ts.map +1 -0
package/dist/node/import/extract.d.ts +43 -0
package/dist/node/import/extract.d.ts.map +1 -0
package/dist/node/import/history.d.ts +40 -0
package/dist/node/import/history.d.ts.map +1 -0
package/dist/node/import/index.d.ts +17 -0
package/dist/node/import/index.d.ts.map +1 -0
package/dist/node/import/providers/aider.d.ts +2 -0
package/dist/node/import/providers/aider.d.ts.map +1 -0
package/dist/node/import/providers/claude-code.d.ts +2 -0
package/dist/node/import/providers/claude-code.d.ts.map +1 -0
package/dist/node/import/providers/cline.d.ts +2 -0
package/dist/node/import/providers/cline.d.ts.map +1 -0
package/dist/node/import/providers/codex.d.ts +2 -0
package/dist/node/import/providers/codex.d.ts.map +1 -0
package/dist/node/import/providers/continue.d.ts +2 -0
package/dist/node/import/providers/continue.d.ts.map +1 -0
package/dist/node/import/providers/index.d.ts +19 -0
package/dist/node/import/providers/index.d.ts.map +1 -0
package/dist/node/import/providers/opencode.d.ts +2 -0
package/dist/node/import/providers/opencode.d.ts.map +1 -0
package/dist/node/import/providers/pi.d.ts +2 -0
package/dist/node/import/providers/pi.d.ts.map +1 -0
package/dist/node/import/types.d.ts +82 -0
package/dist/node/import/types.d.ts.map +1 -0
package/dist/node/index.d.ts +4 -1
package/dist/node/index.d.ts.map +1 -1
package/dist/node/index.js +2217 -224
package/dist/node/index.js.map +4 -4
package/dist/node/instruction-detect.d.ts +66 -0
package/dist/node/instruction-detect.d.ts.map +1 -0
package/dist/node/log.d.ts +9 -0
package/dist/node/log.d.ts.map +1 -1
package/dist/node/ltm.d.ts +40 -0
package/dist/node/ltm.d.ts.map +1 -1
package/dist/node/pattern-extract.d.ts +7 -0
package/dist/node/pattern-extract.d.ts.map +1 -1
package/dist/node/prompt.d.ts +1 -1
package/dist/node/prompt.d.ts.map +1 -1
package/dist/node/recall.d.ts.map +1 -1
package/dist/node/search.d.ts +5 -3
package/dist/node/search.d.ts.map +1 -1
package/dist/node/temporal.d.ts.map +1 -1
package/dist/node/types.d.ts +1 -1
package/dist/types/agents-file.d.ts +4 -0
package/dist/types/agents-file.d.ts.map +1 -1
package/dist/types/config.d.ts +2 -0
package/dist/types/config.d.ts.map +1 -1
package/dist/types/curator.d.ts +45 -0
package/dist/types/curator.d.ts.map +1 -1
package/dist/types/data-dir.d.ts +18 -0
package/dist/types/data-dir.d.ts.map +1 -0
package/dist/types/db.d.ts +12 -0
package/dist/types/db.d.ts.map +1 -1
package/dist/types/distillation.d.ts.map +1 -1
package/dist/types/embedding-vendor.d.ts +22 -38
package/dist/types/embedding-vendor.d.ts.map +1 -1
package/dist/types/embedding-worker-types.d.ts +17 -12
package/dist/types/embedding-worker-types.d.ts.map +1 -1
package/dist/types/embedding-worker.d.ts +9 -2
package/dist/types/embedding-worker.d.ts.map +1 -1
package/dist/types/embedding.d.ts +30 -22
package/dist/types/embedding.d.ts.map +1 -1
package/dist/types/gradient.d.ts +8 -1
package/dist/types/gradient.d.ts.map +1 -1
package/dist/types/import/detect.d.ts +14 -0
package/dist/types/import/detect.d.ts.map +1 -0
package/dist/types/import/extract.d.ts +43 -0
package/dist/types/import/extract.d.ts.map +1 -0
package/dist/types/import/history.d.ts +40 -0
package/dist/types/import/history.d.ts.map +1 -0
package/dist/types/import/index.d.ts +17 -0
package/dist/types/import/index.d.ts.map +1 -0
package/dist/types/import/providers/aider.d.ts +2 -0
package/dist/types/import/providers/aider.d.ts.map +1 -0
package/dist/types/import/providers/claude-code.d.ts +2 -0
package/dist/types/import/providers/claude-code.d.ts.map +1 -0
package/dist/types/import/providers/cline.d.ts +2 -0
package/dist/types/import/providers/cline.d.ts.map +1 -0
package/dist/types/import/providers/codex.d.ts +2 -0
package/dist/types/import/providers/codex.d.ts.map +1 -0
package/dist/types/import/providers/continue.d.ts +2 -0
package/dist/types/import/providers/continue.d.ts.map +1 -0
package/dist/types/import/providers/index.d.ts +19 -0
package/dist/types/import/providers/index.d.ts.map +1 -0
package/dist/types/import/providers/opencode.d.ts +2 -0
package/dist/types/import/providers/opencode.d.ts.map +1 -0
package/dist/types/import/providers/pi.d.ts +2 -0
package/dist/types/import/providers/pi.d.ts.map +1 -0
package/dist/types/import/types.d.ts +82 -0
package/dist/types/import/types.d.ts.map +1 -0
package/dist/types/index.d.ts +4 -1
package/dist/types/index.d.ts.map +1 -1
package/dist/types/instruction-detect.d.ts +66 -0
package/dist/types/instruction-detect.d.ts.map +1 -0
package/dist/types/log.d.ts +9 -0
package/dist/types/log.d.ts.map +1 -1
package/dist/types/ltm.d.ts +40 -0
package/dist/types/ltm.d.ts.map +1 -1
package/dist/types/pattern-extract.d.ts +7 -0
package/dist/types/pattern-extract.d.ts.map +1 -1
package/dist/types/prompt.d.ts +1 -1
package/dist/types/prompt.d.ts.map +1 -1
package/dist/types/recall.d.ts.map +1 -1
package/dist/types/search.d.ts +5 -3
package/dist/types/search.d.ts.map +1 -1
package/dist/types/temporal.d.ts.map +1 -1
package/dist/types/types.d.ts +1 -1
package/package.json +2 -4
package/src/agents-file.ts +41 -13
package/src/config.ts +31 -18
package/src/curator.ts +111 -75
package/src/data-dir.ts +76 -0
package/src/db.ts +110 -11
package/src/distillation.ts +10 -2
package/src/embedding-vendor.ts +23 -40
package/src/embedding-worker-types.ts +19 -11
package/src/embedding-worker.ts +111 -47
package/src/embedding.ts +196 -171
package/src/gradient.ts +9 -1
package/src/import/detect.ts +37 -0
package/src/import/extract.ts +137 -0
package/src/import/history.ts +99 -0
package/src/import/index.ts +45 -0
package/src/import/providers/aider.ts +207 -0
package/src/import/providers/claude-code.ts +339 -0
package/src/import/providers/cline.ts +324 -0
package/src/import/providers/codex.ts +369 -0
package/src/import/providers/continue.ts +304 -0
package/src/import/providers/index.ts +32 -0
package/src/import/providers/opencode.ts +272 -0
package/src/import/providers/pi.ts +332 -0
package/src/import/types.ts +91 -0
package/src/index.ts +5 -0
package/src/instruction-detect.ts +275 -0
package/src/log.ts +91 -3
package/src/ltm.ts +316 -3
package/src/pattern-extract.ts +41 -0
package/src/prompt.ts +7 -1
package/src/recall.ts +43 -5
package/src/search.ts +7 -5
package/src/temporal.ts +8 -6
package/src/types.ts +1 -1

package/src/embedding.ts CHANGED Viewed

@@ -1,17 +1,22 @@
 /**
  * Embedding integration for vector search.
  *
- * Supports multiple embedding providers (Voyage AI, OpenAI) behind a common
- * interface. Provides embedding generation, pure-JS cosine similarity, and
- * vector search over the knowledge and distillation tables. All operations
- * are gated behind `search.embeddings.enabled` config + the provider's API
- * key env var — falls back silently to FTS-only when unavailable.
+ * Supports multiple embedding providers behind a common interface:
+ *   - "local" (default): @huggingface/transformers + nomic-embed-text-v1.5
+ *     (768 dims, Matryoshka-capable). Runs ONNX inference in a worker thread.
+ *   - "voyage": Voyage AI API (voyage-code-3, 1024 dims)
+ *   - "openai": OpenAI API (text-embedding-3-small, 1536 dims)
+ *
+ * Provides embedding generation, pure-JS cosine similarity, and vector search
+ * over the knowledge and distillation tables. All operations are gated behind
+ * `search.embeddings.enabled` config + the provider's API key env var — falls
+ * back silently to FTS-only when unavailable.
  */
 import { db } from "./db";
 import { config } from "./config";
 import * as log from "./log";
-import { isVendoredBinary, vendorModelInfo } from "./embedding-vendor";
+import { vendorModelInfo } from "./embedding-vendor";
 import type {
   WorkerInbound,
   WorkerOutbound,
@@ -139,152 +144,76 @@ class OpenAIProvider implements EmbeddingProvider {
 }
 // ---------------------------------------------------------------------------
-// Local provider (fastembed + ONNX Runtime)
+// Local provider (@huggingface/transformers + nomic-embed-text-v1.5)
 // ---------------------------------------------------------------------------
 /**
- * Thrown when `LocalProvider` is requested but `fastembed` cannot be loaded.
- * `fastembed` is an optionalDependency of `@loreai/core`: if its postinstall
- * fails (e.g. CUDA 13 hits the upstream `onnxruntime-node` bug — see #185),
- * the package install still succeeds but local embeddings are disabled.
- * Callers in `recall.ts` / `ltm.ts` / `distillation.ts` already gate on
- * `isAvailable()`, which flips to `false` after this error fires once.
+ * Thrown when `LocalProvider` cannot initialize (e.g. ONNX runtime fails
+ * to load). Callers in `recall.ts` / `ltm.ts` / `distillation.ts` gate
+ * on `isAvailable()`, which flips to `false` after this error fires once.
  */
 export class LocalProviderUnavailableError extends Error {
   constructor(cause?: unknown) {
     super(
-      "Local embedding provider unavailable: 'fastembed' is not installed. " +
+      "Local embedding provider unavailable: '@huggingface/transformers' failed to initialize. " +
         "Configure search.embeddings.provider to 'voyage' or 'openai', or " +
-        "reinstall with ONNXRUNTIME_NODE_INSTALL_CUDA=skip to retry the optional fastembed install.",
+        "set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback.",
     );
     this.name = "LocalProviderUnavailableError";
     if (cause !== undefined) (this as Error & { cause?: unknown }).cause = cause;
   }
 }
-/** Cache of the fastembed module-load probe.
- *  null = not yet probed; module = imported successfully; false = import failed. */
-let fastembedModule: typeof import("fastembed") | null = null;
-let fastembedProbed: boolean = false;
-let fastembedAvailable: boolean = false;
-let fastembedLogged: boolean = false;
-/** For tests: reset the fastembed probe cache. */
-export function _resetFastembedProbe(): void {
-  fastembedModule = null;
-  fastembedProbed = false;
-  fastembedAvailable = false;
-  fastembedLogged = false;
-}
-/** For tests: simulate fastembed being unresolvable, without mocking the
- *  dynamic import. After this call, `tryLoadFastembed()` short-circuits to
- *  `null` and `isAvailable()` returns false for the local provider. */
-export function _markFastembedUnavailable(): void {
-  fastembedModule = null;
-  fastembedProbed = true;
-  fastembedAvailable = false;
-  fastembedLogged = true; // suppress the info log in tests
-}
+/** Tracks whether the local provider has been probed and found unavailable.
+ *  Set to true after the first worker init failure so subsequent calls
+ *  to `isAvailable()` short-circuit. */
+let localProviderKnownBroken = false;
+let localProviderErrorLogged = false;
-/**
- * Probe `fastembed` once. Returns the module on success, `null` on failure.
- * Logs an info-level note exactly once on the first failure so users know
- * how to recover (switch provider, fix the install, or rely on the
- * VOYAGE/OPENAI auto-fallback in `embed()`).
- *
- * In binary mode `import("fastembed")` resolves to the bundle Bun packed
- * at compile time (the binary's wrapper has already preloaded the
- * side-load `libonnxruntime` lib so the addon's dlopen succeeds). In
- * npm mode it goes through standard module resolution and may fail if
- * the optional postinstall didn't run.
- */
-async function tryLoadFastembed(): Promise<typeof import("fastembed") | null> {
-  if (fastembedProbed) return fastembedAvailable ? fastembedModule : null;
-  try {
-    const mod = await loadFastembedModule();
-    // Re-check after the async boundary: another caller (e.g. a test helper
-    // like _markFastembedUnavailable) may have set the probe while we were
-    // awaiting. Their decision takes priority — don't overwrite it.
-    if (fastembedProbed) return fastembedAvailable ? fastembedModule : null;
-    fastembedModule = mod;
-    fastembedAvailable = true;
-  } catch (err) {
-    if (fastembedProbed) return fastembedAvailable ? fastembedModule : null;
-    fastembedAvailable = false;
-    if (!fastembedLogged) {
-      fastembedLogged = true;
-      const msg = err instanceof Error ? err.message : String(err);
-      // Binary mode: a load failure here is a real bug (everything was
-      // bundled at build time). npm mode: the optional dep didn't
-      // install — point the user at the standard recovery options.
-      const remediation = isVendoredBinary()
-        ? "this is a bug in the lore binary; please file an issue. " +
-          "Set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback in the meantime"
-        : "set search.embeddings.provider to 'voyage' or 'openai', " +
-          "set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback, " +
-          "or reinstall fastembed with ONNXRUNTIME_NODE_INSTALL_CUDA=skip";
-      log.info(
-        `local embedding provider unavailable (fastembed not installed: ${msg}) — ${remediation}`,
-      );
-    }
-  } finally {
-    fastembedProbed = true;
-  }
-  return fastembedAvailable ? fastembedModule : null;
+/** For tests: reset the local provider probe state. */
+export function _resetLocalProviderProbe(): void {
+  localProviderKnownBroken = false;
+  localProviderErrorLogged = false;
 }
-/**
- * Resolve and import the fastembed module.
- *
- * One bare import covers both modes:
- *
- *   - Binary mode: `bun build --compile` resolves "fastembed" against the
- *     per-target staging `node_modules/` at build time and bundles it
- *     (plus its transitive deps and `.node` addons) into the binary. The
- *     side-load `libonnxruntime.so.1` / `.dylib` / `.dll` is preloaded
- *     by the binary's wrapper before this import evaluates, so the
- *     bundled `onnxruntime_binding.node`'s dlopen finds the cached
- *     handle instead of failing with "shared object not found".
- *
- *   - npm mode: standard Node/Bun resolution — works for `@loreai/core`
- *     consumers whose `npm install` cleanly installed the optional dep.
- *     If the postinstall failed (CUDA-13 hosts), the import throws here
- *     and the caller logs + falls back to a remote provider.
- */
-async function loadFastembedModule(): Promise<typeof import("fastembed")> {
-  return (await import("fastembed")) as typeof import("fastembed");
+/** For tests: simulate the local provider being unavailable, without
+ *  actually spawning a worker. After this call, `isAvailable()` returns
+ *  false for the local provider. */
+export function _markLocalProviderUnavailable(): void {
+  localProviderKnownBroken = true;
+  localProviderErrorLogged = true; // suppress the info log in tests
 }
-/** True iff the fastembed probe has run and reported the module missing. */
-function fastembedKnownUnavailable(): boolean {
-  return fastembedProbed && !fastembedAvailable;
+/** True iff the local provider has been probed and found broken. */
+function localProviderKnownUnavailable(): boolean {
+  return localProviderKnownBroken;
 }
 /**
- * Local embedding provider using fastembed (bge-small-en-v1.5 by default).
+ * Local embedding provider using @huggingface/transformers with
+ * nomic-embed-text-v1.5 by default.
  *
  * No API key required — runs entirely on-device via ONNX Runtime.
- * Model files are downloaded on first use (~33MB) and cached in
- * `~/.cache/fastembed`. Subsequent inits load from disk in ~350ms.
+ * Model files are downloaded on first use (~137MB for INT8 quantized)
+ * and cached locally. Subsequent inits load from cache.
  *
  * ONNX inference runs in a dedicated `node:worker_threads` Worker so the
  * main thread's event loop stays free. This class is a thin RPC client —
  * it posts `{ texts, inputType }` to the worker and awaits a reply.
- * The worker owns the `FlagEmbedding` model and processes requests
+ * The worker owns the transformers.js pipeline and processes requests
  * sequentially from a priority queue (recall queries jump ahead of
  * backfill batches).
  *
- * Uses dynamic import so the module is only loaded when the "local"
- * provider is actually selected — avoids startup cost and allows
- * graceful fallback when the optional `fastembed` peer isn't installed
- * (its native onnxruntime-node may fail to build, e.g. on CUDA 13).
+ * Task instruction prefixes are prepended automatically:
+ *   - "document" → "search_document: <text>"
+ *   - "query"    → "search_query: <text>"
  */
 class LocalProvider implements EmbeddingProvider {
   // With inference off the main thread, large batches no longer block
   // the event loop. 256 maximises throughput per round-trip to the
-  // worker. Backfill callers use a smaller BACKFILL_CHUNK_SIZE to give
-  // the worker's priority queue breathing room for recall queries.
+  // worker. Backfill callers use token-budget-based batching (see
+  // nextBatch) to give the worker's priority queue breathing room
+  // for recall queries and prevent OOM on long texts.
   readonly maxBatchSize = 256;
   private worker: import("node:worker_threads").Worker | null = null;
@@ -296,16 +225,16 @@ class LocalProvider implements EmbeddingProvider {
   >();
   private nextRequestId = 0;
   private initPromise: Promise<void> | null = null;
-  private modelName: string;
+  private modelId: string;
+  private dimensions: number;
-  constructor(modelName: string) {
-    this.modelName = modelName;
+  constructor(modelId: string, dimensions: number) {
+    this.modelId = modelId;
+    this.dimensions = dimensions;
   }
   /**
-   * Ensure the worker thread is running. Probes fastembed on the main
-   * thread first (fast, cached) as a fast-fail gate — the worker is only
-   * spawned if the module is known-loadable. Worker startup failure is
+   * Ensure the worker thread is running. Worker startup failure is
    * surfaced as `LocalProviderUnavailableError` to trigger the existing
    * auto-fallback to remote providers.
    */
@@ -315,10 +244,8 @@ class LocalProvider implements EmbeddingProvider {
     if (this.initPromise) return this.initPromise;
     this.initPromise = (async () => {
-      // Fast-fail: probe fastembed on the main thread. This is cached
-      // after the first call and preserves the existing error flow.
-      const fastembed = await tryLoadFastembed();
-      if (!fastembed) throw new LocalProviderUnavailableError();
+      // Fast-fail if a previous attempt already marked local broken.
+      if (localProviderKnownBroken) throw new LocalProviderUnavailableError();
       const { Worker } = await import("node:worker_threads");
@@ -334,17 +261,10 @@ class LocalProvider implements EmbeddingProvider {
       // In dev (Bun running .ts directly): embedding-worker.ts
       // In dist (esbuild bundle): embedding-worker.js
       const vendorWorkerUrl = (globalThis as Record<string, unknown>).__LORE_VENDOR_WORKER_URL__ as string | undefined;
-      // On Windows, new Worker() with a file:// URL pointing to $bunfs
-      // fails with ENOENT. Pass the raw path instead (B:\~BUN\root\...).
-      // On macOS/Linux the file:// URL works fine with $bunfs paths.
       let workerUrl: string | URL;
       if (vendorWorkerUrl) {
         if (process.platform === "win32") {
-          // On Windows, new Worker() with a file:// URL pointing to $bunfs
-          // fails with ENOENT (Bun bug). Extract the raw path instead.
-          // URL.pathname keeps %7E encoded; decodeURIComponent restores ~.
           workerUrl = decodeURIComponent(new URL(vendorWorkerUrl).pathname);
-          // URL.pathname on Windows: /B:/~BUN/root/wrapper.js → strip leading /
           if (/^\/[A-Za-z]:/.test(workerUrl)) {
             workerUrl = workerUrl.slice(1);
           }
@@ -357,9 +277,10 @@ class LocalProvider implements EmbeddingProvider {
       const vendor = vendorModelInfo();
       const workerInitData: WorkerInitData = {
-        modelName: this.modelName,
+        modelId: this.modelId,
+        dimensions: this.dimensions,
         vendorModel: vendor
-          ? { modelAbsoluteDirPath: vendor.modelAbsoluteDirPath, modelName: vendor.modelName }
+          ? { localModelPath: vendor.localModelPath }
           : null,
       };
@@ -394,6 +315,14 @@ class LocalProvider implements EmbeddingProvider {
             // LocalProviderUnavailableError on all pending + future requests.
             this.workerInitError = msg.error;
             this.workerReady = false;
+            localProviderKnownBroken = true;
+            if (!localProviderErrorLogged) {
+              localProviderErrorLogged = true;
+              log.info(
+                `local embedding provider failed to init: ${msg.error}. ` +
+                  `Set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback.`,
+              );
+            }
             for (const [, p] of this.pendingRequests) {
               p.reject(new LocalProviderUnavailableError(msg.error));
             }
@@ -453,6 +382,10 @@ class LocalProvider implements EmbeddingProvider {
   async embed(texts: string[], inputType: "document" | "query"): Promise<Float32Array[]> {
     await this.ensureWorker();
+    // Prepend Nomic task instruction prefix.
+    const prefix = inputType === "document" ? "search_document: " : "search_query: ";
+    const prefixed = texts.map((t) => prefix + t);
     const id = this.nextRequestId++;
     // Recall queries (single query-type texts) get high priority so they
     // jump ahead of any queued backfill batches in the worker.
@@ -464,7 +397,7 @@ class LocalProvider implements EmbeddingProvider {
       this.worker!.postMessage({
         type: "embed",
         id,
-        texts,
+        texts: prefixed,
         inputType,
         priority,
       } satisfies WorkerInbound);
@@ -473,8 +406,6 @@ class LocalProvider implements EmbeddingProvider {
   /** Shut down the worker thread. Called by `resetProvider()` on config change.
    *  Sends a shutdown message so the worker calls `process.exit(0)` internally.
-   *  We avoid `worker.terminate()` because Bun's forced termination triggers a
-   *  NAPI fatal error when tearing down onnxruntime's native bindings.
    *
    *  Returns a promise that resolves once the worker has fully exited. Callers
    *  that need a clean teardown (tests, config change) should await the result.
@@ -507,7 +438,7 @@ class LocalProvider implements EmbeddingProvider {
 /** Default models per provider — used when config doesn't override. */
 const PROVIDER_DEFAULTS: Record<string, { model: string; dimensions: number }> = {
-  local: { model: "BGESmallENV15", dimensions: 384 },
+  local: { model: "nomic-ai/nomic-embed-text-v1.5", dimensions: 768 },
   voyage: { model: "voyage-code-3", dimensions: 1024 },
   openai: { model: "text-embedding-3-small", dimensions: 1536 },
 };
@@ -539,12 +470,11 @@ function getProvider(): EmbeddingProvider | null {
   switch (providerName) {
     case "local": {
-      // `fastembed` is an optionalDependency. We construct the provider
-      // optimistically here; the import + ONNX init happens lazily in
-      // `LocalProvider.getModel()`, which throws `LocalProviderUnavailableError`
-      // if the optional dep isn't installed. After that first failure
-      // `isAvailable()` short-circuits to false and callers fall back to FTS.
-      cachedProvider = new LocalProvider(model);
+      // Construct the provider optimistically — the ONNX model init
+      // happens lazily in the worker thread on first `embed()` call.
+      // If it fails, `LocalProviderUnavailableError` triggers the
+      // auto-fallback to a remote provider or FTS-only search.
+      cachedProvider = new LocalProvider(model, cfg.dimensions);
       break;
     }
     case "voyage": {
@@ -619,7 +549,7 @@ export function _saveAndClearProvider(): unknown {
 /** Restore a provider previously saved by `_saveAndClearProvider()`. Any
  *  provider created between save and restore is discarded (callers must
  *  ensure it's not a LocalProvider with a live worker — those suites only
- *  use `_markFastembedUnavailable()` so no worker is spawned). */
+ *  use `_markLocalProviderUnavailable()` so no worker is spawned). */
 export function _restoreProvider(token: unknown): void {
   const saved = token as { provider: EmbeddingProvider | null | undefined; remoteFallbackLogged: boolean };
   cachedProvider = saved.provider;
@@ -669,13 +599,13 @@ export function pickRemoteFallback(): {
  *  Active when the configured provider's API key is set, unless explicitly
  *  disabled via `search.embeddings.enabled: false` in .lore.json.
  *
- *  For the `local` provider, also returns false once we've discovered the
- *  optional `fastembed` peer is missing — callers (recall, ltm, distillation)
- *  use this gate to skip embedding work and fall back to FTS-only search. */
+ *  For the `local` provider, also returns false once the worker has reported
+ *  an init failure — callers (recall, ltm, distillation) use this gate to
+ *  skip embedding work and fall back to FTS-only search. */
 export function isAvailable(): boolean {
   const provider = getProvider();
   if (!provider) return false;
-  if (provider instanceof LocalProvider && fastembedKnownUnavailable()) return false;
+  if (provider instanceof LocalProvider && localProviderKnownUnavailable()) return false;
   return true;
 }
@@ -686,7 +616,7 @@ export function isAvailable(): boolean {
 /**
  * Generate embeddings for the given texts using the configured provider.
  *
- * If the configured provider is `local` and `fastembed` turns out to be
+ * If the configured provider is `local` and the local provider turns out to be
  * unavailable at runtime (failed install, vendor extraction blocked, etc.),
  * automatically swap to a remote provider when `VOYAGE_API_KEY` or
  * `OPENAI_API_KEY` is set in env. The swap is permanent for the rest of
@@ -717,7 +647,7 @@ export async function embed(
     if (!remoteFallbackLogged) {
       remoteFallbackLogged = true;
       log.info(
-        `fastembed unavailable; auto-switching to ${fallback.name} ` +
+        `local embedding provider unavailable; auto-switching to ${fallback.name} ` +
           `(set search.embeddings.provider in .lore.json to silence this)`,
       );
     }
@@ -826,6 +756,53 @@ export function vectorSearchDistillations(
   return scored.slice(0, limit);
 }
+// ---------------------------------------------------------------------------
+// Vector search — all distillations (including archived)
+// ---------------------------------------------------------------------------
+export type DistillationVectorHit = {
+  id: string;
+  session_id: string;
+  similarity: number;
+};
+/**
+ * Search ALL distillations (including archived) with embeddings by cosine
+ * similarity, scoped to a single project. Returns session_id alongside
+ * similarity for cross-session counting.
+ *
+ * Unlike vectorSearchDistillations() which filters to non-archived only,
+ * this searches the full distillation archive — necessary for detecting
+ * repeated instructions across sessions where older distillations have
+ * been archived after meta-distillation.
+ *
+ * Pure brute-force — fine for ~200 entries per project. Safety-capped
+ * at 500 rows to prevent excessive CPU on long-running projects.
+ */
+const MAX_DISTILLATION_VECTOR_ROWS = 500;
+export function vectorSearchAllDistillations(
+  queryEmbedding: Float32Array,
+  projectId: string,
+  limit = 20,
+): DistillationVectorHit[] {
+  const rows = db()
+    .query(
+      "SELECT id, session_id, embedding FROM distillations WHERE embedding IS NOT NULL AND project_id = ? ORDER BY created_at DESC LIMIT ?",
+    )
+    .all(projectId, MAX_DISTILLATION_VECTOR_ROWS) as Array<{ id: string; session_id: string; embedding: Buffer }>;
+  const scored: DistillationVectorHit[] = [];
+  for (const row of rows) {
+    const vec = fromBlob(row.embedding);
+    const sim = cosineSimilarity(queryEmbedding, vec);
+    scored.push({ id: row.id, session_id: row.session_id, similarity: sim });
+  }
+  scored.sort((a, b) => b.similarity - a.similarity);
+  return scored.slice(0, limit);
+}
 // ---------------------------------------------------------------------------
 // Fire-and-forget embedding
 // ---------------------------------------------------------------------------
@@ -1107,14 +1084,52 @@ export async function runStartupBackfill(): Promise<void> {
 // ---------------------------------------------------------------------------
 /**
- * Chunk size for backfill embed requests. Each chunk becomes a separate
- * message to the embedding worker. Keeping chunks small (32) gives the
- * worker's priority queue natural gaps to interleave high-priority recall
- * queries between backfill batches. The provider's `maxBatchSize` (256)
- * is the upper limit for any single embed call; this is intentionally
- * smaller for backfill-vs-live interleaving.
+ * Maximum chunk size for backfill embed requests. Each chunk becomes a
+ * separate message to the embedding worker. Keeping chunks small gives
+ * the worker's priority queue natural gaps to interleave high-priority
+ * recall queries between backfill batches.
+ */
+const MAX_BACKFILL_CHUNK = 8;
+/**
+ * Maximum total "token area" (batch_size × max_sequence_length) per
+ * backfill batch. ONNX runtime pads all texts to the longest sequence,
+ * so the peak tensor size is proportional to this product. A budget of
+ * 4096 tokens allows e.g. 8 × 512-token texts, or 2 × 2048-token texts.
+ * Prevents OOM on batches with long distillation observations (~4000+
+ * chars) that were blowing up at fixed batch sizes.
+ */
+const MAX_BATCH_TOKEN_AREA = 4096;
+/**
+ * Rough chars-per-token ratio for budget estimation. Nomic v1.5 uses a
+ * WordPiece tokenizer; English text averages ~4 chars/token.
+ */
+const CHARS_PER_TOKEN = 4;
+/**
+ * Partition `rows` into batches that respect both MAX_BACKFILL_CHUNK and
+ * MAX_BATCH_TOKEN_AREA. Each batch's estimated token area is
+ * `batch.length × max_tokens_in_batch`. We greedily add rows until the
+ * next row would push the area over budget.
  */
-const BACKFILL_CHUNK_SIZE = 32;
+function nextBatch<T extends { text: string }>(rows: T[], start: number): T[] {
+  const batch: T[] = [];
+  let maxTokens = 0;
+  for (let i = start; i < rows.length && batch.length < MAX_BACKFILL_CHUNK; i++) {
+    const estTokens = Math.ceil(rows[i].text.length / CHARS_PER_TOKEN);
+    const newMax = Math.max(maxTokens, estTokens);
+    const newArea = (batch.length + 1) * newMax;
+    if (batch.length > 0 && newArea > MAX_BATCH_TOKEN_AREA) break;
+    batch.push(rows[i]);
+    maxTokens = newMax;
+  }
+  return batch;
+}
 /**
  * Embed all knowledge entries that are missing embeddings.
@@ -1136,14 +1151,18 @@ export async function backfillEmbeddings(): Promise<number> {
   if (!rows.length) return 0;
+  // Pre-compute text for token-budget batching
+  const items = rows.map((r) => ({ ...r, text: `${r.title}\n${r.content}` }));
   let embedded = 0;
+  let i = 0;
-  for (let i = 0; i < rows.length; i += BACKFILL_CHUNK_SIZE) {
-    const batch = rows.slice(i, i + BACKFILL_CHUNK_SIZE);
-    const texts = batch.map((r) => `${r.title}\n${r.content}`);
+  while (i < items.length) {
+    const batch = nextBatch(items, i);
+    i += batch.length;
     try {
-      const vectors = await embed(texts, "document");
+      const vectors = await embed(batch.map((b) => b.text), "document");
       const update = db().prepare(
         "UPDATE knowledge SET embedding = ? WHERE id = ?",
       );
@@ -1153,7 +1172,8 @@ export async function backfillEmbeddings(): Promise<number> {
         embedded++;
       }
     } catch (err) {
-      log.info(`embedding backfill batch ${i}-${i + batch.length} failed:`, err);
+      // log.error sends to Sentry via captureException
+      log.error(`embedding backfill batch failed (${batch.length} items):`, err);
     }
     // No yieldToEventLoop() needed — embed() is truly async (worker thread).
   }
@@ -1188,17 +1208,21 @@ export async function backfillDistillationEmbeddings(): Promise<number> {
   let embedded = 0;
   // Progress logging: heartbeat every PROGRESS_INTERVAL embedded so a long
-  // backfill (e.g. 1000+ pending after a fastembed reinstall) doesn't look
+  // backfill (e.g. 1000+ pending after a model change) doesn't look
   // like a silent hang. Without this, only the final tally was logged.
   const PROGRESS_INTERVAL = 256;
   let nextProgressAt = PROGRESS_INTERVAL;
-  for (let i = 0; i < rows.length; i += BACKFILL_CHUNK_SIZE) {
-    const batch = rows.slice(i, i + BACKFILL_CHUNK_SIZE);
-    const texts = batch.map((r) => r.observations);
+  // Pre-compute text for token-budget batching
+  const items = rows.map((r) => ({ ...r, text: r.observations }));
+  let i = 0;
+  while (i < items.length) {
+    const batch = nextBatch(items, i);
+    i += batch.length;
     try {
-      const vectors = await embed(texts, "document");
+      const vectors = await embed(batch.map((b) => b.text), "document");
       const update = db().prepare(
         "UPDATE distillations SET embedding = ? WHERE id = ?",
       );
@@ -1208,7 +1232,8 @@ export async function backfillDistillationEmbeddings(): Promise<number> {
         embedded++;
       }
     } catch (err) {
-      log.info(`distillation embedding backfill batch ${i}-${i + batch.length} failed:`, err);
+      // log.error sends to Sentry via captureException
+      log.error(`distillation embedding backfill batch failed (${batch.length} items):`, err);
     }
     if (embedded >= nextProgressAt) {

package/src/gradient.ts CHANGED Viewed

@@ -354,11 +354,19 @@ function getSessionState(sessionID: string): SessionState {
  *
  * Set `thresholdMs <= 0` to disable. Returns true if a reset fired so the
  * caller can log/observe.
+ *
+ * @param skipCompact  When true, perform all idle-resume housekeeping
+ *   (clear caches, set cameOutOfIdle) but do NOT set postIdleCompact.
+ *   Used when the caller knows the upstream prompt cache is still warm
+ *   (e.g. cache warmer recently refreshed it) — compacting would produce
+ *   a different prompt body that doesn't match the warmed prefix, causing
+ *   a cache bust and wasting the warming cost.
  */
 export function onIdleResume(
   sessionID: string,
   thresholdMs: number,
   now: number = Date.now(),
+  skipCompact: boolean = false,
 ): { triggered: false } | { triggered: true; idleMs: number } {
   if (thresholdMs <= 0) return { triggered: false };
   const state = getSessionState(sessionID);
@@ -369,7 +377,7 @@ export function onIdleResume(
   state.rawWindowCache = null;
   state.distillationSnapshot = null;
   state.cameOutOfIdle = true;
-  state.postIdleCompact = true;
+  state.postIdleCompact = !skipCompact;
   return { triggered: true, idleMs };
 }

package/src/import/detect.ts ADDED Viewed

@@ -0,0 +1,37 @@
+/**
+ * Detection orchestrator — scans all registered providers for conversation
+ * history matching a given project path.
+ */
+import type { DetectionResult } from "./types";
+import { getProviders } from "./providers";
+/**
+ * Scan all registered providers for conversation history matching the
+ * given project path.
+ *
+ * @returns Results from all providers that found data, sorted by
+ *          total messages descending (richest source first).
+ */
+export function detectAll(projectPath: string): DetectionResult[] {
+  const results: DetectionResult[] = [];
+  for (const provider of getProviders()) {
+    try {
+      const sessions = provider.detect(projectPath);
+      if (sessions.length > 0) {
+        results.push({
+          agentName: provider.name,
+          agentDisplayName: provider.displayName,
+          sessions,
+          totalTokens: sessions.reduce((s, sess) => s + sess.estimatedTokens, 0),
+          totalMessages: sessions.reduce((s, sess) => s + sess.messageCount, 0),
+        });
+      }
+    } catch (err) {
+      // Provider failed (e.g. corrupt DB, missing directory) — skip silently.
+      // Avoid log.warn to not alarm users about agents they don't use.
+    }
+  }
+  return results.sort((a, b) => b.totalMessages - a.totalMessages);
+}