npm - @loreai/core - Versions diffs - 0.12.0 → 0.13.1 - Mend

@loreai/core 0.12.0 → 0.13.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (87) hide show

package/dist/bun/agents-file.d.ts +29 -8
package/dist/bun/agents-file.d.ts.map +1 -1
package/dist/bun/config.d.ts +1 -0
package/dist/bun/config.d.ts.map +1 -1
package/dist/bun/db.d.ts.map +1 -1
package/dist/bun/distillation.d.ts +29 -0
package/dist/bun/distillation.d.ts.map +1 -1
package/dist/bun/embedding.d.ts +15 -1
package/dist/bun/embedding.d.ts.map +1 -1
package/dist/bun/gradient.d.ts +53 -5
package/dist/bun/gradient.d.ts.map +1 -1
package/dist/bun/index.d.ts +4 -4
package/dist/bun/index.d.ts.map +1 -1
package/dist/bun/index.js +696 -243
package/dist/bun/index.js.map +4 -4
package/dist/bun/pattern-extract.d.ts +36 -0
package/dist/bun/pattern-extract.d.ts.map +1 -0
package/dist/bun/recall.d.ts +1 -0
package/dist/bun/recall.d.ts.map +1 -1
package/dist/bun/search.d.ts +13 -1
package/dist/bun/search.d.ts.map +1 -1
package/dist/bun/types.d.ts +41 -1
package/dist/bun/types.d.ts.map +1 -1
package/dist/bun/worker-model.d.ts +22 -0
package/dist/bun/worker-model.d.ts.map +1 -1
package/dist/node/agents-file.d.ts +29 -8
package/dist/node/agents-file.d.ts.map +1 -1
package/dist/node/config.d.ts +1 -0
package/dist/node/config.d.ts.map +1 -1
package/dist/node/db.d.ts.map +1 -1
package/dist/node/distillation.d.ts +29 -0
package/dist/node/distillation.d.ts.map +1 -1
package/dist/node/embedding.d.ts +15 -1
package/dist/node/embedding.d.ts.map +1 -1
package/dist/node/gradient.d.ts +53 -5
package/dist/node/gradient.d.ts.map +1 -1
package/dist/node/index.d.ts +4 -4
package/dist/node/index.d.ts.map +1 -1
package/dist/node/index.js +696 -243
package/dist/node/index.js.map +4 -4
package/dist/node/pattern-extract.d.ts +36 -0
package/dist/node/pattern-extract.d.ts.map +1 -0
package/dist/node/recall.d.ts +1 -0
package/dist/node/recall.d.ts.map +1 -1
package/dist/node/search.d.ts +13 -1
package/dist/node/search.d.ts.map +1 -1
package/dist/node/types.d.ts +41 -1
package/dist/node/types.d.ts.map +1 -1
package/dist/node/worker-model.d.ts +22 -0
package/dist/node/worker-model.d.ts.map +1 -1
package/dist/types/agents-file.d.ts +29 -8
package/dist/types/agents-file.d.ts.map +1 -1
package/dist/types/config.d.ts +1 -0
package/dist/types/config.d.ts.map +1 -1
package/dist/types/db.d.ts.map +1 -1
package/dist/types/distillation.d.ts +29 -0
package/dist/types/distillation.d.ts.map +1 -1
package/dist/types/embedding.d.ts +15 -1
package/dist/types/embedding.d.ts.map +1 -1
package/dist/types/gradient.d.ts +53 -5
package/dist/types/gradient.d.ts.map +1 -1
package/dist/types/index.d.ts +4 -4
package/dist/types/index.d.ts.map +1 -1
package/dist/types/pattern-extract.d.ts +36 -0
package/dist/types/pattern-extract.d.ts.map +1 -0
package/dist/types/recall.d.ts +1 -0
package/dist/types/recall.d.ts.map +1 -1
package/dist/types/search.d.ts +13 -1
package/dist/types/search.d.ts.map +1 -1
package/dist/types/types.d.ts +41 -1
package/dist/types/types.d.ts.map +1 -1
package/dist/types/worker-model.d.ts +22 -0
package/dist/types/worker-model.d.ts.map +1 -1
package/package.json +3 -2
package/src/agents-file.ts +111 -28
package/src/config.ts +25 -18
package/src/curator.ts +2 -2
package/src/db.ts +19 -2
package/src/distillation.ts +152 -15
package/src/embedding.ts +158 -14
package/src/gradient.ts +398 -227
package/src/index.ts +13 -5
package/src/pattern-extract.ts +108 -0
package/src/recall.ts +124 -6
package/src/search.ts +37 -1
package/src/types.ts +41 -1
package/src/worker-model.ts +142 -5

package/src/distillation.ts CHANGED Viewed

@@ -3,7 +3,9 @@ import { config } from "./config";
 import * as temporal from "./temporal";
 import { CHUNK_TERMINATOR } from "./temporal";
 import * as embedding from "./embedding";
+import * as ltm from "./ltm";
 import * as log from "./log";
+import { extractPatterns } from "./pattern-extract";
 import {
   DISTILLATION_SYSTEM,
   distillationUser,
@@ -328,6 +330,10 @@ export type Distillation = {
   generation: number;
   token_count: number;
   created_at: number;
+  /** k/√N compression ratio. NULL for pre-v12 rows or meta-distillations. */
+  r_compression: number | null;
+  /** Temporal clustering [0,1]. NULL for pre-v12 rows or meta-distillations. */
+  c_norm: number | null;
 };
 /**
@@ -351,8 +357,8 @@ export function loadForSession(
 ): Distillation[] {
   const pid = ensureProject(projectPath);
   const sql = includeArchived
-    ? "SELECT id, project_id, session_id, observations, source_ids, generation, token_count, created_at FROM distillations WHERE project_id = ? AND session_id = ? ORDER BY created_at ASC"
-    : "SELECT id, project_id, session_id, observations, source_ids, generation, token_count, created_at FROM distillations WHERE project_id = ? AND session_id = ? AND archived = 0 ORDER BY created_at ASC";
+    ? "SELECT id, project_id, session_id, observations, source_ids, generation, token_count, created_at, r_compression, c_norm FROM distillations WHERE project_id = ? AND session_id = ? ORDER BY created_at ASC"
+    : "SELECT id, project_id, session_id, observations, source_ids, generation, token_count, created_at, r_compression, c_norm FROM distillations WHERE project_id = ? AND session_id = ? AND archived = 0 ORDER BY created_at ASC";
   const rows = db()
     .query(sql)
     .all(pid, sessionID) as Array<{
@@ -364,6 +370,8 @@ export function loadForSession(
     generation: number;
     token_count: number;
     created_at: number;
+    r_compression: number | null;
+    c_norm: number | null;
   }>;
   return rows.map((r) => ({
     ...r,
@@ -377,6 +385,8 @@ function storeDistillation(input: {
   observations: string;
   sourceIDs: string[];
   generation: number;
+  rCompression?: number;
+  cNorm?: number;
 }): string {
   const pid = ensureProject(input.projectPath);
   const id = crypto.randomUUID();
@@ -384,8 +394,8 @@ function storeDistillation(input: {
   const tokens = Math.ceil(input.observations.length / 3);
   db()
     .query(
-      `INSERT INTO distillations (id, project_id, session_id, narrative, facts, observations, source_ids, generation, token_count, created_at)
-       VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
+      `INSERT INTO distillations (id, project_id, session_id, narrative, facts, observations, source_ids, generation, token_count, created_at, r_compression, c_norm)
+       VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
     )
     .run(
       id,
@@ -398,6 +408,8 @@ function storeDistillation(input: {
       input.generation,
       tokens,
       Date.now(),
+      input.rCompression ?? null,
+      input.cNorm ?? null,
     );
   return id;
 }
@@ -420,7 +432,7 @@ function loadGen0(projectPath: string, sessionID: string): Distillation[] {
   const pid = ensureProject(projectPath);
   const rows = db()
     .query(
-      "SELECT id, project_id, session_id, observations, source_ids, generation, token_count, created_at FROM distillations WHERE project_id = ? AND session_id = ? AND generation = 0 AND archived = 0 ORDER BY created_at ASC",
+      "SELECT id, project_id, session_id, observations, source_ids, generation, token_count, created_at, r_compression, c_norm FROM distillations WHERE project_id = ? AND session_id = ? AND generation = 0 AND archived = 0 ORDER BY created_at ASC",
     )
     .all(pid, sessionID) as Array<{
     id: string;
@@ -431,6 +443,8 @@ function loadGen0(projectPath: string, sessionID: string): Distillation[] {
     generation: number;
     token_count: number;
     created_at: number;
+    r_compression: number | null;
+    c_norm: number | null;
   }>;
   return rows.map((r) => ({
     ...r,
@@ -514,6 +528,17 @@ export async function run(input: {
   model?: { providerID: string; modelID: string };
   /** Skip minMessages threshold check — distill whatever is pending */
   force?: boolean;
+  /** Skip meta-distillation even when gen-0 count exceeds the threshold.
+   *  Used when the upstream prompt cache is likely still warm — meta-distillation
+   *  rewrites distillation row IDs, which invalidates the distilled prefix cache
+   *  and causes a cache bust on the next turn. Callers should set this to true
+   *  when `Date.now() - getLastTurnAt(sessionID) < cacheTTL`. */
+  skipMeta?: boolean;
+  /** When true, all LLM calls in this run are marked urgent and bypass the
+   *  batch queue (if one is active). Use for compaction and overflow recovery
+   *  where the caller is blocking on the result. Background/idle distillation
+   *  should leave this false to benefit from batch API 50% cost savings. */
+  urgent?: boolean;
 }): Promise<{ rounds: number; distilled: number }> {
   // Reset orphaned messages (marked distilled by a deleted/migrated distillation)
   const orphans = resetOrphans(input.projectPath, input.sessionID);
@@ -547,6 +572,7 @@ export async function run(input: {
           sessionID: input.sessionID,
           messages: segment,
           model: input.model,
+          urgent: input.urgent,
         });
         if (result) {
           distilled += segment.length;
@@ -555,8 +581,11 @@ export async function run(input: {
       }
     }
-    // Check if meta-distillation is needed
+    // Check if meta-distillation is needed (skip when cache is warm to avoid
+    // prefix cache invalidation — row IDs change after meta-distill, busting
+    // the prompt cache on the next turn).
     if (
+      !input.skipMeta &&
       gen0Count(input.projectPath, input.sessionID) >=
       cfg.distillation.metaThreshold
     ) {
@@ -565,6 +594,7 @@ export async function run(input: {
         projectPath: input.projectPath,
         sessionID: input.sessionID,
         model: input.model,
+        urgent: input.urgent,
       });
       rounds++;
     }
@@ -582,6 +612,7 @@ async function distillSegment(input: {
   sessionID: string;
   messages: TemporalMessage[];
   model?: { providerID: string; modelID: string };
+  urgent?: boolean;
 }): Promise<DistillationResult | null> {
   const prior = latestObservations(input.projectPath, input.sessionID);
   const text = messagesToText(input.messages);
@@ -604,29 +635,30 @@ async function distillSegment(input: {
   const responseText = await input.llm.prompt(
     DISTILLATION_SYSTEM,
     userContent,
-    { model, workerID: "lore-distill" },
+    { model, workerID: "lore-distill", thinking: false, urgent: input.urgent, sessionID: input.sessionID },
   );
   if (!responseText) return null;
   const result = parseDistillationResult(responseText);
   if (!result) return null;
+  // Compute context health metrics before storing.
+  const distilledTokens = Math.ceil(result.observations.length / 3);
+  const sourceTokens = input.messages.reduce((sum, m) => sum + m.tokens, 0);
+  const rComp = compressionRatio(distilledTokens, sourceTokens);
+  const cNorm = temporal.temporalCnorm(input.messages.map((m) => m.created_at));
   const distillId = storeDistillation({
     projectPath: input.projectPath,
     sessionID: input.sessionID,
     observations: result.observations,
     sourceIDs: input.messages.map((m) => m.id),
     generation: 0,
+    rCompression: rComp,
+    cNorm,
   });
   temporal.markDistilled(input.messages.map((m) => m.id));
-  // Diagnostic: log compression health and temporal clustering metrics.
-  // R_compression (k/√N): < 1.0 signals likely lossy distillation.
-  // C_norm: 0 = uniform timestamps, 1 = dominated by distant past.
-  const distilledTokens = Math.ceil(result.observations.length / 3);
-  const sourceTokens = input.messages.reduce((sum, m) => sum + m.tokens, 0);
-  const rComp = compressionRatio(distilledTokens, sourceTokens);
-  const cNorm = temporal.temporalCnorm(input.messages.map((m) => m.created_at));
   log.info(
     `distill segment: ${input.messages.length} msgs, ` +
       `${sourceTokens}→${distilledTokens} tokens, ` +
@@ -638,6 +670,24 @@ async function distillSegment(input: {
     embedding.embedDistillation(distillId, result.observations);
   }
+  // Fire-and-forget: extract decision/preference patterns → knowledge entries
+  if (config().knowledge.enabled) {
+    for (const pat of extractPatterns(result.observations)) {
+      try {
+        ltm.create({
+          projectPath: input.projectPath,
+          category: pat.category,
+          title: pat.title,
+          content: pat.content,
+          session: input.sessionID,
+          scope: "project",
+        });
+      } catch {
+        // Dedup guard in ltm.create() handles duplicates — swallow errors
+      }
+    }
+  }
   return result;
 }
@@ -654,6 +704,7 @@ export async function metaDistill(input: {
   projectPath: string;
   sessionID: string;
   model?: { providerID: string; modelID: string };
+  urgent?: boolean;
 }): Promise<DistillationResult | null> {
   const existing = loadGen0(input.projectPath, input.sessionID);
@@ -681,7 +732,7 @@ export async function metaDistill(input: {
   const responseText = await input.llm.prompt(
     RECURSIVE_SYSTEM,
     userContent,
-    { model, workerID: "lore-distill" },
+    { model, workerID: "lore-distill", thinking: false, urgent: input.urgent, sessionID: input.sessionID },
   );
   if (!responseText) return null;
@@ -732,5 +783,91 @@ export async function metaDistill(input: {
     embedding.embedDistillation(metaId, result.observations);
   }
+  // Fire-and-forget: extract decision/preference patterns → knowledge entries
+  if (config().knowledge.enabled) {
+    for (const pat of extractPatterns(result.observations)) {
+      try {
+        ltm.create({
+          projectPath: input.projectPath,
+          category: pat.category,
+          title: pat.title,
+          content: pat.content,
+          session: input.sessionID,
+          scope: "project",
+        });
+      } catch {
+        // Dedup guard in ltm.create() handles duplicates — swallow errors
+      }
+    }
+  }
   return result;
 }
+// ---------------------------------------------------------------------------
+// Retroactive metric backfill
+// ---------------------------------------------------------------------------
+/**
+ * Backfill `r_compression` and `c_norm` for distillations that were created
+ * before schema v12 (or before PR #113 added the computation).
+ *
+ * For each distillation with NULL metrics, loads source temporal messages via
+ * `source_ids`, computes `compressionRatio()` and `temporalCnorm()`, and
+ * writes the values back. Skips rows where source messages have been pruned
+ * or source_ids is empty.
+ *
+ * Designed to run once at startup — idempotent (only touches NULL rows).
+ * Returns the number of rows updated.
+ */
+export function backfillMetrics(): number {
+  const rows = db()
+    .query(
+      "SELECT id, source_ids, token_count FROM distillations WHERE r_compression IS NULL",
+    )
+    .all() as Array<{
+    id: string;
+    source_ids: string;
+    token_count: number;
+  }>;
+  if (!rows.length) return 0;
+  const update = db().prepare(
+    "UPDATE distillations SET r_compression = ?, c_norm = ? WHERE id = ?",
+  );
+  let updated = 0;
+  for (const row of rows) {
+    const sourceIds = parseSourceIds(row.source_ids);
+    if (!sourceIds.length) continue;
+    // Load source temporal messages — they may have been pruned.
+    const placeholders = sourceIds.map(() => "?").join(",");
+    const sources = db()
+      .query(
+        `SELECT tokens, created_at FROM temporal_messages WHERE id IN (${placeholders})`,
+      )
+      .all(...sourceIds) as Array<{ tokens: number; created_at: number }>;
+    if (!sources.length) continue;
+    const sourceTokens = sources.reduce((sum, s) => sum + s.tokens, 0);
+    const timestamps = sources.map((s) => s.created_at);
+    const rComp = compressionRatio(row.token_count, sourceTokens);
+    const cNorm = temporal.temporalCnorm(timestamps);
+    update.run(rComp, cNorm, row.id);
+    updated++;
+  }
+  if (updated > 0) {
+    log.info(
+      `backfilled metrics for ${updated} distillations (${rows.length - updated} skipped — missing sources)`,
+    );
+  }
+  return updated;
+}

package/src/embedding.ts CHANGED Viewed

@@ -132,12 +132,82 @@ class OpenAIProvider implements EmbeddingProvider {
   }
 }
+// ---------------------------------------------------------------------------
+// Local provider (fastembed + ONNX Runtime)
+// ---------------------------------------------------------------------------
+/**
+ * Local embedding provider using fastembed (bge-small-en-v1.5 by default).
+ *
+ * No API key required — runs entirely on-device via ONNX Runtime.
+ * Model files are downloaded on first use (~33MB) and cached in
+ * `~/.cache/fastembed`. Subsequent inits load from disk in ~350ms.
+ *
+ * Uses dynamic import so the module is only loaded when the "local"
+ * provider is actually selected — avoids startup cost and allows
+ * graceful fallback if fastembed is not installed.
+ */
+class LocalProvider implements EmbeddingProvider {
+  readonly maxBatchSize = 256;
+  private model: unknown | null = null;
+  private initPromise: Promise<unknown> | null = null;
+  private modelName: string;
+  constructor(modelName: string) {
+    this.modelName = modelName;
+  }
+  private async getModel(): Promise<unknown> {
+    if (this.model) return this.model;
+    if (!this.initPromise) {
+      this.initPromise = (async () => {
+        const { EmbeddingModel, FlagEmbedding } = await import("fastembed");
+        // Map config model string to EmbeddingModel enum value.
+        // If the configured model matches an enum key, use it; otherwise try
+        // the raw string as a model name (CUSTOM model support in fastembed).
+        const enumValue = (EmbeddingModel as Record<string, string>)[this.modelName];
+        // fastembed's init() has overloaded signatures expecting specific enum
+        // members, but we resolve the model dynamically from config. The enum
+        // lookup guarantees a valid value at runtime; cast to satisfy the type.
+        const m = await FlagEmbedding.init({
+          model: enumValue ?? this.modelName,
+        } as { model: typeof EmbeddingModel.BGESmallENV15 });
+        this.model = m;
+        return m;
+      })();
+    }
+    return this.initPromise;
+  }
+  async embed(texts: string[], inputType: "document" | "query"): Promise<Float32Array[]> {
+    const model = (await this.getModel()) as {
+      queryEmbed(text: string): Promise<number[]>;
+      passageEmbed(texts: string[], batchSize?: number): AsyncGenerator<number[][]>;
+    };
+    if (inputType === "query" && texts.length === 1) {
+      const vec = await model.queryEmbed(texts[0]);
+      return [new Float32Array(vec)];
+    }
+    // passageEmbed returns an async generator of batches
+    const results: Float32Array[] = [];
+    for await (const batch of model.passageEmbed(texts)) {
+      for (const vec of batch) {
+        results.push(new Float32Array(vec));
+      }
+    }
+    return results;
+  }
+}
 // ---------------------------------------------------------------------------
 // Provider resolution
 // ---------------------------------------------------------------------------
 /** Default models per provider — used when config doesn't override. */
 const PROVIDER_DEFAULTS: Record<string, { model: string; dimensions: number }> = {
+  local: { model: "BGESmallENV15", dimensions: 384 },
   voyage: { model: "voyage-code-3", dimensions: 1024 },
   openai: { model: "text-embedding-3-small", dimensions: 1536 },
 };
@@ -165,23 +235,36 @@ function getProvider(): EmbeddingProvider | null {
   }
   const providerName = cfg.provider;
-  const apiKey = getProviderApiKey(providerName);
-  if (!apiKey) {
-    cachedProvider = null;
-    return null;
-  }
-  const defaults = PROVIDER_DEFAULTS[providerName];
-  const model = cfg.model === defaults?.model ? cfg.model : cfg.model;
-  const dimensions = cfg.dimensions;
+  const model = cfg.model;
   switch (providerName) {
-    case "voyage":
-      cachedProvider = new VoyageProvider(apiKey, model, dimensions);
+    case "local": {
+      try {
+        cachedProvider = new LocalProvider(model);
+      } catch {
+        log.info("local embedding provider unavailable (fastembed not installed)");
+        cachedProvider = null;
+      }
       break;
-    case "openai":
-      cachedProvider = new OpenAIProvider(apiKey, model, dimensions);
+    }
+    case "voyage": {
+      const apiKey = getProviderApiKey(providerName);
+      if (!apiKey) {
+        cachedProvider = null;
+        return null;
+      }
+      cachedProvider = new VoyageProvider(apiKey, model, cfg.dimensions);
       break;
+    }
+    case "openai": {
+      const apiKey = getProviderApiKey(providerName);
+      if (!apiKey) {
+        cachedProvider = null;
+        return null;
+      }
+      cachedProvider = new OpenAIProvider(apiKey, model, cfg.dimensions);
+      break;
+    }
     default:
       log.info(`unknown embedding provider: ${providerName}`);
       cachedProvider = null;
@@ -433,13 +516,74 @@ export function checkConfigChange(): boolean {
   return true;
 }
+// ---------------------------------------------------------------------------
+// Startup backfill — single entry point for all hosts
+// ---------------------------------------------------------------------------
+/**
+ * Run all embedding backfills and log coverage stats.
+ *
+ * This is the canonical entry point that every host adapter (OpenCode, Pi,
+ * future ACP) should call once during init. It:
+ *   1. Detects config changes (provider swap) and clears stale embeddings
+ *   2. Backfills knowledge entries missing embeddings
+ *   3. Backfills non-archived distillations missing embeddings
+ *   4. Logs a one-line coverage summary to stderr (always visible, not gated)
+ *
+ * Fire-and-forget: callers should `.catch()` — embedding failures must not
+ * block plugin initialization.
+ */
+export async function runStartupBackfill(): Promise<void> {
+  if (!isAvailable()) return;
+  const knowledgeEmbedded = await backfillEmbeddings();
+  const distillationEmbedded = await backfillDistillationEmbeddings();
+  // Coverage stats — always log to stderr so the problem is visible.
+  const kTotal = (
+    db()
+      .query("SELECT COUNT(*) as n FROM knowledge WHERE confidence > 0.2")
+      .get() as { n: number }
+  ).n;
+  const kWithEmb = (
+    db()
+      .query(
+        "SELECT COUNT(*) as n FROM knowledge WHERE embedding IS NOT NULL AND confidence > 0.2",
+      )
+      .get() as { n: number }
+  ).n;
+  const dTotal = (
+    db()
+      .query(
+        "SELECT COUNT(*) as n FROM distillations WHERE archived = 0 AND observations != ''",
+      )
+      .get() as { n: number }
+  ).n;
+  const dWithEmb = (
+    db()
+      .query(
+        "SELECT COUNT(*) as n FROM distillations WHERE embedding IS NOT NULL AND archived = 0",
+      )
+      .get() as { n: number }
+  ).n;
+  const parts: string[] = [];
+  if (knowledgeEmbedded > 0 || distillationEmbedded > 0) {
+    parts.push(`backfilled ${knowledgeEmbedded} knowledge + ${distillationEmbedded} distillations`);
+  }
+  parts.push(
+    `coverage: knowledge ${kWithEmb}/${kTotal}, distillations ${dWithEmb}/${dTotal}`,
+  );
+  log.info(`embedding startup: ${parts.join("; ")}`);
+}
 // ---------------------------------------------------------------------------
 // Backfill — knowledge
 // ---------------------------------------------------------------------------
 /**
  * Embed all knowledge entries that are missing embeddings.
- * Called on startup when embeddings are first enabled.
+ * Called by `runStartupBackfill()`.
  * Also handles config changes: if provider/model/dimensions changed, clears
  * stale embeddings first, then re-embeds all entries.
  * Returns the number of entries embedded.