npm - @loreai/core - Versions diffs - 0.11.1 → 0.13.0 - Mend

@loreai/core 0.11.1 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (94) hide show

package/dist/bun/agents-file.d.ts +29 -8
package/dist/bun/agents-file.d.ts.map +1 -1
package/dist/bun/config.d.ts +1 -0
package/dist/bun/config.d.ts.map +1 -1
package/dist/bun/db.d.ts.map +1 -1
package/dist/bun/distillation.d.ts +55 -0
package/dist/bun/distillation.d.ts.map +1 -1
package/dist/bun/embedding.d.ts +15 -1
package/dist/bun/embedding.d.ts.map +1 -1
package/dist/bun/gradient.d.ts +53 -5
package/dist/bun/gradient.d.ts.map +1 -1
package/dist/bun/index.d.ts +4 -4
package/dist/bun/index.d.ts.map +1 -1
package/dist/bun/index.js +799 -256
package/dist/bun/index.js.map +4 -4
package/dist/bun/pattern-extract.d.ts +36 -0
package/dist/bun/pattern-extract.d.ts.map +1 -0
package/dist/bun/recall.d.ts +1 -0
package/dist/bun/recall.d.ts.map +1 -1
package/dist/bun/search.d.ts +13 -1
package/dist/bun/search.d.ts.map +1 -1
package/dist/bun/temporal.d.ts +15 -0
package/dist/bun/temporal.d.ts.map +1 -1
package/dist/bun/types.d.ts +41 -1
package/dist/bun/types.d.ts.map +1 -1
package/dist/bun/worker-model.d.ts +22 -0
package/dist/bun/worker-model.d.ts.map +1 -1
package/dist/node/agents-file.d.ts +29 -8
package/dist/node/agents-file.d.ts.map +1 -1
package/dist/node/config.d.ts +1 -0
package/dist/node/config.d.ts.map +1 -1
package/dist/node/db.d.ts.map +1 -1
package/dist/node/distillation.d.ts +55 -0
package/dist/node/distillation.d.ts.map +1 -1
package/dist/node/embedding.d.ts +15 -1
package/dist/node/embedding.d.ts.map +1 -1
package/dist/node/gradient.d.ts +53 -5
package/dist/node/gradient.d.ts.map +1 -1
package/dist/node/index.d.ts +4 -4
package/dist/node/index.d.ts.map +1 -1
package/dist/node/index.js +799 -256
package/dist/node/index.js.map +4 -4
package/dist/node/pattern-extract.d.ts +36 -0
package/dist/node/pattern-extract.d.ts.map +1 -0
package/dist/node/recall.d.ts +1 -0
package/dist/node/recall.d.ts.map +1 -1
package/dist/node/search.d.ts +13 -1
package/dist/node/search.d.ts.map +1 -1
package/dist/node/temporal.d.ts +15 -0
package/dist/node/temporal.d.ts.map +1 -1
package/dist/node/types.d.ts +41 -1
package/dist/node/types.d.ts.map +1 -1
package/dist/node/worker-model.d.ts +22 -0
package/dist/node/worker-model.d.ts.map +1 -1
package/dist/types/agents-file.d.ts +29 -8
package/dist/types/agents-file.d.ts.map +1 -1
package/dist/types/config.d.ts +1 -0
package/dist/types/config.d.ts.map +1 -1
package/dist/types/db.d.ts.map +1 -1
package/dist/types/distillation.d.ts +55 -0
package/dist/types/distillation.d.ts.map +1 -1
package/dist/types/embedding.d.ts +15 -1
package/dist/types/embedding.d.ts.map +1 -1
package/dist/types/gradient.d.ts +53 -5
package/dist/types/gradient.d.ts.map +1 -1
package/dist/types/index.d.ts +4 -4
package/dist/types/index.d.ts.map +1 -1
package/dist/types/pattern-extract.d.ts +36 -0
package/dist/types/pattern-extract.d.ts.map +1 -0
package/dist/types/recall.d.ts +1 -0
package/dist/types/recall.d.ts.map +1 -1
package/dist/types/search.d.ts +13 -1
package/dist/types/search.d.ts.map +1 -1
package/dist/types/temporal.d.ts +15 -0
package/dist/types/temporal.d.ts.map +1 -1
package/dist/types/types.d.ts +41 -1
package/dist/types/types.d.ts.map +1 -1
package/dist/types/worker-model.d.ts +22 -0
package/dist/types/worker-model.d.ts.map +1 -1
package/package.json +3 -2
package/src/agents-file.ts +111 -28
package/src/config.ts +25 -18
package/src/curator.ts +2 -2
package/src/db.ts +83 -4
package/src/distillation.ts +270 -27
package/src/embedding.ts +158 -14
package/src/gradient.ts +398 -227
package/src/index.ts +13 -5
package/src/pattern-extract.ts +108 -0
package/src/recall.ts +142 -6
package/src/search.ts +37 -1
package/src/temporal.ts +39 -0
package/src/types.ts +41 -1
package/src/worker-model.ts +142 -5

package/src/db.ts CHANGED Viewed

@@ -1,8 +1,9 @@
 import { Database } from "#db/driver";
 import { join, dirname } from "path";
 import { mkdirSync } from "fs";
+import { homedir } from "os";
-const SCHEMA_VERSION = 11;
+const SCHEMA_VERSION = 12;
 const MIGRATIONS: string[] = [
   `
@@ -333,11 +334,27 @@ const MIGRATIONS: string[] = [
   WHERE content LIKE '%' || char(10) || '[tool:%'
      OR content LIKE '%' || char(10) || '[reasoning] %';
   `,
+  `
+  -- Version 12: Context health diagnostic columns on distillations.
+  --
+  -- r_compression: k/√N where k = distilled token count, N = source token
+  -- count. Values < 1.0 signal likely lossy compression. NULL for rows
+  -- created before this migration or for meta-distillations (gen > 0)
+  -- where the metric is not computed.
+  --
+  -- c_norm: normalized variance of relative-existence weights over source
+  -- message timestamps. Range [0, 1]; 0 = uniform distribution, 1 = attention
+  -- dominated by distant past. NULL for pre-migration rows or meta-distillations.
+  --
+  -- Both columns are nullable REALs — cheap to add, no backfill needed.
+  ALTER TABLE distillations ADD COLUMN r_compression REAL;
+  ALTER TABLE distillations ADD COLUMN c_norm REAL;
+  `,
 ];
 function dataDir() {
   const xdg = process.env.XDG_DATA_HOME;
-  const base = xdg || join(process.env.HOME || "~", ".local", "share");
+  const base = xdg || join(homedir(), ".local", "share");
   return join(base, "opencode-lore");
 }
@@ -396,7 +413,13 @@ function migrate(database: Database) {
         }
       )?.version ?? 0)
     : 0;
-  if (current >= MIGRATIONS.length) return;
+  if (current >= MIGRATIONS.length) {
+    // Schema is at the expected version but a prior partial run may have left
+    // holes (e.g. ALTER TABLE succeeded but CREATE TABLE in the same migration
+    // string was skipped). Run idempotent recovery for known fragile objects.
+    recoverMissingObjects(database);
+    return;
+  }
   for (let i = current; i < MIGRATIONS.length; i++) {
     if (i === VACUUM_MIGRATION_INDEX) {
       // VACUUM cannot run inside a transaction. Run it directly.
@@ -406,12 +429,68 @@ function migrate(database: Database) {
       database.exec("PRAGMA auto_vacuum = INCREMENTAL");
       database.exec("VACUUM");
     } else {
-      database.exec(MIGRATIONS[i]);
+      try {
+        database.exec(MIGRATIONS[i]);
+      } catch (e: unknown) {
+        // Multi-statement migrations can partially fail when an early
+        // statement (e.g. ALTER TABLE ADD COLUMN) hits a duplicate-column
+        // error from a prior partial run. Swallow duplicate-column errors
+        // so the rest of the migration loop and the version bump proceed.
+        // Any genuinely new error is re-thrown.
+        if (
+          e instanceof Error &&
+          /duplicate column name/i.test(e.message)
+        ) {
+          // The ALTER TABLE already applied — run remaining statements in
+          // this migration by stripping the offending ALTER and re-exec'ing.
+          // (Important: migrate() in db.ts runs each migration via database.exec()
+          // which stops at the first error in a multi-statement string.)
+          const stripped = stripAppliedAlters(MIGRATIONS[i], database);
+          if (stripped.trim()) database.exec(stripped);
+        } else {
+          throw e;
+        }
+      }
     }
   }
   // Update version to latest. Migration 0 inserts version=1 via its own INSERT,
   // but subsequent migrations don't update it, so always normalize to MIGRATIONS.length.
   database.exec(`UPDATE schema_version SET version = ${MIGRATIONS.length}`);
+  // Also run recovery for existing DBs that are already at the latest version
+  // but have holes from past partial runs.
+  recoverMissingObjects(database);
+}
+/**
+ * Strip ALTER TABLE ADD COLUMN statements for columns that already exist.
+ * Returns the migration string with those statements removed.
+ */
+function stripAppliedAlters(migration: string, database: Database): string {
+  return migration.replace(
+    /ALTER\s+TABLE\s+(\w+)\s+ADD\s+COLUMN\s+(\w+)\b[^;]*;/gi,
+    (match, table, column) => {
+      const cols = database
+        .query(`PRAGMA table_info(${table})`)
+        .all() as Array<{ name: string }>;
+      if (cols.some((c) => c.name === column)) return ""; // already exists
+      return match; // keep — this ALTER hasn't been applied
+    },
+  );
+}
+/**
+ * Idempotent recovery for objects that may be missing due to multi-statement
+ * migration partial failures (e.g. ALTER TABLE throws duplicate-column,
+ * aborting the exec before a subsequent CREATE TABLE in the same string).
+ */
+function recoverMissingObjects(database: Database) {
+  database.exec(`
+    CREATE TABLE IF NOT EXISTS kv_meta (
+      key TEXT PRIMARY KEY,
+      value TEXT NOT NULL
+    );
+  `);
 }
 export function close() {

package/src/distillation.ts CHANGED Viewed

@@ -3,7 +3,9 @@ import { config } from "./config";
 import * as temporal from "./temporal";
 import { CHUNK_TERMINATOR } from "./temporal";
 import * as embedding from "./embedding";
+import * as ltm from "./ltm";
 import * as log from "./log";
+import { extractPatterns } from "./pattern-extract";
 import {
   DISTILLATION_SYSTEM,
   distillationUser,
@@ -19,32 +21,125 @@ export { workerSessionIDs };
 type TemporalMessage = temporal.TemporalMessage;
-// Segment detection: group related messages together
-function detectSegments(
+/**
+ * Compression health ratio: k / √N.
+ *
+ * k = distilled token count, N = source token count.
+ * Values < 1.0 signal likely lossy compression (below the square-root
+ * boundary). Values > 1.0 signal relatively faithful compression.
+ *
+ * Based on the "LLM Context Square Root Theory" heuristic from
+ * D7x7z49/llm-context-idea. The specific threshold is unvalidated —
+ * use as a diagnostic signal, not a hard gate.
+ */
+export function compressionRatio(
+  distilledTokens: number,
+  sourceTokens: number,
+): number {
+  if (sourceTokens <= 0) return 0;
+  return distilledTokens / Math.sqrt(sourceTokens);
+}
+/**
+ * Segment detection: group related messages into distillation-sized chunks.
+ *
+ * When the message count exceeds `maxSegment`, prefers splitting at the
+ * largest inter-message time gap (if it's ≥ 3× the median gap) to respect
+ * natural conversation boundaries. Falls back to count-based splitting at
+ * `maxSegment` when timestamps are uniform.
+ *
+ * Trailing segments with < 3 messages are merged into the previous segment
+ * to avoid tiny distillation inputs with too little context.
+ *
+ * Exported for testing; `run()` is the production caller.
+ */
+export function detectSegments(
   messages: TemporalMessage[],
   maxSegment: number,
 ): TemporalMessage[][] {
   if (messages.length <= maxSegment) return [messages];
-  const segments: TemporalMessage[][] = [];
-  let current: TemporalMessage[] = [];
-  for (const msg of messages) {
-    current.push(msg);
-    // Split on segment size limit
-    if (current.length >= maxSegment) {
-      segments.push(current);
-      current = [];
-    }
+  return splitSegments(messages, maxSegment);
+}
+/** Minimum segment size — segments smaller than this get merged. */
+const MIN_SEGMENT = 3;
+/**
+ * Multiplier for the median gap threshold: a time gap must be at least
+ * this many times the median gap to be used as a split point.
+ */
+const GAP_THRESHOLD_MULTIPLIER = 3;
+function splitSegments(
+  messages: TemporalMessage[],
+  maxSegment: number,
+): TemporalMessage[][] {
+  if (messages.length <= maxSegment) return [messages];
+  // Find the split point: prefer the largest time gap if it's significant
+  const splitIdx = findSplitIndex(messages, maxSegment);
+  const left = messages.slice(0, splitIdx);
+  const right = messages.slice(splitIdx);
+  // Recurse on both halves
+  const result = splitSegments(left, maxSegment);
+  if (right.length < MIN_SEGMENT) {
+    // Merge tiny trailing segment into the last segment
+    result[result.length - 1].push(...right);
+  } else {
+    result.push(...splitSegments(right, maxSegment));
   }
-  if (current.length > 0) {
-    // Merge small trailing segment with previous if too small
-    if (current.length < 3 && segments.length > 0) {
-      segments[segments.length - 1].push(...current);
-    } else {
-      segments.push(current);
+  return result;
+}
+/**
+ * Choose where to split an oversized message array.
+ *
+ * If there's a time gap ≥ 3× the median gap AND it falls within a range
+ * that would produce segments of at least MIN_SEGMENT size, use it.
+ * Otherwise fall back to the count-based boundary at `maxSegment`.
+ */
+function findSplitIndex(
+  messages: TemporalMessage[],
+  maxSegment: number,
+): number {
+  // Compute consecutive time gaps
+  const gaps: Array<{ index: number; gap: number }> = [];
+  for (let i = 1; i < messages.length; i++) {
+    gaps.push({
+      index: i,
+      gap: messages[i].created_at - messages[i - 1].created_at,
+    });
+  }
+  if (gaps.length === 0) return maxSegment;
+  // Find median gap
+  const sortedGaps = gaps.map((g) => g.gap).sort((a, b) => a - b);
+  const medianGap = sortedGaps[Math.floor(sortedGaps.length / 2)];
+  // Find the largest gap that would produce viable segments (≥ MIN_SEGMENT on each side)
+  let bestGap = { index: -1, gap: 0 };
+  for (const g of gaps) {
+    if (
+      g.gap > bestGap.gap &&
+      g.index >= MIN_SEGMENT &&
+      messages.length - g.index >= MIN_SEGMENT
+    ) {
+      bestGap = g;
     }
   }
-  return segments;
+  // Use the time gap if it's significantly larger than median
+  if (bestGap.index > 0 && bestGap.gap >= medianGap * GAP_THRESHOLD_MULTIPLIER) {
+    return bestGap.index;
+  }
+  // Fall back to count-based splitting
+  return maxSegment;
 }
 function formatTime(ms: number): string {
@@ -235,6 +330,10 @@ export type Distillation = {
   generation: number;
   token_count: number;
   created_at: number;
+  /** k/√N compression ratio. NULL for pre-v12 rows or meta-distillations. */
+  r_compression: number | null;
+  /** Temporal clustering [0,1]. NULL for pre-v12 rows or meta-distillations. */
+  c_norm: number | null;
 };
 /**
@@ -258,8 +357,8 @@ export function loadForSession(
 ): Distillation[] {
   const pid = ensureProject(projectPath);
   const sql = includeArchived
-    ? "SELECT id, project_id, session_id, observations, source_ids, generation, token_count, created_at FROM distillations WHERE project_id = ? AND session_id = ? ORDER BY created_at ASC"
-    : "SELECT id, project_id, session_id, observations, source_ids, generation, token_count, created_at FROM distillations WHERE project_id = ? AND session_id = ? AND archived = 0 ORDER BY created_at ASC";
+    ? "SELECT id, project_id, session_id, observations, source_ids, generation, token_count, created_at, r_compression, c_norm FROM distillations WHERE project_id = ? AND session_id = ? ORDER BY created_at ASC"
+    : "SELECT id, project_id, session_id, observations, source_ids, generation, token_count, created_at, r_compression, c_norm FROM distillations WHERE project_id = ? AND session_id = ? AND archived = 0 ORDER BY created_at ASC";
   const rows = db()
     .query(sql)
     .all(pid, sessionID) as Array<{
@@ -271,6 +370,8 @@ export function loadForSession(
     generation: number;
     token_count: number;
     created_at: number;
+    r_compression: number | null;
+    c_norm: number | null;
   }>;
   return rows.map((r) => ({
     ...r,
@@ -284,6 +385,8 @@ function storeDistillation(input: {
   observations: string;
   sourceIDs: string[];
   generation: number;
+  rCompression?: number;
+  cNorm?: number;
 }): string {
   const pid = ensureProject(input.projectPath);
   const id = crypto.randomUUID();
@@ -291,8 +394,8 @@ function storeDistillation(input: {
   const tokens = Math.ceil(input.observations.length / 3);
   db()
     .query(
-      `INSERT INTO distillations (id, project_id, session_id, narrative, facts, observations, source_ids, generation, token_count, created_at)
-       VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
+      `INSERT INTO distillations (id, project_id, session_id, narrative, facts, observations, source_ids, generation, token_count, created_at, r_compression, c_norm)
+       VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
     )
     .run(
       id,
@@ -305,6 +408,8 @@ function storeDistillation(input: {
       input.generation,
       tokens,
       Date.now(),
+      input.rCompression ?? null,
+      input.cNorm ?? null,
     );
   return id;
 }
@@ -327,7 +432,7 @@ function loadGen0(projectPath: string, sessionID: string): Distillation[] {
   const pid = ensureProject(projectPath);
   const rows = db()
     .query(
-      "SELECT id, project_id, session_id, observations, source_ids, generation, token_count, created_at FROM distillations WHERE project_id = ? AND session_id = ? AND generation = 0 AND archived = 0 ORDER BY created_at ASC",
+      "SELECT id, project_id, session_id, observations, source_ids, generation, token_count, created_at, r_compression, c_norm FROM distillations WHERE project_id = ? AND session_id = ? AND generation = 0 AND archived = 0 ORDER BY created_at ASC",
     )
     .all(pid, sessionID) as Array<{
     id: string;
@@ -338,6 +443,8 @@ function loadGen0(projectPath: string, sessionID: string): Distillation[] {
     generation: number;
     token_count: number;
     created_at: number;
+    r_compression: number | null;
+    c_norm: number | null;
   }>;
   return rows.map((r) => ({
     ...r,
@@ -421,6 +528,17 @@ export async function run(input: {
   model?: { providerID: string; modelID: string };
   /** Skip minMessages threshold check — distill whatever is pending */
   force?: boolean;
+  /** Skip meta-distillation even when gen-0 count exceeds the threshold.
+   *  Used when the upstream prompt cache is likely still warm — meta-distillation
+   *  rewrites distillation row IDs, which invalidates the distilled prefix cache
+   *  and causes a cache bust on the next turn. Callers should set this to true
+   *  when `Date.now() - getLastTurnAt(sessionID) < cacheTTL`. */
+  skipMeta?: boolean;
+  /** When true, all LLM calls in this run are marked urgent and bypass the
+   *  batch queue (if one is active). Use for compaction and overflow recovery
+   *  where the caller is blocking on the result. Background/idle distillation
+   *  should leave this false to benefit from batch API 50% cost savings. */
+  urgent?: boolean;
 }): Promise<{ rounds: number; distilled: number }> {
   // Reset orphaned messages (marked distilled by a deleted/migrated distillation)
   const orphans = resetOrphans(input.projectPath, input.sessionID);
@@ -454,6 +572,7 @@ export async function run(input: {
           sessionID: input.sessionID,
           messages: segment,
           model: input.model,
+          urgent: input.urgent,
         });
         if (result) {
           distilled += segment.length;
@@ -462,8 +581,11 @@ export async function run(input: {
       }
     }
-    // Check if meta-distillation is needed
+    // Check if meta-distillation is needed (skip when cache is warm to avoid
+    // prefix cache invalidation — row IDs change after meta-distill, busting
+    // the prompt cache on the next turn).
     if (
+      !input.skipMeta &&
       gen0Count(input.projectPath, input.sessionID) >=
       cfg.distillation.metaThreshold
     ) {
@@ -472,6 +594,7 @@ export async function run(input: {
         projectPath: input.projectPath,
         sessionID: input.sessionID,
         model: input.model,
+        urgent: input.urgent,
       });
       rounds++;
     }
@@ -489,6 +612,7 @@ async function distillSegment(input: {
   sessionID: string;
   messages: TemporalMessage[];
   model?: { providerID: string; modelID: string };
+  urgent?: boolean;
 }): Promise<DistillationResult | null> {
   const prior = latestObservations(input.projectPath, input.sessionID);
   const text = messagesToText(input.messages);
@@ -511,27 +635,59 @@ async function distillSegment(input: {
   const responseText = await input.llm.prompt(
     DISTILLATION_SYSTEM,
     userContent,
-    { model, workerID: "lore-distill" },
+    { model, workerID: "lore-distill", thinking: false, urgent: input.urgent, sessionID: input.sessionID },
   );
   if (!responseText) return null;
   const result = parseDistillationResult(responseText);
   if (!result) return null;
+  // Compute context health metrics before storing.
+  const distilledTokens = Math.ceil(result.observations.length / 3);
+  const sourceTokens = input.messages.reduce((sum, m) => sum + m.tokens, 0);
+  const rComp = compressionRatio(distilledTokens, sourceTokens);
+  const cNorm = temporal.temporalCnorm(input.messages.map((m) => m.created_at));
   const distillId = storeDistillation({
     projectPath: input.projectPath,
     sessionID: input.sessionID,
     observations: result.observations,
     sourceIDs: input.messages.map((m) => m.id),
     generation: 0,
+    rCompression: rComp,
+    cNorm,
   });
   temporal.markDistilled(input.messages.map((m) => m.id));
+  log.info(
+    `distill segment: ${input.messages.length} msgs, ` +
+      `${sourceTokens}→${distilledTokens} tokens, ` +
+      `R=${rComp.toFixed(2)}, C_norm=${cNorm.toFixed(3)}`,
+  );
   // Fire-and-forget: embed the distillation for vector search
   if (embedding.isAvailable()) {
     embedding.embedDistillation(distillId, result.observations);
   }
+  // Fire-and-forget: extract decision/preference patterns → knowledge entries
+  if (config().knowledge.enabled) {
+    for (const pat of extractPatterns(result.observations)) {
+      try {
+        ltm.create({
+          projectPath: input.projectPath,
+          category: pat.category,
+          title: pat.title,
+          content: pat.content,
+          session: input.sessionID,
+          scope: "project",
+        });
+      } catch {
+        // Dedup guard in ltm.create() handles duplicates — swallow errors
+      }
+    }
+  }
   return result;
 }
@@ -548,6 +704,7 @@ export async function metaDistill(input: {
   projectPath: string;
   sessionID: string;
   model?: { providerID: string; modelID: string };
+  urgent?: boolean;
 }): Promise<DistillationResult | null> {
   const existing = loadGen0(input.projectPath, input.sessionID);
@@ -575,7 +732,7 @@ export async function metaDistill(input: {
   const responseText = await input.llm.prompt(
     RECURSIVE_SYSTEM,
     userContent,
-    { model, workerID: "lore-distill" },
+    { model, workerID: "lore-distill", thinking: false, urgent: input.urgent, sessionID: input.sessionID },
   );
   if (!responseText) return null;
@@ -626,5 +783,91 @@ export async function metaDistill(input: {
     embedding.embedDistillation(metaId, result.observations);
   }
+  // Fire-and-forget: extract decision/preference patterns → knowledge entries
+  if (config().knowledge.enabled) {
+    for (const pat of extractPatterns(result.observations)) {
+      try {
+        ltm.create({
+          projectPath: input.projectPath,
+          category: pat.category,
+          title: pat.title,
+          content: pat.content,
+          session: input.sessionID,
+          scope: "project",
+        });
+      } catch {
+        // Dedup guard in ltm.create() handles duplicates — swallow errors
+      }
+    }
+  }
   return result;
 }
+// ---------------------------------------------------------------------------
+// Retroactive metric backfill
+// ---------------------------------------------------------------------------
+/**
+ * Backfill `r_compression` and `c_norm` for distillations that were created
+ * before schema v12 (or before PR #113 added the computation).
+ *
+ * For each distillation with NULL metrics, loads source temporal messages via
+ * `source_ids`, computes `compressionRatio()` and `temporalCnorm()`, and
+ * writes the values back. Skips rows where source messages have been pruned
+ * or source_ids is empty.
+ *
+ * Designed to run once at startup — idempotent (only touches NULL rows).
+ * Returns the number of rows updated.
+ */
+export function backfillMetrics(): number {
+  const rows = db()
+    .query(
+      "SELECT id, source_ids, token_count FROM distillations WHERE r_compression IS NULL",
+    )
+    .all() as Array<{
+    id: string;
+    source_ids: string;
+    token_count: number;
+  }>;
+  if (!rows.length) return 0;
+  const update = db().prepare(
+    "UPDATE distillations SET r_compression = ?, c_norm = ? WHERE id = ?",
+  );
+  let updated = 0;
+  for (const row of rows) {
+    const sourceIds = parseSourceIds(row.source_ids);
+    if (!sourceIds.length) continue;
+    // Load source temporal messages — they may have been pruned.
+    const placeholders = sourceIds.map(() => "?").join(",");
+    const sources = db()
+      .query(
+        `SELECT tokens, created_at FROM temporal_messages WHERE id IN (${placeholders})`,
+      )
+      .all(...sourceIds) as Array<{ tokens: number; created_at: number }>;
+    if (!sources.length) continue;
+    const sourceTokens = sources.reduce((sum, s) => sum + s.tokens, 0);
+    const timestamps = sources.map((s) => s.created_at);
+    const rComp = compressionRatio(row.token_count, sourceTokens);
+    const cNorm = temporal.temporalCnorm(timestamps);
+    update.run(rComp, cNorm, row.id);
+    updated++;
+  }
+  if (updated > 0) {
+    log.info(
+      `backfilled metrics for ${updated} distillations (${rows.length - updated} skipped — missing sources)`,
+    );
+  }
+  return updated;
+}