npm - @loreai/core - Versions diffs - 0.18.0 → 0.20.0 - Mend

@loreai/core 0.18.0 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (88) hide show

package/dist/bun/agents-file.d.ts.map +1 -1
package/dist/bun/config.d.ts.map +1 -1
package/dist/bun/curator.d.ts.map +1 -1
package/dist/bun/db.d.ts +86 -1
package/dist/bun/db.d.ts.map +1 -1
package/dist/bun/distillation.d.ts +2 -13
package/dist/bun/distillation.d.ts.map +1 -1
package/dist/bun/embedding.d.ts +5 -1
package/dist/bun/embedding.d.ts.map +1 -1
package/dist/bun/git.d.ts.map +1 -1
package/dist/bun/gradient.d.ts +13 -1
package/dist/bun/gradient.d.ts.map +1 -1
package/dist/bun/hosted.d.ts +36 -0
package/dist/bun/hosted.d.ts.map +1 -0
package/dist/bun/index.d.ts +3 -2
package/dist/bun/index.d.ts.map +1 -1
package/dist/bun/index.js +1049 -247
package/dist/bun/index.js.map +4 -4
package/dist/bun/lat-reader.d.ts.map +1 -1
package/dist/bun/ltm.d.ts +99 -5
package/dist/bun/ltm.d.ts.map +1 -1
package/dist/bun/session-limiter.d.ts +26 -0
package/dist/bun/session-limiter.d.ts.map +1 -0
package/dist/bun/temporal.d.ts +2 -0
package/dist/bun/temporal.d.ts.map +1 -1
package/dist/node/agents-file.d.ts.map +1 -1
package/dist/node/config.d.ts.map +1 -1
package/dist/node/curator.d.ts.map +1 -1
package/dist/node/db.d.ts +86 -1
package/dist/node/db.d.ts.map +1 -1
package/dist/node/distillation.d.ts +2 -13
package/dist/node/distillation.d.ts.map +1 -1
package/dist/node/embedding.d.ts +5 -1
package/dist/node/embedding.d.ts.map +1 -1
package/dist/node/git.d.ts.map +1 -1
package/dist/node/gradient.d.ts +13 -1
package/dist/node/gradient.d.ts.map +1 -1
package/dist/node/hosted.d.ts +36 -0
package/dist/node/hosted.d.ts.map +1 -0
package/dist/node/index.d.ts +3 -2
package/dist/node/index.d.ts.map +1 -1
package/dist/node/index.js +1049 -247
package/dist/node/index.js.map +4 -4
package/dist/node/lat-reader.d.ts.map +1 -1
package/dist/node/ltm.d.ts +99 -5
package/dist/node/ltm.d.ts.map +1 -1
package/dist/node/session-limiter.d.ts +26 -0
package/dist/node/session-limiter.d.ts.map +1 -0
package/dist/node/temporal.d.ts +2 -0
package/dist/node/temporal.d.ts.map +1 -1
package/dist/types/agents-file.d.ts.map +1 -1
package/dist/types/config.d.ts.map +1 -1
package/dist/types/curator.d.ts.map +1 -1
package/dist/types/db.d.ts +86 -1
package/dist/types/db.d.ts.map +1 -1
package/dist/types/distillation.d.ts +2 -13
package/dist/types/distillation.d.ts.map +1 -1
package/dist/types/embedding.d.ts +5 -1
package/dist/types/embedding.d.ts.map +1 -1
package/dist/types/git.d.ts.map +1 -1
package/dist/types/gradient.d.ts +13 -1
package/dist/types/gradient.d.ts.map +1 -1
package/dist/types/hosted.d.ts +36 -0
package/dist/types/hosted.d.ts.map +1 -0
package/dist/types/index.d.ts +3 -2
package/dist/types/index.d.ts.map +1 -1
package/dist/types/lat-reader.d.ts.map +1 -1
package/dist/types/ltm.d.ts +99 -5
package/dist/types/ltm.d.ts.map +1 -1
package/dist/types/session-limiter.d.ts +26 -0
package/dist/types/session-limiter.d.ts.map +1 -0
package/dist/types/temporal.d.ts +2 -0
package/dist/types/temporal.d.ts.map +1 -1
package/package.json +3 -1
package/src/agents-file.ts +12 -0
package/src/config.ts +10 -5
package/src/curator.ts +54 -2
package/src/db.ts +386 -6
package/src/distillation.ts +55 -14
package/src/embedding.ts +71 -8
package/src/git.ts +4 -0
package/src/gradient.ts +227 -74
package/src/hosted.ts +46 -0
package/src/index.ts +12 -0
package/src/lat-reader.ts +4 -0
package/src/ltm.ts +480 -45
package/src/session-limiter.ts +47 -0
package/src/temporal.ts +10 -0

package/src/embedding.ts CHANGED Viewed

@@ -28,6 +28,27 @@ import type {
  *  embedding calls but bounded enough to avoid minutes-long hangs. */
 const EMBED_TIMEOUT_MS = 10_000;
+/**
+ * Safe per-text character limit for local ONNX inference. The Nomic v1.5 model
+ * supports up to 8192 tokens, but ONNX runtime OOMs on inputs near that ceiling
+ * (error codes 284432024, 287180544, 144786472). Pre-truncating to ~4096 tokens
+ * worth of characters keeps the tensor well within safe allocation bounds.
+ * The worker's `truncation: true` remains as a safety net.
+ */
+const LOCAL_MAX_CHARS = 4096 * 4; // ~4096 tokens × ~4 chars/token
+/**
+ * Truncate a string to LOCAL_MAX_CHARS without splitting a UTF-16 surrogate pair.
+ * If the cut falls on a high surrogate (0xD800-0xDBFF), backs up one char.
+ */
+function safeLocalTruncate(text: string): string {
+  if (text.length <= LOCAL_MAX_CHARS) return text;
+  let end = LOCAL_MAX_CHARS;
+  const code = text.charCodeAt(end - 1);
+  if (code >= 0xD800 && code <= 0xDBFF) end--; // don't split surrogate pair
+  return text.slice(0, end);
+}
 // ---------------------------------------------------------------------------
 // Provider interface
 // ---------------------------------------------------------------------------
@@ -272,7 +293,21 @@ class LocalProvider implements EmbeddingProvider {
           workerUrl = vendorWorkerUrl;
         }
       } else {
-        workerUrl = new URL(`./embedding-worker${import.meta.url.endsWith(".ts") ? ".ts" : ".js"}`, import.meta.url);
+        // In CJS bundles (gateway npm package), esbuild shims import.meta as
+        // an empty object {}, so import.meta.url is undefined. Fall back to
+        // __filename which esbuild defines in CJS output.
+        const selfUrl = typeof import.meta.url === "string" ? import.meta.url : undefined;
+        if (selfUrl) {
+          workerUrl = new URL(
+            `./embedding-worker${selfUrl.endsWith(".ts") ? ".ts" : ".js"}`,
+            selfUrl,
+          );
+        } else {
+          // CJS fallback: __filename is defined by esbuild's CJS output.
+          // The embedding-worker.cjs is built alongside the main bundle.
+          const { pathToFileURL } = await import("node:url");
+          workerUrl = new URL("./embedding-worker.cjs", pathToFileURL(__filename));
+        }
       }
       const vendor = vendorModelInfo();
@@ -318,9 +353,10 @@ class LocalProvider implements EmbeddingProvider {
             localProviderKnownBroken = true;
             if (!localProviderErrorLogged) {
               localProviderErrorLogged = true;
-              log.info(
+              log.error(
                 `local embedding provider failed to init: ${msg.error}. ` +
                   `Set VOYAGE_API_KEY/OPENAI_API_KEY for automatic remote fallback.`,
+                new Error(`embedding worker init failed: ${msg.error}`),
               );
             }
             for (const [, p] of this.pendingRequests) {
@@ -337,6 +373,7 @@ class LocalProvider implements EmbeddingProvider {
       this.worker.on("error", (err: Error) => {
         this.workerInitError = err.message;
         this.workerReady = false;
+        log.error("embedding worker crashed:", err);
         for (const [, p] of this.pendingRequests) {
           p.reject(new LocalProviderUnavailableError(err));
         }
@@ -347,6 +384,10 @@ class LocalProvider implements EmbeddingProvider {
       this.worker.on("exit", (code) => {
         if (code !== 0 && !this.workerInitError) {
           this.workerInitError = `embedding worker exited with code ${code}`;
+          log.error(
+            this.workerInitError,
+            new Error(this.workerInitError),
+          );
         }
         this.workerReady = false;
         for (const [, p] of this.pendingRequests) {
@@ -382,9 +423,13 @@ class LocalProvider implements EmbeddingProvider {
   async embed(texts: string[], inputType: "document" | "query"): Promise<Float32Array[]> {
     await this.ensureWorker();
+    // Pre-truncate texts that exceed the safe ONNX inference limit.
+    // This prevents OOM on single inputs near the model's 8192-token max.
+    const truncated = texts.map(safeLocalTruncate);
     // Prepend Nomic task instruction prefix.
     const prefix = inputType === "document" ? "search_document: " : "search_query: ";
-    const prefixed = texts.map((t) => prefix + t);
+    const prefixed = truncated.map((t) => prefix + t);
     const id = this.nextRequestId++;
     // Recall queries (single query-type texts) get high priority so they
@@ -706,14 +751,25 @@ type VectorHit = { id: string; similarity: number };
  * Search all knowledge entries with embeddings by cosine similarity.
  * Returns top-k entries sorted by similarity descending.
  * Pure brute-force — fine for <100 entries (microseconds).
+ *
+ * @param excludeCategories  Optional category names to exclude from results.
+ *   Useful when preferences are injected in a separate system block and
+ *   shouldn't compete for vector search slots with context-bound entries.
  */
 export function vectorSearch(
   queryEmbedding: Float32Array,
   limit = 10,
+  excludeCategories?: string[],
 ): VectorHit[] {
+  let sql = "SELECT id, embedding FROM knowledge WHERE embedding IS NOT NULL AND confidence > 0.2";
+  const params: string[] = [];
+  if (excludeCategories?.length) {
+    sql += ` AND category NOT IN (${excludeCategories.map(() => "?").join(",")})`;
+    params.push(...excludeCategories);
+  }
   const rows = db()
-    .query("SELECT id, embedding FROM knowledge WHERE embedding IS NOT NULL AND confidence > 0.2")
-    .all() as Array<{ id: string; embedding: Buffer }>;
+    .query(sql)
+    .all(...params) as Array<{ id: string; embedding: Buffer }>;
   const scored: VectorHit[] = [];
   for (const row of rows) {
@@ -817,6 +873,7 @@ export function embedKnowledgeEntry(
   title: string,
   content: string,
 ): void {
+  if (!isAvailable()) return;
   const text = `${title}\n${content}`;
   embed([text], "document")
     .then(([vec]) => {
@@ -825,7 +882,7 @@ export function embedKnowledgeEntry(
         .run(toBlob(vec), id);
     })
     .catch((err) => {
-      log.info("embedding failed for knowledge entry", id, ":", err);
+      log.error("embedding failed for knowledge entry", id, ":", err);
     });
 }
@@ -838,6 +895,7 @@ export function embedDistillation(
   id: string,
   observations: string,
 ): void {
+  if (!isAvailable()) return;
   embed([observations], "document")
     .then(([vec]) => {
       db()
@@ -845,7 +903,7 @@ export function embedDistillation(
         .run(toBlob(vec), id);
     })
     .catch((err) => {
-      log.info("embedding failed for distillation", id, ":", err);
+      log.error("embedding failed for distillation", id, ":", err);
     });
 }
@@ -859,6 +917,7 @@ export function embedTemporalMessage(
   id: string,
   content: string,
 ): void {
+  if (!isAvailable()) return;
   // Skip very short messages — they don't carry enough semantic signal
   // to be useful in vector search and would waste embedding capacity.
   if (content.length < 50) return;
@@ -870,7 +929,7 @@ export function embedTemporalMessage(
         .run(toBlob(vec), id);
     })
     .catch((err) => {
-      log.info("embedding failed for temporal message", id, ":", err);
+      log.error("embedding failed for temporal message", id, ":", err);
     });
 }
@@ -1174,6 +1233,8 @@ export async function backfillEmbeddings(): Promise<number> {
     } catch (err) {
       // log.error sends to Sentry via captureException
       log.error(`embedding backfill batch failed (${batch.length} items):`, err);
+      // Provider is dead — no point retrying remaining batches.
+      if (err instanceof LocalProviderUnavailableError) break;
     }
     // No yieldToEventLoop() needed — embed() is truly async (worker thread).
   }
@@ -1234,6 +1295,8 @@ export async function backfillDistillationEmbeddings(): Promise<number> {
     } catch (err) {
       // log.error sends to Sentry via captureException
       log.error(`distillation embedding backfill batch failed (${batch.length} items):`, err);
+      // Provider is dead — no point retrying remaining batches.
+      if (err instanceof LocalProviderUnavailableError) break;
     }
     if (embedded >= nextProgressAt) {

package/src/git.ts CHANGED Viewed

@@ -13,6 +13,7 @@
  */
 import { execSync } from "child_process";
+import { isHostedMode } from "./hosted";
 // ---------------------------------------------------------------------------
 // URL normalization
@@ -95,6 +96,9 @@ export function clearGitRemoteCache(): void {
  * subprocess calls — `git remote -v` only runs once per unique path.
  */
 export function getGitRemote(path: string): string | null {
+  // In hosted mode, never run git subprocesses with client-controlled cwd.
+  if (isHostedMode()) return null;
   const cached = gitRemoteCache.get(path);
   if (cached !== undefined) return cached;

package/src/gradient.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 import type { LoreMessage, LorePart, LoreMessageWithParts, LoreToolPart, LoreTextPart, LoreToolState, LoreToolStateCompleted } from "./types";
 import { isTextPart, isReasoningPart, isToolPart } from "./types";
-import { db, ensureProject, loadForceMinLayer, saveForceMinLayer } from "./db";
+import { db, ensureProject, loadForceMinLayer, saveForceMinLayer, saveSessionTracking, loadSessionTracking } from "./db";
 import { config } from "./config";
 import { formatDistillations } from "./prompt";
 import { normalize } from "./markdown";
@@ -98,9 +98,42 @@ export function updateBustRate(
   cacheWrite: number,
   cacheRead: number,
   sessionID?: string,
+  lastLayer?: number,
 ): void {
   if (!sessionID) return;
   const state = getSessionState(sessionID);
+  // Layer 4 (emergency) is structurally a full cache write — feeding its
+  // bust stats into the EMA and cap adaptation creates a death spiral where
+  // the cap ratchets down to MIN_CONTEXT_FLOOR and prevents the session from
+  // ever fitting in layers 1-3 again. Skip EMA updates entirely.
+  // This check is BEFORE the total===0 guard so that the consecutiveLayer4
+  // counter is always updated regardless of whether usage was reported.
+  if (lastLayer === 4) {
+    state.consecutiveLayer4++;
+    // Recovery hatch: after 5+ consecutive Layer 4 turns, the shrunken cap
+    // may be what's trapping us. Relax it by 10% per turn to give layers
+    // 1-3 a chance to fit. From 130K floor: turns 5-9 → 143K→157K→173K→190K→209K.
+    if (
+      state.consecutiveLayer4 >= 5 &&
+      state.dynamicContextCap > 0 &&
+      maxContextTokensCeiling > 0
+    ) {
+      state.dynamicContextCap = Math.min(
+        maxContextTokensCeiling,
+        Math.floor(state.dynamicContextCap * 1.10),
+      );
+    }
+    return;
+  }
+  // Non-Layer-4 turn: reset the consecutive counter (also before total===0
+  // guard — a zero-usage non-L4 turn must not leave a stale count).
+  if (lastLayer !== undefined) {
+    state.consecutiveLayer4 = 0;
+  }
   const total = cacheWrite + cacheRead;
   if (total === 0) return;
@@ -253,6 +286,10 @@ type SessionState = {
   postIdleCompact: boolean;
   /** Consecutive turns at layer >= 2. When >= 3, log a compaction hint. */
   consecutiveHighLayer: number;
+  /** Consecutive Layer 4 turns — used to skip bust-rate EMA updates
+   *  (Layer 4 busts are structural, not a caching signal) and to trigger
+   *  a recovery hatch that relaxes dynamicContextCap after prolonged trapping. */
+  consecutiveLayer4: number;
   // --- Cost-aware context cap dynamic state ---
@@ -298,6 +335,7 @@ function makeSessionState(): SessionState {
     cameOutOfIdle: false,
     postIdleCompact: false,
     consecutiveHighLayer: 0,
+    consecutiveLayer4: 0,
     bustRateEMA: -1,
     interBustIntervalEMA: -1,
@@ -319,6 +357,27 @@ function getSessionState(sessionID: string): SessionState {
     // forceMinLayer=2, but if OpenCode restarts before the next turn,
     // the in-memory escalation would be lost without this.
     state.forceMinLayer = loadForceMinLayer(sessionID) as SafetyLayer;
+    // Restore gradient calibration state from DB (v24) — avoids uncalibrated
+    // first turns after restart. Without this, dynamicContextCap reverts to
+    // the static ceiling, bustRateEMA is uninitialized, and lastTurnAt=0
+    // prevents onIdleResume() from detecting idle gaps.
+    //
+    // Atomic restore: lastTurnAt > 0 is the proxy for "gradient state was
+    // ever flushed to DB". Restore all fields together or none — avoids
+    // per-field sentinel fragility where a valid value (e.g. lastLayer=0)
+    // could be mistaken for "never persisted".
+    const persisted = loadSessionTracking(sessionID);
+    if (persisted && persisted.lastTurnAt > 0) {
+      state.dynamicContextCap = persisted.dynamicContextCap;
+      state.bustRateEMA = persisted.bustRateEMA;
+      state.interBustIntervalEMA = persisted.interBustIntervalEMA;
+      state.lastLayer = persisted.lastLayer as SafetyLayer;
+      state.lastKnownInput = persisted.lastKnownInput;
+      state.lastTurnAt = persisted.lastTurnAt;
+      state.lastBustAt = persisted.lastBustAt;
+    }
     sessionStates.set(sessionID, state);
   }
   return state;
@@ -584,6 +643,9 @@ export function inspectSessionState(sessionID: string): {
   postIdleCompact: boolean;
   lastTurnAt: number;
   distillationSnapshot: DistillationSnapshot | null;
+  bustRateEMA: number;
+  dynamicContextCap: number;
+  consecutiveLayer4: number;
 } | null {
   const state = sessionStates.get(sessionID);
   if (!state) return null;
@@ -594,6 +656,9 @@ export function inspectSessionState(sessionID: string): {
     postIdleCompact: state.postIdleCompact,
     lastTurnAt: state.lastTurnAt,
     distillationSnapshot: state.distillationSnapshot,
+    bustRateEMA: state.bustRateEMA,
+    dynamicContextCap: state.dynamicContextCap,
+    consecutiveLayer4: state.consecutiveLayer4,
   };
 }
@@ -606,6 +671,28 @@ export function setLastTurnAtForTest(sessionID: string, ms: number): void {
   getSessionState(sessionID).lastTurnAt = ms;
 }
+/**
+ * Persist gradient calibration state to the session_state table.
+ *
+ * Designed to be called periodically (e.g. every 30s from the idle scheduler
+ * tick) rather than on every mutation, to avoid write amplification on the
+ * hot path. Max data loss on crash is one tick interval (~30s).
+ */
+export function saveGradientState(sessionID: string): void {
+  const state = sessionStates.get(sessionID);
+  if (!state) return;
+  saveSessionTracking(sessionID, {
+    dynamicContextCap: state.dynamicContextCap,
+    bustRateEMA: state.bustRateEMA,
+    interBustIntervalEMA: state.interBustIntervalEMA,
+    lastLayer: state.lastLayer,
+    lastKnownInput: state.lastKnownInput,
+    lastTurnAt: state.lastTurnAt,
+    lastBustAt: state.lastBustAt,
+  });
+}
 type Distillation = {
   id: string;
   observations: string;
@@ -1132,8 +1219,54 @@ function buildPrefixMessages(formatted: string): MessageWithParts[] {
   ];
 }
+// --- Importance-aware distillation selection ---
+//
+// When a compression stage limits distillation count (distLimit < Infinity),
+// selects the most valuable distillations rather than blindly taking the last N.
+// Scoring: 70% recency (position in chronological order) + 30% content signal.
+// Results are re-sorted chronologically after selection so the prefix cache
+// (Approach C) remains byte-stable when the same distillations are selected.
+//
+// Content signals (lightweight keyword detection, no LLM call):
+//   - Decisions: "decision"/"decided"/"chose" → +0.3
+//   - Gotchas/bugs: "gotcha"/"bug"/"fix"/"error" → +0.2
+//   - Architecture: "architecture"/"pattern" → +0.1
+//   - Meta-distilled (gen >= 1): +0.2 (consolidation = higher value density)
+const DECISION_RE = /\b(?:decision|decided|chose|chosen|agreed)\b/i;
+const GOTCHA_RE = /\b(?:gotcha|(?:critical|known|subtle)\s+bug|broken|crash(?:ed|es)?|regression)\b/i;
+const ARCH_RE = /\b(?:architecture|design.(?:decision|pattern)|system.design)\b/i;
+function importanceBonus(d: Distillation): number {
+  let bonus = 0;
+  if (DECISION_RE.test(d.observations)) bonus += 0.3;
+  if (GOTCHA_RE.test(d.observations)) bonus += 0.2;
+  if (ARCH_RE.test(d.observations)) bonus += 0.1;
+  if (d.generation >= 1) bonus += 0.2;
+  return Math.min(bonus, 1.0);
+}
+function selectDistillations(all: Distillation[], limit: number): Distillation[] {
+  if (all.length <= limit) return all;
+  // Recency: normalize to [0, 0.7] where oldest = 0.0, newest = 0.7.
+  // Use (length - 1) as divisor so the last entry gets full recency weight.
+  const maxIdx = all.length - 1;
+  const scored = all.map((d, i) => ({
+    d,
+    score: (maxIdx > 0 ? (i / maxIdx) : 1) * 0.7 + importanceBonus(d) * 0.3,
+  }));
+  // Keep top N by score, then re-sort chronologically (cache-safe).
+  return scored
+    .sort((a, b) => b.score - a.score)
+    .slice(0, limit)
+    .map((s) => s.d)
+    .sort((a, b) => a.created_at - b.created_at);
+}
 // Build a synthetic message pair containing the distilled history.
-// Non-cached path — used by layers 2-4 which already cause full cache invalidation.
+// Non-cached path — used by layers 2+ which already cause full cache invalidation.
 function distilledPrefix(distillations: Distillation[]): MessageWithParts[] {
   if (!distillations.length) return [];
   const formatted = formatDistillations(distillations);
@@ -1324,7 +1457,7 @@ function tryFitStable(input: {
   rawBudget: number;
   sessionID: string;
   sessState: SessionState;
-}): Omit<TransformResult, "layer" | "usable" | "distilledBudget" | "rawBudget"> | null {
+}): Omit<TransformResult, "layer" | "usable" | "distilledBudget" | "rawBudget" | "refreshLtm"> | null {
   // If the prefix already overflows its budget there's no point trying.
   if (input.prefixTokens > input.distilledBudget && input.prefix.length > 0)
     return null;
@@ -1425,6 +1558,25 @@ function tryFitStable(input: {
 export type SafetyLayer = 0 | 1 | 2 | 3 | 4;
+// --- Compression stage table ---
+// Defines the escalation path for layers 1-3. Each stage tries increasingly
+// aggressive compression: tool stripping, tighter budgets, distillation trimming.
+// Adding a new intermediate stage = one table entry.
+type CompressionStage = {
+  strip: "none" | "old-tools" | "all-tools";
+  rawFrac: number | null;     // fraction of usable; null = use default rawBudget
+  distFrac: number | null;    // fraction of usable; null = use default distilledBudget
+  distLimit: number;          // Infinity = all, 5 = last 5, etc.
+  protectedTurns: number;     // turns exempt from tool stripping
+  useStableWindow: boolean;   // use tryFitStable (Approach B pin cache)
+};
+const COMPRESSION_STAGES: CompressionStage[] = [
+  { strip: "none",      rawFrac: null, distFrac: null, distLimit: Infinity, protectedTurns: 0, useStableWindow: true },
+  { strip: "old-tools", rawFrac: 0.50, distFrac: null, distLimit: Infinity, protectedTurns: 2, useStableWindow: false },
+  { strip: "all-tools", rawFrac: 0.55, distFrac: 0.15, distLimit: 5,        protectedTurns: 0, useStableWindow: false },
+];
 export type TransformResult = {
   messages: MessageWithParts[];
   layer: SafetyLayer;
@@ -1435,6 +1587,10 @@ export type TransformResult = {
   usable: number;
   distilledBudget: number;
   rawBudget: number;
+  // Signals that the pipeline should re-run forSession() to refresh LTM
+  // relevance scoring. Set on Layer 4 (emergency) where the context is
+  // fully reset and mid-session knowledge may have changed relevance.
+  refreshLtm: boolean;
 };
 // Per-session urgent distillation tracking.
@@ -1530,7 +1686,10 @@ function transformInner(input: {
   // Pinning to the *actual* last layer prevents all downward oscillation.
   // Only applied when calibrated (same session, per-session state) to avoid
   // affecting other sessions including worker sessions.
-  if (calibrated && sessState.lastLayer >= 1 && input.messages.length >= sessState.lastKnownMessageCount) {
+  // Layer 4 (emergency) already blows the cache — stickiness there just traps
+  // the session at emergency permanently. Only apply stickiness for layers 1-3
+  // where dropping back would bust a warm cache.
+  if (calibrated && sessState.lastLayer >= 1 && sessState.lastLayer <= 3 && input.messages.length >= sessState.lastKnownMessageCount) {
     effectiveMinLayer = Math.max(effectiveMinLayer, sessState.lastLayer) as SafetyLayer;
   }
@@ -1608,6 +1767,7 @@ function transformInner(input: {
       usable,
       distilledBudget,
       rawBudget,
+      refreshLtm: false,
     };
   }
@@ -1627,7 +1787,7 @@ function transformInner(input: {
   // Layer 1 uses the append-only cached prefix (Approach C) to keep the
   // distilled content byte-identical between distillation runs, preserving
-  // the prompt cache. Layers 2-4 already cause full cache invalidation via
+  // the prompt cache. Layers 2+ already cause full cache invalidation via
   // tool stripping / message restructuring, so they use the non-cached path.
   const cached = sid
     ? distilledPrefixCached(distillations, sid, sessState)
@@ -1636,79 +1796,71 @@ function transformInner(input: {
         return { messages: msgs, tokens: msgs.reduce((sum, m) => sum + estimateMessage(m), 0) };
       })();
-  // Layer 1: Normal budget allocation with lazy raw window eviction (Approach B).
-  // tryFitStable reuses the previous cutoff when it still fits, keeping the raw
-  // window byte-identical across turns for prompt caching. Only advances the
-  // cutoff when a genuinely oversized message forces eviction.
-  // Skipped when force-escalated to layer 2+ (previous attempt already failed at this level).
-  if (effectiveMinLayer <= 1) {
-    const layer1 = sid
-      ? tryFitStable({
-          messages: dedupMessages,
-          prefix: cached.messages,
-          prefixTokens: cached.tokens,
-          distilledBudget,
-          rawBudget,
-          sessionID: sid,
-          sessState,
-        })
-      : tryFit({
-          messages: dedupMessages,
-          prefix: cached.messages,
-          prefixTokens: cached.tokens,
-          distilledBudget,
-          rawBudget,
-          strip: "none",
-        });
-    if (fitsWithSafetyMargin(layer1)) {
-      if (cached.tokens === 0 && sid) {
-        urgentDistillationMap.set(sid, true);
-      }
-      return { ...layer1!, layer: 1, usable, distilledBudget, rawBudget };
+  // --- Compression stages (layers 1-3) ---
+  // Data-driven table replaces three hardcoded layer blocks. Each stage
+  // escalates tool stripping and/or tightens distillation budgets.
+  // Stage 0 (layer 1): stable window (Approach B), no stripping
+  // Stage 1 (layer 2): strip old tool outputs, protect last 2 turns
+  // Stage 2 (layer 3): strip ALL tool outputs, keep only 5 distillations
+  for (let s = 0; s < COMPRESSION_STAGES.length; s++) {
+    const stageLayer = (s + 1) as SafetyLayer;
+    if (effectiveMinLayer > stageLayer) continue;
+    const stage = COMPRESSION_STAGES[s];
+    const stageRawBudget = stage.rawFrac !== null ? Math.floor(usable * stage.rawFrac) : rawBudget;
+    const stageDistBudget = stage.distFrac !== null ? Math.floor(usable * stage.distFrac) : distilledBudget;
+    // Determine prefix: if distLimit is finite, re-render with trimmed distillations.
+    // Otherwise use the cached prefix (Approach C, byte-identical for cache).
+    let stagePrefix = cached.messages;
+    let stagePrefixTokens = cached.tokens;
+    if (stage.distLimit !== Infinity && distillations.length > stage.distLimit) {
+      const trimmed = selectDistillations(distillations, stage.distLimit);
+      stagePrefix = distilledPrefix(trimmed);
+      stagePrefixTokens = stagePrefix.reduce((sum, m) => sum + estimateMessage(m), 0);
     }
-  }
-  // Layer 1 didn't fit (or was force-skipped) — reset the raw window cache.
-  // Layers 2-4 use full scans and already break the prompt cache.
-  sessState.rawWindowCache = null;
+    // Stage 0 (layer 1) uses tryFitStable for Approach B pin cache.
+    // Higher stages reset the raw window cache and use plain tryFit.
+    let result: Omit<TransformResult, "layer" | "usable" | "distilledBudget" | "rawBudget" | "refreshLtm"> | null;
+    if (stage.useStableWindow && sid) {
+      result = tryFitStable({
+        messages: dedupMessages,
+        prefix: stagePrefix,
+        prefixTokens: stagePrefixTokens,
+        distilledBudget: stageDistBudget,
+        rawBudget: stageRawBudget,
+        sessionID: sid,
+        sessState,
+      });
+    } else {
+      // Reset raw window cache when leaving stage 0 — higher stages use full
+      // scans and already break the prompt cache. Must fire even when stage 1
+      // is skipped via effectiveMinLayer (e.g. forceMinLayer = 3).
+      sessState.rawWindowCache = null;
+      result = tryFit({
+        messages: dedupMessages,
+        prefix: stagePrefix,
+        prefixTokens: stagePrefixTokens,
+        distilledBudget: stageDistBudget,
+        rawBudget: stageRawBudget,
+        strip: stage.strip,
+        protectedTurns: stage.protectedTurns,
+      });
+    }
-  // Layer 2: Strip tool outputs from older messages, keep last 2 turns
-  // Skipped when force-escalated to layer 3+.
-  if (effectiveMinLayer <= 2) {
-    const layer2 = tryFit({
-      messages: dedupMessages,
-      prefix: cached.messages,
-      prefixTokens: cached.tokens,
-      distilledBudget,
-      rawBudget: Math.floor(usable * 0.5), // give raw more room
-      strip: "old-tools",
-      protectedTurns: 2,
-    });
-    if (fitsWithSafetyMargin(layer2)) {
-      if (sid) urgentDistillationMap.set(sid, true);
-      return { ...layer2!, layer: 2, usable, distilledBudget, rawBudget };
+    if (fitsWithSafetyMargin(result)) {
+      // Trigger urgent distillation when: (a) higher stages always need it, or
+      // (b) stage 0 with no distillations = first time in gradient mode.
+      if (sid && (s > 0 || cached.tokens === 0)) {
+        urgentDistillationMap.set(sid, true);
+      }
+      return { ...result!, layer: stageLayer, usable, distilledBudget, rawBudget, refreshLtm: false };
     }
   }
-  // Layer 3: Strip ALL tool outputs, drop oldest distillations
-  const trimmedDistillations = distillations.slice(-5);
-  const trimmedPrefix = distilledPrefix(trimmedDistillations);
-  const trimmedPrefixTokens = trimmedPrefix.reduce(
-    (sum, m) => sum + estimateMessage(m),
-    0,
-  );
-  const layer3 = tryFit({
-    messages: dedupMessages,
-    prefix: trimmedPrefix,
-    prefixTokens: trimmedPrefixTokens,
-    distilledBudget: Math.floor(usable * 0.15),
-    rawBudget: Math.floor(usable * 0.55),
-    strip: "all-tools",
-  });
-  if (fitsWithSafetyMargin(layer3)) {
-    if (sid) urgentDistillationMap.set(sid, true);
-    return { ...layer3!, layer: 3, usable, distilledBudget, rawBudget };
-  }
+  // All compression stages exhausted — reset raw window cache before emergency.
+  sessState.rawWindowCache = null;
   // Layer 4: Emergency — last 2 distillations + token-budget raw tail.
   // We do NOT strip tool parts here: doing so would cause an infinite tool-call loop because
@@ -1724,7 +1876,7 @@ function transformInner(input: {
   // and must always return. Remaining budget is filled backward with older
   // messages.
   if (sid) urgentDistillationMap.set(sid, true);
-  const nuclearDistillations = distillations.slice(-2);
+  const nuclearDistillations = selectDistillations(distillations, 2);
   const nuclearPrefix = distilledPrefix(nuclearDistillations);
   const nuclearPrefixTokens = nuclearPrefix.reduce(
     (sum, m) => sum + estimateMessage(m),
@@ -1773,6 +1925,7 @@ function transformInner(input: {
     usable,
     distilledBudget,
     rawBudget,
+    refreshLtm: true,
   };
 }
@@ -1893,7 +2046,7 @@ function tryFit(input: {
   rawBudget: number;
   strip: "none" | "old-tools" | "all-tools";
   protectedTurns?: number;
-}): Omit<TransformResult, "layer" | "usable" | "distilledBudget" | "rawBudget"> | null {
+}): Omit<TransformResult, "layer" | "usable" | "distilledBudget" | "rawBudget" | "refreshLtm"> | null {
   // If distilled prefix exceeds its budget, fail this layer
   if (input.prefixTokens > input.distilledBudget && input.prefix.length > 0)
     return null;