npm - @loreai/core - Versions diffs - 0.12.0 → 0.13.0 - Mend

@loreai/core 0.12.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (87) hide show

package/dist/bun/agents-file.d.ts +29 -8
package/dist/bun/agents-file.d.ts.map +1 -1
package/dist/bun/config.d.ts +1 -0
package/dist/bun/config.d.ts.map +1 -1
package/dist/bun/db.d.ts.map +1 -1
package/dist/bun/distillation.d.ts +29 -0
package/dist/bun/distillation.d.ts.map +1 -1
package/dist/bun/embedding.d.ts +15 -1
package/dist/bun/embedding.d.ts.map +1 -1
package/dist/bun/gradient.d.ts +53 -5
package/dist/bun/gradient.d.ts.map +1 -1
package/dist/bun/index.d.ts +4 -4
package/dist/bun/index.d.ts.map +1 -1
package/dist/bun/index.js +696 -243
package/dist/bun/index.js.map +4 -4
package/dist/bun/pattern-extract.d.ts +36 -0
package/dist/bun/pattern-extract.d.ts.map +1 -0
package/dist/bun/recall.d.ts +1 -0
package/dist/bun/recall.d.ts.map +1 -1
package/dist/bun/search.d.ts +13 -1
package/dist/bun/search.d.ts.map +1 -1
package/dist/bun/types.d.ts +41 -1
package/dist/bun/types.d.ts.map +1 -1
package/dist/bun/worker-model.d.ts +22 -0
package/dist/bun/worker-model.d.ts.map +1 -1
package/dist/node/agents-file.d.ts +29 -8
package/dist/node/agents-file.d.ts.map +1 -1
package/dist/node/config.d.ts +1 -0
package/dist/node/config.d.ts.map +1 -1
package/dist/node/db.d.ts.map +1 -1
package/dist/node/distillation.d.ts +29 -0
package/dist/node/distillation.d.ts.map +1 -1
package/dist/node/embedding.d.ts +15 -1
package/dist/node/embedding.d.ts.map +1 -1
package/dist/node/gradient.d.ts +53 -5
package/dist/node/gradient.d.ts.map +1 -1
package/dist/node/index.d.ts +4 -4
package/dist/node/index.d.ts.map +1 -1
package/dist/node/index.js +696 -243
package/dist/node/index.js.map +4 -4
package/dist/node/pattern-extract.d.ts +36 -0
package/dist/node/pattern-extract.d.ts.map +1 -0
package/dist/node/recall.d.ts +1 -0
package/dist/node/recall.d.ts.map +1 -1
package/dist/node/search.d.ts +13 -1
package/dist/node/search.d.ts.map +1 -1
package/dist/node/types.d.ts +41 -1
package/dist/node/types.d.ts.map +1 -1
package/dist/node/worker-model.d.ts +22 -0
package/dist/node/worker-model.d.ts.map +1 -1
package/dist/types/agents-file.d.ts +29 -8
package/dist/types/agents-file.d.ts.map +1 -1
package/dist/types/config.d.ts +1 -0
package/dist/types/config.d.ts.map +1 -1
package/dist/types/db.d.ts.map +1 -1
package/dist/types/distillation.d.ts +29 -0
package/dist/types/distillation.d.ts.map +1 -1
package/dist/types/embedding.d.ts +15 -1
package/dist/types/embedding.d.ts.map +1 -1
package/dist/types/gradient.d.ts +53 -5
package/dist/types/gradient.d.ts.map +1 -1
package/dist/types/index.d.ts +4 -4
package/dist/types/index.d.ts.map +1 -1
package/dist/types/pattern-extract.d.ts +36 -0
package/dist/types/pattern-extract.d.ts.map +1 -0
package/dist/types/recall.d.ts +1 -0
package/dist/types/recall.d.ts.map +1 -1
package/dist/types/search.d.ts +13 -1
package/dist/types/search.d.ts.map +1 -1
package/dist/types/types.d.ts +41 -1
package/dist/types/types.d.ts.map +1 -1
package/dist/types/worker-model.d.ts +22 -0
package/dist/types/worker-model.d.ts.map +1 -1
package/package.json +3 -2
package/src/agents-file.ts +111 -28
package/src/config.ts +25 -18
package/src/curator.ts +2 -2
package/src/db.ts +19 -2
package/src/distillation.ts +152 -15
package/src/embedding.ts +158 -14
package/src/gradient.ts +398 -227
package/src/index.ts +13 -5
package/src/pattern-extract.ts +108 -0
package/src/recall.ts +124 -6
package/src/search.ts +37 -1
package/src/types.ts +41 -1
package/src/worker-model.ts +142 -5

package/src/gradient.ts CHANGED Viewed

@@ -72,6 +72,13 @@ let calibratedOverhead: number | null = null;
 // response via UNCALIBRATED_SAFETY.
 // ---------------------------------------------------------------------------
+type DistillationSnapshot = {
+  /** Cached distillation rows from the most recent DB read */
+  rows: Distillation[];
+  /** ID of the last user message when this snapshot was taken */
+  lastUserMsgId: string | null;
+};
 type SessionState = {
   /** Exact input token count from the last successful API response */
   lastKnownInput: number;
@@ -89,6 +96,8 @@ type SessionState = {
   forceMinLayer: SafetyLayer;
   /** Token estimate from the most recent transform() output (compressed window) */
   lastTransformEstimate: number;
+  /** LTM tokens injected for this session's current turn (per-session isolation) */
+  ltmTokens: number;
   /** Distilled prefix cache (Approach C) */
   prefixCache: PrefixCache | null;
   /** Raw window pin cache (Approach B) */
@@ -112,10 +121,36 @@ type SessionState = {
    * the post-idle turn regardless of conversation size.
    */
   cameOutOfIdle: boolean;
+  /**
+   * Set true by onIdleResume() alongside cameOutOfIdle; consumed (and cleared)
+   * by transformInner() to activate the post-idle compact layer. When true AND
+   * distillations exist, transform skips layer 0 (full-raw passthrough) and
+   * uses a tighter raw budget for layer 1. Rationale: on a cold cache the
+   * entire context is a cache WRITE — a smaller total means lower write cost,
+   * and aggressive idle distillation already captured the older history.
+   */
+  postIdleCompact: boolean;
   /** Consecutive turns at layer >= 2. When >= 3, log a compaction hint. */
   consecutiveHighLayer: number;
   /** Hash of the first message IDs in the last transform output — for cache-bust diagnostics. */
   lastPrefixHash: string;
+  /** Cumulative cache-bust count for this session (prefix hash changed between turns). */
+  bustCount: number;
+  /** Total transform() calls for this session — used with bustCount for rate calculation. */
+  transformCount: number;
+  /**
+   * Distillation row snapshot — cached to avoid hitting the DB on every
+   * transform() call. Refreshed only at turn boundaries (when a new user
+   * message appears) or on first call / idle resume. During autonomous
+   * tool-call chains this stays frozen, keeping the distilled prefix
+   * byte-identical across consecutive API calls and preserving the prompt
+   * cache.
+   *
+   * Cost context: each prefix refresh costs context_size × cache_write_price
+   * (~$1.88 per bust at 500K Sonnet). New distillations have near-zero
+   * marginal value mid-chain since the model already has raw messages.
+   */
+  distillationSnapshot: DistillationSnapshot | null;
 };
 function makeSessionState(): SessionState {
@@ -128,12 +163,17 @@ function makeSessionState(): SessionState {
     lastWindowMessageIDs: new Set(),
     forceMinLayer: 0,
     lastTransformEstimate: 0,
+    ltmTokens: 0,
     prefixCache: null,
     rawWindowCache: null,
     lastTurnAt: 0,
     cameOutOfIdle: false,
+    postIdleCompact: false,
     consecutiveHighLayer: 0,
     lastPrefixHash: "",
+    bustCount: 0,
+    transformCount: 0,
+    distillationSnapshot: null,
   };
 }
@@ -196,10 +236,22 @@ export function onIdleResume(
   if (idleMs < thresholdMs) return { triggered: false };
   state.prefixCache = null;
   state.rawWindowCache = null;
+  state.distillationSnapshot = null;
   state.cameOutOfIdle = true;
+  state.postIdleCompact = true;
   return { triggered: true, idleMs };
 }
+/**
+ * Return the wall-clock timestamp (epoch ms) of the most recent transform()
+ * call for this session. Returns 0 if the session has never been transformed.
+ * Used by callers (e.g. meta-distillation gating) to check whether the
+ * upstream prompt cache is likely still warm.
+ */
+export function getLastTurnAt(sessionID: string): number {
+  return sessionStates.get(sessionID)?.lastTurnAt ?? 0;
+}
 /**
  * Read-and-clear the cameOutOfIdle flag. The OpenCode host's LTM degraded-
  * recovery branch consumes this to decide whether to bypass the
@@ -213,8 +265,9 @@ export function consumeCameOutOfIdle(sessionID: string): boolean {
 }
 // LTM tokens injected via system transform hook this turn.
-// Set by setLtmTokens() after the system hook runs; consumed by transform().
-let ltmTokens = 0;
+// Per-session when a sessionID is provided (preferred), with a module-level
+// fallback for callers that don't have a session ID.
+let ltmTokensFallback = 0;
 export function setModelLimits(limits: { context: number; output: number }) {
   contextLimit = limits.context || 200_000;
@@ -248,14 +301,25 @@ export function computeLayer0Cap(
   return Math.max(rawCap, MIN_LAYER0_FLOOR);
 }
-/** Called by the system transform hook after formatting LTM knowledge. */
-export function setLtmTokens(tokens: number) {
-  ltmTokens = tokens;
+/** Called by the system transform hook after formatting LTM knowledge.
+ *  When sessionID is provided, stores on per-session state to prevent
+ *  cross-session budget contamination. Falls back to module-level global
+ *  for callers without a session ID. */
+export function setLtmTokens(tokens: number, sessionID?: string) {
+  if (sessionID) {
+    getSessionState(sessionID).ltmTokens = tokens;
+  }
+  ltmTokensFallback = tokens;
 }
-/** Returns the current LTM token count (for tests and diagnostics). */
-export function getLtmTokens(): number {
-  return ltmTokens;
+/** Returns the LTM token count for the given session, falling back to
+ *  the module-level global when no session ID is provided. */
+export function getLtmTokens(sessionID?: string): number {
+  if (sessionID) {
+    const state = sessionStates.get(sessionID);
+    if (state) return state.ltmTokens;
+  }
+  return ltmTokensFallback;
 }
 /**
@@ -306,7 +370,7 @@ export function calibrate(
   if (sessionID !== undefined) {
     const state = getSessionState(sessionID);
     state.lastKnownInput = actualInput;
-    state.lastKnownLtm = ltmTokens;
+    state.lastKnownLtm = state.ltmTokens;
     if (messageCount !== undefined) state.lastKnownMessageCount = messageCount;
   }
 }
@@ -378,7 +442,9 @@ export function inspectSessionState(sessionID: string): {
   hasPrefixCache: boolean;
   hasRawWindowCache: boolean;
   cameOutOfIdle: boolean;
+  postIdleCompact: boolean;
   lastTurnAt: number;
+  distillationSnapshot: DistillationSnapshot | null;
 } | null {
   const state = sessionStates.get(sessionID);
   if (!state) return null;
@@ -386,7 +452,9 @@ export function inspectSessionState(sessionID: string): {
     hasPrefixCache: state.prefixCache !== null,
     hasRawWindowCache: state.rawWindowCache !== null,
     cameOutOfIdle: state.cameOutOfIdle,
+    postIdleCompact: state.postIdleCompact,
     lastTurnAt: state.lastTurnAt,
+    distillationSnapshot: state.distillationSnapshot,
   };
 }
@@ -425,6 +493,46 @@ function loadDistillations(
     .all(...params) as Distillation[];
 }
+// Cached distillation loader — avoids hitting the DB on every transform() call.
+// Refreshed only at turn boundaries (when a new user message appears), on first
+// call (null snapshot), or after idle resume (snapshot cleared by onIdleResume).
+// During autonomous tool-call chains (consecutive assistant→tool→assistant with
+// the same last user message), returns the cached rows so the distilled prefix
+// stays byte-identical and preserves the Anthropic prompt cache.
+function loadDistillationsCached(
+  projectPath: string,
+  sessionID: string,
+  messages: MessageWithParts[],
+  sessState: SessionState,
+): Distillation[] {
+  // Find the last user message ID in the input
+  let lastUserMsgId: string | null = null;
+  for (let i = messages.length - 1; i >= 0; i--) {
+    if (messages[i].info.role === "user") {
+      lastUserMsgId = messages[i].info.id;
+      break;
+    }
+  }
+  const snapshot = sessState.distillationSnapshot;
+  // Cache hit: same user message = still in the same tool-call chain
+  if (snapshot && snapshot.lastUserMsgId === lastUserMsgId) {
+    return snapshot.rows;
+  }
+  // Cache miss: new user message (turn boundary), first call, or post-idle
+  const rows = loadDistillations(projectPath, sessionID);
+  sessState.distillationSnapshot = { rows, lastUserMsgId };
+  log.info(
+    `distillation refresh: ${rows.length} rows` +
+      ` (user msg ${lastUserMsgId?.substring(0, 16) ?? "none"})`,
+  );
+  return rows;
+}
 // Strip all <system-reminder>...</system-reminder> blocks from message text.
 // For the user-message wrapper pattern, extracts the actual user text.
 // For all other reminders (build-switch, plan reminders, etc.), drops them entirely.
@@ -534,6 +642,15 @@ function simpleHash(str: string): number {
   return hash;
 }
+/** Parsed read-tool input: file path plus optional line range. */
+type ReadRange = {
+  path: string;
+  /** 1-based start line. undefined = from beginning. */
+  offset: number | undefined;
+  /** Number of lines to read. undefined = to end. */
+  limit: number | undefined;
+};
 /** Extract file path from a tool's input JSON.
  *  Handles common formats: {"path": "/foo.ts"}, {"filePath": "/foo.ts"},
  *  and plain text fallback. */
@@ -548,10 +665,72 @@ function extractFilePath(input: string): string | undefined {
   }
 }
+/** Extract file path + line range from a read tool's input. */
+function extractReadRange(input: string): ReadRange | undefined {
+  try {
+    const parsed = JSON.parse(input);
+    const path = parsed.path || parsed.filePath || parsed.file;
+    if (!path) return undefined;
+    const offset = typeof parsed.offset === "number" ? parsed.offset : undefined;
+    const limit = typeof parsed.limit === "number" ? parsed.limit : undefined;
+    return { path, offset, limit };
+  } catch {
+    const match = input.match(/(?:[\w.-]+\/)+[\w.-]+\.\w{1,5}/);
+    if (!match) return undefined;
+    return { path: match[0], offset: undefined, limit: undefined };
+  }
+}
+/**
+ * Does `later` cover the line range of `earlier`?
+ *
+ * Coverage rules:
+ * - Full-file read (no offset/limit) covers everything for the same path.
+ * - A ranged read covers another ranged read when its [offset, offset+limit)
+ *   interval is a superset of (or equal to) the other's interval.
+ * - A ranged read does NOT cover a full-file read.
+ */
+export function laterReadCovers(later: ReadRange, earlier: ReadRange): boolean {
+  if (later.path !== earlier.path) return false;
+  // Full-file read covers everything for the same path.
+  if (later.offset === undefined && later.limit === undefined) return true;
+  // Later is a ranged read but earlier is full-file — can't cover.
+  if (earlier.offset === undefined && earlier.limit === undefined) return false;
+  // Both have ranges. Compute intervals.
+  const laterStart = later.offset ?? 1;
+  const earlierStart = earlier.offset ?? 1;
+  // An open-ended later read (no limit) covers if its start <= earlier start.
+  if (later.limit === undefined) return laterStart <= earlierStart;
+  // Earlier is open-ended but later isn't — later can't cover infinite range.
+  if (earlier.limit === undefined) return false;
+  // Both bounded: [start, start+limit) superset check.
+  const laterEnd = laterStart + later.limit;
+  const earlierEnd = earlierStart + earlier.limit;
+  return laterStart <= earlierStart && laterEnd >= earlierEnd;
+}
+/** Format a range label for dedup annotations. */
+function rangeLabel(range: ReadRange): string {
+  if (range.offset !== undefined && range.limit !== undefined) {
+    return ` lines ${range.offset}-${range.offset + range.limit - 1}`;
+  }
+  if (range.offset !== undefined) {
+    return ` from line ${range.offset}`;
+  }
+  return "";
+}
 /** Annotation for deduplicated tool output — follows the toolStripAnnotation() pattern. */
-function dedupAnnotation(toolName: string, filePath?: string): string {
+function dedupAnnotation(toolName: string, filePath?: string, range?: ReadRange): string {
   if (filePath) {
-    return `[earlier version of ${filePath} — see latest read below for current content]`;
+    const rl = range ? rangeLabel(range) : "";
+    return `[earlier read of ${filePath}${rl} — see latest read below for current content]`;
   }
   return `[duplicate output — same content as later ${toolName} in this session — use recall for details]`;
 }
@@ -563,7 +742,9 @@ function dedupAnnotation(toolName: string, filePath?: string): string {
  *
  * Deduplicates by:
  * 1. Exact content hash: identical tool outputs (same file read twice, same command output)
- * 2. Same-file reads: read_file outputs for the same path (content may differ due to edits)
+ * 2. Range-aware file reads: read_file/read outputs for the same path where a later
+ *    read covers the same or wider line range (full-file covers everything; a ranged
+ *    read only covers another ranged read when its interval is a superset).
  *
  * The current turn (from currentTurnIdx onward) is never touched — the model
  * needs full context for its active work. Tool parts are never removed entirely;
@@ -577,11 +758,13 @@ export function deduplicateToolOutputs(
 ): MessageWithParts[] {
   // Track latest occurrence: contentKey → latest message index
   const contentLatest = new Map<string, number>();
-  // Track latest read by file path: "read:path" → latest message index
-  const fileLatest = new Map<string, number>();
-  // Also include current-turn reads in the "latest" tracking so we properly
-  // recognize earlier reads as duplicates of current-turn content.
+  // Track all read ranges per file path, ordered by message index (ascending).
+  // Each entry records the range and the message index so the second pass can
+  // check whether any later read covers the current read's range.
+  const fileReads = new Map<string, Array<{ range: ReadRange; msgIdx: number }>>();
+  // First pass: scan all messages (including current turn) to build tracking maps.
   for (let i = 0; i < messages.length; i++) {
     for (const part of messages[i].parts) {
       if (!isToolPart(part) || part.state.status !== "completed") continue;
@@ -591,13 +774,20 @@ export function deduplicateToolOutputs(
       const key = `${part.tool}:${simpleHash(output)}`;
       contentLatest.set(key, i);
-      // For read-type tools, also track by file path
+      // For read-type tools, record the full range info
       if (part.tool === "read_file" || part.tool === "read") {
         const inputStr = typeof part.state.input === "string"
           ? part.state.input
           : JSON.stringify(part.state.input);
-        const fp = extractFilePath(inputStr);
-        if (fp) fileLatest.set(`read:${fp}`, i);
+        const range = extractReadRange(inputStr);
+        if (range) {
+          let entries = fileReads.get(range.path);
+          if (!entries) {
+            entries = [];
+            fileReads.set(range.path, entries);
+          }
+          entries.push({ range, msgIdx: i });
+        }
       }
     }
   }
@@ -617,19 +807,31 @@ export function deduplicateToolOutputs(
       const contentKey = `${part.tool}:${simpleHash(output)}`;
       const isLatestContent = contentLatest.get(contentKey) === msgIdx;
-      // Check file-path dedup for read tools: is this the latest read of this file?
-      let filePath: string | undefined;
-      let isLatestFile = true;
+      // Check range-aware file dedup for read tools: does any later read
+      // of the same file cover this read's range?
+      let readRange: ReadRange | undefined;
+      let coveredByLater = false;
       if (part.tool === "read_file" || part.tool === "read") {
         const inputStr = typeof part.state.input === "string"
           ? part.state.input
           : JSON.stringify(part.state.input);
-        filePath = extractFilePath(inputStr);
-        if (filePath) isLatestFile = fileLatest.get(`read:${filePath}`) === msgIdx;
+        readRange = extractReadRange(inputStr);
+        if (readRange) {
+          const entries = fileReads.get(readRange.path);
+          if (entries) {
+            // Check if any entry with a higher message index covers this range
+            for (const entry of entries) {
+              if (entry.msgIdx > msgIdx && laterReadCovers(entry.range, readRange)) {
+                coveredByLater = true;
+                break;
+              }
+            }
+          }
+        }
       }
-      // Keep if this is both the latest content AND latest file read (or not a read tool)
-      if (isLatestContent && isLatestFile) return part;
+      // Keep if this is both the latest content AND not covered by a later read
+      if (isLatestContent && !coveredByLater) return part;
       // This is a duplicate — replace with compact annotation
       partsChanged = true;
@@ -637,7 +839,7 @@ export function deduplicateToolOutputs(
         ...part,
         state: {
           ...part.state,
-          output: dedupAnnotation(part.tool, filePath),
+          output: dedupAnnotation(part.tool, readRange?.path, readRange),
         },
       } as LorePart;
     });
@@ -671,8 +873,14 @@ function sanitizeToolParts(
       if (status === "completed" || status === "error") return part;
       // pending or running → convert to error so SDK emits tool_result
+      // Use a deterministic timestamp (0) instead of Date.now() so that
+      // repeated transform() calls on the same stale pending part produce
+      // identical bytes.  OpenCode's prompt-loop cache fix (e148f00aa)
+      // preserves old pending parts across iterations; Date.now() here
+      // would re-stamp them each call → different bytes → cache bust.
       partsChanged = true;
-      const now = Date.now();
+      const existingStart =
+        "time" in part.state ? part.state.time.start : 0;
       return {
         ...part,
         state: {
@@ -682,8 +890,8 @@ function sanitizeToolParts(
           metadata:
             "metadata" in part.state ? part.state.metadata : undefined,
           time: {
-            start: "time" in part.state ? part.state.time.start : now,
-            end: now,
+            start: existingStart,
+            end: existingStart,
           },
         },
       } as LorePart;
@@ -728,134 +936,6 @@ function stripToTextOnly(parts: LorePart[]): LorePart[] {
   return stripped;
 }
-// --- Phase 2: Temporal anchoring at read time ---
-function formatRelativeTime(date: Date, now: Date): string {
-  const diffMs = now.getTime() - date.getTime();
-  const diffDays = Math.floor(diffMs / (1000 * 60 * 60 * 24));
-  if (diffDays === 0) return "today";
-  if (diffDays === 1) return "yesterday";
-  if (diffDays < 7) return `${diffDays} days ago`;
-  if (diffDays < 14) return "1 week ago";
-  if (diffDays < 30) return `${Math.floor(diffDays / 7)} weeks ago`;
-  if (diffDays < 60) return "1 month ago";
-  if (diffDays < 365) return `${Math.floor(diffDays / 30)} months ago`;
-  return `${Math.floor(diffDays / 365)} year${Math.floor(diffDays / 365) > 1 ? "s" : ""} ago`;
-}
-function parseDateFromContent(s: string): Date | null {
-  // "Month Day, Year" e.g. "January 15, 2026"
-  const simple = s.match(/([A-Z][a-z]+)\s+(\d{1,2}),?\s+(\d{4})/);
-  if (simple) {
-    const d = new Date(`${simple[1]} ${simple[2]}, ${simple[3]}`);
-    if (!isNaN(d.getTime())) return d;
-  }
-  // "Month D-D, Year" range — use start
-  const range = s.match(/([A-Z][a-z]+)\s+(\d{1,2})-\d{1,2},?\s+(\d{4})/);
-  if (range) {
-    const d = new Date(`${range[1]} ${range[2]}, ${range[3]}`);
-    if (!isNaN(d.getTime())) return d;
-  }
-  // "late/early/mid Month Year"
-  const vague = s.match(/(late|early|mid)[- ]?([A-Z][a-z]+)\s+(\d{4})/i);
-  if (vague) {
-    const day =
-      vague[1].toLowerCase() === "early"
-        ? 7
-        : vague[1].toLowerCase() === "late"
-          ? 23
-          : 15;
-    const d = new Date(`${vague[2]} ${day}, ${vague[3]}`);
-    if (!isNaN(d.getTime())) return d;
-  }
-  return null;
-}
-// Expand "(meaning DATE)" and "(estimated DATE)" annotations with a relative offset.
-// Past future-intent lines get "(likely already happened)" appended.
-function expandInlineEstimatedDates(text: string, now: Date): string {
-  return text.replace(
-    /\(((?:meaning|estimated)\s+)([^)]+\d{4})\)/gi,
-    (match, prefix: string, dateContent: string) => {
-      const d = parseDateFromContent(dateContent);
-      if (!d) return match;
-      const rel = formatRelativeTime(d, now);
-      // Detect future-intent by looking backwards on the same line
-      const matchIdx = text.indexOf(match);
-      const lineStart = text.lastIndexOf("\n", matchIdx) + 1;
-      const linePrefix = text.slice(lineStart, matchIdx);
-      const isFutureIntent =
-        /\b(?:will|plans?\s+to|planning\s+to|going\s+to|intends?\s+to)\b/i.test(
-          linePrefix,
-        );
-      if (d < now && isFutureIntent)
-        return `(${prefix}${dateContent} — ${rel}, likely already happened)`;
-      return `(${prefix}${dateContent} — ${rel})`;
-    },
-  );
-}
-// Add relative time annotations to "Date: Month D, Year" section headers
-// and gap markers between non-consecutive dates.
-function addRelativeTimeToObservations(text: string, now: Date): string {
-  // First pass: expand inline "(meaning DATE)" annotations
-  const withInline = expandInlineEstimatedDates(text, now);
-  // Second pass: annotate date headers and add gap markers
-  const dateHeaderRe = /^(Date:\s*)([A-Z][a-z]+ \d{1,2}, \d{4})$/gm;
-  const found: Array<{
-    index: number;
-    date: Date;
-    full: string;
-    prefix: string;
-    ds: string;
-  }> = [];
-  let m: RegExpExecArray | null;
-  while ((m = dateHeaderRe.exec(withInline)) !== null) {
-    const d = new Date(m[2]);
-    if (!isNaN(d.getTime()))
-      found.push({
-        index: m.index,
-        date: d,
-        full: m[0],
-        prefix: m[1],
-        ds: m[2],
-      });
-  }
-  if (!found.length) return withInline;
-  let result = "";
-  let last = 0;
-  for (let i = 0; i < found.length; i++) {
-    const curr = found[i];
-    const prev = found[i - 1];
-    result += withInline.slice(last, curr.index);
-    // Gap marker between non-consecutive dates
-    if (prev) {
-      const gapDays = Math.floor(
-        (curr.date.getTime() - prev.date.getTime()) / 86400000,
-      );
-      if (gapDays > 1) {
-        const gap =
-          gapDays < 7
-            ? `[${gapDays} days later]`
-            : gapDays < 14
-              ? "[1 week later]"
-              : gapDays < 30
-                ? `[${Math.floor(gapDays / 7)} weeks later]`
-                : gapDays < 60
-                  ? "[1 month later]"
-                  : `[${Math.floor(gapDays / 30)} months later]`;
-        result += `\n${gap}\n\n`;
-      }
-    }
-    result += `${curr.prefix}${curr.ds} (${formatRelativeTime(curr.date, now)})`;
-    last = curr.index + curr.full.length;
-  }
-  result += withInline.slice(last);
-  return result;
-}
 // Build synthetic user/assistant message pair wrapping formatted distillation text.
 // Shared by the cached and non-cached prefix paths.
 function buildPrefixMessages(formatted: string): MessageWithParts[] {
@@ -917,12 +997,7 @@ function buildPrefixMessages(formatted: string): MessageWithParts[] {
 // Non-cached path — used by layers 2-4 which already cause full cache invalidation.
 function distilledPrefix(distillations: Distillation[]): MessageWithParts[] {
   if (!distillations.length) return [];
-  const now = new Date();
-  const annotated = distillations.map((d) => ({
-    ...d,
-    observations: addRelativeTimeToObservations(d.observations, now),
-  }));
-  const formatted = formatDistillations(annotated);
+  const formatted = formatDistillations(distillations);
   if (!formatted) return [];
   return buildPrefixMessages(formatted);
 }
@@ -995,12 +1070,7 @@ function distilledPrefixCached(
     // New rows appended — render only the delta and append to cached text
     const newRows = distillations.slice(prefixCache!.rowCount);
-    const now = new Date();
-    const annotated = newRows.map((d) => ({
-      ...d,
-      observations: addRelativeTimeToObservations(d.observations, now),
-    }));
-    const deltaText = formatDistillations(annotated);
+    const deltaText = formatDistillations(newRows);
     if (deltaText) {
       const fullText = prefixCache!.cachedText + "\n\n" + deltaText;
@@ -1019,12 +1089,7 @@ function distilledPrefixCached(
   }
   // Full re-render: first call or meta-distillation rewrote rows
-  const now = new Date();
-  const annotated = distillations.map((d) => ({
-    ...d,
-    observations: addRelativeTimeToObservations(d.observations, now),
-  }));
-  const fullText = formatDistillations(annotated);
+  const fullText = formatDistillations(distillations);
   if (!fullText) {
     sessState.prefixCache = null;
     return { messages: [], tokens: 0 };
@@ -1053,6 +1118,16 @@ export function resetPrefixCache(sessionID?: string) {
   }
 }
+// For testing only — reset distillation snapshot for a specific session (or all)
+export function resetDistillationSnapshot(sessionID?: string) {
+  if (sessionID) {
+    const state = sessionStates.get(sessionID);
+    if (state) state.distillationSnapshot = null;
+  } else {
+    for (const state of sessionStates.values()) state.distillationSnapshot = null;
+  }
+}
 // --- Approach B: Lazy raw window eviction ---
 //
 // Tracks the ID of the first (oldest) message in the previous raw window.
@@ -1072,8 +1147,14 @@ export function resetPrefixCache(sessionID?: string) {
 type RawWindowCache = {
   sessionID: string;
-  /** ID of the first message in the pinned raw window */
-  firstMessageID: string;
+  /** Number of raw messages (excluding prefix) in the pinned window at creation. */
+  pinnedRawCount: number;
+  /** Total number of messages in the input array when the pin was created.
+   *  Used to compute how many new messages were appended since. */
+  pinnedTotalCount: number;
+  /** rawBudget that was in effect when the pin was created — used for the
+   *  pin-validity check so that global budget fluctuations don't evict the pin. */
+  pinnedBudget: number;
 };
 // For testing only — reset raw window cache state for a specific session (or all)
@@ -1114,36 +1195,63 @@ function tryFitStable(input: {
     rawWindowCache !== null && rawWindowCache.sessionID === input.sessionID;
   if (cacheValid) {
-    const pinnedIdx = input.messages.findIndex(
-      (m) => m.info.id === rawWindowCache!.firstMessageID,
+    // Compute the pinned index from the stored raw count + new message growth.
+    // newMessages = messages appended since pin creation (typically 2 per turn).
+    // The pinned window grows to include them: pinnedRawCount + newMessages.
+    // This is resilient to front-trimming by the host (e.g. OpenCode evicting
+    // old messages) because the offset is relative to the tail.
+    const newMessages = Math.max(0, input.messages.length - rawWindowCache!.pinnedTotalCount);
+    const windowSize = rawWindowCache!.pinnedRawCount + newMessages;
+    const pinnedIdx = Math.max(0, input.messages.length - windowSize);
+    // Measure the token cost of the pinned window.
+    const pinnedWindow = input.messages.slice(pinnedIdx);
+    const pinnedTokens = pinnedWindow.reduce(
+      (sum, m) => sum + estimateMessage(m),
+      0,
     );
-    if (pinnedIdx !== -1) {
-      // Measure the token cost of the pinned window.
-      const pinnedWindow = input.messages.slice(pinnedIdx);
-      const pinnedTokens = pinnedWindow.reduce(
-        (sum, m) => sum + estimateMessage(m),
-        0,
-      );
-      if (pinnedTokens <= input.rawBudget) {
-        // Pinned window still fits — keep it. Apply system-reminder cleanup
-        // only (strip:"none" is the layer-1 mode), returning the same message
-        // object references wherever nothing changed.
-        const processed = pinnedWindow.map((msg) => {
-          const parts = cleanParts(msg.parts);
-          return parts !== msg.parts ? { info: msg.info, parts } : msg;
-        });
-        const total = input.prefixTokens + pinnedTokens;
-        return {
-          messages: [...input.prefix, ...processed],
-          distilledTokens: input.prefixTokens,
-          rawTokens: pinnedTokens,
-          totalTokens: total,
+    // Use the budget that was in effect when the pin was created with a 15%
+    // hysteresis margin so that small budget fluctuations from overhead drift
+    // and deduplicateToolOutputs() token-estimate changes don't evict the pin.
+    // The high-water mark (max of pinned and current budgets) prevents overhead
+    // EMA drift from shrinking the effective budget below what was valid when
+    // the pin was created — the budget shrank due to overhead drift, not because
+    // the context limit changed.
+    const highWaterBudget = Math.max(rawWindowCache!.pinnedBudget, input.rawBudget);
+    const effectiveBudget = highWaterBudget * 1.15;
+    if (pinnedTokens <= effectiveBudget) {
+      // Pinned window still fits within the hysteresis margin of the high-water
+      // budget. Re-pin at the current budget when the old hysteresis is exceeded
+      // so that next turn's check uses a fresh baseline.
+      if (pinnedTokens > rawWindowCache!.pinnedBudget * 1.15) {
+        input.sessState.rawWindowCache = {
+          ...rawWindowCache!,
+          pinnedRawCount: pinnedWindow.length,
+          pinnedTotalCount: input.messages.length,
+          pinnedBudget: input.rawBudget,
         };
       }
-      // Pinned window is too large — fall through to the normal scan below.
+      // Apply system-reminder cleanup only (strip:"none" is the layer-1 mode),
+      // returning the same message object references wherever nothing changed.
+      const processed = pinnedWindow.map((msg) => {
+        const parts = cleanParts(msg.parts);
+        return parts !== msg.parts ? { info: msg.info, parts } : msg;
+      });
+      const total = input.prefixTokens + pinnedTokens;
+      return {
+        messages: [...input.prefix, ...processed],
+        distilledTokens: input.prefixTokens,
+        rawTokens: pinnedTokens,
+        totalTokens: total,
+      };
     }
+    // Pinned window is too large for both budgets — fall through to rescan.
+    log.info(
+      `pin-overflow: session=${input.sessionID} pinnedTokens=${pinnedTokens} ` +
+      `pinnedBudget=${rawWindowCache!.pinnedBudget} effectiveBudget=${Math.round(effectiveBudget)} ` +
+      `currentRawBudget=${input.rawBudget} windowSize=${pinnedWindow.length}`,
+    );
   }
   // Normal backward scan to find the tightest fitting cutoff.
@@ -1157,13 +1265,18 @@ function tryFitStable(input: {
   });
   if (result) {
-    // Update the raw window cache: the first non-prefix message is the oldest
-    // raw message in the new window. Pin to its ID for the next turn.
-    const rawStart = result.messages[input.prefix.length];
-    if (rawStart) {
+    // Update the raw window cache: store the raw message count and total message
+    // count so we can reconstruct the window position on the next turn even after
+    // front-trimming by the host (e.g. OpenCode evicting old messages).
+    // Snapshot the current rawBudget so future pin checks use the budget that
+    // was in effect when this window was chosen (Option 1: snapshot isolation).
+    const rawMessageCount = result.messages.length - input.prefix.length;
+    if (rawMessageCount > 0) {
       input.sessState.rawWindowCache = {
         sessionID: input.sessionID,
-        firstMessageID: rawStart.info.id,
+        pinnedRawCount: rawMessageCount,
+        pinnedTotalCount: input.messages.length,
+        pinnedBudget: input.rawBudget,
       };
     }
   }
@@ -1200,21 +1313,27 @@ function transformInner(input: {
 }): TransformResult {
   const cfg = config();
   const overhead = getOverhead();
+  // --- Session state (must precede budget computation) ---
+  const sid = input.sessionID ?? input.messages[0]?.info.sessionID;
+  const sessState = sid ? getSessionState(sid) : makeSessionState();
   // Usable = full context minus output reservation minus fixed overhead (system + tools)
   // minus LTM tokens already injected into the system prompt this turn.
+  // Read LTM tokens from per-session state to avoid cross-session contamination.
+  const sessLtmTokens = sid ? sessState.ltmTokens : ltmTokensFallback;
   const usable = Math.max(
     0,
-    contextLimit - outputReserved - overhead - ltmTokens,
+    contextLimit - outputReserved - overhead - sessLtmTokens,
   );
   const distilledBudget = Math.floor(usable * cfg.budget.distilled);
-  const rawBudget = Math.floor(usable * cfg.budget.raw);
+  // Base raw budget. May be overridden below for post-idle compact mode.
+  let rawBudget = Math.floor(usable * cfg.budget.raw);
   // --- Force escalation (reactive error recovery) ---
   // When the API previously rejected with "prompt is too long", skip layers
   // below the forced minimum to ensure enough trimming on the next attempt.
   // One-shot: consumed here and reset to 0 (both in-memory and on disk).
-  const sid = input.sessionID ?? input.messages[0]?.info.sessionID;
-  const sessState = sid ? getSessionState(sid) : makeSessionState();
   let effectiveMinLayer = sessState.forceMinLayer;
   sessState.forceMinLayer = 0;
   if (sid && effectiveMinLayer > 0) saveForceMinLayer(sid, 0);
@@ -1246,17 +1365,43 @@ function transformInner(input: {
   }
   // --- Sticky layer guard (Option C) ---
-  // After a compressed turn (layer >= 1), don't allow layer 0 re-entry until
+  // After a compressed turn (layer >= N), don't allow re-entry below N until
   // the session genuinely shrinks (e.g. after compaction deletes messages).
-  // Prevents the calibration oscillation: a compressed turn stores
-  // lastKnownInput=100K for a 50-message window, but the next turn's
-  // input.messages has 300 raw messages. The delta estimation treats the 250
-  // evicted messages as "new" and undercounts their tokens, producing an
-  // expectedInput that fits in layer 0 — but the actual tokens are ~190K.
+  // Prevents calibration oscillation AND layer-transition cache busts:
+  //   - 0→1→0: compressed turn stores lastKnownInput=100K for a 50-message
+  //     window, next turn's 300 raw messages produce an undercounted
+  //     expectedInput that "fits" in layer 0 but actually overflows.
+  //   - 1→2→1: layer 2 strips tool outputs (different bytes), bouncing back
+  //     to layer 1 restores them (different bytes again) → two busts.
+  // Pinning to the *actual* last layer prevents all downward oscillation.
   // Only applied when calibrated (same session, per-session state) to avoid
   // affecting other sessions including worker sessions.
   if (calibrated && sessState.lastLayer >= 1 && input.messages.length >= sessState.lastKnownMessageCount) {
+    effectiveMinLayer = Math.max(effectiveMinLayer, sessState.lastLayer) as SafetyLayer;
+  }
+  // --- Post-idle compact layer ---
+  // When the cache just went cold (onIdleResume fired), skip layer 0 full-raw
+  // passthrough and use a tighter raw budget. Rationale: the entire context is
+  // a cache WRITE regardless — a smaller total costs less to write, and
+  // aggressive idle distillation already captured older history in the prefix.
+  // The flag is one-shot: consumed here and reset so subsequent turns use
+  // normal budgets once the cache is warm.
+  const postIdleCompact = sessState.postIdleCompact;
+  if (postIdleCompact) {
+    sessState.postIdleCompact = false;
+    // Skip layer 0 — don't pass through all raw messages on a cold cache.
     effectiveMinLayer = Math.max(effectiveMinLayer, 1) as SafetyLayer;
+    // Use a tighter raw budget: 20% of usable instead of the normal 40%.
+    // The distilled prefix covers the older history; the raw window only
+    // needs the current turn + minimal recent context. This reduces the
+    // total cold-cache write cost by up to 20% of usable (~29K tokens on
+    // a 200K context model).
+    rawBudget = Math.floor(usable * 0.20);
+    log.info(
+      `post-idle compact: session=${sid} rawBudget=${rawBudget}` +
+      ` (${Math.floor(usable * cfg.budget.raw)}→${rawBudget})`,
+    );
   }
   let expectedInput: number;
@@ -1269,12 +1414,12 @@ function transformInner(input: {
       ? input.messages.filter((m) => !sessState.lastWindowMessageIDs.has(m.info.id))
       : input.messages.slice(-Math.max(0, input.messages.length - sessState.lastKnownMessageCount));
     const newMsgTokens = newMessages.reduce((s, m) => s + estimateMessage(m), 0);
-    const ltmDelta = ltmTokens - sessState.lastKnownLtm;
+    const ltmDelta = sessLtmTokens - sessState.lastKnownLtm;
     expectedInput = sessState.lastKnownInput + newMsgTokens + ltmDelta;
   } else {
     // First turn or session change: fall back to chars/3 estimate + overhead.
     const messageTokens = input.messages.reduce((s, m) => s + estimateMessage(m), 0);
-    expectedInput = messageTokens + overhead + ltmTokens;
+    expectedInput = messageTokens + overhead + sessLtmTokens;
   }
   // When uncalibrated, apply safety multiplier to the layer-0 decision too.
@@ -1299,8 +1444,8 @@ function transformInner(input: {
     // All messages fit — return unmodified to preserve append-only prompt-cache pattern.
     // Raw messages are strictly better context than lossy distilled summaries.
     const messageTokens = calibrated
-      ? expectedInput - (ltmTokens - sessState.lastKnownLtm)  // approximate raw portion
-      : expectedInput - overhead - ltmTokens;
+      ? expectedInput - (sessLtmTokens - sessState.lastKnownLtm)  // approximate raw portion
+      : expectedInput - overhead - sessLtmTokens;
     return {
       messages: input.messages,
       layer: 0,
@@ -1323,7 +1468,9 @@ function transformInner(input: {
   const dedupMessages = deduplicateToolOutputs(input.messages, turnStart);
-  const distillations = sid ? loadDistillations(input.projectPath, sid) : [];
+  const distillations = sid
+    ? loadDistillationsCached(input.projectPath, sid, input.messages, sessState)
+    : [];
   // Layer 1 uses the append-only cached prefix (Approach C) to keep the
   // distilled content byte-identical between distillation runs, preserving
@@ -1503,19 +1650,43 @@ export function transform(input: {
     // result fields above so a thrown transformInner doesn't update it.
     state.lastTurnAt = Date.now();
-    // --- Cache-bust diagnostics (LORE_DEBUG only) ---
+    // --- Cache-bust diagnostics ---
     // Track byte-identity of the message prefix. When the prefix hash changes
     // between consecutive turns, it means Anthropic's prompt cache is invalidated
     // and the entire context is re-written (12.5× cache-read price). This helps
     // identify which code paths are breaking byte-identity.
-    const prefixIds = result.messages.slice(0, 5).map((m) => m.info.id).join(",");
-    const prefixHash = `${result.layer}:${prefixIds}`;
+    //
+    // Use a content-based fingerprint (role + text snippet) rather than message
+    // IDs, since IDs can be unstable (gateway generates fresh UUIDs, OpenCode
+    // may regenerate messages in-place). Content hashes are a better proxy for
+    // Anthropic's actual byte-identity cache.
+    const prefixFingerprint = result.messages.slice(0, 5).map((m) => {
+      const text = m.parts
+        .map((p) => {
+          if (isTextPart(p)) return p.text?.slice(0, 40) ?? "";
+          if (isReasoningPart(p)) return p.text?.slice(0, 40) ?? "";
+          return p.type;
+        })
+        .join("|");
+      return `${m.info.role}:${text.slice(0, 60)}`;
+    }).join(",");
+    const prefixHash = `${result.layer}:${prefixFingerprint}`;
+    state.transformCount++;
     if (state.lastPrefixHash && state.lastPrefixHash !== prefixHash) {
+      state.bustCount++;
+      const rate = state.bustCount / state.transformCount;
       log.info(
-        `cache-bust detected: session=${sid} layer=${state.lastLayer}→${result.layer}` +
+        `cache-bust #${state.bustCount} (${(rate * 100).toFixed(0)}%): session=${sid}` +
+        ` layer=${state.lastLayer}→${result.layer}` +
         ` msgs=${state.lastTransformedCount}→${result.messages.length}` +
         ` prefix=${state.lastPrefixHash.slice(0, 30)}→${prefixHash.slice(0, 30)}`,
       );
+      if (state.transformCount >= 20 && rate > 0.5) {
+        log.warn(
+          `HIGH BUST RATE: session ${sid} has ${(rate * 100).toFixed(0)}% bust rate` +
+          ` (${state.bustCount}/${state.transformCount} transforms)`,
+        );
+      }
     }
     state.lastPrefixHash = prefixHash;