npm - @loreai/core - Versions diffs - 0.10.2 → 0.11.0 - Mend

@loreai/core 0.10.2 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (72) hide show

package/dist/bun/config.d.ts +8 -0
package/dist/bun/config.d.ts.map +1 -1
package/dist/bun/db.d.ts.map +1 -1
package/dist/bun/distillation.d.ts +74 -2
package/dist/bun/distillation.d.ts.map +1 -1
package/dist/bun/embedding.d.ts.map +1 -1
package/dist/bun/gradient.d.ts +72 -0
package/dist/bun/gradient.d.ts.map +1 -1
package/dist/bun/index.d.ts +4 -2
package/dist/bun/index.d.ts.map +1 -1
package/dist/bun/index.js +530 -67
package/dist/bun/index.js.map +4 -4
package/dist/bun/prompt.d.ts +8 -2
package/dist/bun/prompt.d.ts.map +1 -1
package/dist/bun/temporal.d.ts +31 -0
package/dist/bun/temporal.d.ts.map +1 -1
package/dist/bun/types.d.ts +9 -0
package/dist/bun/types.d.ts.map +1 -1
package/dist/bun/worker-model.d.ts +90 -0
package/dist/bun/worker-model.d.ts.map +1 -0
package/dist/node/config.d.ts +8 -0
package/dist/node/config.d.ts.map +1 -1
package/dist/node/db.d.ts.map +1 -1
package/dist/node/distillation.d.ts +74 -2
package/dist/node/distillation.d.ts.map +1 -1
package/dist/node/embedding.d.ts.map +1 -1
package/dist/node/gradient.d.ts +72 -0
package/dist/node/gradient.d.ts.map +1 -1
package/dist/node/index.d.ts +4 -2
package/dist/node/index.d.ts.map +1 -1
package/dist/node/index.js +530 -67
package/dist/node/index.js.map +4 -4
package/dist/node/prompt.d.ts +8 -2
package/dist/node/prompt.d.ts.map +1 -1
package/dist/node/temporal.d.ts +31 -0
package/dist/node/temporal.d.ts.map +1 -1
package/dist/node/types.d.ts +9 -0
package/dist/node/types.d.ts.map +1 -1
package/dist/node/worker-model.d.ts +90 -0
package/dist/node/worker-model.d.ts.map +1 -0
package/dist/types/config.d.ts +8 -0
package/dist/types/config.d.ts.map +1 -1
package/dist/types/db.d.ts.map +1 -1
package/dist/types/distillation.d.ts +74 -2
package/dist/types/distillation.d.ts.map +1 -1
package/dist/types/embedding.d.ts.map +1 -1
package/dist/types/gradient.d.ts +72 -0
package/dist/types/gradient.d.ts.map +1 -1
package/dist/types/index.d.ts +4 -2
package/dist/types/index.d.ts.map +1 -1
package/dist/types/prompt.d.ts +8 -2
package/dist/types/prompt.d.ts.map +1 -1
package/dist/types/temporal.d.ts +31 -0
package/dist/types/temporal.d.ts.map +1 -1
package/dist/types/types.d.ts +9 -0
package/dist/types/types.d.ts.map +1 -1
package/dist/types/worker-model.d.ts +90 -0
package/dist/types/worker-model.d.ts.map +1 -0
package/package.json +1 -1
package/src/config.ts +53 -6
package/src/db.ts +57 -1
package/src/distillation.ts +225 -28
package/src/embedding.ts +7 -0
package/src/gradient.ts +262 -8
package/src/index.ts +16 -0
package/src/lat-reader.ts +4 -4
package/src/ltm.ts +17 -17
package/src/prompt.ts +101 -0
package/src/recall.ts +4 -4
package/src/temporal.ts +41 -10
package/src/types.ts +9 -0
package/src/worker-model.ts +264 -0

package/src/gradient.ts CHANGED Viewed

@@ -4,6 +4,7 @@ import { db, ensureProject, loadForceMinLayer, saveForceMinLayer } from "./db";
 import { config } from "./config";
 import { formatDistillations } from "./prompt";
 import { normalize } from "./markdown";
+import * as log from "./log";
 type MessageWithParts = LoreMessageWithParts;
@@ -36,6 +37,15 @@ function estimateMessage(msg: MessageWithParts): number {
 let contextLimit = 200_000; // sensible default
 let outputReserved = 32_000;
+// Cost-aware layer-0 token cap. When > 0, the layer-0 passthrough gate uses
+// min(maxInput, maxLayer0Tokens) instead of maxInput alone. Derived from the
+// model's cache-read cost: cap = targetCostPerTurn / costPerToken. This prevents
+// expensive models from sending huge contexts at layer 0, where cache-read costs
+// compound linearly across turns. Set to 0 to disable (use full context).
+let maxLayer0Tokens = 0;
+const MIN_LAYER0_FLOOR = 40_000;
 // Conservative overhead reserve for first-turn (before calibration):
 // accounts for provider system prompt + AGENTS.md + tool definitions + env info
 const FIRST_TURN_OVERHEAD = 15_000;
@@ -83,6 +93,29 @@ type SessionState = {
   prefixCache: PrefixCache | null;
   /** Raw window pin cache (Approach B) */
   rawWindowCache: RawWindowCache | null;
+  /**
+   * Wall-clock timestamp (epoch ms) of the most recent transform() call for this
+   * session. Used by onIdleResume() to detect cold-cache resumption — when the
+   * gap between turns exceeds Anthropic's prompt cache eviction window (5 min
+   * default / 1 hour extended), the byte-identity caching subsystems
+   * (prefixCache, rawWindowCache) are providing no value because the cache is
+   * already cold. Refreshing them on resume lets us produce a better-fitting
+   * window without paying a cache cost we'd otherwise be trying to preserve.
+   * 0 = never set (first turn).
+   */
+  lastTurnAt: number;
+  /**
+   * Set true by onIdleResume() when an idle-resume reset just fired; consumed
+   * (and cleared) by the LTM degraded-recovery branch in the OpenCode hook to
+   * skip the conversation-vs-LTM token comparison. After idle eviction the
+   * cache-bust cost is effectively zero, so we should always recover LTM on
+   * the post-idle turn regardless of conversation size.
+   */
+  cameOutOfIdle: boolean;
+  /** Consecutive turns at layer >= 2. When >= 3, log a compaction hint. */
+  consecutiveHighLayer: number;
+  /** Hash of the first message IDs in the last transform output — for cache-bust diagnostics. */
+  lastPrefixHash: string;
 };
 function makeSessionState(): SessionState {
@@ -97,6 +130,10 @@ function makeSessionState(): SessionState {
     lastTransformEstimate: 0,
     prefixCache: null,
     rawWindowCache: null,
+    lastTurnAt: 0,
+    cameOutOfIdle: false,
+    consecutiveHighLayer: 0,
+    lastPrefixHash: "",
   };
 }
@@ -116,6 +153,65 @@ function getSessionState(sessionID: string): SessionState {
   return state;
 }
+/**
+ * Detect cold-cache resumption and refresh byte-identity caches.
+ *
+ * Anthropic's prompt cache evicts entries after ~5 minutes (default tier) /
+ * ~1 hour (extended tier). When a session resumes after the eviction window,
+ * the cache is provably cold — every prefix we've been carefully keeping
+ * byte-stable (`prefixCache`, `rawWindowCache`, plus the host's per-session
+ * LTM cache) provides no benefit on this turn. Worse, the LTM block was
+ * scored against the conversation context as it was on the previous turn,
+ * which may have drifted significantly in N hours.
+ *
+ * On resume after `thresholdMs`:
+ *   - reset the distilled prefix cache (next turn re-renders from scratch)
+ *   - reset the raw window pin cache (next turn picks a fresh cutoff)
+ *   - set `cameOutOfIdle` so the OpenCode host can also clear `ltmSessionCache`
+ *     and bypass the conversation-vs-LTM cost comparison in the LTM
+ *     degraded-recovery branch
+ *
+ * Importantly, this does NOT touch:
+ *   - reasoning blocks (Anthropic's April 23 postmortem identifies dropping
+ *     reasoning blocks as the root cause of forgetfulness/repetition; Lore
+ *     preserves reasoning by policy across all gradient layers)
+ *   - the gradient layer (cold cache doesn't change token budgets;
+ *     calibration's actualInput = input + cache.read + cache.write already
+ *     accounts for cache misses correctly)
+ *   - calibration state (`lastKnownInput`, overhead EMA, message-ID set) —
+ *     the next API response will refresh these via the normal calibrate() path
+ *
+ * Set `thresholdMs <= 0` to disable. Returns true if a reset fired so the
+ * caller can log/observe.
+ */
+export function onIdleResume(
+  sessionID: string,
+  thresholdMs: number,
+  now: number = Date.now(),
+): { triggered: false } | { triggered: true; idleMs: number } {
+  if (thresholdMs <= 0) return { triggered: false };
+  const state = getSessionState(sessionID);
+  if (state.lastTurnAt === 0) return { triggered: false }; // first turn — nothing to refresh
+  const idleMs = now - state.lastTurnAt;
+  if (idleMs < thresholdMs) return { triggered: false };
+  state.prefixCache = null;
+  state.rawWindowCache = null;
+  state.cameOutOfIdle = true;
+  return { triggered: true, idleMs };
+}
+/**
+ * Read-and-clear the cameOutOfIdle flag. The OpenCode host's LTM degraded-
+ * recovery branch consumes this to decide whether to bypass the
+ * conversation-vs-LTM token comparison on a post-idle turn.
+ */
+export function consumeCameOutOfIdle(sessionID: string): boolean {
+  const state = sessionStates.get(sessionID);
+  if (!state || !state.cameOutOfIdle) return false;
+  state.cameOutOfIdle = false;
+  return true;
+}
 // LTM tokens injected via system transform hook this turn.
 // Set by setLtmTokens() after the system hook runs; consumed by transform().
 let ltmTokens = 0;
@@ -131,6 +227,27 @@ export function setModelLimits(limits: { context: number; output: number }) {
   outputReserved = Math.min(limits.output || 32_000, 32_000);
 }
+/**
+ * Set the cost-aware layer-0 token cap. When the cap > 0, the layer-0
+ * passthrough gate uses `min(maxInput, cap)` instead of `maxInput` alone.
+ *
+ * Call from the host adapter after computing the cap from model pricing:
+ * `cap = max(targetCostPerTurn / model.cost.cache.read, MIN_LAYER0_FLOOR)`
+ */
+export function setMaxLayer0Tokens(tokens: number) {
+  maxLayer0Tokens = Math.max(0, Math.floor(tokens));
+}
+/** Compute the layer-0 token cap from a per-turn cost target and cache-read price. */
+export function computeLayer0Cap(
+  targetCostPerTurn: number,
+  cacheReadCostPerToken: number,
+): number {
+  if (targetCostPerTurn <= 0 || cacheReadCostPerToken <= 0) return 0;
+  const rawCap = Math.floor(targetCostPerTurn / cacheReadCostPerToken);
+  return Math.max(rawCap, MIN_LAYER0_FLOOR);
+}
 /** Called by the system transform hook after formatting LTM knowledge. */
 export function setLtmTokens(tokens: number) {
   ltmTokens = tokens;
@@ -251,6 +368,37 @@ export function resetCalibration(sessionID?: string) {
   }
 }
+/**
+ * For testing only — observe session-state cache fields without exposing the
+ * full type. Returns null when the session has no state. The boolean fields
+ * answer "does this cache hold something right now?" — sufficient for asserting
+ * that onIdleResume() reset them.
+ */
+export function inspectSessionState(sessionID: string): {
+  hasPrefixCache: boolean;
+  hasRawWindowCache: boolean;
+  cameOutOfIdle: boolean;
+  lastTurnAt: number;
+} | null {
+  const state = sessionStates.get(sessionID);
+  if (!state) return null;
+  return {
+    hasPrefixCache: state.prefixCache !== null,
+    hasRawWindowCache: state.rawWindowCache !== null,
+    cameOutOfIdle: state.cameOutOfIdle,
+    lastTurnAt: state.lastTurnAt,
+  };
+}
+/**
+ * For testing only — set the session's lastTurnAt field. Used to simulate
+ * idle gaps without sleeping. Creates the session state if not present so
+ * tests don't need to seed it via a transform() call.
+ */
+export function setLastTurnAtForTest(sessionID: string, ms: number): void {
+  getSessionState(sessionID).lastTurnAt = ms;
+}
 type Distillation = {
   id: string;
   observations: string;
@@ -320,20 +468,41 @@ function cleanParts(parts: LorePart[]): LorePart[] {
   return filtered.length > 0 ? filtered : parts;
 }
+// Upper bound on how much of the output the path-extraction regex scans.
+// Two mitigations for catastrophic backtracking in `PATH_RE`:
+//   1. Skip entirely if the input contains no '/' (a path requires at least
+//      one separator, so without one the regex has no possible match yet
+//      still backtracks O(n²) on long runs of [\w.-]).
+//   2. Cap the scanned slice at this limit so even crafted inputs with a
+//      '/' somewhere don't stall the worker. The annotation only needs a
+//      few representative paths — sampling the first 64KB is plenty.
+const ANNOTATION_PATH_SCAN_LIMIT = 64 * 1024;
+const PATH_RE = /(?:[\w.-]+\/)+[\w.-]+\.\w{1,5}/g;
 // Build a metadata annotation for a stripped tool output, preserving key signals
 // about what was lost without requiring an LLM call. Inspired by the per-token
 // scalar bias β from "Fast KV Compaction via Attention Matching" (Zweiger et al.,
 // 2025) — when tokens are removed, preserving metadata about the removed content
 // helps the model compensate for information loss and decide whether to recall.
 // Reference: https://arxiv.org/abs/2602.16284
-function toolStripAnnotation(toolName: string, output: string): string {
+export function toolStripAnnotation(toolName: string, output: string): string {
   const lines = output.split("\n").length;
-  const chars = output.length;
   // Detect key signals via lightweight heuristics — no LLM call
   const hasError = /\b(?:error|fail(?:ed|ure)?|exception|panic|traceback)\b/i.test(output);
-  const paths = output.match(/(?:[\w.-]+\/)+[\w.-]+\.\w{1,5}/g);
-  const uniquePaths = paths ? [...new Set(paths)].slice(0, 5) : [];
+  // Path extraction: skip entirely if no '/' is present (cheap O(n) check
+  // via indexOf) to avoid PATH_RE's O(n²) backtracking on long runs of
+  // [\w.-] without a separator. Otherwise sample the first N KB.
+  let uniquePaths: string[] = [];
+  if (output.indexOf("/") !== -1) {
+    const pathScan =
+      output.length > ANNOTATION_PATH_SCAN_LIMIT
+        ? output.slice(0, ANNOTATION_PATH_SCAN_LIMIT)
+        : output;
+    const paths = pathScan.match(PATH_RE);
+    if (paths) uniquePaths = [...new Set(paths)].slice(0, 5);
+  }
   let annotation = `[output omitted — ${toolName}: ${lines} lines`;
   if (hasError) annotation += ", contained errors";
@@ -1113,7 +1282,20 @@ function transformInner(input: {
   // estimated at 146K passes layer 0 but actually costs 214K → overflow.
   const layer0Input = calibrated ? expectedInput : expectedInput * UNCALIBRATED_SAFETY;
-  if (effectiveMinLayer === 0 && layer0Input <= maxInput) {
+  // Cost-aware layer-0 cap: use the smaller of the API limit and the cost-derived
+  // cap. When maxLayer0Tokens is 0 (disabled), fall back to pure maxInput.
+  let layer0Ceiling = maxLayer0Tokens > 0
+    ? Math.min(maxInput, maxLayer0Tokens)
+    : maxInput;
+  // Cold-cache awareness: on the first turn (uncalibrated = no prior API data),
+  // the entire context is a cache WRITE at 12.5× the cache-read price. Use 70%
+  // of the normal cap to reduce the cold-write cost.
+  if (!calibrated && layer0Ceiling < maxInput) {
+    layer0Ceiling = Math.floor(layer0Ceiling * 0.7);
+  }
+  if (effectiveMinLayer === 0 && layer0Input <= layer0Ceiling) {
     // All messages fit — return unmodified to preserve append-only prompt-cache pattern.
     // Raw messages are strictly better context than lossy distilled summaries.
     const messageTokens = calibrated
@@ -1222,11 +1404,19 @@ function transformInner(input: {
     return { ...layer3!, layer: 3, usable, distilledBudget, rawBudget };
   }
-  // Layer 4: Emergency — last 2 distillations, last 3 raw messages with tool parts intact.
+  // Layer 4: Emergency — last 2 distillations + token-budget raw tail.
   // We do NOT strip tool parts here: doing so would cause an infinite tool-call loop because
   // the model would lose sight of its own in-progress tool calls and re-invoke them endlessly.
   // Instead, we aggressively drop old messages and rely on the `recall` tool (which the model
   // is always instructed to use) to retrieve any older details it needs.
+  //
+  // Token-budget tail (F7): instead of a fixed `slice(-3)`, size the raw
+  // tail using `clamp(usable * 0.25, 2_000, 8_000)` tokens — matching
+  // upstream OpenCode's tail-budget formula for compaction. The current
+  // agentic turn (from `currentTurnStart()`) is ALWAYS fully included even
+  // if it alone exceeds the tail budget — layer 4 is the terminal layer
+  // and must always return. Remaining budget is filled backward with older
+  // messages.
   urgentDistillation = true;
   const nuclearDistillations = distillations.slice(-2);
   const nuclearPrefix = distilledPrefix(nuclearDistillations);
@@ -1234,15 +1424,40 @@ function transformInner(input: {
     (sum, m) => sum + estimateMessage(m),
     0,
   );
-  const nuclearRaw = input.messages.slice(-3).map((m) => ({
+  // Token budget for the raw tail. clamp(usable * 0.25, 2K, 8K).
+  const tailBudget = Math.max(2_000, Math.min(8_000, Math.floor(usable * 0.25)));
+  // Current turn is always included (non-negotiable — dropping it causes
+  // the infinite tool-call loop). Clean parts but never strip tool outputs.
+  const nuclearTurnStart = currentTurnStart(input.messages);
+  const currentTurn = input.messages.slice(nuclearTurnStart).map((m) => ({
     info: m.info,
     parts: cleanParts(m.parts),
   }));
-  const nuclearRawTokens = nuclearRaw.reduce(
+  const currentTurnTokens = currentTurn.reduce(
     (sum, m) => sum + estimateMessage(m),
     0,
   );
+  // Fill remaining budget walking backward from the turn boundary.
+  const olderMessages: MessageWithParts[] = [];
+  let olderTokens = 0;
+  const remaining = Math.max(0, tailBudget - currentTurnTokens);
+  for (let i = nuclearTurnStart - 1; i >= 0 && olderTokens < remaining; i--) {
+    const msg = input.messages[i];
+    const est = estimateMessage(msg);
+    if (olderTokens + est > remaining) break;
+    olderMessages.unshift({
+      info: msg.info,
+      parts: cleanParts(msg.parts),
+    });
+    olderTokens += est;
+  }
+  const nuclearRaw = [...olderMessages, ...currentTurn];
+  const nuclearRawTokens = olderTokens + currentTurnTokens;
   return {
     messages: [...nuclearPrefix, ...nuclearRaw],
     layer: 4,
@@ -1282,6 +1497,45 @@ export function transform(input: {
     state.lastTransformEstimate = result.totalTokens;
     state.lastLayer = result.layer;
     state.lastWindowMessageIDs = new Set(result.messages.map((m) => m.info.id));
+    // Mark wall-clock for onIdleResume() — must record on every transform()
+    // so the next-turn idle check has an accurate baseline. Done after the
+    // result fields above so a thrown transformInner doesn't update it.
+    state.lastTurnAt = Date.now();
+    // --- Cache-bust diagnostics (LORE_DEBUG only) ---
+    // Track byte-identity of the message prefix. When the prefix hash changes
+    // between consecutive turns, it means Anthropic's prompt cache is invalidated
+    // and the entire context is re-written (12.5× cache-read price). This helps
+    // identify which code paths are breaking byte-identity.
+    const prefixIds = result.messages.slice(0, 5).map((m) => m.info.id).join(",");
+    const prefixHash = `${result.layer}:${prefixIds}`;
+    if (state.lastPrefixHash && state.lastPrefixHash !== prefixHash) {
+      log.info(
+        `cache-bust detected: session=${sid} layer=${state.lastLayer}→${result.layer}` +
+        ` msgs=${state.lastTransformedCount}→${result.messages.length}` +
+        ` prefix=${state.lastPrefixHash.slice(0, 30)}→${prefixHash.slice(0, 30)}`,
+      );
+    }
+    state.lastPrefixHash = prefixHash;
+    // --- Compaction hint ---
+    if (result.layer >= 2) {
+      state.consecutiveHighLayer++;
+      if (state.consecutiveHighLayer === 3) {
+        log.info(
+          `session ${sid} has been at gradient layer ${result.layer}+ for 3 consecutive turns.` +
+          ` Consider running /compact to reset the context window.`,
+        );
+      }
+    } else {
+      state.consecutiveHighLayer = 0;
+    }
+    log.info(
+      `gradient: session=${sid} layer=${result.layer} tokens=${result.totalTokens}` +
+      ` (distilled=${result.distilledTokens} raw=${result.rawTokens})` +
+      ` usable=${result.usable} cap=${maxLayer0Tokens || "off"}`,
+    );
   }
   return result;
 }

package/src/index.ts CHANGED Viewed

@@ -60,6 +60,8 @@ export {
 export {
   transform,
   setModelLimits,
+  setMaxLayer0Tokens,
+  computeLayer0Cap,
   needsUrgentDistillation,
   calibrate,
   setLtmTokens,
@@ -68,6 +70,13 @@ export {
   setForceMinLayer,
   getLastTransformedCount,
   getLastTransformEstimate,
+  toolStripAnnotation,
+  onIdleResume,
+  consumeCameOutOfIdle,
+  // Test-only — exposed at the barrel so host-package tests can simulate idle
+  // gaps without sleeping. Not part of the public API.
+  setLastTurnAtForTest,
+  inspectSessionState,
 } from "./gradient";
 export {
   formatKnowledge,
@@ -81,9 +90,16 @@ export {
   CONSOLIDATION_SYSTEM,
   consolidationUser,
   QUERY_EXPANSION_SYSTEM,
+  COMPACT_SUMMARY_TEMPLATE,
+  buildCompactPrompt,
 } from "./prompt";
 export { shouldImport, importFromFile, exportToFile } from "./agents-file";
 export { workerSessionIDs, isWorkerSession } from "./worker";
+export * as workerModel from "./worker-model";
+export {
+  WORKER_JUDGE_SYSTEM,
+  workerJudgeUser,
+} from "./worker-model";
 export {
   ftsQuery,
   ftsQueryOr,

package/src/lat-reader.ts CHANGED Viewed

@@ -290,8 +290,8 @@ export function searchScored(input: {
   const ftsSQL = `SELECT s.id, s.project_id, s.file, s.heading, s.depth, s.content,
          s.content_hash, s.first_paragraph, s.updated_at,
          bm25(lat_sections_fts, 6.0, 2.0) as rank
-       FROM lat_sections s
-       JOIN lat_sections_fts f ON s.rowid = f.rowid
+       FROM lat_sections_fts f
+       CROSS JOIN lat_sections s ON s.rowid = f.rowid
        WHERE lat_sections_fts MATCH ?
        AND s.project_id = ?
        ORDER BY rank LIMIT ?`;
@@ -335,8 +335,8 @@ export function scoreForSession(
         `SELECT s.id, s.project_id, s.file, s.heading, s.depth, s.content,
                 s.content_hash, s.first_paragraph, s.updated_at,
                 bm25(lat_sections_fts, 6.0, 2.0) as rank
-         FROM lat_sections s
-         JOIN lat_sections_fts f ON s.rowid = f.rowid
+         FROM lat_sections_fts f
+         CROSS JOIN lat_sections s ON s.rowid = f.rowid
          WHERE lat_sections_fts MATCH ?
          AND s.project_id = ?
          ORDER BY rank`,

package/src/ltm.ts CHANGED Viewed

@@ -215,11 +215,11 @@ function scoreEntriesFTS(sessionContext: string): Map<string, number> {
   try {
     const results = db()
       .query(
-        `SELECT k.id, bm25(knowledge_fts, ?, ?, ?) as rank
-         FROM knowledge k
-         JOIN knowledge_fts f ON k.rowid = f.rowid
-         WHERE knowledge_fts MATCH ?
-         AND k.confidence > 0.2`,
+         `SELECT k.id, bm25(knowledge_fts, ?, ?, ?) as rank
+          FROM knowledge_fts f
+          CROSS JOIN knowledge k ON k.rowid = f.rowid
+          WHERE knowledge_fts MATCH ?
+          AND k.confidence > 0.2`,
       )
       .all(title, content, category, q) as Array<{
       id: string;
@@ -460,14 +460,14 @@ export function search(input: {
   const pid = input.projectPath ? ensureProject(input.projectPath) : null;
   const ftsSQL = pid
-    ? `SELECT ${KNOWLEDGE_COLS_K} FROM knowledge k
-       JOIN knowledge_fts f ON k.rowid = f.rowid
+    ? `SELECT ${KNOWLEDGE_COLS_K} FROM knowledge_fts f
+       CROSS JOIN knowledge k ON k.rowid = f.rowid
        WHERE knowledge_fts MATCH ?
        AND (k.project_id = ? OR k.project_id IS NULL OR k.cross_project = 1)
        AND k.confidence > 0.2
        ORDER BY bm25(knowledge_fts, ?, ?, ?) LIMIT ?`
-    : `SELECT ${KNOWLEDGE_COLS_K} FROM knowledge k
-       JOIN knowledge_fts f ON k.rowid = f.rowid
+    : `SELECT ${KNOWLEDGE_COLS_K} FROM knowledge_fts f
+       CROSS JOIN knowledge k ON k.rowid = f.rowid
        WHERE knowledge_fts MATCH ?
        AND k.confidence > 0.2
        ORDER BY bm25(knowledge_fts, ?, ?, ?) LIMIT ?`;
@@ -517,14 +517,14 @@ export function searchScored(input: {
   const { title, content, category } = ftsWeights();
   const ftsSQL = pid
-    ? `SELECT ${KNOWLEDGE_COLS_K}, bm25(knowledge_fts, ?, ?, ?) as rank FROM knowledge k
-       JOIN knowledge_fts f ON k.rowid = f.rowid
+    ? `SELECT ${KNOWLEDGE_COLS_K}, bm25(knowledge_fts, ?, ?, ?) as rank FROM knowledge_fts f
+       CROSS JOIN knowledge k ON k.rowid = f.rowid
        WHERE knowledge_fts MATCH ?
        AND (k.project_id = ? OR k.project_id IS NULL OR k.cross_project = 1)
        AND k.confidence > 0.2
        ORDER BY rank LIMIT ?`
-    : `SELECT ${KNOWLEDGE_COLS_K}, bm25(knowledge_fts, ?, ?, ?) as rank FROM knowledge k
-       JOIN knowledge_fts f ON k.rowid = f.rowid
+    : `SELECT ${KNOWLEDGE_COLS_K}, bm25(knowledge_fts, ?, ?, ?) as rank FROM knowledge_fts f
+       CROSS JOIN knowledge k ON k.rowid = f.rowid
        WHERE knowledge_fts MATCH ?
        AND k.confidence > 0.2
        ORDER BY rank LIMIT ?`;
@@ -569,8 +569,8 @@ export function searchScoredOtherProjects(input: {
   // Find entries from other projects that are NOT cross-project (those are
   // already included in the normal search via the cross_project=1 filter).
   // Also exclude entries with no project_id (global) — already included.
-  const ftsSQL = `SELECT ${KNOWLEDGE_COLS_K}, bm25(knowledge_fts, ?, ?, ?) as rank FROM knowledge k
-     JOIN knowledge_fts f ON k.rowid = f.rowid
+  const ftsSQL = `SELECT ${KNOWLEDGE_COLS_K}, bm25(knowledge_fts, ?, ?, ?) as rank FROM knowledge_fts f
+     CROSS JOIN knowledge k ON k.rowid = f.rowid
      WHERE knowledge_fts MATCH ?
      AND k.project_id IS NOT NULL
      AND k.project_id != ?
@@ -819,8 +819,8 @@ export function check(projectPath: string): IntegrityIssue[] {
       const { title, content, category } = config().search.ftsWeights;
       const matches = db()
         .query(
-          `SELECT k.id, k.title FROM knowledge k
-           JOIN knowledge_fts f ON k.rowid = f.rowid
+          `SELECT k.id, k.title FROM knowledge_fts f
+           CROSS JOIN knowledge k ON k.rowid = f.rowid
            WHERE knowledge_fts MATCH ?
            AND k.id != ?
            AND k.confidence > 0.2

package/src/prompt.ts CHANGED Viewed

@@ -184,14 +184,30 @@ EXACT NUMBERS: When two segments report different numbers for what seems like th
 EARLY-SESSION CONTENT: Bug fixes, code changes, and decisions from the start of a session are just as important as later work. Never drop them just because the segment is short or old. If the first segment contains a specific bug fix with file paths and root cause, it MUST survive into the reflection.
+ANCHORED UPDATES: If the prompt includes a <previous-meta-summary> block, treat it as the current consolidated state. Update it using the NEW observation segments — preserve still-true details, remove stale details, and merge in new facts. Keep the same section headings. Do NOT re-derive unchanged sections verbatim unless the new segments contradict them.
 Output ONLY an <observations> block with the consolidated observations.`;
 export function recursiveUser(
   distillations: Array<{ observations: string }>,
+  previousMeta?: string,
 ): string {
   const entries = distillations.map(
     (d, i) => `Segment ${i + 1}:\n${d.observations}`,
   );
+  if (previousMeta) {
+    return `Update the anchored meta-summary below using the NEW observation segments. Preserve still-true details, remove stale details, and merge in new facts. Keep the same section headings.
+<previous-meta-summary>
+${previousMeta}
+</previous-meta-summary>
+---
+New observation segments to merge (chronological order):
+${entries.join("\n\n---\n\n")}`;
+  }
   return `Observation segments to consolidate (chronological order):
 ${entries.join("\n\n---\n\n")}`;
@@ -388,6 +404,91 @@ export function formatDistillations(
   return sections.join("\n\n");
 }
+// Strict Markdown skeleton for the /compact session summary. Task-oriented
+// sections so the next agent starting from the compacted context has a clear
+// "where am I, what's next, what's blocked" briefing. Derived from upstream
+// OpenCode's SUMMARY_TEMPLATE (session/compaction.ts in #23870) with a "(none)"
+// directive added for explicit empty sections and a closing "I'm ready to
+// continue." sentinel to preserve Lore's post-compact UX.
+export const COMPACT_SUMMARY_TEMPLATE = `Output exactly this Markdown structure. Keep every section in this order, even when empty (use "(none)").
+---
+## Goal
+- [single-sentence task summary]
+## Constraints & Preferences
+- [user constraints, preferences, specs, or "(none)"]
+## Progress
+### Done
+- [completed work or "(none)"]
+### In Progress
+- [current work or "(none)"]
+### Blocked
+- [blockers or "(none)"]
+## Key Decisions
+- [decision and why, or "(none)"]
+## Next Steps
+- [ordered next actions or "(none)"]
+## Critical Context
+- [important technical facts, errors, open questions, or "(none)"]
+## Relevant Files
+- [file or directory path: why it matters, or "(none)"]
+---
+Rules:
+- Keep every section, even when empty.
+- Use terse bullets, not prose paragraphs.
+- Preserve exact file paths, commands, error strings, and identifiers when known.
+- Do not mention the summary process or that context was compacted.
+- End with "I'm ready to continue." on its own line after the closing "---".`;
+// Build the user-facing prompt passed to the compaction agent during /compact.
+// Lore injects pre-computed distillations as context separately; this prompt
+// just tells the model how to render its summary.
+//
+// `hasDistillations` is a boolean rather than the full array because this
+// function only cares about presence — the distillation bodies are pushed into
+// `output.context` separately by the caller. Passing the array shape would be
+// misleading dead weight.
+//
+// `previousSummary` is the prior `/compact` output text (typically from the
+// most recent assistant message with `info.summary === true`). When present,
+// the prompt asks the model to UPDATE the anchored summary in place rather
+// than re-derive from scratch — matching upstream OpenCode's behavior at
+// `compaction.ts:121-132` (`buildPrompt`). When absent, the prompt is
+// byte-identical to today's non-anchored output.
+//
+// F1b (this parameter) is OpenCode-specific: the retrieval path uses
+// `client.session.messages` to find the prior summary by `info.summary === true`.
+// See `findPreviousCompactSummary` in `packages/opencode/src/index.ts`.
+export function buildCompactPrompt(input: {
+  hasDistillations: boolean;
+  knowledge?: string;
+  previousSummary?: string;
+}): string {
+  const distillSection = input.hasDistillations
+    ? "Lore has pre-computed chunked summaries of the session history (injected above as context). Use them as the authoritative source — do NOT re-read raw conversation messages that conflict with them.\n\n"
+    : "";
+  const anchorBlock = input.previousSummary
+    ? `A prior compacted summary exists for this session. Update it using the conversation history above: preserve still-true details, remove stale details, and merge in new facts. Keep every section in place.\n\n<previous-summary>\n${input.previousSummary}\n</previous-summary>\n\n`
+    : "";
+  const knowledgeBlock = input.knowledge ? `\n${input.knowledge}\n` : "";
+  return `You are producing a compacted session summary for an AI coding agent. This summary will be the ONLY context available in the next part of the conversation.
+${distillSection}${anchorBlock}${COMPACT_SUMMARY_TEMPLATE}
+${knowledgeBlock}`;
+}
 // ~3 chars per token — validated as best heuristic against real API data.
 function estimateTokens(text: string): number {
   return Math.ceil(text.length / 3);

package/src/recall.ts CHANGED Viewed

@@ -116,14 +116,14 @@ function searchDistillationsScored(input: {
   const ftsSQL = input.sessionID
     ? `SELECT d.id, d.observations, d.generation, d.created_at, d.session_id, rank
-       FROM distillations d
-       JOIN distillation_fts f ON d.rowid = f.rowid
+       FROM distillation_fts f
+       CROSS JOIN distillations d ON d.rowid = f.rowid
        WHERE distillation_fts MATCH ?
        AND d.project_id = ? AND d.session_id = ?
        ORDER BY rank LIMIT ?`
     : `SELECT d.id, d.observations, d.generation, d.created_at, d.session_id, rank
-       FROM distillations d
-       JOIN distillation_fts f ON d.rowid = f.rowid
+       FROM distillation_fts f
+       CROSS JOIN distillations d ON d.rowid = f.rowid
        WHERE distillation_fts MATCH ?
        AND d.project_id = ?
        ORDER BY rank LIMIT ?`;