npm - opencode-lore - Versions diffs - 0.2.4 → 0.2.7 - Mend

opencode-lore 0.2.4 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "opencode-lore",
-  "version": "0.2.4",
+  "version": "0.2.7",
   "type": "module",
   "license": "MIT",
   "description": "Three-tier memory architecture for OpenCode — distillation, not summarization",

package/src/gradient.ts CHANGED Viewed

@@ -63,6 +63,26 @@ export function getLastTransformedCount(): number {
   return lastTransformedCount;
 }
+/** Returns the layer used by the most recent transform() call. For testing. */
+export function getLastLayer(): SafetyLayer {
+  return lastLayer;
+}
+// The layer used by the most recent transform() call.
+// Used for the sticky-layer guard: once gradient mode activates (layer >= 1),
+// we don't allow fallback to layer 0 until the session genuinely shrinks
+// (e.g. after compaction). This prevents the calibration oscillation where a
+// compressed turn records 100K + 50-msg count, and the next turn's delta
+// estimation treats 250 evicted messages as "new", undercounts their tokens
+// via chars/4, and incorrectly concludes layer 0 passes.
+let lastLayer: SafetyLayer = 0;
+// The set of message IDs included in the most recent transform() output.
+// Used for accurate delta estimation: instead of counting messages by index
+// (which breaks after compression changes the window), we identify exactly
+// which messages are genuinely new since the last window.
+let lastWindowMessageIDs: Set<string> = new Set();
 // --- Force escalation ---
 // Set when the API returns "prompt is too long" — forces the transform to skip
 // layer 0 (and optionally layer 1) on the next call to ensure the context is
@@ -153,6 +173,8 @@ export function resetCalibration() {
   lastKnownMessageCount = 0;
   lastTransformedCount = 0;
   forceMinLayer = 0;
+  lastLayer = 0;
+  lastWindowMessageIDs = new Set();
 }
 type Distillation = {
@@ -724,7 +746,7 @@ function transformInner(input: {
   // When the API previously rejected with "prompt is too long", skip layers
   // below the forced minimum to ensure enough trimming on the next attempt.
   // One-shot: consumed here and reset to 0.
-  const effectiveMinLayer = forceMinLayer;
+  let effectiveMinLayer = forceMinLayer;
   forceMinLayer = 0;
   // --- Approach A: Cache-preserving passthrough ---
@@ -754,13 +776,29 @@ function transformInner(input: {
     return result.totalTokens * UNCALIBRATED_SAFETY <= maxInput;
   }
+  // --- Sticky layer guard (Option C) ---
+  // After a compressed turn (layer >= 1), don't allow layer 0 re-entry until
+  // the session genuinely shrinks (e.g. after compaction deletes messages).
+  // Prevents the calibration oscillation: a compressed turn stores
+  // lastKnownInput=100K for a 50-message window, but the next turn's
+  // input.messages has 300 raw messages. The delta estimation treats the 250
+  // evicted messages as "new" and undercounts them via chars/4, producing an
+  // expectedInput that fits in layer 0 — but the actual tokens are ~190K.
+  // Only applied when calibrated (same session) to avoid affecting other sessions.
+  if (calibrated && lastLayer >= 1 && input.messages.length >= lastKnownMessageCount) {
+    effectiveMinLayer = Math.max(effectiveMinLayer, 1) as SafetyLayer;
+  }
   let expectedInput: number;
   if (calibrated) {
-    // Exact approach: prior API count + estimate of only the new messages.
-    const newMsgCount = Math.max(0, input.messages.length - lastKnownMessageCount);
-    const newMsgTokens = newMsgCount > 0
-      ? input.messages.slice(-newMsgCount).reduce((s, m) => s + estimateMessage(m), 0)
-      : 0;
+    // Exact approach: prior API count + estimate of only genuinely new messages.
+    // Use message ID tracking (Option B) to identify new messages accurately.
+    // After compression, the "last window" is a subset of the full message array —
+    // counting by index would treat evicted messages as new (off-by-250 error).
+    const newMessages = lastWindowMessageIDs.size > 0
+      ? input.messages.filter((m) => !lastWindowMessageIDs.has(m.info.id))
+      : input.messages.slice(-Math.max(0, input.messages.length - lastKnownMessageCount));
+    const newMsgTokens = newMessages.reduce((s, m) => s + estimateMessage(m), 0);
     const ltmDelta = ltmTokens - lastKnownLtm;
     expectedInput = lastKnownInput + newMsgTokens + ltmDelta;
   } else {
@@ -918,6 +956,8 @@ export function transform(input: {
 }): TransformResult {
   const result = transformInner(input);
   lastTransformedCount = result.messages.length;
+  lastLayer = result.layer;
+  lastWindowMessageIDs = new Set(result.messages.map((m) => m.info.id));
   return result;
 }
@@ -926,6 +966,23 @@ export function estimateMessages(messages: MessageWithParts[]): number {
   return messages.reduce((sum, m) => sum + estimateMessage(m), 0);
 }
+// Identify the current agentic turn: the last user message plus all subsequent
+// assistant messages that share its ID as parentID. These messages form an atomic
+// unit — the model must see all of them or it will lose track of its own prior
+// tool calls and re-issue them in an infinite loop.
+function currentTurnStart(messages: MessageWithParts[]): number {
+  // Find the last user message
+  let lastUserIdx = -1;
+  for (let i = messages.length - 1; i >= 0; i--) {
+    if (messages[i].info.role === "user") {
+      lastUserIdx = i;
+      break;
+    }
+  }
+  if (lastUserIdx === -1) return 0; // no user message — treat all as current turn
+  return lastUserIdx;
+}
 function tryFit(input: {
   messages: MessageWithParts[];
   prefix: MessageWithParts[];
@@ -939,32 +996,49 @@ function tryFit(input: {
   if (input.prefixTokens > input.distilledBudget && input.prefix.length > 0)
     return null;
-  // Walk backwards through messages, accumulating tokens within raw budget
-  let rawTokens = 0;
-  let cutoff = input.messages.length;
+  // Identify the current turn (last user message + all following assistant messages).
+  // These are always included — they must never be evicted. If they alone exceed the
+  // raw budget, escalate to the next layer (which strips tool outputs to reduce size).
+  const turnStart = currentTurnStart(input.messages);
+  const currentTurn = input.messages.slice(turnStart);
+  const currentTurnTokens = currentTurn.reduce((s, m) => s + estimateMessage(m), 0);
+  if (currentTurnTokens > input.rawBudget) {
+    // Current turn alone exceeds budget — can't fit even with everything else dropped.
+    // Signal failure so the caller escalates to the next layer (tool-output stripping).
+    return null;
+  }
+  // Walk backwards through older messages (before the current turn),
+  // filling the remaining budget after reserving space for the current turn.
+  const olderMessages = input.messages.slice(0, turnStart);
+  const remainingBudget = input.rawBudget - currentTurnTokens;
+  let olderTokens = 0;
+  let cutoff = olderMessages.length; // default: include none of the older messages
   const protectedTurns = input.protectedTurns ?? 0;
-  let turns = 0;
-  for (let i = input.messages.length - 1; i >= 0; i--) {
-    const msg = input.messages[i];
-    if (msg.info.role === "user") turns++;
+  for (let i = olderMessages.length - 1; i >= 0; i--) {
+    const msg = olderMessages[i];
     const tokens = estimateMessage(msg);
-    if (rawTokens + tokens > input.rawBudget) {
+    if (olderTokens + tokens > remainingBudget) {
       cutoff = i + 1;
       break;
     }
-    rawTokens += tokens;
+    olderTokens += tokens;
     if (i === 0) cutoff = 0;
   }
-  const raw = input.messages.slice(cutoff);
-  // Must keep at least 1 raw message — otherwise this layer fails
-  if (!raw.length) return null;
+  const rawMessages = [...olderMessages.slice(cutoff), ...currentTurn];
+  const rawTokens = olderTokens + currentTurnTokens;
-  // Apply system-reminder stripping + optional tool output stripping
-  const processed = raw.map((msg, idx) => {
-    const fromEnd = raw.length - idx;
+  // Apply system-reminder stripping + optional tool output stripping.
+  // The current turn (end of rawMessages) is always "protected" — never stripped.
+  const currentTurnSet = new Set(currentTurn.map((m) => m.info.id));
+  const processed = rawMessages.map((msg, idx) => {
+    const fromEnd = rawMessages.length - idx;
+    const isCurrentTurn = currentTurnSet.has(msg.info.id);
     const isProtected =
+      isCurrentTurn ||
       input.strip === "none" ||
       (input.strip === "old-tools" && fromEnd <= protectedTurns * 2);
     const parts = isProtected

package/src/ltm.ts CHANGED Viewed

@@ -135,19 +135,72 @@ export function forProject(
     .all(pid) as KnowledgeEntry[];
 }
+type Scored = { entry: KnowledgeEntry; score: number };
+/** Max entries per pool to include on first turn when no session context exists. */
+const NO_CONTEXT_FALLBACK_CAP = 10;
+/** Number of top-confidence project entries always included as a safety net,
+ *  even when they don't match any session context terms. This guards against
+ *  the coarse term-overlap scoring accidentally excluding important project
+ *  knowledge. */
+const PROJECT_SAFETY_NET = 5;
+/**
+ * Score entries by term overlap with session context.
+ * Returns score = (fraction of topTerms matched) * entry.confidence.
+ */
+function scoreEntries(
+  entries: KnowledgeEntry[],
+  topTerms: string[],
+): Scored[] {
+  return entries.map((entry) => {
+    const haystack =
+      (entry.title + " " + entry.content).replace(/[^\w\s]/g, " ").toLowerCase();
+    let hits = 0;
+    for (const term of topTerms) {
+      if (haystack.includes(term)) hits++;
+    }
+    const relevance = topTerms.length > 0 ? hits / topTerms.length : 0;
+    return { entry, score: relevance * entry.confidence };
+  });
+}
+/**
+ * Extract the top 30 meaningful terms (>3 chars) from text, sorted by frequency.
+ */
+function extractTopTerms(text: string): string[] {
+  const freq = text
+    .replace(/[^\w\s]/g, " ")
+    .toLowerCase()
+    .split(/\s+/)
+    .filter((w) => w.length > 3)
+    .reduce<Map<string, number>>((acc, w) => {
+      acc.set(w, (acc.get(w) ?? 0) + 1);
+      return acc;
+    }, new Map());
+  return [...freq.entries()]
+    .sort((a, b) => b[1] - a[1])
+    .slice(0, 30)
+    .map(([w]) => w);
+}
 /**
  * Build a relevance-ranked, budget-capped list of knowledge entries for injection
  * into the system prompt of a live session.
  *
  * Strategy:
- * 1. Project-specific entries (project_id = current project, cross_project = 0)
- *    always get priority — they were curated specifically for this codebase.
- * 2. Cross-project entries are scored for relevance against recent session context
- *    (last distillation + recent raw messages). Only entries that match are included.
- * 3. All candidates are ranked by score * confidence, then greedily packed into
- *    the token budget (smallest-first within same score band to maximize count).
- * 4. If there's no session context yet (first turn), fall back to top entries by
- *    confidence only.
+ * 1. Both project-specific and cross-project entries are scored for relevance
+ *    against recent session context (last distillation + recent raw messages).
+ * 2. Project entries get a safety net: the top PROJECT_SAFETY_NET entries by
+ *    confidence are always included even if they have zero relevance score.
+ *    This ensures the most important project knowledge is never lost to
+ *    coarse term-overlap scoring.
+ * 3. All scored entries are merged into a single pool and greedily packed
+ *    into the token budget by score descending.
+ * 4. If there's no session context yet (first turn), fall back to top entries
+ *    by confidence only (capped at NO_CONTEXT_FALLBACK_CAP per pool).
  *
  * @param projectPath   Current project path
  * @param sessionID     Current session ID (for context extraction)
@@ -160,7 +213,7 @@ export function forSession(
 ): KnowledgeEntry[] {
   const pid = ensureProject(projectPath);
-  // --- 1. Load project-specific entries (always relevant) ---
+  // --- 1. Load project-specific entries ---
   const projectEntries = db()
     .query(
       `SELECT * FROM knowledge
@@ -181,7 +234,6 @@ export function forSession(
   if (!crossEntries.length && !projectEntries.length) return [];
   // --- 3. Build session context for relevance scoring ---
-  // Combine the most recent distillation text + last ~10 raw messages for this session
   let sessionContext = "";
   if (sessionID) {
     const distRow = db()
@@ -206,79 +258,53 @@ export function forSession(
     }
   }
-  // --- 4. Score cross-project entries by relevance ---
-  // Use FTS5 matching: extract terms from session context and score each entry
-  type Scored = { entry: KnowledgeEntry; score: number };
+  // --- 4. Score both pools by relevance ---
+  let scoredProject: Scored[];
   let scoredCross: Scored[];
   if (sessionContext.trim().length > 20) {
-    // Build a term set from session context (top 30 meaningful words)
-    const contextTerms = sessionContext
-      .replace(/[^\w\s]/g, " ")
-      .toLowerCase()
-      .split(/\s+/)
-      .filter((w) => w.length > 3)
-      .reduce<Map<string, number>>((acc, w) => {
-        acc.set(w, (acc.get(w) ?? 0) + 1);
-        return acc;
-      }, new Map());
-    // Sort by frequency, take top 30 terms
-    const topTerms = [...contextTerms.entries()]
-      .sort((a, b) => b[1] - a[1])
-      .slice(0, 30)
-      .map(([w]) => w);
-    scoredCross = crossEntries.map((entry) => {
-      const haystack =
-        (entry.title + " " + entry.content).replace(/[^\w\s]/g, " ").toLowerCase();
-      let hits = 0;
-      for (const term of topTerms) {
-        // Count how many context terms appear in this entry (simple overlap)
-        if (haystack.includes(term)) hits++;
-      }
-      // Score = fraction of top terms matched, weighted by confidence
-      const relevance = topTerms.length > 0 ? hits / topTerms.length : 0;
-      return { entry, score: relevance * entry.confidence };
-    });
-    // Only keep entries with at least one term match
-    scoredCross = scoredCross.filter((s) => s.score > 0);
+    const topTerms = extractTopTerms(sessionContext);
+    // Score project entries — include matched + safety net of top-N by confidence
+    const rawScored = scoreEntries(projectEntries, topTerms);
+    const matched = rawScored.filter((s) => s.score > 0);
+    const matchedIds = new Set(matched.map((s) => s.entry.id));
+    // Safety net: top PROJECT_SAFETY_NET entries by confidence that weren't already matched.
+    // Given a tiny score (0.001 * confidence) so they sort below genuinely matched entries.
+    const safetyNet = projectEntries
+      .filter((e) => !matchedIds.has(e.id))
+      .slice(0, PROJECT_SAFETY_NET)
+      .map((e) => ({ entry: e, score: 0.001 * e.confidence }));
+    scoredProject = [...matched, ...safetyNet];
+    // Score cross-project entries — only include entries with at least one term match
+    scoredCross = scoreEntries(crossEntries, topTerms).filter((s) => s.score > 0);
   } else {
-    // No session context yet — take top cross-project entries by confidence
-    scoredCross = crossEntries.slice(0, 10).map((entry) => ({
-      entry,
-      score: entry.confidence,
-    }));
+    // No session context — fall back to top entries by confidence, capped
+    scoredProject = projectEntries
+      .slice(0, NO_CONTEXT_FALLBACK_CAP)
+      .map((entry) => ({ entry, score: entry.confidence }));
+    scoredCross = crossEntries
+      .slice(0, NO_CONTEXT_FALLBACK_CAP)
+      .map((entry) => ({ entry, score: entry.confidence }));
   }
-  // Sort cross-project by score desc
-  scoredCross.sort((a, b) => b.score - a.score);
+  // --- 5. Merge and pack into token budget by score descending ---
+  const allScored = [...scoredProject, ...scoredCross];
+  allScored.sort((a, b) => b.score - a.score);
-  // --- 5. Pack into token budget ---
-  // Project entries get first pick (fully relevant); cross entries fill remaining budget.
-  // Use a greedy fit: iterate candidates and include if they fit.
-  const HEADER_OVERHEAD_TOKENS = 15; // "## Long-term Knowledge\n"
+  const HEADER_OVERHEAD_TOKENS = 15;
   let used = HEADER_OVERHEAD_TOKENS;
   const result: KnowledgeEntry[] = [];
-  function tryAdd(entry: KnowledgeEntry): boolean {
+  for (const { entry } of allScored) {
+    if (used >= maxTokens) break;
     const cost = estimateTokens(entry.title + entry.content) + 10;
-    if (used + cost > maxTokens) return false;
+    if (used + cost > maxTokens) continue;
     result.push(entry);
     used += cost;
-    return true;
-  }
-  // Project-specific first
-  for (const entry of projectEntries) {
-    tryAdd(entry);
-  }
-  // Then cross-project by relevance score
-  for (const { entry } of scoredCross) {
-    if (used >= maxTokens) break;
-    tryAdd(entry);
   }
   return result;