npm - opencode-lore - Versions diffs - 0.2.5 → 0.2.8 - Mend

opencode-lore 0.2.5 → 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "opencode-lore",
-  "version": "0.2.5",
+  "version": "0.2.8",
   "type": "module",
   "license": "MIT",
   "description": "Three-tier memory architecture for OpenCode — distillation, not summarization",

package/src/gradient.ts CHANGED Viewed

@@ -63,6 +63,26 @@ export function getLastTransformedCount(): number {
   return lastTransformedCount;
 }
+/** Returns the layer used by the most recent transform() call. For testing. */
+export function getLastLayer(): SafetyLayer {
+  return lastLayer;
+}
+// The layer used by the most recent transform() call.
+// Used for the sticky-layer guard: once gradient mode activates (layer >= 1),
+// we don't allow fallback to layer 0 until the session genuinely shrinks
+// (e.g. after compaction). This prevents the calibration oscillation where a
+// compressed turn records 100K + 50-msg count, and the next turn's delta
+// estimation treats 250 evicted messages as "new", undercounts their tokens
+// via chars/4, and incorrectly concludes layer 0 passes.
+let lastLayer: SafetyLayer = 0;
+// The set of message IDs included in the most recent transform() output.
+// Used for accurate delta estimation: instead of counting messages by index
+// (which breaks after compression changes the window), we identify exactly
+// which messages are genuinely new since the last window.
+let lastWindowMessageIDs: Set<string> = new Set();
 // --- Force escalation ---
 // Set when the API returns "prompt is too long" — forces the transform to skip
 // layer 0 (and optionally layer 1) on the next call to ensure the context is
@@ -153,6 +173,8 @@ export function resetCalibration() {
   lastKnownMessageCount = 0;
   lastTransformedCount = 0;
   forceMinLayer = 0;
+  lastLayer = 0;
+  lastWindowMessageIDs = new Set();
 }
 type Distillation = {
@@ -724,7 +746,7 @@ function transformInner(input: {
   // When the API previously rejected with "prompt is too long", skip layers
   // below the forced minimum to ensure enough trimming on the next attempt.
   // One-shot: consumed here and reset to 0.
-  const effectiveMinLayer = forceMinLayer;
+  let effectiveMinLayer = forceMinLayer;
   forceMinLayer = 0;
   // --- Approach A: Cache-preserving passthrough ---
@@ -754,13 +776,29 @@ function transformInner(input: {
     return result.totalTokens * UNCALIBRATED_SAFETY <= maxInput;
   }
+  // --- Sticky layer guard (Option C) ---
+  // After a compressed turn (layer >= 1), don't allow layer 0 re-entry until
+  // the session genuinely shrinks (e.g. after compaction deletes messages).
+  // Prevents the calibration oscillation: a compressed turn stores
+  // lastKnownInput=100K for a 50-message window, but the next turn's
+  // input.messages has 300 raw messages. The delta estimation treats the 250
+  // evicted messages as "new" and undercounts them via chars/4, producing an
+  // expectedInput that fits in layer 0 — but the actual tokens are ~190K.
+  // Only applied when calibrated (same session) to avoid affecting other sessions.
+  if (calibrated && lastLayer >= 1 && input.messages.length >= lastKnownMessageCount) {
+    effectiveMinLayer = Math.max(effectiveMinLayer, 1) as SafetyLayer;
+  }
   let expectedInput: number;
   if (calibrated) {
-    // Exact approach: prior API count + estimate of only the new messages.
-    const newMsgCount = Math.max(0, input.messages.length - lastKnownMessageCount);
-    const newMsgTokens = newMsgCount > 0
-      ? input.messages.slice(-newMsgCount).reduce((s, m) => s + estimateMessage(m), 0)
-      : 0;
+    // Exact approach: prior API count + estimate of only genuinely new messages.
+    // Use message ID tracking (Option B) to identify new messages accurately.
+    // After compression, the "last window" is a subset of the full message array —
+    // counting by index would treat evicted messages as new (off-by-250 error).
+    const newMessages = lastWindowMessageIDs.size > 0
+      ? input.messages.filter((m) => !lastWindowMessageIDs.has(m.info.id))
+      : input.messages.slice(-Math.max(0, input.messages.length - lastKnownMessageCount));
+    const newMsgTokens = newMessages.reduce((s, m) => s + estimateMessage(m), 0);
     const ltmDelta = ltmTokens - lastKnownLtm;
     expectedInput = lastKnownInput + newMsgTokens + ltmDelta;
   } else {
@@ -918,6 +956,8 @@ export function transform(input: {
 }): TransformResult {
   const result = transformInner(input);
   lastTransformedCount = result.messages.length;
+  lastLayer = result.layer;
+  lastWindowMessageIDs = new Set(result.messages.map((m) => m.info.id));
   return result;
 }

package/src/index.ts CHANGED Viewed

@@ -395,28 +395,31 @@ export const LorePlugin: Plugin = async (ctx) => {
       // so the append-only sequence stays intact for prompt caching.
       if (result.layer > 0) {
         // The API requires the conversation to end with a user message.
-        // Drop trailing non-user messages, but stop if we hit an assistant message
-        // with an in-progress (non-completed) tool call — dropping it would cause
-        // the model to lose its pending tool invocation and re-issue it in an
-        // infinite loop. A completed tool part is safe to drop; a pending one is not.
+        // Drop trailing pure-text assistant messages (no tool parts), which would
+        // cause an Anthropic "does not support assistant message prefill" error.
+        //
+        // Crucially, assistant messages that contain tool parts (completed OR pending)
+        // must NOT be dropped:
+        // - Completed tool parts: OpenCode's SDK converts these into tool_result blocks
+        //   sent as user-role messages at the API level. The conversation already ends
+        //   with a user message — dropping would strip the entire current agentic turn
+        //   and cause an infinite tool-call loop (the model restarts from scratch).
+        // - Pending tool parts: the tool call hasn't returned yet; dropping would make
+        //   the model re-issue the same tool call on the next turn.
         while (
           result.messages.length > 0 &&
           result.messages.at(-1)!.info.role !== "user"
         ) {
           const last = result.messages.at(-1)!;
-          const hasPendingTool = last.parts.some(
-            (p) => p.type === "tool" && p.state.status !== "completed",
-          );
-          if (hasPendingTool) {
-            console.error(
-              "[lore] WARN: cannot drop trailing assistant message with pending tool call — may cause prefill error. id:",
-              last.info.id,
-            );
+          const hasToolParts = last.parts.some((p) => p.type === "tool");
+          if (hasToolParts) {
+            // Tool parts → tool_result (user-role) at the API level → no prefill error.
+            // Stop dropping; the conversation ends correctly as-is.
             break;
           }
           const dropped = result.messages.pop()!;
           console.error(
-            "[lore] WARN: dropping trailing",
+            "[lore] WARN: dropping trailing pure-text",
             dropped.info.role,
             "message to prevent prefill error. id:",
             dropped.info.id,

package/src/ltm.ts CHANGED Viewed

@@ -135,19 +135,72 @@ export function forProject(
     .all(pid) as KnowledgeEntry[];
 }
+type Scored = { entry: KnowledgeEntry; score: number };
+/** Max entries per pool to include on first turn when no session context exists. */
+const NO_CONTEXT_FALLBACK_CAP = 10;
+/** Number of top-confidence project entries always included as a safety net,
+ *  even when they don't match any session context terms. This guards against
+ *  the coarse term-overlap scoring accidentally excluding important project
+ *  knowledge. */
+const PROJECT_SAFETY_NET = 5;
+/**
+ * Score entries by term overlap with session context.
+ * Returns score = (fraction of topTerms matched) * entry.confidence.
+ */
+function scoreEntries(
+  entries: KnowledgeEntry[],
+  topTerms: string[],
+): Scored[] {
+  return entries.map((entry) => {
+    const haystack =
+      (entry.title + " " + entry.content).replace(/[^\w\s]/g, " ").toLowerCase();
+    let hits = 0;
+    for (const term of topTerms) {
+      if (haystack.includes(term)) hits++;
+    }
+    const relevance = topTerms.length > 0 ? hits / topTerms.length : 0;
+    return { entry, score: relevance * entry.confidence };
+  });
+}
+/**
+ * Extract the top 30 meaningful terms (>3 chars) from text, sorted by frequency.
+ */
+function extractTopTerms(text: string): string[] {
+  const freq = text
+    .replace(/[^\w\s]/g, " ")
+    .toLowerCase()
+    .split(/\s+/)
+    .filter((w) => w.length > 3)
+    .reduce<Map<string, number>>((acc, w) => {
+      acc.set(w, (acc.get(w) ?? 0) + 1);
+      return acc;
+    }, new Map());
+  return [...freq.entries()]
+    .sort((a, b) => b[1] - a[1])
+    .slice(0, 30)
+    .map(([w]) => w);
+}
 /**
  * Build a relevance-ranked, budget-capped list of knowledge entries for injection
  * into the system prompt of a live session.
  *
  * Strategy:
- * 1. Project-specific entries (project_id = current project, cross_project = 0)
- *    always get priority — they were curated specifically for this codebase.
- * 2. Cross-project entries are scored for relevance against recent session context
- *    (last distillation + recent raw messages). Only entries that match are included.
- * 3. All candidates are ranked by score * confidence, then greedily packed into
- *    the token budget (smallest-first within same score band to maximize count).
- * 4. If there's no session context yet (first turn), fall back to top entries by
- *    confidence only.
+ * 1. Both project-specific and cross-project entries are scored for relevance
+ *    against recent session context (last distillation + recent raw messages).
+ * 2. Project entries get a safety net: the top PROJECT_SAFETY_NET entries by
+ *    confidence are always included even if they have zero relevance score.
+ *    This ensures the most important project knowledge is never lost to
+ *    coarse term-overlap scoring.
+ * 3. All scored entries are merged into a single pool and greedily packed
+ *    into the token budget by score descending.
+ * 4. If there's no session context yet (first turn), fall back to top entries
+ *    by confidence only (capped at NO_CONTEXT_FALLBACK_CAP per pool).
  *
  * @param projectPath   Current project path
  * @param sessionID     Current session ID (for context extraction)
@@ -160,7 +213,7 @@ export function forSession(
 ): KnowledgeEntry[] {
   const pid = ensureProject(projectPath);
-  // --- 1. Load project-specific entries (always relevant) ---
+  // --- 1. Load project-specific entries ---
   const projectEntries = db()
     .query(
       `SELECT * FROM knowledge
@@ -181,7 +234,6 @@ export function forSession(
   if (!crossEntries.length && !projectEntries.length) return [];
   // --- 3. Build session context for relevance scoring ---
-  // Combine the most recent distillation text + last ~10 raw messages for this session
   let sessionContext = "";
   if (sessionID) {
     const distRow = db()
@@ -206,79 +258,53 @@ export function forSession(
     }
   }
-  // --- 4. Score cross-project entries by relevance ---
-  // Use FTS5 matching: extract terms from session context and score each entry
-  type Scored = { entry: KnowledgeEntry; score: number };
+  // --- 4. Score both pools by relevance ---
+  let scoredProject: Scored[];
   let scoredCross: Scored[];
   if (sessionContext.trim().length > 20) {
-    // Build a term set from session context (top 30 meaningful words)
-    const contextTerms = sessionContext
-      .replace(/[^\w\s]/g, " ")
-      .toLowerCase()
-      .split(/\s+/)
-      .filter((w) => w.length > 3)
-      .reduce<Map<string, number>>((acc, w) => {
-        acc.set(w, (acc.get(w) ?? 0) + 1);
-        return acc;
-      }, new Map());
-    // Sort by frequency, take top 30 terms
-    const topTerms = [...contextTerms.entries()]
-      .sort((a, b) => b[1] - a[1])
-      .slice(0, 30)
-      .map(([w]) => w);
-    scoredCross = crossEntries.map((entry) => {
-      const haystack =
-        (entry.title + " " + entry.content).replace(/[^\w\s]/g, " ").toLowerCase();
-      let hits = 0;
-      for (const term of topTerms) {
-        // Count how many context terms appear in this entry (simple overlap)
-        if (haystack.includes(term)) hits++;
-      }
-      // Score = fraction of top terms matched, weighted by confidence
-      const relevance = topTerms.length > 0 ? hits / topTerms.length : 0;
-      return { entry, score: relevance * entry.confidence };
-    });
-    // Only keep entries with at least one term match
-    scoredCross = scoredCross.filter((s) => s.score > 0);
+    const topTerms = extractTopTerms(sessionContext);
+    // Score project entries — include matched + safety net of top-N by confidence
+    const rawScored = scoreEntries(projectEntries, topTerms);
+    const matched = rawScored.filter((s) => s.score > 0);
+    const matchedIds = new Set(matched.map((s) => s.entry.id));
+    // Safety net: top PROJECT_SAFETY_NET entries by confidence that weren't already matched.
+    // Given a tiny score (0.001 * confidence) so they sort below genuinely matched entries.
+    const safetyNet = projectEntries
+      .filter((e) => !matchedIds.has(e.id))
+      .slice(0, PROJECT_SAFETY_NET)
+      .map((e) => ({ entry: e, score: 0.001 * e.confidence }));
+    scoredProject = [...matched, ...safetyNet];
+    // Score cross-project entries — only include entries with at least one term match
+    scoredCross = scoreEntries(crossEntries, topTerms).filter((s) => s.score > 0);
   } else {
-    // No session context yet — take top cross-project entries by confidence
-    scoredCross = crossEntries.slice(0, 10).map((entry) => ({
-      entry,
-      score: entry.confidence,
-    }));
+    // No session context — fall back to top entries by confidence, capped
+    scoredProject = projectEntries
+      .slice(0, NO_CONTEXT_FALLBACK_CAP)
+      .map((entry) => ({ entry, score: entry.confidence }));
+    scoredCross = crossEntries
+      .slice(0, NO_CONTEXT_FALLBACK_CAP)
+      .map((entry) => ({ entry, score: entry.confidence }));
   }
-  // Sort cross-project by score desc
-  scoredCross.sort((a, b) => b.score - a.score);
+  // --- 5. Merge and pack into token budget by score descending ---
+  const allScored = [...scoredProject, ...scoredCross];
+  allScored.sort((a, b) => b.score - a.score);
-  // --- 5. Pack into token budget ---
-  // Project entries get first pick (fully relevant); cross entries fill remaining budget.
-  // Use a greedy fit: iterate candidates and include if they fit.
-  const HEADER_OVERHEAD_TOKENS = 15; // "## Long-term Knowledge\n"
+  const HEADER_OVERHEAD_TOKENS = 15;
   let used = HEADER_OVERHEAD_TOKENS;
   const result: KnowledgeEntry[] = [];
-  function tryAdd(entry: KnowledgeEntry): boolean {
+  for (const { entry } of allScored) {
+    if (used >= maxTokens) break;
     const cost = estimateTokens(entry.title + entry.content) + 10;
-    if (used + cost > maxTokens) return false;
+    if (used + cost > maxTokens) continue;
     result.push(entry);
     used += cost;
-    return true;
-  }
-  // Project-specific first
-  for (const entry of projectEntries) {
-    tryAdd(entry);
-  }
-  // Then cross-project by relevance score
-  for (const { entry } of scoredCross) {
-    if (used >= maxTokens) break;
-    tryAdd(entry);
   }
   return result;