npm - opencode-lore - Versions diffs - 0.2.2 → 0.2.4 - Mend

opencode-lore 0.2.2 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "opencode-lore",
-  "version": "0.2.2",
+  "version": "0.2.4",
   "type": "module",
   "license": "MIT",
   "description": "Three-tier memory architecture for OpenCode — distillation, not summarization",

package/src/gradient.ts CHANGED Viewed

@@ -51,6 +51,18 @@ let lastKnownLtm = 0;
 let lastKnownSessionID: string | null = null;
 let lastKnownMessageCount = 0;
+// Number of messages in the most recent transform() output — i.e. how many
+// messages were actually sent to the model. On layer 0 this equals the full
+// session length. On layers 1-4 it equals the compressed window size.
+// Calibration must use this count (not the total DB message count) so that
+// the delta on the next turn reflects only messages added since the last
+// compressed window, not since the last DB snapshot.
+let lastTransformedCount = 0;
+export function getLastTransformedCount(): number {
+  return lastTransformedCount;
+}
 // --- Force escalation ---
 // Set when the API returns "prompt is too long" — forces the transform to skip
 // layer 0 (and optionally layer 1) on the next call to ensure the context is
@@ -139,6 +151,7 @@ export function resetCalibration() {
   lastKnownLtm = 0;
   lastKnownSessionID = null;
   lastKnownMessageCount = 0;
+  lastTransformedCount = 0;
   forceMinLayer = 0;
 }
@@ -691,7 +704,7 @@ export function needsUrgentDistillation(): boolean {
   return v;
 }
-export function transform(input: {
+function transformInner(input: {
   messages: MessageWithParts[];
   projectPath: string;
   sessionID?: string;
@@ -890,6 +903,24 @@ export function transform(input: {
   };
 }
+// Public wrapper: records the compressed message count for calibration.
+// Calibration needs to know how many messages were SENT to the model (the
+// compressed window), not the total DB count. On layer 0 these are equal;
+// on layers 1-4 the compressed window is smaller, and the delta on the next
+// turn must be computed relative to the compressed count — otherwise the
+// expected input on the next turn is anchored to the compressed input token
+// count but the "new messages" delta is computed against the full DB count,
+// making newMsgCount ≈ 0 and causing layer 0 passthrough on an overflowing session.
+export function transform(input: {
+  messages: MessageWithParts[];
+  projectPath: string;
+  sessionID?: string;
+}): TransformResult {
+  const result = transformInner(input);
+  lastTransformedCount = result.messages.length;
+  return result;
+}
 // Compute our message-only estimate for a set of messages (for calibration use)
 export function estimateMessages(messages: MessageWithParts[]): number {
   return messages.reduce((sum, m) => sum + estimateMessage(m), 0);

package/src/index.ts CHANGED Viewed

@@ -14,6 +14,7 @@ import {
   setLtmTokens,
   getLtmBudget,
   setForceMinLayer,
+  getLastTransformedCount,
 } from "./gradient";
 import { formatKnowledge } from "./prompt";
 import { createRecallTool } from "./reflect";
@@ -219,7 +220,11 @@ export const LorePlugin: Plugin = async (ctx) => {
                 const msgEstimate = estimateMessages(withParts);
                 const actualInput =
                   msg.tokens.input + msg.tokens.cache.read + msg.tokens.cache.write;
-                calibrate(actualInput, msgEstimate, msg.sessionID, withParts.length);
+                // Use the compressed message count (from the last transform output),
+                // not the total DB count. On layer 0 these are equal. On layers 1-4,
+                // the model only saw the compressed window — calibrate must track that
+                // count so the next turn's delta is computed correctly.
+                calibrate(actualInput, msgEstimate, msg.sessionID, getLastTransformedCount() || withParts.length);
               }
             }
           }
@@ -390,12 +395,25 @@ export const LorePlugin: Plugin = async (ctx) => {
       // so the append-only sequence stays intact for prompt caching.
       if (result.layer > 0) {
         // The API requires the conversation to end with a user message.
-        // Always drop trailing non-user messages — even assistant messages with
-        // tool parts. A hard API error is worse than the model re-invoking a tool.
+        // Drop trailing non-user messages, but stop if we hit an assistant message
+        // with an in-progress (non-completed) tool call — dropping it would cause
+        // the model to lose its pending tool invocation and re-issue it in an
+        // infinite loop. A completed tool part is safe to drop; a pending one is not.
         while (
           result.messages.length > 0 &&
           result.messages.at(-1)!.info.role !== "user"
         ) {
+          const last = result.messages.at(-1)!;
+          const hasPendingTool = last.parts.some(
+            (p) => p.type === "tool" && p.state.status !== "completed",
+          );
+          if (hasPendingTool) {
+            console.error(
+              "[lore] WARN: cannot drop trailing assistant message with pending tool call — may cause prefill error. id:",
+              last.info.id,
+            );
+            break;
+          }
           const dropped = result.messages.pop()!;
           console.error(
             "[lore] WARN: dropping trailing",