npm - opencode-lore - Versions diffs - 0.4.2 → 0.4.4 - Mend

opencode-lore 0.4.2 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "opencode-lore",
-  "version": "0.4.2",
+  "version": "0.4.4",
   "type": "module",
   "license": "MIT",
   "description": "Three-tier memory architecture for OpenCode — distillation, not summarization",

package/src/db.ts CHANGED Viewed

@@ -2,7 +2,7 @@ import { Database } from "bun:sqlite";
 import { join } from "path";
 import { mkdirSync } from "fs";
-const SCHEMA_VERSION = 3;
+const SCHEMA_VERSION = 4;
 const MIGRATIONS: string[] = [
   `
@@ -130,6 +130,17 @@ const MIGRATIONS: string[] = [
   -- VACUUM must run outside a transaction and cannot be in a multi-statement
   -- exec, so it is handled specially in the migrate() function.
   `,
+  `
+  -- Version 4: Persistent session state for error recovery.
+  -- Stores forceMinLayer so it survives OpenCode restarts. Without this,
+  -- a "prompt too long" error recovery (escalate to layer 2) is lost if
+  -- the process restarts before the next turn.
+  CREATE TABLE IF NOT EXISTS session_state (
+    session_id TEXT PRIMARY KEY,
+    force_min_layer INTEGER NOT NULL DEFAULT 0,
+    updated_at INTEGER NOT NULL
+  );
+  `,
 ];
 function dataDir() {
@@ -229,3 +240,35 @@ export function isFirstRun(): boolean {
     .get() as { count: number };
   return row.count === 0;
 }
+// ---------------------------------------------------------------------------
+// Persistent session state (error recovery)
+// ---------------------------------------------------------------------------
+/**
+ * Load persisted forceMinLayer for a session. Returns 0 if none stored.
+ */
+export function loadForceMinLayer(sessionID: string): number {
+  const row = db()
+    .query("SELECT force_min_layer FROM session_state WHERE session_id = ?")
+    .get(sessionID) as { force_min_layer: number } | null;
+  return row?.force_min_layer ?? 0;
+}
+/**
+ * Persist forceMinLayer for a session. Deletes the row when layer is 0
+ * (consumed) to avoid unbounded growth.
+ */
+export function saveForceMinLayer(sessionID: string, layer: number): void {
+  if (layer === 0) {
+    db()
+      .query("DELETE FROM session_state WHERE session_id = ?")
+      .run(sessionID);
+  } else {
+    db()
+      .query(
+        "INSERT OR REPLACE INTO session_state (session_id, force_min_layer, updated_at) VALUES (?, ?, ?)",
+      )
+      .run(sessionID, layer, Date.now());
+  }
+}

package/src/gradient.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import type { Message, Part } from "@opencode-ai/sdk";
-import { db, ensureProject } from "./db";
+import { db, ensureProject, loadForceMinLayer, saveForceMinLayer } from "./db";
 import { config } from "./config";
 import { formatDistillations } from "./prompt";
 import { normalize } from "./markdown";
@@ -53,9 +53,12 @@ let calibratedOverhead: number | null = null;
 // lore-curator) from corrupting the main session's sticky-layer guard and
 // delta-estimation state when their transform() calls return layer 0.
 //
-// DB persistence is unnecessary: UNCALIBRATED_SAFETY=1.5 safely handles
-// the first turn of a resumed session. The Map is bounded — there are never
-// more than a handful of active sessions at once.
+// forceMinLayer is the one field that MUST survive process restarts: when the
+// API returns "prompt is too long", the error handler sets forceMinLayer=2.
+// If OpenCode restarts before the next turn, the escalation is lost and the
+// overflow repeats. forceMinLayer is persisted to SQLite (session_state table)
+// and loaded on first access. All other state rebuilds from the first API
+// response via UNCALIBRATED_SAFETY.
 // ---------------------------------------------------------------------------
 type SessionState = {
@@ -102,6 +105,11 @@ function getSessionState(sessionID: string): SessionState {
   let state = sessionStates.get(sessionID);
   if (!state) {
     state = makeSessionState();
+    // Restore persisted forceMinLayer from DB — survives process restarts.
+    // Critical for "prompt too long" recovery: the error handler sets
+    // forceMinLayer=2, but if OpenCode restarts before the next turn,
+    // the in-memory escalation would be lost without this.
+    state.forceMinLayer = loadForceMinLayer(sessionID) as SafetyLayer;
     sessionStates.set(sessionID, state);
   }
   return state;
@@ -213,10 +221,12 @@ export function getLastLayer(sessionID?: string): SafetyLayer {
 export function setForceMinLayer(layer: SafetyLayer, sessionID?: string) {
   if (sessionID) {
     getSessionState(sessionID).forceMinLayer = layer;
+    saveForceMinLayer(sessionID, layer);
   } else {
     // Fallback for tests / callers without session ID: set on all active sessions
-    for (const state of sessionStates.values()) {
+    for (const [sid, state] of sessionStates.entries()) {
       state.forceMinLayer = layer;
+      saveForceMinLayer(sid, layer);
     }
   }
 }
@@ -225,8 +235,12 @@ export function setForceMinLayer(layer: SafetyLayer, sessionID?: string) {
 export function resetCalibration(sessionID?: string) {
   calibratedOverhead = null;
   if (sessionID) {
+    saveForceMinLayer(sessionID, 0); // clear persisted state
     sessionStates.delete(sessionID);
   } else {
+    for (const sid of sessionStates.keys()) {
+      saveForceMinLayer(sid, 0);
+    }
     sessionStates.clear();
   }
 }
@@ -812,11 +826,12 @@ function transformInner(input: {
   // --- Force escalation (reactive error recovery) ---
   // When the API previously rejected with "prompt is too long", skip layers
   // below the forced minimum to ensure enough trimming on the next attempt.
-  // One-shot: consumed here and reset to 0.
+  // One-shot: consumed here and reset to 0 (both in-memory and on disk).
   const sid = input.sessionID ?? input.messages[0]?.info.sessionID;
   const sessState = sid ? getSessionState(sid) : makeSessionState();
   let effectiveMinLayer = sessState.forceMinLayer;
   sessState.forceMinLayer = 0;
+  if (sid && effectiveMinLayer > 0) saveForceMinLayer(sid, 0);
   // --- Approach A: Cache-preserving passthrough ---
   // Use exact token count from the previous API response when available.
@@ -876,7 +891,12 @@ function transformInner(input: {
     expectedInput = messageTokens + overhead + ltmTokens;
   }
-  if (effectiveMinLayer === 0 && expectedInput <= maxInput) {
+  // When uncalibrated, apply safety multiplier to the layer-0 decision too.
+  // chars/3 undercounts by ~1.63x on real sessions — without this, a session
+  // estimated at 146K passes layer 0 but actually costs 214K → overflow.
+  const layer0Input = calibrated ? expectedInput : expectedInput * UNCALIBRATED_SAFETY;
+  if (effectiveMinLayer === 0 && layer0Input <= maxInput) {
     // All messages fit — return unmodified to preserve append-only prompt-cache pattern.
     // Raw messages are strictly better context than lossy distilled summaries.
     const messageTokens = calibrated

package/src/index.ts CHANGED Viewed

@@ -15,10 +15,51 @@ import {
   setForceMinLayer,
   getLastTransformedCount,
 } from "./gradient";
-import { formatKnowledge } from "./prompt";
+import { formatKnowledge, formatDistillations } from "./prompt";
 import { createRecallTool } from "./reflect";
 import { shouldImport, importFromFile, exportToFile } from "./agents-file";
+/**
+ * Detect whether an error from session.error is a context overflow ("prompt too long").
+ * Matches both APIError wrapper shape (error.data.message) and direct shape (error.message).
+ */
+export function isContextOverflow(rawError: unknown): boolean {
+  const error = rawError as
+    | { name?: string; message?: string; data?: { message?: string } }
+    | undefined;
+  const errorMessage = error?.data?.message ?? error?.message ?? "";
+  return (
+    typeof errorMessage === "string" &&
+    (errorMessage.includes("prompt is too long") ||
+      errorMessage.includes("context length exceeded") ||
+      errorMessage.includes("maximum context length") ||
+      errorMessage.includes("ContextWindowExceededError") ||
+      errorMessage.includes("too many tokens"))
+  );
+}
+/**
+ * Build the synthetic recovery message injected after a context overflow.
+ * Contains the distilled session history so the model can continue.
+ */
+export function buildRecoveryMessage(
+  summaries: Array<{ observations: string; generation: number }>,
+): string {
+  const historyText = summaries.length > 0
+    ? formatDistillations(summaries)
+    : "";
+  return [
+    "<system-reminder>",
+    "The previous turn failed with a context overflow error (prompt too long).",
+    "Lore has automatically compressed the conversation history.",
+    "Review the session history below and continue where you left off.",
+    "",
+    historyText || "(No distilled history available — check recent messages for context.)",
+    "</system-reminder>",
+  ].join("\n");
+}
 export const LorePlugin: Plugin = async (ctx) => {
   const projectPath = ctx.worktree || ctx.directory;
   await load(ctx.directory);
@@ -226,45 +267,50 @@ export const LorePlugin: Plugin = async (ctx) => {
           | undefined;
         if (errorSessionID && await shouldSkip(errorSessionID)) return;
-        // Detect "prompt is too long" API errors and auto-recover:
-        // 1. Force the gradient transform to escalate on the next call (skip layer 0/1)
-        // 2. Force distillation to capture all temporal data before compaction
-        // 3. Trigger compaction so the session recovers without user intervention
+        // Detect "prompt is too long" API errors and auto-recover.
         const rawError = (event.properties as Record<string, unknown>).error;
-        // Diagnostic: log the full error shape so we can verify our detection matches
         console.error("[lore] session.error received:", JSON.stringify(rawError, null, 2));
-        const error = rawError as
-          | { name?: string; message?: string; data?: { message?: string } }
-          | undefined;
-        // Match both shapes: error.data.message (APIError wrapper) and error.message (direct)
-        const errorMessage = error?.data?.message ?? error?.message ?? "";
-        const isPromptTooLong =
-          typeof errorMessage === "string" &&
-          (errorMessage.includes("prompt is too long") ||
-            errorMessage.includes("context length exceeded") ||
-            errorMessage.includes("maximum context length") ||
-            errorMessage.includes("ContextWindowExceededError") ||
-            errorMessage.includes("too many tokens"));
-        console.error(
-          `[lore] session.error isPromptTooLong=${isPromptTooLong} (name=${error?.name}, message=${errorMessage.substring(0, 120)})`,
-        );
-        if (isPromptTooLong) {
+        if (isContextOverflow(rawError) && errorSessionID) {
           console.error(
-            `[lore] detected 'prompt too long' error — forcing distillation + layer escalation (session: ${errorSessionID?.substring(0, 16)})`,
+            `[lore] detected context overflow — auto-recovering (session: ${errorSessionID.substring(0, 16)})`,
           );
-          // Force layer 2 on next transform — layers 0 and 1 were already too large.
-          // The gradient at layers 2-4 will compress the context enough for the next turn.
-          // Do NOT call session.summarize() here — it sends all messages to the model,
-          // which would overflow again and create a stuck compaction loop.
+          // 1. Force layer 2 on next transform (persisted to DB — survives restarts).
           setForceMinLayer(2, errorSessionID);
-          if (errorSessionID) {
-            // Force distillation to capture all undistilled messages into the temporal
-            // store so they're preserved even if the session is later compacted manually.
-            await backgroundDistill(errorSessionID, true);
+          // 2. Distill all undistilled messages so nothing is lost.
+          await backgroundDistill(errorSessionID, true);
+          // 3. Auto-recover: inject a synthetic message that goes through the normal
+          //    chat path. The gradient transform fires with forceMinLayer=2, compressing
+          //    the context to fit. The model receives the distilled summaries and
+          //    continues where it left off — no user intervention needed.
+          try {
+            const summaries = distillation.loadForSession(projectPath, errorSessionID);
+            const recoveryText = buildRecoveryMessage(
+              summaries.map(s => ({ observations: s.observations, generation: s.generation })),
+            );
+            console.error(
+              `[lore] sending auto-recovery message to session ${errorSessionID.substring(0, 16)}`,
+            );
+            await ctx.client.session.prompt({
+              path: { id: errorSessionID },
+              body: {
+                parts: [{ type: "text", text: recoveryText, synthetic: true }],
+              },
+            });
+            console.error(
+              `[lore] auto-recovery message sent successfully`,
+            );
+          } catch (recoveryError) {
+            // Recovery is best-effort — don't let it crash the event handler.
+            // The persisted forceMinLayer will still help on the user's next message.
+            console.error(
+              `[lore] auto-recovery failed (forceMinLayer still persisted):`,
+              recoveryError,
+            );
           }
         }
       }