npm - opencode-lore - Versions diffs - 0.4.0 → 0.4.1 - Mend

opencode-lore 0.4.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "opencode-lore",
-  "version": "0.4.0",
+  "version": "0.4.1",
   "type": "module",
   "license": "MIT",
   "description": "Three-tier memory architecture for OpenCode — distillation, not summarization",

package/src/agents-file.ts CHANGED Viewed

@@ -18,7 +18,7 @@ import { serialize, inline, h, ul, liph, strong, t, root, unescapeMarkdown } fro
 // ---------------------------------------------------------------------------
 export const LORE_SECTION_START =
-  "<!-- This section is auto-maintained by lore (https://github.com/BYK/opencode-lore) -->";
+  "<!-- This section is maintained by the coding agent via lore (https://github.com/BYK/opencode-lore) -->";
 export const LORE_SECTION_END = "<!-- End lore-managed section -->";
 /** Regex matching a valid UUID (v4 or v7) — 8-4-4-4-12 hex groups. */

package/src/distillation.ts CHANGED Viewed

@@ -153,7 +153,7 @@ function storeDistillation(input: {
   const pid = ensureProject(input.projectPath);
   const id = crypto.randomUUID();
   const sourceJson = JSON.stringify(input.sourceIDs);
-  const tokens = Math.ceil(input.observations.length / 4);
+  const tokens = Math.ceil(input.observations.length / 3);
   db()
     .query(
       `INSERT INTO distillations (id, project_id, session_id, narrative, facts, observations, source_ids, generation, token_count, created_at)

package/src/gradient.ts CHANGED Viewed

@@ -6,9 +6,12 @@ import { normalize } from "./markdown";
 type MessageWithParts = { info: Message; parts: Part[] };
-// Rough token estimate: ~4 chars per token
+// Token estimate: ~3 chars per token. Validated against real API data across
+// 200+ turn-pairs: chars/3 gives ~1.68x ratio (actual/estimate), best among
+// heuristics tested. The gap is overhead (system prompt, tool definitions,
+// conversation structure) which calibratedOverhead captures via EMA.
 function estimate(text: string): number {
-  return Math.ceil(text.length / 4);
+  return Math.ceil(text.length / 3);
 }
 function estimateParts(parts: Part[]): number {
@@ -70,6 +73,8 @@ type SessionState = {
   lastWindowMessageIDs: Set<string>;
   /** One-shot force escalation: skip layers below this on the next transform() */
   forceMinLayer: SafetyLayer;
+  /** Token estimate from the most recent transform() output (compressed window) */
+  lastTransformEstimate: number;
   /** Distilled prefix cache (Approach C) */
   prefixCache: PrefixCache | null;
   /** Raw window pin cache (Approach B) */
@@ -85,6 +90,7 @@ function makeSessionState(): SessionState {
     lastLayer: 0,
     lastWindowMessageIDs: new Set(),
     forceMinLayer: 0,
+    lastTransformEstimate: 0,
     prefixCache: null,
     rawWindowCache: null,
   };
@@ -139,22 +145,36 @@ export function getLtmBudget(ltmFraction: number): number {
 }
 // Called after each assistant message completes with real token usage data.
-// actualInput    = tokens.input + tokens.cache.read (all tokens the model saw)
-// messageEstimate = our chars/4 estimate of the messages we sent
+// actualInput    = tokens.input + tokens.cache.read + tokens.cache.write
 // sessionID      = session that produced this response (for exact-tracking validity)
 // messageCount   = number of messages that were sent (for delta estimation)
+//
+// Overhead calibration uses lastTransformEstimate (the token estimate from the
+// compressed window that was actually sent to the model) instead of re-estimating
+// all session messages. On compressed sessions, all-message estimate >> actualInput,
+// which clamped overhead to 0 and broke budget calculations.
 export function calibrate(
   actualInput: number,
-  messageEstimate: number,
   sessionID?: string,
   messageCount?: number,
 ) {
+  // Use the transform's own estimate for the compressed window it produced.
+  // This is the correct baseline: it estimates the same messages the model saw.
+  const messageEstimate = sessionID
+    ? getSessionState(sessionID).lastTransformEstimate
+    : 0;
   // Update global overhead calibration (shared across sessions — model-level).
-  const overhead = Math.max(0, actualInput - messageEstimate);
-  calibratedOverhead =
-    calibratedOverhead === null
-      ? overhead
-      : Math.round(calibratedOverhead * 0.7 + overhead * 0.3);
+  // Skip when actualInput > 0 but no transform estimate exists yet (no baseline
+  // to compare against). Allow when both are 0 (test setup to zero overhead) or
+  // when we have a real transform estimate.
+  if (messageEstimate > 0 || actualInput === 0) {
+    const overhead = Math.max(0, actualInput - messageEstimate);
+    calibratedOverhead =
+      calibratedOverhead === null
+        ? overhead
+        : Math.round(calibratedOverhead * 0.7 + overhead * 0.3);
+  }
   // Store per-session exact counts for the proactive layer 0 decision.
   if (sessionID !== undefined) {
@@ -800,20 +820,20 @@ function transformInner(input: {
   // --- Approach A: Cache-preserving passthrough ---
   // Use exact token count from the previous API response when available.
-  // Only the delta (messages added since last call) uses chars/4 estimation,
-  // making the layer-0 decision 99%+ accurate from the API's own tokenizer.
+   // Only the delta (messages added since last call) uses chars/3 estimation,
+   // making the layer-0 decision highly accurate from the API's own tokenizer.
   // maxInput = absolute ceiling the API enforces: input_tokens + max_tokens <= context
   const maxInput = contextLimit - outputReserved;
   // True when we have real API token data from a previous turn in this session.
-  // When false (first turn / session change), chars/4 estimates can undercount by
-  // up to 1.8x — so tryFit output must be validated with a safety multiplier before
-  // being used, to prevent sending an apparently-fitting window that actually overflows.
+  // When false (first turn / session change), chars/3 estimates may still diverge
+  // from the real tokenizer — so tryFit output must be validated with a safety
+  // multiplier before being used.
   const calibrated = sessState.lastKnownInput > 0;
   // On uncalibrated turns, apply this multiplier to tryFit's estimated total to
-  // approximate the real token count. 1.5 is conservative but not so aggressive
-  // that it forces layer 4 on modestly-sized sessions.
+  // approximate the real token count. chars/3 undercounts by ~1.68x on real data,
+  // but overhead EMA captures most of the gap. 1.5 provides a safe margin.
   const UNCALIBRATED_SAFETY = 1.5;
   // Returns true if the tryFit result is safe to use: either we have calibrated
@@ -830,7 +850,7 @@ function transformInner(input: {
   // Prevents the calibration oscillation: a compressed turn stores
   // lastKnownInput=100K for a 50-message window, but the next turn's
   // input.messages has 300 raw messages. The delta estimation treats the 250
-  // evicted messages as "new" and undercounts them via chars/4, producing an
+  // evicted messages as "new" and undercounts their tokens, producing an
   // expectedInput that fits in layer 0 — but the actual tokens are ~190K.
   // Only applied when calibrated (same session, per-session state) to avoid
   // affecting other sessions including worker sessions.
@@ -851,7 +871,7 @@ function transformInner(input: {
     const ltmDelta = ltmTokens - sessState.lastKnownLtm;
     expectedInput = sessState.lastKnownInput + newMsgTokens + ltmDelta;
   } else {
-    // First turn or session change: fall back to chars/4 + overhead.
+    // First turn or session change: fall back to chars/3 estimate + overhead.
     const messageTokens = input.messages.reduce((s, m) => s + estimateMessage(m), 0);
     expectedInput = messageTokens + overhead + ltmTokens;
   }
@@ -1009,6 +1029,7 @@ export function transform(input: {
   if (sid) {
     const state = getSessionState(sid);
     state.lastTransformedCount = result.messages.length;
+    state.lastTransformEstimate = result.totalTokens;
     state.lastLayer = result.layer;
     state.lastWindowMessageIDs = new Set(result.messages.map((m) => m.info.id));
   }

package/src/index.ts CHANGED Viewed

@@ -10,7 +10,6 @@ import {
   setModelLimits,
   needsUrgentDistillation,
   calibrate,
-  estimateMessages,
   setLtmTokens,
   getLtmBudget,
   setForceMinLayer,
@@ -204,28 +203,15 @@ export const LorePlugin: Plugin = async (ctx) => {
                 backgroundDistill(msg.sessionID);
               }
-              // Calibrate overhead estimate using real token counts.
-              // Also store the exact input count + message count for the proactive
-              // layer-0 decision (avoids full chars/4 re-estimation each turn).
-              // actualInput = all tokens the model processed as input, regardless of
-              // whether they were new (input), read from cache (cache.read), or newly
-              // written to cache (cache.write). All three contribute to the context window.
-              const allMsgs = await ctx.client.session.messages({
-                path: { id: msg.sessionID },
-              });
-              if (allMsgs.data) {
-                const withParts = allMsgs.data
-                  .filter((m) => m.info.id !== msg.id)
-                  .map((m) => ({ info: m.info, parts: m.parts }));
-                const msgEstimate = estimateMessages(withParts);
-                const actualInput =
-                  msg.tokens.input + msg.tokens.cache.read + msg.tokens.cache.write;
-                // Use the compressed message count (from the last transform output),
-                // not the total DB count. On layer 0 these are equal. On layers 1-4,
-                // the model only saw the compressed window — calibrate must track that
-                // count so the next turn's delta is computed correctly.
-                calibrate(actualInput, msgEstimate, msg.sessionID, getLastTransformedCount(msg.sessionID) || withParts.length);
-              }
+              // Calibrate overhead using real token counts from the API response.
+              // actualInput = all tokens the model processed (input + cache.read + cache.write).
+              // The message estimate comes from the transform's own output (stored in
+              // session state as lastTransformEstimate), NOT from re-estimating all session
+              // messages. On compressed sessions, all-message estimate >> actualInput, which
+              // previously clamped overhead to 0 and broke budget calculations.
+              const actualInput =
+                msg.tokens.input + msg.tokens.cache.read + msg.tokens.cache.write;
+              calibrate(actualInput, msg.sessionID, getLastTransformedCount(msg.sessionID));
             }
           }
         } catch {
@@ -394,13 +380,24 @@ export const LorePlugin: Plugin = async (ctx) => {
       if (formatted) {
         // Track how many tokens we actually consumed so the gradient manager
         // can deduct them from the usable budget for message injection.
-        // Use /3 (not /4) — consistent with ltm.ts and prompt.ts estimators.
         const ltmTokenCount = Math.ceil(formatted.length / 3);
         setLtmTokens(ltmTokenCount);
         output.system.push(formatted);
       } else {
         setLtmTokens(0);
       }
+      // Remind the agent to include the agents file in commits.
+      // It is always modified after the lore export runs (post-session) so it
+      // appears as unstaged when the agent goes to commit — the agent must not
+      // skip it just because it looks auto-generated.
+      if (cfg.agentsFile.enabled) {
+        output.system.push(
+          `When making git commits, always check if ${cfg.agentsFile.path} has ` +
+          `unstaged changes and include it in the commit. This file contains ` +
+          `shared project knowledge managed by lore and must be version-controlled.`,
+        );
+      }
     },
     // Transform message history: distilled prefix + raw recent.

package/src/ltm.ts CHANGED Viewed

@@ -2,8 +2,7 @@ import { uuidv7 } from "uuidv7";
 import { db, ensureProject } from "./db";
 import { ftsQuery } from "./temporal";
-// Rough token estimate: ~3 chars per token (conservative for markdown-heavy technical text;
-// real tokenization of code terms and special chars runs ~3.0-3.5 chars/token, not 4).
+// ~3 chars per token — validated as best heuristic against real API data.
 function estimateTokens(text: string): number {
   return Math.ceil(text.length / 3);
 }

package/src/prompt.ts CHANGED Viewed

@@ -359,8 +359,7 @@ export function formatDistillations(
   return sections.join("\n\n");
 }
-// Rough token estimate used for budget-gating knowledge entries.
-// Uses ~3 chars/token (conservative for markdown-heavy technical text).
+// ~3 chars per token — validated as best heuristic against real API data.
 function estimateTokens(text: string): number {
   return Math.ceil(text.length / 3);
 }

package/src/temporal.ts CHANGED Viewed

@@ -1,9 +1,9 @@
 import { db, ensureProject } from "./db";
 import type { Message, Part } from "@opencode-ai/sdk";
-// Estimate token count from text length (rough: 1 token ≈ 4 chars)
+// ~3 chars per token — validated as best heuristic against real API data.
 function estimate(text: string): number {
-  return Math.ceil(text.length / 4);
+  return Math.ceil(text.length / 3);
 }
 function partsToText(parts: Part[]): string {