opencode-lore 0.2.2 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "opencode-lore",
3
- "version": "0.2.2",
3
+ "version": "0.2.4",
4
4
  "type": "module",
5
5
  "license": "MIT",
6
6
  "description": "Three-tier memory architecture for OpenCode — distillation, not summarization",
package/src/gradient.ts CHANGED
@@ -51,6 +51,18 @@ let lastKnownLtm = 0;
51
51
  let lastKnownSessionID: string | null = null;
52
52
  let lastKnownMessageCount = 0;
53
53
 
54
+ // Number of messages in the most recent transform() output — i.e. how many
55
+ // messages were actually sent to the model. On layer 0 this equals the full
56
+ // session length. On layers 1-4 it equals the compressed window size.
57
+ // Calibration must use this count (not the total DB message count) so that
58
+ // the delta on the next turn reflects only messages added since the last
59
+ // compressed window, not since the last DB snapshot.
60
+ let lastTransformedCount = 0;
61
+
62
+ export function getLastTransformedCount(): number {
63
+ return lastTransformedCount;
64
+ }
65
+
54
66
  // --- Force escalation ---
55
67
  // Set when the API returns "prompt is too long" — forces the transform to skip
56
68
  // layer 0 (and optionally layer 1) on the next call to ensure the context is
@@ -139,6 +151,7 @@ export function resetCalibration() {
139
151
  lastKnownLtm = 0;
140
152
  lastKnownSessionID = null;
141
153
  lastKnownMessageCount = 0;
154
+ lastTransformedCount = 0;
142
155
  forceMinLayer = 0;
143
156
  }
144
157
 
@@ -691,7 +704,7 @@ export function needsUrgentDistillation(): boolean {
691
704
  return v;
692
705
  }
693
706
 
694
- export function transform(input: {
707
+ function transformInner(input: {
695
708
  messages: MessageWithParts[];
696
709
  projectPath: string;
697
710
  sessionID?: string;
@@ -890,6 +903,24 @@ export function transform(input: {
890
903
  };
891
904
  }
892
905
 
906
+ // Public wrapper: records the compressed message count for calibration.
907
+ // Calibration needs to know how many messages were SENT to the model (the
908
+ // compressed window), not the total DB count. On layer 0 these are equal;
909
+ // on layers 1-4 the compressed window is smaller, and the delta on the next
910
+ // turn must be computed relative to the compressed count — otherwise the
911
+ // expected input on the next turn is anchored to the compressed input token
912
+ // count but the "new messages" delta is computed against the full DB count,
913
+ // making newMsgCount ≈ 0 and causing layer 0 passthrough on an overflowing session.
914
+ export function transform(input: {
915
+ messages: MessageWithParts[];
916
+ projectPath: string;
917
+ sessionID?: string;
918
+ }): TransformResult {
919
+ const result = transformInner(input);
920
+ lastTransformedCount = result.messages.length;
921
+ return result;
922
+ }
923
+
893
924
  // Compute our message-only estimate for a set of messages (for calibration use)
894
925
  export function estimateMessages(messages: MessageWithParts[]): number {
895
926
  return messages.reduce((sum, m) => sum + estimateMessage(m), 0);
package/src/index.ts CHANGED
@@ -14,6 +14,7 @@ import {
14
14
  setLtmTokens,
15
15
  getLtmBudget,
16
16
  setForceMinLayer,
17
+ getLastTransformedCount,
17
18
  } from "./gradient";
18
19
  import { formatKnowledge } from "./prompt";
19
20
  import { createRecallTool } from "./reflect";
@@ -219,7 +220,11 @@ export const LorePlugin: Plugin = async (ctx) => {
219
220
  const msgEstimate = estimateMessages(withParts);
220
221
  const actualInput =
221
222
  msg.tokens.input + msg.tokens.cache.read + msg.tokens.cache.write;
222
- calibrate(actualInput, msgEstimate, msg.sessionID, withParts.length);
223
+ // Use the compressed message count (from the last transform output),
224
+ // not the total DB count. On layer 0 these are equal. On layers 1-4,
225
+ // the model only saw the compressed window — calibrate must track that
226
+ // count so the next turn's delta is computed correctly.
227
+ calibrate(actualInput, msgEstimate, msg.sessionID, getLastTransformedCount() || withParts.length);
223
228
  }
224
229
  }
225
230
  }
@@ -390,12 +395,25 @@ export const LorePlugin: Plugin = async (ctx) => {
390
395
  // so the append-only sequence stays intact for prompt caching.
391
396
  if (result.layer > 0) {
392
397
  // The API requires the conversation to end with a user message.
393
- // Always drop trailing non-user messages even assistant messages with
394
- // tool parts. A hard API error is worse than the model re-invoking a tool.
398
+ // Drop trailing non-user messages, but stop if we hit an assistant message
399
+ // with an in-progress (non-completed) tool call dropping it would cause
400
+ // the model to lose its pending tool invocation and re-issue it in an
401
+ // infinite loop. A completed tool part is safe to drop; a pending one is not.
395
402
  while (
396
403
  result.messages.length > 0 &&
397
404
  result.messages.at(-1)!.info.role !== "user"
398
405
  ) {
406
+ const last = result.messages.at(-1)!;
407
+ const hasPendingTool = last.parts.some(
408
+ (p) => p.type === "tool" && p.state.status !== "completed",
409
+ );
410
+ if (hasPendingTool) {
411
+ console.error(
412
+ "[lore] WARN: cannot drop trailing assistant message with pending tool call — may cause prefill error. id:",
413
+ last.info.id,
414
+ );
415
+ break;
416
+ }
399
417
  const dropped = result.messages.pop()!;
400
418
  console.error(
401
419
  "[lore] WARN: dropping trailing",