opencode-lore 0.2.8 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "opencode-lore",
3
- "version": "0.2.8",
3
+ "version": "0.2.9",
4
4
  "type": "module",
5
5
  "license": "MIT",
6
6
  "description": "Three-tier memory architecture for OpenCode — distillation, not summarization",
package/src/gradient.ts CHANGED
@@ -38,56 +38,68 @@ const FIRST_TURN_OVERHEAD = 15_000;
38
38
 
39
39
  // Calibrated overhead: actual tokens used minus our message estimate.
40
40
  // Null = not yet calibrated (first turn). Updated after every assistant response.
41
+ // Shared across all sessions — this is model-level overhead (system prompt,
42
+ // tool definitions, provider headers) that doesn't vary per session.
41
43
  let calibratedOverhead: number | null = null;
42
44
 
43
- // --- Exact token tracking ---
44
- // Stores the real input token count from the last successful API response.
45
- // Used for the layer 0 passthrough decision: instead of estimating the full
46
- // message array with chars/4, we take the exact count from the previous turn
47
- // and only estimate the small delta (new messages). 99%+ of the count is
48
- // exact from the API's own tokenizer, virtually eliminating overflow errors.
49
- let lastKnownInput = 0;
50
- let lastKnownLtm = 0;
51
- let lastKnownSessionID: string | null = null;
52
- let lastKnownMessageCount = 0;
53
-
54
- // Number of messages in the most recent transform() output — i.e. how many
55
- // messages were actually sent to the model. On layer 0 this equals the full
56
- // session length. On layers 1-4 it equals the compressed window size.
57
- // Calibration must use this count (not the total DB message count) so that
58
- // the delta on the next turn reflects only messages added since the last
59
- // compressed window, not since the last DB snapshot.
60
- let lastTransformedCount = 0;
61
-
62
- export function getLastTransformedCount(): number {
63
- return lastTransformedCount;
64
- }
45
+ // ---------------------------------------------------------------------------
46
+ // Per-session state
47
+ //
48
+ // All calibration, layer-tracking, and window-ID state is scoped per session
49
+ // using an in-memory Map. This prevents worker sessions (lore-distill,
50
+ // lore-curator) from corrupting the main session's sticky-layer guard and
51
+ // delta-estimation state when their transform() calls return layer 0.
52
+ //
53
+ // DB persistence is unnecessary: UNCALIBRATED_SAFETY=1.5 safely handles
54
+ // the first turn of a resumed session. The Map is bounded — there are never
55
+ // more than a handful of active sessions at once.
56
+ // ---------------------------------------------------------------------------
57
+
58
+ type SessionState = {
59
+ /** Exact input token count from the last successful API response */
60
+ lastKnownInput: number;
61
+ /** LTM tokens that were in-flight when lastKnownInput was recorded */
62
+ lastKnownLtm: number;
63
+ /** Total messages sent to the model in the last turn (compressed count on layers 1-4) */
64
+ lastKnownMessageCount: number;
65
+ /** Number of messages in the most recent transform() output */
66
+ lastTransformedCount: number;
67
+ /** Layer used by the most recent transform() call — sticky-layer guard */
68
+ lastLayer: SafetyLayer;
69
+ /** Message IDs in the most recent transform() output — ID-based delta estimation */
70
+ lastWindowMessageIDs: Set<string>;
71
+ /** One-shot force escalation: skip layers below this on the next transform() */
72
+ forceMinLayer: SafetyLayer;
73
+ /** Distilled prefix cache (Approach C) */
74
+ prefixCache: PrefixCache | null;
75
+ /** Raw window pin cache (Approach B) */
76
+ rawWindowCache: RawWindowCache | null;
77
+ };
65
78
 
66
- /** Returns the layer used by the most recent transform() call. For testing. */
67
- export function getLastLayer(): SafetyLayer {
68
- return lastLayer;
79
+ function makeSessionState(): SessionState {
80
+ return {
81
+ lastKnownInput: 0,
82
+ lastKnownLtm: 0,
83
+ lastKnownMessageCount: 0,
84
+ lastTransformedCount: 0,
85
+ lastLayer: 0,
86
+ lastWindowMessageIDs: new Set(),
87
+ forceMinLayer: 0,
88
+ prefixCache: null,
89
+ rawWindowCache: null,
90
+ };
69
91
  }
70
92
 
71
- // The layer used by the most recent transform() call.
72
- // Used for the sticky-layer guard: once gradient mode activates (layer >= 1),
73
- // we don't allow fallback to layer 0 until the session genuinely shrinks
74
- // (e.g. after compaction). This prevents the calibration oscillation where a
75
- // compressed turn records 100K + 50-msg count, and the next turn's delta
76
- // estimation treats 250 evicted messages as "new", undercounts their tokens
77
- // via chars/4, and incorrectly concludes layer 0 passes.
78
- let lastLayer: SafetyLayer = 0;
79
-
80
- // The set of message IDs included in the most recent transform() output.
81
- // Used for accurate delta estimation: instead of counting messages by index
82
- // (which breaks after compression changes the window), we identify exactly
83
- // which messages are genuinely new since the last window.
84
- let lastWindowMessageIDs: Set<string> = new Set();
85
-
86
- // --- Force escalation ---
87
- // Set when the API returns "prompt is too long" — forces the transform to skip
88
- // layer 0 (and optionally layer 1) on the next call to ensure the context is
89
- // trimmed enough to fit. Cleared after one use (one-shot).
90
- let forceMinLayer: SafetyLayer = 0;
93
+ const sessionStates = new Map<string, SessionState>();
94
+
95
+ function getSessionState(sessionID: string): SessionState {
96
+ let state = sessionStates.get(sessionID);
97
+ if (!state) {
98
+ state = makeSessionState();
99
+ sessionStates.set(sessionID, state);
100
+ }
101
+ return state;
102
+ }
91
103
 
92
104
  // LTM tokens injected via system transform hook this turn.
93
105
  // Set by setLtmTokens() after the system hook runs; consumed by transform().
@@ -137,18 +149,20 @@ export function calibrate(
137
149
  sessionID?: string,
138
150
  messageCount?: number,
139
151
  ) {
140
- // Store exact counts for the proactive layer 0 decision.
141
- lastKnownInput = actualInput;
142
- lastKnownLtm = ltmTokens;
143
- if (sessionID !== undefined) lastKnownSessionID = sessionID;
144
- if (messageCount !== undefined) lastKnownMessageCount = messageCount;
145
-
152
+ // Update global overhead calibration (shared across sessions model-level).
146
153
  const overhead = Math.max(0, actualInput - messageEstimate);
147
- // Smooth with EMA (alpha=0.3) once calibrated, or set directly on first call
148
154
  calibratedOverhead =
149
155
  calibratedOverhead === null
150
156
  ? overhead
151
157
  : Math.round(calibratedOverhead * 0.7 + overhead * 0.3);
158
+
159
+ // Store per-session exact counts for the proactive layer 0 decision.
160
+ if (sessionID !== undefined) {
161
+ const state = getSessionState(sessionID);
162
+ state.lastKnownInput = actualInput;
163
+ state.lastKnownLtm = ltmTokens;
164
+ if (messageCount !== undefined) state.lastKnownMessageCount = messageCount;
165
+ }
152
166
  }
153
167
 
154
168
  export function getOverhead(): number {
@@ -156,25 +170,45 @@ export function getOverhead(): number {
156
170
  }
157
171
 
158
172
  /**
159
- * Force the next transform() call to use at least the given layer.
173
+ * Returns the number of messages in the most recent transform() output for
174
+ * the given session. Used by calibrate() to track the compressed window size.
175
+ */
176
+ export function getLastTransformedCount(sessionID: string): number {
177
+ return sessionStates.get(sessionID)?.lastTransformedCount ?? 0;
178
+ }
179
+
180
+ /** Returns the layer used by the most recent transform() call. For testing. */
181
+ export function getLastLayer(sessionID?: string): SafetyLayer {
182
+ if (sessionID) return sessionStates.get(sessionID)?.lastLayer ?? 0;
183
+ // Fallback for tests: return from the first (and usually only) session state
184
+ const first = sessionStates.values().next().value;
185
+ return first?.lastLayer ?? 0;
186
+ }
187
+
188
+ /**
189
+ * Force the next transform() call for this session to use at least the given layer.
160
190
  * Called when the API returns "prompt is too long" so the next attempt
161
191
  * trims the context enough to fit within the model's context window.
162
192
  */
163
- export function setForceMinLayer(layer: SafetyLayer) {
164
- forceMinLayer = layer;
193
+ export function setForceMinLayer(layer: SafetyLayer, sessionID?: string) {
194
+ if (sessionID) {
195
+ getSessionState(sessionID).forceMinLayer = layer;
196
+ } else {
197
+ // Fallback for tests / callers without session ID: set on all active sessions
198
+ for (const state of sessionStates.values()) {
199
+ state.forceMinLayer = layer;
200
+ }
201
+ }
165
202
  }
166
203
 
167
204
  // For testing only — reset all calibration and force-escalation state
168
- export function resetCalibration() {
205
+ export function resetCalibration(sessionID?: string) {
169
206
  calibratedOverhead = null;
170
- lastKnownInput = 0;
171
- lastKnownLtm = 0;
172
- lastKnownSessionID = null;
173
- lastKnownMessageCount = 0;
174
- lastTransformedCount = 0;
175
- forceMinLayer = 0;
176
- lastLayer = 0;
177
- lastWindowMessageIDs = new Set();
207
+ if (sessionID) {
208
+ sessionStates.delete(sessionID);
209
+ } else {
210
+ sessionStates.clear();
211
+ }
178
212
  }
179
213
 
180
214
  type Distillation = {
@@ -496,28 +530,29 @@ type PrefixCache = {
496
530
  prefixTokens: number;
497
531
  };
498
532
 
499
- let prefixCache: PrefixCache | null = null;
500
-
501
533
  /**
502
534
  * Return the distilled prefix messages, reusing cached content when possible.
535
+ * Uses per-session state from sessState.prefixCache (no module-level cache).
503
536
  *
504
537
  * Cache hit — no new rows: returns the exact same prefixMessages object
505
538
  * (byte-identical content, prompt cache preserved).
506
539
  * Cache miss — new rows appended: renders only the delta, appends to cached
507
540
  * text, updates cache.
508
- * Full reset — session changed, or rows were rewritten by meta-distillation:
541
+ * Full reset — first call, or rows were rewritten by meta-distillation:
509
542
  * renders everything from scratch.
510
543
  */
511
544
  function distilledPrefixCached(
512
545
  distillations: Distillation[],
513
546
  sessionID: string,
547
+ sessState: SessionState,
514
548
  ): { messages: MessageWithParts[]; tokens: number } {
515
549
  if (!distillations.length) {
516
- prefixCache = null;
550
+ sessState.prefixCache = null;
517
551
  return { messages: [], tokens: 0 };
518
552
  }
519
553
 
520
554
  const lastRow = distillations[distillations.length - 1];
555
+ const prefixCache = sessState.prefixCache;
521
556
 
522
557
  // Cache is valid when: same session, row count only grew (no rewrites),
523
558
  // and the last previously-cached row still exists at the same position.
@@ -551,7 +586,7 @@ function distilledPrefixCached(
551
586
  const fullText = prefixCache!.cachedText + "\n\n" + deltaText;
552
587
  const messages = buildPrefixMessages(fullText);
553
588
  const tokens = messages.reduce((sum, m) => sum + estimateMessage(m), 0);
554
- prefixCache = {
589
+ sessState.prefixCache = {
555
590
  sessionID,
556
591
  lastDistillationID: lastRow.id,
557
592
  rowCount: distillations.length,
@@ -563,7 +598,7 @@ function distilledPrefixCached(
563
598
  }
564
599
  }
565
600
 
566
- // Full re-render: first call, session change, or meta-distillation rewrote rows
601
+ // Full re-render: first call or meta-distillation rewrote rows
567
602
  const now = new Date();
568
603
  const annotated = distillations.map((d) => ({
569
604
  ...d,
@@ -571,13 +606,13 @@ function distilledPrefixCached(
571
606
  }));
572
607
  const fullText = formatDistillations(annotated);
573
608
  if (!fullText) {
574
- prefixCache = null;
609
+ sessState.prefixCache = null;
575
610
  return { messages: [], tokens: 0 };
576
611
  }
577
612
 
578
613
  const messages = buildPrefixMessages(fullText);
579
614
  const tokens = messages.reduce((sum, m) => sum + estimateMessage(m), 0);
580
- prefixCache = {
615
+ sessState.prefixCache = {
581
616
  sessionID,
582
617
  lastDistillationID: lastRow.id,
583
618
  rowCount: distillations.length,
@@ -588,9 +623,14 @@ function distilledPrefixCached(
588
623
  return { messages, tokens };
589
624
  }
590
625
 
591
- // For testing only — reset prefix cache state
592
- export function resetPrefixCache() {
593
- prefixCache = null;
626
+ // For testing only — reset prefix cache state for a specific session (or all)
627
+ export function resetPrefixCache(sessionID?: string) {
628
+ if (sessionID) {
629
+ const state = sessionStates.get(sessionID);
630
+ if (state) state.prefixCache = null;
631
+ } else {
632
+ for (const state of sessionStates.values()) state.prefixCache = null;
633
+ }
594
634
  }
595
635
 
596
636
  // --- Approach B: Lazy raw window eviction ---
@@ -616,14 +656,19 @@ type RawWindowCache = {
616
656
  firstMessageID: string;
617
657
  };
618
658
 
619
- let rawWindowCache: RawWindowCache | null = null;
620
-
621
- export function resetRawWindowCache() {
622
- rawWindowCache = null;
659
+ // For testing only reset raw window cache state for a specific session (or all)
660
+ export function resetRawWindowCache(sessionID?: string) {
661
+ if (sessionID) {
662
+ const state = sessionStates.get(sessionID);
663
+ if (state) state.rawWindowCache = null;
664
+ } else {
665
+ for (const state of sessionStates.values()) state.rawWindowCache = null;
666
+ }
623
667
  }
624
668
 
625
669
  /**
626
670
  * Layer-1 tryFit with lazy eviction.
671
+ * Uses per-session rawWindowCache from sessState (no module-level cache).
627
672
  *
628
673
  * Attempts to reuse the previous raw window cutoff before falling back to a
629
674
  * full backward scan. If the pinned window fits, returns it unchanged (same
@@ -638,11 +683,13 @@ function tryFitStable(input: {
638
683
  distilledBudget: number;
639
684
  rawBudget: number;
640
685
  sessionID: string;
686
+ sessState: SessionState;
641
687
  }): Omit<TransformResult, "layer" | "usable" | "distilledBudget" | "rawBudget"> | null {
642
688
  // If the prefix already overflows its budget there's no point trying.
643
689
  if (input.prefixTokens > input.distilledBudget && input.prefix.length > 0)
644
690
  return null;
645
691
 
692
+ const rawWindowCache = input.sessState.rawWindowCache;
646
693
  const cacheValid =
647
694
  rawWindowCache !== null && rawWindowCache.sessionID === input.sessionID;
648
695
 
@@ -694,7 +741,7 @@ function tryFitStable(input: {
694
741
  // raw message in the new window. Pin to its ID for the next turn.
695
742
  const rawStart = result.messages[input.prefix.length];
696
743
  if (rawStart) {
697
- rawWindowCache = {
744
+ input.sessState.rawWindowCache = {
698
745
  sessionID: input.sessionID,
699
746
  firstMessageID: rawStart.info.id,
700
747
  };
@@ -746,8 +793,10 @@ function transformInner(input: {
746
793
  // When the API previously rejected with "prompt is too long", skip layers
747
794
  // below the forced minimum to ensure enough trimming on the next attempt.
748
795
  // One-shot: consumed here and reset to 0.
749
- let effectiveMinLayer = forceMinLayer;
750
- forceMinLayer = 0;
796
+ const sid = input.sessionID ?? input.messages[0]?.info.sessionID;
797
+ const sessState = sid ? getSessionState(sid) : makeSessionState();
798
+ let effectiveMinLayer = sessState.forceMinLayer;
799
+ sessState.forceMinLayer = 0;
751
800
 
752
801
  // --- Approach A: Cache-preserving passthrough ---
753
802
  // Use exact token count from the previous API response when available.
@@ -755,13 +804,12 @@ function transformInner(input: {
755
804
  // making the layer-0 decision 99%+ accurate from the API's own tokenizer.
756
805
  // maxInput = absolute ceiling the API enforces: input_tokens + max_tokens <= context
757
806
  const maxInput = contextLimit - outputReserved;
758
- const sid = input.sessionID ?? input.messages[0]?.info.sessionID;
759
807
 
760
808
  // True when we have real API token data from a previous turn in this session.
761
809
  // When false (first turn / session change), chars/4 estimates can undercount by
762
810
  // up to 1.8x — so tryFit output must be validated with a safety multiplier before
763
811
  // being used, to prevent sending an apparently-fitting window that actually overflows.
764
- const calibrated = lastKnownInput > 0 && sid === lastKnownSessionID;
812
+ const calibrated = sessState.lastKnownInput > 0;
765
813
 
766
814
  // On uncalibrated turns, apply this multiplier to tryFit's estimated total to
767
815
  // approximate the real token count. 1.5 is conservative but not so aggressive
@@ -784,8 +832,9 @@ function transformInner(input: {
784
832
  // input.messages has 300 raw messages. The delta estimation treats the 250
785
833
  // evicted messages as "new" and undercounts them via chars/4, producing an
786
834
  // expectedInput that fits in layer 0 — but the actual tokens are ~190K.
787
- // Only applied when calibrated (same session) to avoid affecting other sessions.
788
- if (calibrated && lastLayer >= 1 && input.messages.length >= lastKnownMessageCount) {
835
+ // Only applied when calibrated (same session, per-session state) to avoid
836
+ // affecting other sessions including worker sessions.
837
+ if (calibrated && sessState.lastLayer >= 1 && input.messages.length >= sessState.lastKnownMessageCount) {
789
838
  effectiveMinLayer = Math.max(effectiveMinLayer, 1) as SafetyLayer;
790
839
  }
791
840
 
@@ -795,12 +844,12 @@ function transformInner(input: {
795
844
  // Use message ID tracking (Option B) to identify new messages accurately.
796
845
  // After compression, the "last window" is a subset of the full message array —
797
846
  // counting by index would treat evicted messages as new (off-by-250 error).
798
- const newMessages = lastWindowMessageIDs.size > 0
799
- ? input.messages.filter((m) => !lastWindowMessageIDs.has(m.info.id))
800
- : input.messages.slice(-Math.max(0, input.messages.length - lastKnownMessageCount));
847
+ const newMessages = sessState.lastWindowMessageIDs.size > 0
848
+ ? input.messages.filter((m) => !sessState.lastWindowMessageIDs.has(m.info.id))
849
+ : input.messages.slice(-Math.max(0, input.messages.length - sessState.lastKnownMessageCount));
801
850
  const newMsgTokens = newMessages.reduce((s, m) => s + estimateMessage(m), 0);
802
- const ltmDelta = ltmTokens - lastKnownLtm;
803
- expectedInput = lastKnownInput + newMsgTokens + ltmDelta;
851
+ const ltmDelta = ltmTokens - sessState.lastKnownLtm;
852
+ expectedInput = sessState.lastKnownInput + newMsgTokens + ltmDelta;
804
853
  } else {
805
854
  // First turn or session change: fall back to chars/4 + overhead.
806
855
  const messageTokens = input.messages.reduce((s, m) => s + estimateMessage(m), 0);
@@ -810,8 +859,8 @@ function transformInner(input: {
810
859
  if (effectiveMinLayer === 0 && expectedInput <= maxInput) {
811
860
  // All messages fit — return unmodified to preserve append-only prompt-cache pattern.
812
861
  // Raw messages are strictly better context than lossy distilled summaries.
813
- const messageTokens = lastKnownInput > 0 && sid === lastKnownSessionID
814
- ? expectedInput - (ltmTokens - lastKnownLtm) // approximate raw portion
862
+ const messageTokens = calibrated
863
+ ? expectedInput - (ltmTokens - sessState.lastKnownLtm) // approximate raw portion
815
864
  : expectedInput - overhead - ltmTokens;
816
865
  return {
817
866
  messages: input.messages,
@@ -834,7 +883,7 @@ function transformInner(input: {
834
883
  // the prompt cache. Layers 2-4 already cause full cache invalidation via
835
884
  // tool stripping / message restructuring, so they use the non-cached path.
836
885
  const cached = sid
837
- ? distilledPrefixCached(distillations, sid)
886
+ ? distilledPrefixCached(distillations, sid, sessState)
838
887
  : (() => {
839
888
  const msgs = distilledPrefix(distillations);
840
889
  return { messages: msgs, tokens: msgs.reduce((sum, m) => sum + estimateMessage(m), 0) };
@@ -854,6 +903,7 @@ function transformInner(input: {
854
903
  distilledBudget,
855
904
  rawBudget,
856
905
  sessionID: sid,
906
+ sessState,
857
907
  })
858
908
  : tryFit({
859
909
  messages: input.messages,
@@ -868,7 +918,7 @@ function transformInner(input: {
868
918
 
869
919
  // Layer 1 didn't fit (or was force-skipped) — reset the raw window cache.
870
920
  // Layers 2-4 use full scans and already break the prompt cache.
871
- rawWindowCache = null;
921
+ sessState.rawWindowCache = null;
872
922
 
873
923
  // Layer 2: Strip tool outputs from older messages, keep last 2 turns
874
924
  // Skipped when force-escalated to layer 3+.
@@ -955,9 +1005,13 @@ export function transform(input: {
955
1005
  sessionID?: string;
956
1006
  }): TransformResult {
957
1007
  const result = transformInner(input);
958
- lastTransformedCount = result.messages.length;
959
- lastLayer = result.layer;
960
- lastWindowMessageIDs = new Set(result.messages.map((m) => m.info.id));
1008
+ const sid = input.sessionID ?? input.messages[0]?.info.sessionID;
1009
+ if (sid) {
1010
+ const state = getSessionState(sid);
1011
+ state.lastTransformedCount = result.messages.length;
1012
+ state.lastLayer = result.layer;
1013
+ state.lastWindowMessageIDs = new Set(result.messages.map((m) => m.info.id));
1014
+ }
961
1015
  return result;
962
1016
  }
963
1017
 
package/src/index.ts CHANGED
@@ -224,7 +224,7 @@ export const LorePlugin: Plugin = async (ctx) => {
224
224
  // not the total DB count. On layer 0 these are equal. On layers 1-4,
225
225
  // the model only saw the compressed window — calibrate must track that
226
226
  // count so the next turn's delta is computed correctly.
227
- calibrate(actualInput, msgEstimate, msg.sessionID, getLastTransformedCount() || withParts.length);
227
+ calibrate(actualInput, msgEstimate, msg.sessionID, getLastTransformedCount(msg.sessionID) || withParts.length);
228
228
  }
229
229
  }
230
230
  }
@@ -270,7 +270,7 @@ export const LorePlugin: Plugin = async (ctx) => {
270
270
  // The gradient at layers 2-4 will compress the context enough for the next turn.
271
271
  // Do NOT call session.summarize() here — it sends all messages to the model,
272
272
  // which would overflow again and create a stuck compaction loop.
273
- setForceMinLayer(2);
273
+ setForceMinLayer(2, sessionID);
274
274
 
275
275
  if (sessionID) {
276
276
  // Force distillation to capture all undistilled messages into the temporal
@@ -384,6 +384,16 @@ export const LorePlugin: Plugin = async (ctx) => {
384
384
 
385
385
  const sessionID = output.messages[0]?.info.sessionID;
386
386
 
387
+ // Skip gradient transform for lore worker sessions (lore-distill, lore-curator).
388
+ // Worker sessions are small (typically 5-15 messages) and don't need context
389
+ // management. More importantly, allowing them through would overwrite the
390
+ // per-session state for the MAIN session if they happen to share a session ID —
391
+ // and before per-session state was introduced, module-level variables were
392
+ // corrupted this way, causing calibration oscillation and layer 0 passthrough
393
+ // on the main session's next step. Belt-and-suspenders: even with per-session
394
+ // state, worker sessions waste CPU on transform() for no benefit.
395
+ if (sessionID && await shouldSkip(sessionID)) return;
396
+
387
397
  const result = transform({
388
398
  messages: output.messages,
389
399
  projectPath,