@loreai/core 0.12.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/dist/bun/agents-file.d.ts +29 -8
  2. package/dist/bun/agents-file.d.ts.map +1 -1
  3. package/dist/bun/config.d.ts +1 -0
  4. package/dist/bun/config.d.ts.map +1 -1
  5. package/dist/bun/db.d.ts.map +1 -1
  6. package/dist/bun/distillation.d.ts +29 -0
  7. package/dist/bun/distillation.d.ts.map +1 -1
  8. package/dist/bun/embedding.d.ts +15 -1
  9. package/dist/bun/embedding.d.ts.map +1 -1
  10. package/dist/bun/gradient.d.ts +53 -5
  11. package/dist/bun/gradient.d.ts.map +1 -1
  12. package/dist/bun/index.d.ts +4 -4
  13. package/dist/bun/index.d.ts.map +1 -1
  14. package/dist/bun/index.js +696 -243
  15. package/dist/bun/index.js.map +4 -4
  16. package/dist/bun/pattern-extract.d.ts +36 -0
  17. package/dist/bun/pattern-extract.d.ts.map +1 -0
  18. package/dist/bun/recall.d.ts +1 -0
  19. package/dist/bun/recall.d.ts.map +1 -1
  20. package/dist/bun/search.d.ts +13 -1
  21. package/dist/bun/search.d.ts.map +1 -1
  22. package/dist/bun/types.d.ts +41 -1
  23. package/dist/bun/types.d.ts.map +1 -1
  24. package/dist/bun/worker-model.d.ts +22 -0
  25. package/dist/bun/worker-model.d.ts.map +1 -1
  26. package/dist/node/agents-file.d.ts +29 -8
  27. package/dist/node/agents-file.d.ts.map +1 -1
  28. package/dist/node/config.d.ts +1 -0
  29. package/dist/node/config.d.ts.map +1 -1
  30. package/dist/node/db.d.ts.map +1 -1
  31. package/dist/node/distillation.d.ts +29 -0
  32. package/dist/node/distillation.d.ts.map +1 -1
  33. package/dist/node/embedding.d.ts +15 -1
  34. package/dist/node/embedding.d.ts.map +1 -1
  35. package/dist/node/gradient.d.ts +53 -5
  36. package/dist/node/gradient.d.ts.map +1 -1
  37. package/dist/node/index.d.ts +4 -4
  38. package/dist/node/index.d.ts.map +1 -1
  39. package/dist/node/index.js +696 -243
  40. package/dist/node/index.js.map +4 -4
  41. package/dist/node/pattern-extract.d.ts +36 -0
  42. package/dist/node/pattern-extract.d.ts.map +1 -0
  43. package/dist/node/recall.d.ts +1 -0
  44. package/dist/node/recall.d.ts.map +1 -1
  45. package/dist/node/search.d.ts +13 -1
  46. package/dist/node/search.d.ts.map +1 -1
  47. package/dist/node/types.d.ts +41 -1
  48. package/dist/node/types.d.ts.map +1 -1
  49. package/dist/node/worker-model.d.ts +22 -0
  50. package/dist/node/worker-model.d.ts.map +1 -1
  51. package/dist/types/agents-file.d.ts +29 -8
  52. package/dist/types/agents-file.d.ts.map +1 -1
  53. package/dist/types/config.d.ts +1 -0
  54. package/dist/types/config.d.ts.map +1 -1
  55. package/dist/types/db.d.ts.map +1 -1
  56. package/dist/types/distillation.d.ts +29 -0
  57. package/dist/types/distillation.d.ts.map +1 -1
  58. package/dist/types/embedding.d.ts +15 -1
  59. package/dist/types/embedding.d.ts.map +1 -1
  60. package/dist/types/gradient.d.ts +53 -5
  61. package/dist/types/gradient.d.ts.map +1 -1
  62. package/dist/types/index.d.ts +4 -4
  63. package/dist/types/index.d.ts.map +1 -1
  64. package/dist/types/pattern-extract.d.ts +36 -0
  65. package/dist/types/pattern-extract.d.ts.map +1 -0
  66. package/dist/types/recall.d.ts +1 -0
  67. package/dist/types/recall.d.ts.map +1 -1
  68. package/dist/types/search.d.ts +13 -1
  69. package/dist/types/search.d.ts.map +1 -1
  70. package/dist/types/types.d.ts +41 -1
  71. package/dist/types/types.d.ts.map +1 -1
  72. package/dist/types/worker-model.d.ts +22 -0
  73. package/dist/types/worker-model.d.ts.map +1 -1
  74. package/package.json +3 -2
  75. package/src/agents-file.ts +111 -28
  76. package/src/config.ts +25 -18
  77. package/src/curator.ts +2 -2
  78. package/src/db.ts +19 -2
  79. package/src/distillation.ts +152 -15
  80. package/src/embedding.ts +158 -14
  81. package/src/gradient.ts +398 -227
  82. package/src/index.ts +13 -5
  83. package/src/pattern-extract.ts +108 -0
  84. package/src/recall.ts +124 -6
  85. package/src/search.ts +37 -1
  86. package/src/types.ts +41 -1
  87. package/src/worker-model.ts +142 -5
package/src/gradient.ts CHANGED
@@ -72,6 +72,13 @@ let calibratedOverhead: number | null = null;
72
72
  // response via UNCALIBRATED_SAFETY.
73
73
  // ---------------------------------------------------------------------------
74
74
 
75
+ type DistillationSnapshot = {
76
+ /** Cached distillation rows from the most recent DB read */
77
+ rows: Distillation[];
78
+ /** ID of the last user message when this snapshot was taken */
79
+ lastUserMsgId: string | null;
80
+ };
81
+
75
82
  type SessionState = {
76
83
  /** Exact input token count from the last successful API response */
77
84
  lastKnownInput: number;
@@ -89,6 +96,8 @@ type SessionState = {
89
96
  forceMinLayer: SafetyLayer;
90
97
  /** Token estimate from the most recent transform() output (compressed window) */
91
98
  lastTransformEstimate: number;
99
+ /** LTM tokens injected for this session's current turn (per-session isolation) */
100
+ ltmTokens: number;
92
101
  /** Distilled prefix cache (Approach C) */
93
102
  prefixCache: PrefixCache | null;
94
103
  /** Raw window pin cache (Approach B) */
@@ -112,10 +121,36 @@ type SessionState = {
112
121
  * the post-idle turn regardless of conversation size.
113
122
  */
114
123
  cameOutOfIdle: boolean;
124
+ /**
125
+ * Set true by onIdleResume() alongside cameOutOfIdle; consumed (and cleared)
126
+ * by transformInner() to activate the post-idle compact layer. When true AND
127
+ * distillations exist, transform skips layer 0 (full-raw passthrough) and
128
+ * uses a tighter raw budget for layer 1. Rationale: on a cold cache the
129
+ * entire context is a cache WRITE — a smaller total means lower write cost,
130
+ * and aggressive idle distillation already captured the older history.
131
+ */
132
+ postIdleCompact: boolean;
115
133
  /** Consecutive turns at layer >= 2. When >= 3, log a compaction hint. */
116
134
  consecutiveHighLayer: number;
117
135
  /** Hash of the first message IDs in the last transform output — for cache-bust diagnostics. */
118
136
  lastPrefixHash: string;
137
+ /** Cumulative cache-bust count for this session (prefix hash changed between turns). */
138
+ bustCount: number;
139
+ /** Total transform() calls for this session — used with bustCount for rate calculation. */
140
+ transformCount: number;
141
+ /**
142
+ * Distillation row snapshot — cached to avoid hitting the DB on every
143
+ * transform() call. Refreshed only at turn boundaries (when a new user
144
+ * message appears) or on first call / idle resume. During autonomous
145
+ * tool-call chains this stays frozen, keeping the distilled prefix
146
+ * byte-identical across consecutive API calls and preserving the prompt
147
+ * cache.
148
+ *
149
+ * Cost context: each prefix refresh costs context_size × cache_write_price
150
+ * (~$1.88 per bust at 500K Sonnet). New distillations have near-zero
151
+ * marginal value mid-chain since the model already has raw messages.
152
+ */
153
+ distillationSnapshot: DistillationSnapshot | null;
119
154
  };
120
155
 
121
156
  function makeSessionState(): SessionState {
@@ -128,12 +163,17 @@ function makeSessionState(): SessionState {
128
163
  lastWindowMessageIDs: new Set(),
129
164
  forceMinLayer: 0,
130
165
  lastTransformEstimate: 0,
166
+ ltmTokens: 0,
131
167
  prefixCache: null,
132
168
  rawWindowCache: null,
133
169
  lastTurnAt: 0,
134
170
  cameOutOfIdle: false,
171
+ postIdleCompact: false,
135
172
  consecutiveHighLayer: 0,
136
173
  lastPrefixHash: "",
174
+ bustCount: 0,
175
+ transformCount: 0,
176
+ distillationSnapshot: null,
137
177
  };
138
178
  }
139
179
 
@@ -196,10 +236,22 @@ export function onIdleResume(
196
236
  if (idleMs < thresholdMs) return { triggered: false };
197
237
  state.prefixCache = null;
198
238
  state.rawWindowCache = null;
239
+ state.distillationSnapshot = null;
199
240
  state.cameOutOfIdle = true;
241
+ state.postIdleCompact = true;
200
242
  return { triggered: true, idleMs };
201
243
  }
202
244
 
245
+ /**
246
+ * Return the wall-clock timestamp (epoch ms) of the most recent transform()
247
+ * call for this session. Returns 0 if the session has never been transformed.
248
+ * Used by callers (e.g. meta-distillation gating) to check whether the
249
+ * upstream prompt cache is likely still warm.
250
+ */
251
+ export function getLastTurnAt(sessionID: string): number {
252
+ return sessionStates.get(sessionID)?.lastTurnAt ?? 0;
253
+ }
254
+
203
255
  /**
204
256
  * Read-and-clear the cameOutOfIdle flag. The OpenCode host's LTM degraded-
205
257
  * recovery branch consumes this to decide whether to bypass the
@@ -213,8 +265,9 @@ export function consumeCameOutOfIdle(sessionID: string): boolean {
213
265
  }
214
266
 
215
267
  // LTM tokens injected via system transform hook this turn.
216
- // Set by setLtmTokens() after the system hook runs; consumed by transform().
217
- let ltmTokens = 0;
268
+ // Per-session when a sessionID is provided (preferred), with a module-level
269
+ // fallback for callers that don't have a session ID.
270
+ let ltmTokensFallback = 0;
218
271
 
219
272
  export function setModelLimits(limits: { context: number; output: number }) {
220
273
  contextLimit = limits.context || 200_000;
@@ -248,14 +301,25 @@ export function computeLayer0Cap(
248
301
  return Math.max(rawCap, MIN_LAYER0_FLOOR);
249
302
  }
250
303
 
251
- /** Called by the system transform hook after formatting LTM knowledge. */
252
- export function setLtmTokens(tokens: number) {
253
- ltmTokens = tokens;
304
+ /** Called by the system transform hook after formatting LTM knowledge.
305
+ * When sessionID is provided, stores on per-session state to prevent
306
+ * cross-session budget contamination. Falls back to module-level global
307
+ * for callers without a session ID. */
308
+ export function setLtmTokens(tokens: number, sessionID?: string) {
309
+ if (sessionID) {
310
+ getSessionState(sessionID).ltmTokens = tokens;
311
+ }
312
+ ltmTokensFallback = tokens;
254
313
  }
255
314
 
256
- /** Returns the current LTM token count (for tests and diagnostics). */
257
- export function getLtmTokens(): number {
258
- return ltmTokens;
315
+ /** Returns the LTM token count for the given session, falling back to
316
+ * the module-level global when no session ID is provided. */
317
+ export function getLtmTokens(sessionID?: string): number {
318
+ if (sessionID) {
319
+ const state = sessionStates.get(sessionID);
320
+ if (state) return state.ltmTokens;
321
+ }
322
+ return ltmTokensFallback;
259
323
  }
260
324
 
261
325
  /**
@@ -306,7 +370,7 @@ export function calibrate(
306
370
  if (sessionID !== undefined) {
307
371
  const state = getSessionState(sessionID);
308
372
  state.lastKnownInput = actualInput;
309
- state.lastKnownLtm = ltmTokens;
373
+ state.lastKnownLtm = state.ltmTokens;
310
374
  if (messageCount !== undefined) state.lastKnownMessageCount = messageCount;
311
375
  }
312
376
  }
@@ -378,7 +442,9 @@ export function inspectSessionState(sessionID: string): {
378
442
  hasPrefixCache: boolean;
379
443
  hasRawWindowCache: boolean;
380
444
  cameOutOfIdle: boolean;
445
+ postIdleCompact: boolean;
381
446
  lastTurnAt: number;
447
+ distillationSnapshot: DistillationSnapshot | null;
382
448
  } | null {
383
449
  const state = sessionStates.get(sessionID);
384
450
  if (!state) return null;
@@ -386,7 +452,9 @@ export function inspectSessionState(sessionID: string): {
386
452
  hasPrefixCache: state.prefixCache !== null,
387
453
  hasRawWindowCache: state.rawWindowCache !== null,
388
454
  cameOutOfIdle: state.cameOutOfIdle,
455
+ postIdleCompact: state.postIdleCompact,
389
456
  lastTurnAt: state.lastTurnAt,
457
+ distillationSnapshot: state.distillationSnapshot,
390
458
  };
391
459
  }
392
460
 
@@ -425,6 +493,46 @@ function loadDistillations(
425
493
  .all(...params) as Distillation[];
426
494
  }
427
495
 
496
+ // Cached distillation loader — avoids hitting the DB on every transform() call.
497
+ // Refreshed only at turn boundaries (when a new user message appears), on first
498
+ // call (null snapshot), or after idle resume (snapshot cleared by onIdleResume).
499
+ // During autonomous tool-call chains (consecutive assistant→tool→assistant with
500
+ // the same last user message), returns the cached rows so the distilled prefix
501
+ // stays byte-identical and preserves the Anthropic prompt cache.
502
+ function loadDistillationsCached(
503
+ projectPath: string,
504
+ sessionID: string,
505
+ messages: MessageWithParts[],
506
+ sessState: SessionState,
507
+ ): Distillation[] {
508
+ // Find the last user message ID in the input
509
+ let lastUserMsgId: string | null = null;
510
+ for (let i = messages.length - 1; i >= 0; i--) {
511
+ if (messages[i].info.role === "user") {
512
+ lastUserMsgId = messages[i].info.id;
513
+ break;
514
+ }
515
+ }
516
+
517
+ const snapshot = sessState.distillationSnapshot;
518
+
519
+ // Cache hit: same user message = still in the same tool-call chain
520
+ if (snapshot && snapshot.lastUserMsgId === lastUserMsgId) {
521
+ return snapshot.rows;
522
+ }
523
+
524
+ // Cache miss: new user message (turn boundary), first call, or post-idle
525
+ const rows = loadDistillations(projectPath, sessionID);
526
+ sessState.distillationSnapshot = { rows, lastUserMsgId };
527
+
528
+ log.info(
529
+ `distillation refresh: ${rows.length} rows` +
530
+ ` (user msg ${lastUserMsgId?.substring(0, 16) ?? "none"})`,
531
+ );
532
+
533
+ return rows;
534
+ }
535
+
428
536
  // Strip all <system-reminder>...</system-reminder> blocks from message text.
429
537
  // For the user-message wrapper pattern, extracts the actual user text.
430
538
  // For all other reminders (build-switch, plan reminders, etc.), drops them entirely.
@@ -534,6 +642,15 @@ function simpleHash(str: string): number {
534
642
  return hash;
535
643
  }
536
644
 
645
+ /** Parsed read-tool input: file path plus optional line range. */
646
+ type ReadRange = {
647
+ path: string;
648
+ /** 1-based start line. undefined = from beginning. */
649
+ offset: number | undefined;
650
+ /** Number of lines to read. undefined = to end. */
651
+ limit: number | undefined;
652
+ };
653
+
537
654
  /** Extract file path from a tool's input JSON.
538
655
  * Handles common formats: {"path": "/foo.ts"}, {"filePath": "/foo.ts"},
539
656
  * and plain text fallback. */
@@ -548,10 +665,72 @@ function extractFilePath(input: string): string | undefined {
548
665
  }
549
666
  }
550
667
 
668
+ /** Extract file path + line range from a read tool's input. */
669
+ function extractReadRange(input: string): ReadRange | undefined {
670
+ try {
671
+ const parsed = JSON.parse(input);
672
+ const path = parsed.path || parsed.filePath || parsed.file;
673
+ if (!path) return undefined;
674
+ const offset = typeof parsed.offset === "number" ? parsed.offset : undefined;
675
+ const limit = typeof parsed.limit === "number" ? parsed.limit : undefined;
676
+ return { path, offset, limit };
677
+ } catch {
678
+ const match = input.match(/(?:[\w.-]+\/)+[\w.-]+\.\w{1,5}/);
679
+ if (!match) return undefined;
680
+ return { path: match[0], offset: undefined, limit: undefined };
681
+ }
682
+ }
683
+
684
+ /**
685
+ * Does `later` cover the line range of `earlier`?
686
+ *
687
+ * Coverage rules:
688
+ * - Full-file read (no offset/limit) covers everything for the same path.
689
+ * - A ranged read covers another ranged read when its [offset, offset+limit)
690
+ * interval is a superset of (or equal to) the other's interval.
691
+ * - A ranged read does NOT cover a full-file read.
692
+ */
693
+ export function laterReadCovers(later: ReadRange, earlier: ReadRange): boolean {
694
+ if (later.path !== earlier.path) return false;
695
+
696
+ // Full-file read covers everything for the same path.
697
+ if (later.offset === undefined && later.limit === undefined) return true;
698
+
699
+ // Later is a ranged read but earlier is full-file — can't cover.
700
+ if (earlier.offset === undefined && earlier.limit === undefined) return false;
701
+
702
+ // Both have ranges. Compute intervals.
703
+ const laterStart = later.offset ?? 1;
704
+ const earlierStart = earlier.offset ?? 1;
705
+
706
+ // An open-ended later read (no limit) covers if its start <= earlier start.
707
+ if (later.limit === undefined) return laterStart <= earlierStart;
708
+
709
+ // Earlier is open-ended but later isn't — later can't cover infinite range.
710
+ if (earlier.limit === undefined) return false;
711
+
712
+ // Both bounded: [start, start+limit) superset check.
713
+ const laterEnd = laterStart + later.limit;
714
+ const earlierEnd = earlierStart + earlier.limit;
715
+ return laterStart <= earlierStart && laterEnd >= earlierEnd;
716
+ }
717
+
718
+ /** Format a range label for dedup annotations. */
719
+ function rangeLabel(range: ReadRange): string {
720
+ if (range.offset !== undefined && range.limit !== undefined) {
721
+ return ` lines ${range.offset}-${range.offset + range.limit - 1}`;
722
+ }
723
+ if (range.offset !== undefined) {
724
+ return ` from line ${range.offset}`;
725
+ }
726
+ return "";
727
+ }
728
+
551
729
  /** Annotation for deduplicated tool output — follows the toolStripAnnotation() pattern. */
552
- function dedupAnnotation(toolName: string, filePath?: string): string {
730
+ function dedupAnnotation(toolName: string, filePath?: string, range?: ReadRange): string {
553
731
  if (filePath) {
554
- return `[earlier version of ${filePath} see latest read below for current content]`;
732
+ const rl = range ? rangeLabel(range) : "";
733
+ return `[earlier read of ${filePath}${rl} — see latest read below for current content]`;
555
734
  }
556
735
  return `[duplicate output — same content as later ${toolName} in this session — use recall for details]`;
557
736
  }
@@ -563,7 +742,9 @@ function dedupAnnotation(toolName: string, filePath?: string): string {
563
742
  *
564
743
  * Deduplicates by:
565
744
  * 1. Exact content hash: identical tool outputs (same file read twice, same command output)
566
- * 2. Same-file reads: read_file outputs for the same path (content may differ due to edits)
745
+ * 2. Range-aware file reads: read_file/read outputs for the same path where a later
746
+ * read covers the same or wider line range (full-file covers everything; a ranged
747
+ * read only covers another ranged read when its interval is a superset).
567
748
  *
568
749
  * The current turn (from currentTurnIdx onward) is never touched — the model
569
750
  * needs full context for its active work. Tool parts are never removed entirely;
@@ -577,11 +758,13 @@ export function deduplicateToolOutputs(
577
758
  ): MessageWithParts[] {
578
759
  // Track latest occurrence: contentKey → latest message index
579
760
  const contentLatest = new Map<string, number>();
580
- // Track latest read by file path: "read:path" → latest message index
581
- const fileLatest = new Map<string, number>();
582
761
 
583
- // Also include current-turn reads in the "latest" tracking so we properly
584
- // recognize earlier reads as duplicates of current-turn content.
762
+ // Track all read ranges per file path, ordered by message index (ascending).
763
+ // Each entry records the range and the message index so the second pass can
764
+ // check whether any later read covers the current read's range.
765
+ const fileReads = new Map<string, Array<{ range: ReadRange; msgIdx: number }>>();
766
+
767
+ // First pass: scan all messages (including current turn) to build tracking maps.
585
768
  for (let i = 0; i < messages.length; i++) {
586
769
  for (const part of messages[i].parts) {
587
770
  if (!isToolPart(part) || part.state.status !== "completed") continue;
@@ -591,13 +774,20 @@ export function deduplicateToolOutputs(
591
774
  const key = `${part.tool}:${simpleHash(output)}`;
592
775
  contentLatest.set(key, i);
593
776
 
594
- // For read-type tools, also track by file path
777
+ // For read-type tools, record the full range info
595
778
  if (part.tool === "read_file" || part.tool === "read") {
596
779
  const inputStr = typeof part.state.input === "string"
597
780
  ? part.state.input
598
781
  : JSON.stringify(part.state.input);
599
- const fp = extractFilePath(inputStr);
600
- if (fp) fileLatest.set(`read:${fp}`, i);
782
+ const range = extractReadRange(inputStr);
783
+ if (range) {
784
+ let entries = fileReads.get(range.path);
785
+ if (!entries) {
786
+ entries = [];
787
+ fileReads.set(range.path, entries);
788
+ }
789
+ entries.push({ range, msgIdx: i });
790
+ }
601
791
  }
602
792
  }
603
793
  }
@@ -617,19 +807,31 @@ export function deduplicateToolOutputs(
617
807
  const contentKey = `${part.tool}:${simpleHash(output)}`;
618
808
  const isLatestContent = contentLatest.get(contentKey) === msgIdx;
619
809
 
620
- // Check file-path dedup for read tools: is this the latest read of this file?
621
- let filePath: string | undefined;
622
- let isLatestFile = true;
810
+ // Check range-aware file dedup for read tools: does any later read
811
+ // of the same file cover this read's range?
812
+ let readRange: ReadRange | undefined;
813
+ let coveredByLater = false;
623
814
  if (part.tool === "read_file" || part.tool === "read") {
624
815
  const inputStr = typeof part.state.input === "string"
625
816
  ? part.state.input
626
817
  : JSON.stringify(part.state.input);
627
- filePath = extractFilePath(inputStr);
628
- if (filePath) isLatestFile = fileLatest.get(`read:${filePath}`) === msgIdx;
818
+ readRange = extractReadRange(inputStr);
819
+ if (readRange) {
820
+ const entries = fileReads.get(readRange.path);
821
+ if (entries) {
822
+ // Check if any entry with a higher message index covers this range
823
+ for (const entry of entries) {
824
+ if (entry.msgIdx > msgIdx && laterReadCovers(entry.range, readRange)) {
825
+ coveredByLater = true;
826
+ break;
827
+ }
828
+ }
829
+ }
830
+ }
629
831
  }
630
832
 
631
- // Keep if this is both the latest content AND latest file read (or not a read tool)
632
- if (isLatestContent && isLatestFile) return part;
833
+ // Keep if this is both the latest content AND not covered by a later read
834
+ if (isLatestContent && !coveredByLater) return part;
633
835
 
634
836
  // This is a duplicate — replace with compact annotation
635
837
  partsChanged = true;
@@ -637,7 +839,7 @@ export function deduplicateToolOutputs(
637
839
  ...part,
638
840
  state: {
639
841
  ...part.state,
640
- output: dedupAnnotation(part.tool, filePath),
842
+ output: dedupAnnotation(part.tool, readRange?.path, readRange),
641
843
  },
642
844
  } as LorePart;
643
845
  });
@@ -671,8 +873,14 @@ function sanitizeToolParts(
671
873
  if (status === "completed" || status === "error") return part;
672
874
 
673
875
  // pending or running → convert to error so SDK emits tool_result
876
+ // Use a deterministic timestamp (0) instead of Date.now() so that
877
+ // repeated transform() calls on the same stale pending part produce
878
+ // identical bytes. OpenCode's prompt-loop cache fix (e148f00aa)
879
+ // preserves old pending parts across iterations; Date.now() here
880
+ // would re-stamp them each call → different bytes → cache bust.
674
881
  partsChanged = true;
675
- const now = Date.now();
882
+ const existingStart =
883
+ "time" in part.state ? part.state.time.start : 0;
676
884
  return {
677
885
  ...part,
678
886
  state: {
@@ -682,8 +890,8 @@ function sanitizeToolParts(
682
890
  metadata:
683
891
  "metadata" in part.state ? part.state.metadata : undefined,
684
892
  time: {
685
- start: "time" in part.state ? part.state.time.start : now,
686
- end: now,
893
+ start: existingStart,
894
+ end: existingStart,
687
895
  },
688
896
  },
689
897
  } as LorePart;
@@ -728,134 +936,6 @@ function stripToTextOnly(parts: LorePart[]): LorePart[] {
728
936
  return stripped;
729
937
  }
730
938
 
731
- // --- Phase 2: Temporal anchoring at read time ---
732
-
733
- function formatRelativeTime(date: Date, now: Date): string {
734
- const diffMs = now.getTime() - date.getTime();
735
- const diffDays = Math.floor(diffMs / (1000 * 60 * 60 * 24));
736
- if (diffDays === 0) return "today";
737
- if (diffDays === 1) return "yesterday";
738
- if (diffDays < 7) return `${diffDays} days ago`;
739
- if (diffDays < 14) return "1 week ago";
740
- if (diffDays < 30) return `${Math.floor(diffDays / 7)} weeks ago`;
741
- if (diffDays < 60) return "1 month ago";
742
- if (diffDays < 365) return `${Math.floor(diffDays / 30)} months ago`;
743
- return `${Math.floor(diffDays / 365)} year${Math.floor(diffDays / 365) > 1 ? "s" : ""} ago`;
744
- }
745
-
746
- function parseDateFromContent(s: string): Date | null {
747
- // "Month Day, Year" e.g. "January 15, 2026"
748
- const simple = s.match(/([A-Z][a-z]+)\s+(\d{1,2}),?\s+(\d{4})/);
749
- if (simple) {
750
- const d = new Date(`${simple[1]} ${simple[2]}, ${simple[3]}`);
751
- if (!isNaN(d.getTime())) return d;
752
- }
753
- // "Month D-D, Year" range — use start
754
- const range = s.match(/([A-Z][a-z]+)\s+(\d{1,2})-\d{1,2},?\s+(\d{4})/);
755
- if (range) {
756
- const d = new Date(`${range[1]} ${range[2]}, ${range[3]}`);
757
- if (!isNaN(d.getTime())) return d;
758
- }
759
- // "late/early/mid Month Year"
760
- const vague = s.match(/(late|early|mid)[- ]?([A-Z][a-z]+)\s+(\d{4})/i);
761
- if (vague) {
762
- const day =
763
- vague[1].toLowerCase() === "early"
764
- ? 7
765
- : vague[1].toLowerCase() === "late"
766
- ? 23
767
- : 15;
768
- const d = new Date(`${vague[2]} ${day}, ${vague[3]}`);
769
- if (!isNaN(d.getTime())) return d;
770
- }
771
- return null;
772
- }
773
-
774
- // Expand "(meaning DATE)" and "(estimated DATE)" annotations with a relative offset.
775
- // Past future-intent lines get "(likely already happened)" appended.
776
- function expandInlineEstimatedDates(text: string, now: Date): string {
777
- return text.replace(
778
- /\(((?:meaning|estimated)\s+)([^)]+\d{4})\)/gi,
779
- (match, prefix: string, dateContent: string) => {
780
- const d = parseDateFromContent(dateContent);
781
- if (!d) return match;
782
- const rel = formatRelativeTime(d, now);
783
- // Detect future-intent by looking backwards on the same line
784
- const matchIdx = text.indexOf(match);
785
- const lineStart = text.lastIndexOf("\n", matchIdx) + 1;
786
- const linePrefix = text.slice(lineStart, matchIdx);
787
- const isFutureIntent =
788
- /\b(?:will|plans?\s+to|planning\s+to|going\s+to|intends?\s+to)\b/i.test(
789
- linePrefix,
790
- );
791
- if (d < now && isFutureIntent)
792
- return `(${prefix}${dateContent} — ${rel}, likely already happened)`;
793
- return `(${prefix}${dateContent} — ${rel})`;
794
- },
795
- );
796
- }
797
-
798
- // Add relative time annotations to "Date: Month D, Year" section headers
799
- // and gap markers between non-consecutive dates.
800
- function addRelativeTimeToObservations(text: string, now: Date): string {
801
- // First pass: expand inline "(meaning DATE)" annotations
802
- const withInline = expandInlineEstimatedDates(text, now);
803
-
804
- // Second pass: annotate date headers and add gap markers
805
- const dateHeaderRe = /^(Date:\s*)([A-Z][a-z]+ \d{1,2}, \d{4})$/gm;
806
- const found: Array<{
807
- index: number;
808
- date: Date;
809
- full: string;
810
- prefix: string;
811
- ds: string;
812
- }> = [];
813
- let m: RegExpExecArray | null;
814
- while ((m = dateHeaderRe.exec(withInline)) !== null) {
815
- const d = new Date(m[2]);
816
- if (!isNaN(d.getTime()))
817
- found.push({
818
- index: m.index,
819
- date: d,
820
- full: m[0],
821
- prefix: m[1],
822
- ds: m[2],
823
- });
824
- }
825
- if (!found.length) return withInline;
826
-
827
- let result = "";
828
- let last = 0;
829
- for (let i = 0; i < found.length; i++) {
830
- const curr = found[i];
831
- const prev = found[i - 1];
832
- result += withInline.slice(last, curr.index);
833
- // Gap marker between non-consecutive dates
834
- if (prev) {
835
- const gapDays = Math.floor(
836
- (curr.date.getTime() - prev.date.getTime()) / 86400000,
837
- );
838
- if (gapDays > 1) {
839
- const gap =
840
- gapDays < 7
841
- ? `[${gapDays} days later]`
842
- : gapDays < 14
843
- ? "[1 week later]"
844
- : gapDays < 30
845
- ? `[${Math.floor(gapDays / 7)} weeks later]`
846
- : gapDays < 60
847
- ? "[1 month later]"
848
- : `[${Math.floor(gapDays / 30)} months later]`;
849
- result += `\n${gap}\n\n`;
850
- }
851
- }
852
- result += `${curr.prefix}${curr.ds} (${formatRelativeTime(curr.date, now)})`;
853
- last = curr.index + curr.full.length;
854
- }
855
- result += withInline.slice(last);
856
- return result;
857
- }
858
-
859
939
  // Build synthetic user/assistant message pair wrapping formatted distillation text.
860
940
  // Shared by the cached and non-cached prefix paths.
861
941
  function buildPrefixMessages(formatted: string): MessageWithParts[] {
@@ -917,12 +997,7 @@ function buildPrefixMessages(formatted: string): MessageWithParts[] {
917
997
  // Non-cached path — used by layers 2-4 which already cause full cache invalidation.
918
998
  function distilledPrefix(distillations: Distillation[]): MessageWithParts[] {
919
999
  if (!distillations.length) return [];
920
- const now = new Date();
921
- const annotated = distillations.map((d) => ({
922
- ...d,
923
- observations: addRelativeTimeToObservations(d.observations, now),
924
- }));
925
- const formatted = formatDistillations(annotated);
1000
+ const formatted = formatDistillations(distillations);
926
1001
  if (!formatted) return [];
927
1002
  return buildPrefixMessages(formatted);
928
1003
  }
@@ -995,12 +1070,7 @@ function distilledPrefixCached(
995
1070
 
996
1071
  // New rows appended — render only the delta and append to cached text
997
1072
  const newRows = distillations.slice(prefixCache!.rowCount);
998
- const now = new Date();
999
- const annotated = newRows.map((d) => ({
1000
- ...d,
1001
- observations: addRelativeTimeToObservations(d.observations, now),
1002
- }));
1003
- const deltaText = formatDistillations(annotated);
1073
+ const deltaText = formatDistillations(newRows);
1004
1074
 
1005
1075
  if (deltaText) {
1006
1076
  const fullText = prefixCache!.cachedText + "\n\n" + deltaText;
@@ -1019,12 +1089,7 @@ function distilledPrefixCached(
1019
1089
  }
1020
1090
 
1021
1091
  // Full re-render: first call or meta-distillation rewrote rows
1022
- const now = new Date();
1023
- const annotated = distillations.map((d) => ({
1024
- ...d,
1025
- observations: addRelativeTimeToObservations(d.observations, now),
1026
- }));
1027
- const fullText = formatDistillations(annotated);
1092
+ const fullText = formatDistillations(distillations);
1028
1093
  if (!fullText) {
1029
1094
  sessState.prefixCache = null;
1030
1095
  return { messages: [], tokens: 0 };
@@ -1053,6 +1118,16 @@ export function resetPrefixCache(sessionID?: string) {
1053
1118
  }
1054
1119
  }
1055
1120
 
1121
+ // For testing only — reset distillation snapshot for a specific session (or all)
1122
+ export function resetDistillationSnapshot(sessionID?: string) {
1123
+ if (sessionID) {
1124
+ const state = sessionStates.get(sessionID);
1125
+ if (state) state.distillationSnapshot = null;
1126
+ } else {
1127
+ for (const state of sessionStates.values()) state.distillationSnapshot = null;
1128
+ }
1129
+ }
1130
+
1056
1131
  // --- Approach B: Lazy raw window eviction ---
1057
1132
  //
1058
1133
  // Tracks the ID of the first (oldest) message in the previous raw window.
@@ -1072,8 +1147,14 @@ export function resetPrefixCache(sessionID?: string) {
1072
1147
 
1073
1148
  type RawWindowCache = {
1074
1149
  sessionID: string;
1075
- /** ID of the first message in the pinned raw window */
1076
- firstMessageID: string;
1150
+ /** Number of raw messages (excluding prefix) in the pinned window at creation. */
1151
+ pinnedRawCount: number;
1152
+ /** Total number of messages in the input array when the pin was created.
1153
+ * Used to compute how many new messages were appended since. */
1154
+ pinnedTotalCount: number;
1155
+ /** rawBudget that was in effect when the pin was created — used for the
1156
+ * pin-validity check so that global budget fluctuations don't evict the pin. */
1157
+ pinnedBudget: number;
1077
1158
  };
1078
1159
 
1079
1160
  // For testing only — reset raw window cache state for a specific session (or all)
@@ -1114,36 +1195,63 @@ function tryFitStable(input: {
1114
1195
  rawWindowCache !== null && rawWindowCache.sessionID === input.sessionID;
1115
1196
 
1116
1197
  if (cacheValid) {
1117
- const pinnedIdx = input.messages.findIndex(
1118
- (m) => m.info.id === rawWindowCache!.firstMessageID,
1198
+ // Compute the pinned index from the stored raw count + new message growth.
1199
+ // newMessages = messages appended since pin creation (typically 2 per turn).
1200
+ // The pinned window grows to include them: pinnedRawCount + newMessages.
1201
+ // This is resilient to front-trimming by the host (e.g. OpenCode evicting
1202
+ // old messages) because the offset is relative to the tail.
1203
+ const newMessages = Math.max(0, input.messages.length - rawWindowCache!.pinnedTotalCount);
1204
+ const windowSize = rawWindowCache!.pinnedRawCount + newMessages;
1205
+ const pinnedIdx = Math.max(0, input.messages.length - windowSize);
1206
+
1207
+ // Measure the token cost of the pinned window.
1208
+ const pinnedWindow = input.messages.slice(pinnedIdx);
1209
+ const pinnedTokens = pinnedWindow.reduce(
1210
+ (sum, m) => sum + estimateMessage(m),
1211
+ 0,
1119
1212
  );
1120
1213
 
1121
- if (pinnedIdx !== -1) {
1122
- // Measure the token cost of the pinned window.
1123
- const pinnedWindow = input.messages.slice(pinnedIdx);
1124
- const pinnedTokens = pinnedWindow.reduce(
1125
- (sum, m) => sum + estimateMessage(m),
1126
- 0,
1127
- );
1128
-
1129
- if (pinnedTokens <= input.rawBudget) {
1130
- // Pinned window still fits — keep it. Apply system-reminder cleanup
1131
- // only (strip:"none" is the layer-1 mode), returning the same message
1132
- // object references wherever nothing changed.
1133
- const processed = pinnedWindow.map((msg) => {
1134
- const parts = cleanParts(msg.parts);
1135
- return parts !== msg.parts ? { info: msg.info, parts } : msg;
1136
- });
1137
- const total = input.prefixTokens + pinnedTokens;
1138
- return {
1139
- messages: [...input.prefix, ...processed],
1140
- distilledTokens: input.prefixTokens,
1141
- rawTokens: pinnedTokens,
1142
- totalTokens: total,
1214
+ // Use the budget that was in effect when the pin was created with a 15%
1215
+ // hysteresis margin so that small budget fluctuations from overhead drift
1216
+ // and deduplicateToolOutputs() token-estimate changes don't evict the pin.
1217
+ // The high-water mark (max of pinned and current budgets) prevents overhead
1218
+ // EMA drift from shrinking the effective budget below what was valid when
1219
+ // the pin was created — the budget shrank due to overhead drift, not because
1220
+ // the context limit changed.
1221
+ const highWaterBudget = Math.max(rawWindowCache!.pinnedBudget, input.rawBudget);
1222
+ const effectiveBudget = highWaterBudget * 1.15;
1223
+ if (pinnedTokens <= effectiveBudget) {
1224
+ // Pinned window still fits within the hysteresis margin of the high-water
1225
+ // budget. Re-pin at the current budget when the old hysteresis is exceeded
1226
+ // so that next turn's check uses a fresh baseline.
1227
+ if (pinnedTokens > rawWindowCache!.pinnedBudget * 1.15) {
1228
+ input.sessState.rawWindowCache = {
1229
+ ...rawWindowCache!,
1230
+ pinnedRawCount: pinnedWindow.length,
1231
+ pinnedTotalCount: input.messages.length,
1232
+ pinnedBudget: input.rawBudget,
1143
1233
  };
1144
1234
  }
1145
- // Pinned window is too large fall through to the normal scan below.
1235
+ // Apply system-reminder cleanup only (strip:"none" is the layer-1 mode),
1236
+ // returning the same message object references wherever nothing changed.
1237
+ const processed = pinnedWindow.map((msg) => {
1238
+ const parts = cleanParts(msg.parts);
1239
+ return parts !== msg.parts ? { info: msg.info, parts } : msg;
1240
+ });
1241
+ const total = input.prefixTokens + pinnedTokens;
1242
+ return {
1243
+ messages: [...input.prefix, ...processed],
1244
+ distilledTokens: input.prefixTokens,
1245
+ rawTokens: pinnedTokens,
1246
+ totalTokens: total,
1247
+ };
1146
1248
  }
1249
+ // Pinned window is too large for both budgets — fall through to rescan.
1250
+ log.info(
1251
+ `pin-overflow: session=${input.sessionID} pinnedTokens=${pinnedTokens} ` +
1252
+ `pinnedBudget=${rawWindowCache!.pinnedBudget} effectiveBudget=${Math.round(effectiveBudget)} ` +
1253
+ `currentRawBudget=${input.rawBudget} windowSize=${pinnedWindow.length}`,
1254
+ );
1147
1255
  }
1148
1256
 
1149
1257
  // Normal backward scan to find the tightest fitting cutoff.
@@ -1157,13 +1265,18 @@ function tryFitStable(input: {
1157
1265
  });
1158
1266
 
1159
1267
  if (result) {
1160
- // Update the raw window cache: the first non-prefix message is the oldest
1161
- // raw message in the new window. Pin to its ID for the next turn.
1162
- const rawStart = result.messages[input.prefix.length];
1163
- if (rawStart) {
1268
+ // Update the raw window cache: store the raw message count and total message
1269
+ // count so we can reconstruct the window position on the next turn even after
1270
+ // front-trimming by the host (e.g. OpenCode evicting old messages).
1271
+ // Snapshot the current rawBudget so future pin checks use the budget that
1272
+ // was in effect when this window was chosen (Option 1: snapshot isolation).
1273
+ const rawMessageCount = result.messages.length - input.prefix.length;
1274
+ if (rawMessageCount > 0) {
1164
1275
  input.sessState.rawWindowCache = {
1165
1276
  sessionID: input.sessionID,
1166
- firstMessageID: rawStart.info.id,
1277
+ pinnedRawCount: rawMessageCount,
1278
+ pinnedTotalCount: input.messages.length,
1279
+ pinnedBudget: input.rawBudget,
1167
1280
  };
1168
1281
  }
1169
1282
  }
@@ -1200,21 +1313,27 @@ function transformInner(input: {
1200
1313
  }): TransformResult {
1201
1314
  const cfg = config();
1202
1315
  const overhead = getOverhead();
1316
+
1317
+ // --- Session state (must precede budget computation) ---
1318
+ const sid = input.sessionID ?? input.messages[0]?.info.sessionID;
1319
+ const sessState = sid ? getSessionState(sid) : makeSessionState();
1320
+
1203
1321
  // Usable = full context minus output reservation minus fixed overhead (system + tools)
1204
1322
  // minus LTM tokens already injected into the system prompt this turn.
1323
+ // Read LTM tokens from per-session state to avoid cross-session contamination.
1324
+ const sessLtmTokens = sid ? sessState.ltmTokens : ltmTokensFallback;
1205
1325
  const usable = Math.max(
1206
1326
  0,
1207
- contextLimit - outputReserved - overhead - ltmTokens,
1327
+ contextLimit - outputReserved - overhead - sessLtmTokens,
1208
1328
  );
1209
1329
  const distilledBudget = Math.floor(usable * cfg.budget.distilled);
1210
- const rawBudget = Math.floor(usable * cfg.budget.raw);
1330
+ // Base raw budget. May be overridden below for post-idle compact mode.
1331
+ let rawBudget = Math.floor(usable * cfg.budget.raw);
1211
1332
 
1212
1333
  // --- Force escalation (reactive error recovery) ---
1213
1334
  // When the API previously rejected with "prompt is too long", skip layers
1214
1335
  // below the forced minimum to ensure enough trimming on the next attempt.
1215
1336
  // One-shot: consumed here and reset to 0 (both in-memory and on disk).
1216
- const sid = input.sessionID ?? input.messages[0]?.info.sessionID;
1217
- const sessState = sid ? getSessionState(sid) : makeSessionState();
1218
1337
  let effectiveMinLayer = sessState.forceMinLayer;
1219
1338
  sessState.forceMinLayer = 0;
1220
1339
  if (sid && effectiveMinLayer > 0) saveForceMinLayer(sid, 0);
@@ -1246,17 +1365,43 @@ function transformInner(input: {
1246
1365
  }
1247
1366
 
1248
1367
  // --- Sticky layer guard (Option C) ---
1249
- // After a compressed turn (layer >= 1), don't allow layer 0 re-entry until
1368
+ // After a compressed turn (layer >= N), don't allow re-entry below N until
1250
1369
  // the session genuinely shrinks (e.g. after compaction deletes messages).
1251
- // Prevents the calibration oscillation: a compressed turn stores
1252
- // lastKnownInput=100K for a 50-message window, but the next turn's
1253
- // input.messages has 300 raw messages. The delta estimation treats the 250
1254
- // evicted messages as "new" and undercounts their tokens, producing an
1255
- // expectedInput that fits in layer 0 but the actual tokens are ~190K.
1370
+ // Prevents calibration oscillation AND layer-transition cache busts:
1371
+ // - 0→1→0: compressed turn stores lastKnownInput=100K for a 50-message
1372
+ // window, next turn's 300 raw messages produce an undercounted
1373
+ // expectedInput that "fits" in layer 0 but actually overflows.
1374
+ // - 1→2→1: layer 2 strips tool outputs (different bytes), bouncing back
1375
+ // to layer 1 restores them (different bytes again) → two busts.
1376
+ // Pinning to the *actual* last layer prevents all downward oscillation.
1256
1377
  // Only applied when calibrated (same session, per-session state) to avoid
1257
1378
  // affecting other sessions including worker sessions.
1258
1379
  if (calibrated && sessState.lastLayer >= 1 && input.messages.length >= sessState.lastKnownMessageCount) {
1380
+ effectiveMinLayer = Math.max(effectiveMinLayer, sessState.lastLayer) as SafetyLayer;
1381
+ }
1382
+
1383
+ // --- Post-idle compact layer ---
1384
+ // When the cache just went cold (onIdleResume fired), skip layer 0 full-raw
1385
+ // passthrough and use a tighter raw budget. Rationale: the entire context is
1386
+ // a cache WRITE regardless — a smaller total costs less to write, and
1387
+ // aggressive idle distillation already captured older history in the prefix.
1388
+ // The flag is one-shot: consumed here and reset so subsequent turns use
1389
+ // normal budgets once the cache is warm.
1390
+ const postIdleCompact = sessState.postIdleCompact;
1391
+ if (postIdleCompact) {
1392
+ sessState.postIdleCompact = false;
1393
+ // Skip layer 0 — don't pass through all raw messages on a cold cache.
1259
1394
  effectiveMinLayer = Math.max(effectiveMinLayer, 1) as SafetyLayer;
1395
+ // Use a tighter raw budget: 20% of usable instead of the normal 40%.
1396
+ // The distilled prefix covers the older history; the raw window only
1397
+ // needs the current turn + minimal recent context. This reduces the
1398
+ // total cold-cache write cost by up to 20% of usable (~29K tokens on
1399
+ // a 200K context model).
1400
+ rawBudget = Math.floor(usable * 0.20);
1401
+ log.info(
1402
+ `post-idle compact: session=${sid} rawBudget=${rawBudget}` +
1403
+ ` (${Math.floor(usable * cfg.budget.raw)}→${rawBudget})`,
1404
+ );
1260
1405
  }
1261
1406
 
1262
1407
  let expectedInput: number;
@@ -1269,12 +1414,12 @@ function transformInner(input: {
1269
1414
  ? input.messages.filter((m) => !sessState.lastWindowMessageIDs.has(m.info.id))
1270
1415
  : input.messages.slice(-Math.max(0, input.messages.length - sessState.lastKnownMessageCount));
1271
1416
  const newMsgTokens = newMessages.reduce((s, m) => s + estimateMessage(m), 0);
1272
- const ltmDelta = ltmTokens - sessState.lastKnownLtm;
1417
+ const ltmDelta = sessLtmTokens - sessState.lastKnownLtm;
1273
1418
  expectedInput = sessState.lastKnownInput + newMsgTokens + ltmDelta;
1274
1419
  } else {
1275
1420
  // First turn or session change: fall back to chars/3 estimate + overhead.
1276
1421
  const messageTokens = input.messages.reduce((s, m) => s + estimateMessage(m), 0);
1277
- expectedInput = messageTokens + overhead + ltmTokens;
1422
+ expectedInput = messageTokens + overhead + sessLtmTokens;
1278
1423
  }
1279
1424
 
1280
1425
  // When uncalibrated, apply safety multiplier to the layer-0 decision too.
@@ -1299,8 +1444,8 @@ function transformInner(input: {
1299
1444
  // All messages fit — return unmodified to preserve append-only prompt-cache pattern.
1300
1445
  // Raw messages are strictly better context than lossy distilled summaries.
1301
1446
  const messageTokens = calibrated
1302
- ? expectedInput - (ltmTokens - sessState.lastKnownLtm) // approximate raw portion
1303
- : expectedInput - overhead - ltmTokens;
1447
+ ? expectedInput - (sessLtmTokens - sessState.lastKnownLtm) // approximate raw portion
1448
+ : expectedInput - overhead - sessLtmTokens;
1304
1449
  return {
1305
1450
  messages: input.messages,
1306
1451
  layer: 0,
@@ -1323,7 +1468,9 @@ function transformInner(input: {
1323
1468
  const dedupMessages = deduplicateToolOutputs(input.messages, turnStart);
1324
1469
 
1325
1470
 
1326
- const distillations = sid ? loadDistillations(input.projectPath, sid) : [];
1471
+ const distillations = sid
1472
+ ? loadDistillationsCached(input.projectPath, sid, input.messages, sessState)
1473
+ : [];
1327
1474
 
1328
1475
  // Layer 1 uses the append-only cached prefix (Approach C) to keep the
1329
1476
  // distilled content byte-identical between distillation runs, preserving
@@ -1503,19 +1650,43 @@ export function transform(input: {
1503
1650
  // result fields above so a thrown transformInner doesn't update it.
1504
1651
  state.lastTurnAt = Date.now();
1505
1652
 
1506
- // --- Cache-bust diagnostics (LORE_DEBUG only) ---
1653
+ // --- Cache-bust diagnostics ---
1507
1654
  // Track byte-identity of the message prefix. When the prefix hash changes
1508
1655
  // between consecutive turns, it means Anthropic's prompt cache is invalidated
1509
1656
  // and the entire context is re-written (12.5× cache-read price). This helps
1510
1657
  // identify which code paths are breaking byte-identity.
1511
- const prefixIds = result.messages.slice(0, 5).map((m) => m.info.id).join(",");
1512
- const prefixHash = `${result.layer}:${prefixIds}`;
1658
+ //
1659
+ // Use a content-based fingerprint (role + text snippet) rather than message
1660
+ // IDs, since IDs can be unstable (gateway generates fresh UUIDs, OpenCode
1661
+ // may regenerate messages in-place). Content hashes are a better proxy for
1662
+ // Anthropic's actual byte-identity cache.
1663
+ const prefixFingerprint = result.messages.slice(0, 5).map((m) => {
1664
+ const text = m.parts
1665
+ .map((p) => {
1666
+ if (isTextPart(p)) return p.text?.slice(0, 40) ?? "";
1667
+ if (isReasoningPart(p)) return p.text?.slice(0, 40) ?? "";
1668
+ return p.type;
1669
+ })
1670
+ .join("|");
1671
+ return `${m.info.role}:${text.slice(0, 60)}`;
1672
+ }).join(",");
1673
+ const prefixHash = `${result.layer}:${prefixFingerprint}`;
1674
+ state.transformCount++;
1513
1675
  if (state.lastPrefixHash && state.lastPrefixHash !== prefixHash) {
1676
+ state.bustCount++;
1677
+ const rate = state.bustCount / state.transformCount;
1514
1678
  log.info(
1515
- `cache-bust detected: session=${sid} layer=${state.lastLayer}→${result.layer}` +
1679
+ `cache-bust #${state.bustCount} (${(rate * 100).toFixed(0)}%): session=${sid}` +
1680
+ ` layer=${state.lastLayer}→${result.layer}` +
1516
1681
  ` msgs=${state.lastTransformedCount}→${result.messages.length}` +
1517
1682
  ` prefix=${state.lastPrefixHash.slice(0, 30)}→${prefixHash.slice(0, 30)}`,
1518
1683
  );
1684
+ if (state.transformCount >= 20 && rate > 0.5) {
1685
+ log.warn(
1686
+ `HIGH BUST RATE: session ${sid} has ${(rate * 100).toFixed(0)}% bust rate` +
1687
+ ` (${state.bustCount}/${state.transformCount} transforms)`,
1688
+ );
1689
+ }
1519
1690
  }
1520
1691
  state.lastPrefixHash = prefixHash;
1521
1692