@loreai/core 0.20.0 → 0.20.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/gradient.ts CHANGED
@@ -37,167 +37,129 @@ function estimateMessage(msg: MessageWithParts): number {
37
37
  let contextLimit = 200_000; // sensible default
38
38
  let outputReserved = 32_000;
39
39
 
40
- // Cost-aware layer-0 token cap. When > 0, the layer-0 passthrough gate uses
41
- // min(maxInput, maxLayer0Tokens) instead of maxInput alone. Derived from the
42
- // model's cache-read cost: cap = targetCostPerTurn / costPerToken. This prevents
43
- // expensive models from sending huge contexts at layer 0, where cache-read costs
44
- // compound linearly across turns. Set to 0 to disable (use full context).
45
- let maxLayer0Tokens = 0;
46
-
47
- const MIN_LAYER0_FLOOR = 40_000;
48
-
49
40
  // ---------------------------------------------------------------------------
50
- // Cost-aware context token cap (layer 1+)
41
+ // Tier-based context management
42
+ //
43
+ // Three quality tiers based on empirical model effectiveness:
44
+ // Tier 1: 0 – 200K tokens (best quality, preferred operating range)
45
+ // Tier 2: 200K – 500K tokens (acceptable quality)
46
+ // Tier 3: 500K – model context limit (degraded, compress when economical)
51
47
  //
52
- // Limits total tokens (distilled + raw) to keep per-bust cache write cost
53
- // bounded. For opus-4-6 at $6.25/M write, a $1.00 target yields a 160K cap.
54
- // For sonnet-4 at $3.75/M write, the cap is 267K (effectively uncapped).
48
+ // At each tier boundary, a per-turn economic comparison decides whether to
49
+ // compress (bust the cache) or continue growing:
50
+ // bustCost = compressedSize × cacheWriteCostPerToken
51
+ // continueCost = currentSize × cacheReadCostPerToken
52
+ // If bustCost ≥ threshold × continueCost, don't compress — reads are cheap.
55
53
  //
56
- // The cap is further adjusted dynamically per session via bust rate EMA and
57
- // inter-bust interval tracking: tighten when busts are frequent, relax when
58
- // the cache is working well. Asymmetric rates: tighten fast, relax slowly.
54
+ // Rolling bust detection: if 5+ consecutive turns bust the cache, stop trying
55
+ // to compress something structural is causing busts, and compression just
56
+ // adds cost on top.
59
57
  // ---------------------------------------------------------------------------
60
58
 
61
- /** Static ceiling for total context tokens, derived from model pricing.
62
- * 0 = disabled (no cap). Set via setMaxContextTokens(). */
63
- let maxContextTokensCeiling = 0;
59
+ /** Tier boundary tokens. Configurable for testing. */
60
+ const TIER_BOUNDARIES = [200_000, 500_000] as const;
64
61
 
65
- const MIN_CONTEXT_FLOOR = 130_000;
62
+ /** Cache pricing per token (USD). Set by host adapter via setCachePricing(). */
63
+ let cacheWriteCostPerToken = 0;
64
+ let cacheReadCostPerToken = 0;
66
65
 
67
- /** Compute the context ceiling from a per-bust cost target and cache-write price per token. */
68
- export function computeContextCap(
69
- targetBustCost: number,
70
- cacheWriteCostPerToken: number,
71
- ): number {
72
- if (targetBustCost <= 0 || cacheWriteCostPerToken <= 0) return 0;
73
- return Math.max(MIN_CONTEXT_FLOOR, Math.floor(targetBustCost / cacheWriteCostPerToken));
66
+ /**
67
+ * Set cache pricing for the current model. Called by the host adapter after
68
+ * looking up model cost data. Required for tier-based bust-vs-continue
69
+ * decisions. When not set (both 0), tier decisions fall back to conservative
70
+ * defaults: always compress at tier boundaries.
71
+ */
72
+ export function setCachePricing(writeCost: number, readCost: number) {
73
+ cacheWriteCostPerToken = Math.max(0, writeCost);
74
+ cacheReadCostPerToken = Math.max(0, readCost);
74
75
  }
75
76
 
76
- /** Set the static context ceiling. Called by the host adapter after computing
77
- * from model pricing. The effective per-session cap may be lower due to
78
- * dynamic adaptation (bust rate EMA). */
79
- export function setMaxContextTokens(tokens: number) {
80
- maxContextTokensCeiling = Math.max(0, Math.floor(tokens));
77
+ /** Returns current pricing (for tests). */
78
+ export function getCachePricing(): { write: number; read: number } {
79
+ return { write: cacheWriteCostPerToken, read: cacheReadCostPerToken };
81
80
  }
82
81
 
83
- /** Returns the current static ceiling (for external callers / tests). */
84
- export function getMaxContextTokens(): number {
85
- return maxContextTokensCeiling;
82
+ // Cost-aware layer-0 token cap. When > 0, the layer-0 passthrough gate uses
83
+ // min(maxInput, maxLayer0Tokens) instead of maxInput alone. Derived from the
84
+ // model's cache-read cost: cap = targetCostPerTurn / costPerToken. This prevents
85
+ // expensive models from sending huge contexts at layer 0, where cache-read costs
86
+ // compound linearly across turns. Set to 0 to disable (use full context).
87
+ let maxLayer0Tokens = 0;
88
+
89
+ const MIN_LAYER0_FLOOR = 40_000;
90
+
91
+ /**
92
+ * Decide whether compression is economical at a tier boundary.
93
+ *
94
+ * @param currentTokens - expected input tokens if we stay at the current layer
95
+ * @param compressedTokens - expected tokens after compression
96
+ * @param consecutiveBusts - how many turns in a row we've busted the cache
97
+ * @param threshold - bust cost must be < threshold × continue cost to compress (default 0.85)
98
+ * @returns true if compression is worth it
99
+ */
100
+ export function shouldCompress(
101
+ currentTokens: number,
102
+ compressedTokens: number,
103
+ consecutiveBusts: number,
104
+ threshold = 0.85,
105
+ ): boolean {
106
+ // Rolling bust detection: if we've been busting 5+ turns in a row,
107
+ // stop trying to compress — it's clearly not helping.
108
+ if (consecutiveBusts >= 5) return false;
109
+
110
+ // If no pricing data, fall back to conservative: do NOT compress.
111
+ // Compression busts the cache, which is expensive. Without pricing data
112
+ // we can't prove it's worthwhile, so err on the side of keeping the cache.
113
+ if (cacheWriteCostPerToken <= 0 || cacheReadCostPerToken <= 0) return false;
114
+
115
+ const bustCost = compressedTokens * cacheWriteCostPerToken;
116
+ const continueCost = currentTokens * cacheReadCostPerToken;
117
+
118
+ // Compress only if the bust cost is meaningfully less than continuing
119
+ return bustCost < threshold * continueCost;
120
+ }
121
+
122
+ /**
123
+ * Determine which tier the given token count falls into.
124
+ * Returns 0, 1, or 2 corresponding to the tier index.
125
+ */
126
+ export function getTier(tokens: number): number {
127
+ if (tokens <= TIER_BOUNDARIES[0]) return 0;
128
+ if (tokens <= TIER_BOUNDARIES[1]) return 1;
129
+ return 2;
86
130
  }
87
131
 
88
132
  /**
89
- * Feed cache usage data after each API response. Updates the per-session
90
- * bust rate EMA and inter-bust interval, which adjust the effective context
91
- * cap dynamically.
133
+ * Record cache usage from an API response. Tracks consecutive busts for
134
+ * the rolling bust detection used by shouldCompress().
135
+ *
136
+ * A "bust" is when cache_write > 50% of total input tokens.
92
137
  *
93
138
  * @param cacheWrite - cache_creation_input_tokens from the API response
94
139
  * @param cacheRead - cache_read_input_tokens from the API response
140
+ * @param inputTokens - total input_tokens from the API response (includes uncached)
95
141
  * @param sessionID - session that produced this response
96
142
  */
97
- export function updateBustRate(
143
+ export function recordCacheUsage(
98
144
  cacheWrite: number,
99
145
  cacheRead: number,
146
+ inputTokens: number,
100
147
  sessionID?: string,
101
- lastLayer?: number,
102
148
  ): void {
103
149
  if (!sessionID) return;
104
150
  const state = getSessionState(sessionID);
105
151
 
106
- // Layer 4 (emergency) is structurally a full cache write — feeding its
107
- // bust stats into the EMA and cap adaptation creates a death spiral where
108
- // the cap ratchets down to MIN_CONTEXT_FLOOR and prevents the session from
109
- // ever fitting in layers 1-3 again. Skip EMA updates entirely.
110
- // This check is BEFORE the total===0 guard so that the consecutiveLayer4
111
- // counter is always updated regardless of whether usage was reported.
112
- if (lastLayer === 4) {
113
- state.consecutiveLayer4++;
114
-
115
- // Recovery hatch: after 5+ consecutive Layer 4 turns, the shrunken cap
116
- // may be what's trapping us. Relax it by 10% per turn to give layers
117
- // 1-3 a chance to fit. From 130K floor: turns 5-9 → 143K→157K→173K→190K→209K.
118
- if (
119
- state.consecutiveLayer4 >= 5 &&
120
- state.dynamicContextCap > 0 &&
121
- maxContextTokensCeiling > 0
122
- ) {
123
- state.dynamicContextCap = Math.min(
124
- maxContextTokensCeiling,
125
- Math.floor(state.dynamicContextCap * 1.10),
126
- );
127
- }
128
- return;
129
- }
130
-
131
- // Non-Layer-4 turn: reset the consecutive counter (also before total===0
132
- // guard — a zero-usage non-L4 turn must not leave a stale count).
133
- if (lastLayer !== undefined) {
134
- state.consecutiveLayer4 = 0;
135
- }
136
-
137
- const total = cacheWrite + cacheRead;
138
- if (total === 0) return;
139
-
140
- // Bust ratio: fraction of total input that was cache-written (0 = all reads, 1 = all writes)
141
- const bustRatio = cacheWrite / total;
142
-
143
- // EMA update (α = 0.3 for smoothing — responsive but not twitchy)
144
- state.bustRateEMA =
145
- state.bustRateEMA < 0
146
- ? bustRatio // first observation
147
- : state.bustRateEMA * 0.7 + bustRatio * 0.3;
148
-
149
- // Inter-bust interval tracking: a "bust" is when >50% of input is writes
150
- const now = Date.now();
151
- if (bustRatio > 0.5) {
152
- if (state.lastBustAt > 0) {
153
- const interval = now - state.lastBustAt;
154
- state.interBustIntervalEMA =
155
- state.interBustIntervalEMA < 0
156
- ? interval
157
- : state.interBustIntervalEMA * 0.7 + interval * 0.3;
158
- }
159
- state.lastBustAt = now;
160
- }
161
-
162
- // Adapt per-session cap based on bust rate and interval
163
- adaptContextCap(state);
164
- }
165
-
166
- /** Adapt the per-session context cap based on bust rate and break frequency. */
167
- function adaptContextCap(state: SessionState): void {
168
- if (maxContextTokensCeiling <= 0) return; // disabled
169
-
170
- const cap = state.dynamicContextCap > 0
171
- ? state.dynamicContextCap
172
- : maxContextTokensCeiling;
173
-
174
- let newCap = cap;
175
-
176
- // Primary signal: bust rate EMA
177
- if (state.bustRateEMA > 0.8) {
178
- // Mostly writes — tighten by 10%
179
- newCap = Math.floor(cap * 0.90);
180
- } else if (state.bustRateEMA < 0.3) {
181
- // Mostly reads — relax by 5% (slower than tightening)
182
- newCap = Math.floor(cap * 1.05);
183
- }
184
-
185
- // Secondary signal: inter-bust interval
186
- if (state.interBustIntervalEMA > 0) {
187
- if (state.interBustIntervalEMA < 2 * 60_000) {
188
- // Busts less than 2 min apart — proactively tighten by extra 5%
189
- newCap = Math.floor(newCap * 0.95);
190
- } else if (state.interBustIntervalEMA > 10 * 60_000) {
191
- // Busts more than 10 min apart — allow extra relaxation
192
- newCap = Math.floor(newCap * 1.03);
152
+ // Use total input tokens as denominator (includes uncached input),
153
+ // not just cacheWrite + cacheRead, to avoid inflated bust ratios
154
+ // when a large fraction of tokens is uncached.
155
+ const total = inputTokens > 0 ? inputTokens : cacheWrite + cacheRead;
156
+ if (total > 0) {
157
+ if (cacheWrite / total > 0.5) {
158
+ state.consecutiveBusts++;
159
+ } else {
160
+ state.consecutiveBusts = 0;
193
161
  }
194
162
  }
195
-
196
- // Clamp to [floor, ceiling]
197
- state.dynamicContextCap = Math.max(
198
- MIN_CONTEXT_FLOOR,
199
- Math.min(maxContextTokensCeiling, newCap),
200
- );
201
163
  }
202
164
 
203
165
  // Conservative overhead reserve for first-turn (before calibration):
@@ -286,22 +248,10 @@ type SessionState = {
286
248
  postIdleCompact: boolean;
287
249
  /** Consecutive turns at layer >= 2. When >= 3, log a compaction hint. */
288
250
  consecutiveHighLayer: number;
289
- /** Consecutive Layer 4 turns used to skip bust-rate EMA updates
290
- * (Layer 4 busts are structural, not a caching signal) and to trigger
291
- * a recovery hatch that relaxes dynamicContextCap after prolonged trapping. */
292
- consecutiveLayer4: number;
293
-
294
- // --- Cost-aware context cap dynamic state ---
295
-
296
- /** EMA of bust ratio (cacheWrite / total). -1 = uninitialized. */
297
- bustRateEMA: number;
298
- /** EMA of time between full busts (ms). -1 = uninitialized. */
299
- interBustIntervalEMA: number;
300
- /** Epoch ms of the last full bust (cacheWrite > 50% of total). 0 = never. */
301
- lastBustAt: number;
302
- /** Per-session dynamic context cap (tokens). Adjusted by adaptContextCap().
303
- * 0 = use the static ceiling (maxContextTokensCeiling). */
304
- dynamicContextCap: number;
251
+ /** Consecutive turns where the cache was busted (>50% writes).
252
+ * Used for rolling bust detection: after 5+ consecutive busts, stop
253
+ * trying to compress and warn that the conversation is unsustainable. */
254
+ consecutiveBusts: number;
305
255
 
306
256
  /**
307
257
  * Distillation row snapshot — cached to avoid hitting the DB on every
@@ -335,12 +285,7 @@ function makeSessionState(): SessionState {
335
285
  cameOutOfIdle: false,
336
286
  postIdleCompact: false,
337
287
  consecutiveHighLayer: 0,
338
- consecutiveLayer4: 0,
339
-
340
- bustRateEMA: -1,
341
- interBustIntervalEMA: -1,
342
- lastBustAt: 0,
343
- dynamicContextCap: 0,
288
+ consecutiveBusts: 0,
344
289
 
345
290
  distillationSnapshot: null,
346
291
  };
@@ -359,9 +304,8 @@ function getSessionState(sessionID: string): SessionState {
359
304
  state.forceMinLayer = loadForceMinLayer(sessionID) as SafetyLayer;
360
305
 
361
306
  // Restore gradient calibration state from DB (v24) — avoids uncalibrated
362
- // first turns after restart. Without this, dynamicContextCap reverts to
363
- // the static ceiling, bustRateEMA is uninitialized, and lastTurnAt=0
364
- // prevents onIdleResume() from detecting idle gaps.
307
+ // first turns after restart. Without this, lastTurnAt=0 prevents
308
+ // onIdleResume() from detecting idle gaps.
365
309
  //
366
310
  // Atomic restore: lastTurnAt > 0 is the proxy for "gradient state was
367
311
  // ever flushed to DB". Restore all fields together or none — avoids
@@ -369,13 +313,12 @@ function getSessionState(sessionID: string): SessionState {
369
313
  // could be mistaken for "never persisted".
370
314
  const persisted = loadSessionTracking(sessionID);
371
315
  if (persisted && persisted.lastTurnAt > 0) {
372
- state.dynamicContextCap = persisted.dynamicContextCap;
373
- state.bustRateEMA = persisted.bustRateEMA;
374
- state.interBustIntervalEMA = persisted.interBustIntervalEMA;
375
316
  state.lastLayer = persisted.lastLayer as SafetyLayer;
376
317
  state.lastKnownInput = persisted.lastKnownInput;
377
318
  state.lastTurnAt = persisted.lastTurnAt;
378
- state.lastBustAt = persisted.lastBustAt;
319
+ // consecutiveBusts is persisted in the dynamicContextCap column
320
+ // (repurposed, see saveGradientState).
321
+ state.consecutiveBusts = persisted.dynamicContextCap;
379
322
  }
380
323
 
381
324
  sessionStates.set(sessionID, state);
@@ -619,6 +562,8 @@ export function setForceMinLayer(layer: SafetyLayer, sessionID?: string) {
619
562
  // For testing only — reset all calibration and force-escalation state
620
563
  export function resetCalibration(sessionID?: string) {
621
564
  calibratedOverhead = null;
565
+ cacheWriteCostPerToken = 0;
566
+ cacheReadCostPerToken = 0;
622
567
  if (sessionID) {
623
568
  saveForceMinLayer(sessionID, 0); // clear persisted state
624
569
  sessionStates.delete(sessionID);
@@ -643,9 +588,7 @@ export function inspectSessionState(sessionID: string): {
643
588
  postIdleCompact: boolean;
644
589
  lastTurnAt: number;
645
590
  distillationSnapshot: DistillationSnapshot | null;
646
- bustRateEMA: number;
647
- dynamicContextCap: number;
648
- consecutiveLayer4: number;
591
+ consecutiveBusts: number;
649
592
  } | null {
650
593
  const state = sessionStates.get(sessionID);
651
594
  if (!state) return null;
@@ -656,9 +599,7 @@ export function inspectSessionState(sessionID: string): {
656
599
  postIdleCompact: state.postIdleCompact,
657
600
  lastTurnAt: state.lastTurnAt,
658
601
  distillationSnapshot: state.distillationSnapshot,
659
- bustRateEMA: state.bustRateEMA,
660
- dynamicContextCap: state.dynamicContextCap,
661
- consecutiveLayer4: state.consecutiveLayer4,
602
+ consecutiveBusts: state.consecutiveBusts,
662
603
  };
663
604
  }
664
605
 
@@ -683,13 +624,12 @@ export function saveGradientState(sessionID: string): void {
683
624
  if (!state) return;
684
625
 
685
626
  saveSessionTracking(sessionID, {
686
- dynamicContextCap: state.dynamicContextCap,
687
- bustRateEMA: state.bustRateEMA,
688
- interBustIntervalEMA: state.interBustIntervalEMA,
689
627
  lastLayer: state.lastLayer,
690
628
  lastKnownInput: state.lastKnownInput,
691
629
  lastTurnAt: state.lastTurnAt,
692
- lastBustAt: state.lastBustAt,
630
+ // Repurpose the dead dynamicContextCap column (v24, always 0 now)
631
+ // to persist consecutiveBusts — avoids a new DB migration.
632
+ dynamicContextCap: state.consecutiveBusts,
693
633
  });
694
634
  }
695
635
 
@@ -1591,6 +1531,10 @@ export type TransformResult = {
1591
1531
  // relevance scoring. Set on Layer 4 (emergency) where the context is
1592
1532
  // fully reset and mid-session knowledge may have changed relevance.
1593
1533
  refreshLtm: boolean;
1534
+ /** When set, the conversation is growing unsustainably — 5+ consecutive
1535
+ * cache busts detected. The pipeline should inject a warning message
1536
+ * advising the user to compact or start a new conversation. */
1537
+ unsustainable?: boolean;
1594
1538
  };
1595
1539
 
1596
1540
  // Per-session urgent distillation tracking.
@@ -1624,17 +1568,11 @@ function transformInner(input: {
1624
1568
  contextLimit - outputReserved - overhead - sessLtmTokens,
1625
1569
  );
1626
1570
 
1627
- // Cost-aware context cap: limit total distilled + raw tokens to keep
1628
- // per-bust cache write cost bounded. On opus-4-6 at $6.25/M, a $1.00
1629
- // target yields a 160K ceiling; on sonnet-4 at $3.75/M, 267K (effectively
1630
- // uncapped at 200K context). Per-session dynamic adaptation may reduce
1631
- // this further based on observed bust rate and break frequency.
1632
- const effectiveCap = sid && sessState.dynamicContextCap > 0
1633
- ? sessState.dynamicContextCap
1634
- : maxContextTokensCeiling;
1635
- const usable = effectiveCap > 0 && usableRaw > effectiveCap
1636
- ? effectiveCap
1637
- : usableRaw;
1571
+ // No EMA-driven adaptive cap use the full available context budget.
1572
+ // The layer-0 cap (maxLayer0Tokens) still applies for per-turn read cost,
1573
+ // and tier-based bust-vs-continue decisions control whether to compress
1574
+ // at quality boundaries.
1575
+ const usable = usableRaw;
1638
1576
 
1639
1577
  const distilledBudget = Math.floor(usable * cfg.budget.distilled);
1640
1578
  // Base raw budget. May be overridden below for post-idle compact mode.
@@ -1705,11 +1643,8 @@ function transformInner(input: {
1705
1643
  sessState.postIdleCompact = false;
1706
1644
  // Skip layer 0 — don't pass through all raw messages on a cold cache.
1707
1645
  effectiveMinLayer = Math.max(effectiveMinLayer, 1) as SafetyLayer;
1708
- // Use a tighter raw budget. When the cost-aware context cap is active,
1709
- // total write size is already bounded — use a moderate 30%. Without
1710
- // the cap, use a tighter 20% to limit cold-write cost directly.
1711
- const postIdleRawFraction = effectiveCap > 0 ? 0.30 : 0.20;
1712
- rawBudget = Math.floor(usable * postIdleRawFraction);
1646
+ // Use a tighter raw budget on cold cache to limit write cost.
1647
+ rawBudget = Math.floor(usable * 0.20);
1713
1648
  log.info(
1714
1649
  `post-idle compact: session=${sid} rawBudget=${rawBudget}` +
1715
1650
  ` (${Math.floor(usable * cfg.budget.raw)}→${rawBudget})`,
@@ -1771,6 +1706,46 @@ function transformInner(input: {
1771
1706
  };
1772
1707
  }
1773
1708
 
1709
+ // --- Tier-based bust-vs-continue gate ---
1710
+ // When expectedInput exceeds the layer-0 cap but still fits in the model's
1711
+ // context window, check whether compression is economically justified.
1712
+ // If not (bust cost ≥ 85% of continue cost), skip compression and pass
1713
+ // through at layer 0 — the cache reads are cheap enough to justify the
1714
+ // larger context, and raw messages are better quality than distilled.
1715
+ if (
1716
+ effectiveMinLayer === 0 &&
1717
+ layer0Input > layer0Ceiling &&
1718
+ layer0Input <= maxInput &&
1719
+ sid
1720
+ ) {
1721
+ const busts = getSessionState(sid).consecutiveBusts;
1722
+ // For compression, estimate the compressed size as the layer-1 budget
1723
+ // (distilled + raw fractions). This is a rough upper bound — actual
1724
+ // compressed output may be smaller.
1725
+ const compressedEstimate = distilledBudget + rawBudget;
1726
+ if (!shouldCompress(Math.round(layer0Input), compressedEstimate, busts)) {
1727
+ const messageTokens = calibrated
1728
+ ? expectedInput - (sessLtmTokens - sessState.lastKnownLtm)
1729
+ : expectedInput - overhead - sessLtmTokens;
1730
+ log.info(
1731
+ `tier gate: session=${sid} skipping compression — bustCost not justified` +
1732
+ ` (input=${Math.round(layer0Input)} compressed=${compressedEstimate} busts=${busts})`,
1733
+ );
1734
+ return {
1735
+ messages: input.messages,
1736
+ layer: 0,
1737
+ distilledTokens: 0,
1738
+ rawTokens: Math.max(0, messageTokens),
1739
+ totalTokens: Math.max(0, messageTokens),
1740
+ usable,
1741
+ distilledBudget,
1742
+ rawBudget,
1743
+ refreshLtm: false,
1744
+ unsustainable: busts >= 5,
1745
+ };
1746
+ }
1747
+ }
1748
+
1774
1749
  // --- Gradient mode: context exhausted (or force-escalated), compress older messages ---
1775
1750
 
1776
1751
  // Pre-pass: deduplicate repeated tool outputs before layer selection.
@@ -1916,6 +1891,8 @@ function transformInner(input: {
1916
1891
  const nuclearRaw = [...olderMessages, ...currentTurn];
1917
1892
  const nuclearRawTokens = olderTokens + currentTurnTokens;
1918
1893
 
1894
+ const unsustainable = sid ? getSessionState(sid).consecutiveBusts >= 5 : false;
1895
+
1919
1896
  return {
1920
1897
  messages: [...nuclearPrefix, ...nuclearRaw],
1921
1898
  layer: 4,
@@ -1926,6 +1903,7 @@ function transformInner(input: {
1926
1903
  distilledBudget,
1927
1904
  rawBudget,
1928
1905
  refreshLtm: true,
1906
+ unsustainable,
1929
1907
  };
1930
1908
  }
1931
1909
 
@@ -1977,7 +1955,7 @@ export function transform(input: {
1977
1955
  log.info(
1978
1956
  `gradient: session=${sid} layer=${result.layer} tokens=${result.totalTokens}` +
1979
1957
  ` (distilled=${result.distilledTokens} raw=${result.rawTokens})` +
1980
- ` usable=${result.usable} cap=${maxLayer0Tokens || "off"}`,
1958
+ ` usable=${result.usable} tier=${getTier(result.totalTokens)} l0cap=${maxLayer0Tokens || "off"}`,
1981
1959
  );
1982
1960
  }
1983
1961
  return result;
package/src/index.ts CHANGED
@@ -94,10 +94,11 @@ export {
94
94
  setModelLimits,
95
95
  setMaxLayer0Tokens,
96
96
  computeLayer0Cap,
97
- setMaxContextTokens,
98
- computeContextCap,
99
- getMaxContextTokens,
100
- updateBustRate,
97
+ setCachePricing,
98
+ getCachePricing,
99
+ shouldCompress,
100
+ getTier,
101
+ recordCacheUsage,
101
102
  needsUrgentDistillation,
102
103
  calibrate,
103
104
  setLtmTokens,