claude-code-cache-fix 1.3.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +1 -0
  2. package/package.json +1 -1
  3. package/preload.mjs +113 -7
package/README.md CHANGED
@@ -132,6 +132,7 @@ Logs are written to `~/.claude/cache-fix-debug.log`. Look for:
132
132
  - `FALSE RATE LIMIT: synthetic model detected` — client-side false rate limit
133
133
  - `GROWTHBOOK FLAGS: {...}` — server-controlled feature flags on first call
134
134
  - `PROMPT SIZE: system=N tools=N injected=N (skills=N mcp=N ...)` — per-call prompt size breakdown
135
+ - `CACHE TTL: tier=1h create=N read=N hit=N% (1h=N 5m=N)` — TTL tier and cache hit rate per call
135
136
  - `SKIPPED: resume relocation (not a resume or already correct)` — no fix needed
136
137
 
137
138
  ### Prefix diff mode
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-code-cache-fix",
3
- "version": "1.3.0",
3
+ "version": "1.4.0",
4
4
  "description": "Fixes prompt cache regression in Claude Code that causes up to 20x cost increase on resumed sessions",
5
5
  "type": "module",
6
6
  "exports": "./preload.mjs",
package/preload.mjs CHANGED
@@ -784,20 +784,126 @@ globalThis.fetch = async function (url, options) {
784
784
  const overage = response.headers.get("anthropic-ratelimit-unified-overage-status");
785
785
 
786
786
  if (h5 || h7d) {
787
- const quota = {
788
- timestamp: new Date().toISOString(),
789
- five_hour: h5 ? { utilization: parseFloat(h5), pct: Math.round(parseFloat(h5) * 100), resets_at: reset5h ? parseInt(reset5h) : null } : null,
790
- seven_day: h7d ? { utilization: parseFloat(h7d), pct: Math.round(parseFloat(h7d) * 100), resets_at: reset7d ? parseInt(reset7d) : null } : null,
791
- status: status || null,
792
- overage_status: overage || null,
793
- };
794
787
  const quotaFile = join(homedir(), ".claude", "quota-status.json");
788
+ let quota = {};
789
+ try { quota = JSON.parse(readFileSync(quotaFile, "utf8")); } catch {}
790
+ quota.timestamp = new Date().toISOString();
791
+ quota.five_hour = h5 ? { utilization: parseFloat(h5), pct: Math.round(parseFloat(h5) * 100), resets_at: reset5h ? parseInt(reset5h) : null } : quota.five_hour;
792
+ quota.seven_day = h7d ? { utilization: parseFloat(h7d), pct: Math.round(parseFloat(h7d) * 100), resets_at: reset7d ? parseInt(reset7d) : null } : quota.seven_day;
793
+ quota.status = status || null;
794
+ quota.overage_status = overage || null;
795
795
  writeFileSync(quotaFile, JSON.stringify(quota, null, 2));
796
796
  }
797
797
  } catch {
798
798
  // Non-critical — don't break the response
799
799
  }
800
+
801
+ // Clone response to extract TTL tier from usage (SSE stream)
802
+ try {
803
+ const clone = response.clone();
804
+ drainTTLFromClone(clone).catch(() => {});
805
+ } catch {
806
+ // clone() failure is non-fatal
807
+ }
800
808
  }
801
809
 
802
810
  return response;
803
811
  };
812
+
813
+ // --------------------------------------------------------------------------
814
+ // TTL tier extraction from SSE response stream
815
+ // --------------------------------------------------------------------------
816
+
817
+ /**
818
+ * Drain a cloned SSE response to extract cache TTL tier from the usage object.
819
+ * The message_start event contains usage.cache_creation with ephemeral_1h and
820
+ * ephemeral_5m token counts, revealing which TTL tier the server applied.
821
+ *
822
+ * Writes TTL tier to ~/.claude/quota-status.json (merges with existing data)
823
+ * and logs to debug log.
824
+ */
825
+ async function drainTTLFromClone(clone) {
826
+ if (!clone.body) return;
827
+
828
+ const reader = clone.body.getReader();
829
+ const decoder = new TextDecoder();
830
+ let buffer = "";
831
+
832
+ try {
833
+ while (true) {
834
+ const { done, value } = await reader.read();
835
+ if (done) break;
836
+ buffer += decoder.decode(value, { stream: true });
837
+
838
+ let newlineIdx;
839
+ while ((newlineIdx = buffer.indexOf("\n")) !== -1) {
840
+ const line = buffer.slice(0, newlineIdx).trim();
841
+ buffer = buffer.slice(newlineIdx + 1);
842
+
843
+ if (!line.startsWith("data: ") || line === "data: [DONE]") continue;
844
+
845
+ try {
846
+ const event = JSON.parse(line.slice(6));
847
+
848
+ if (event.type === "message_start" && event.message?.usage) {
849
+ const u = event.message.usage;
850
+ const cc = u.cache_creation || {};
851
+ const e1h = cc.ephemeral_1h_input_tokens ?? 0;
852
+ const e5m = cc.ephemeral_5m_input_tokens ?? 0;
853
+ const cacheCreate = u.cache_creation_input_tokens ?? 0;
854
+ const cacheRead = u.cache_read_input_tokens ?? 0;
855
+
856
+ // Determine TTL tier from which ephemeral bucket got tokens
857
+ // When cache is fully warm (no creation), infer tier from previous
858
+ let ttlTier = "unknown";
859
+ if (e1h > 0 && e5m === 0) ttlTier = "1h";
860
+ else if (e5m > 0 && e1h === 0) ttlTier = "5m";
861
+ else if (e1h === 0 && e5m === 0 && cacheCreate === 0) {
862
+ // Fully cached — no creation to determine tier. Preserve previous.
863
+ try {
864
+ const prev = JSON.parse(readFileSync(join(homedir(), ".claude", "quota-status.json"), "utf8"));
865
+ ttlTier = prev.cache?.ttl_tier || "1h";
866
+ } catch { ttlTier = "1h"; }
867
+ }
868
+ else if (e1h > 0 && e5m > 0) ttlTier = "mixed";
869
+
870
+ const hitRate = (cacheRead + cacheCreate) > 0
871
+ ? (cacheRead / (cacheRead + cacheCreate) * 100).toFixed(1)
872
+ : "N/A";
873
+
874
+ debugLog(
875
+ `CACHE TTL: tier=${ttlTier}`,
876
+ `create=${cacheCreate} read=${cacheRead} hit=${hitRate}%`,
877
+ `(1h=${e1h} 5m=${e5m})`
878
+ );
879
+
880
+ // Merge TTL data into quota-status.json
881
+ try {
882
+ const quotaFile = join(homedir(), ".claude", "quota-status.json");
883
+ let quota = {};
884
+ try { quota = JSON.parse(readFileSync(quotaFile, "utf8")); } catch {}
885
+ quota.cache = {
886
+ ttl_tier: ttlTier,
887
+ cache_creation: cacheCreate,
888
+ cache_read: cacheRead,
889
+ ephemeral_1h: e1h,
890
+ ephemeral_5m: e5m,
891
+ hit_rate: hitRate,
892
+ timestamp: new Date().toISOString(),
893
+ };
894
+ writeFileSync(quotaFile, JSON.stringify(quota, null, 2));
895
+ } catch {}
896
+
897
+ // Got what we need — stop reading
898
+ reader.cancel();
899
+ return;
900
+ }
901
+ } catch {
902
+ // Skip malformed SSE lines
903
+ }
904
+ }
905
+ }
906
+ } finally {
907
+ try { reader.releaseLock(); } catch {}
908
+ }
909
+ }