claude-code-cache-fix 1.3.0 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +6 -0
  2. package/package.json +1 -1
  3. package/preload.mjs +128 -7
package/README.md CHANGED
@@ -114,6 +114,10 @@ On the first API call, the interceptor reads `~/.claude.json` and logs the curre
114
114
 
115
115
  Response headers are parsed for `anthropic-ratelimit-unified-5h-utilization` and `7d-utilization`, saved to `~/.claude/quota-status.json` for consumption by status line hooks or other tools.
116
116
 
117
+ ### Peak hour detection
118
+
119
+ Anthropic applies elevated quota drain rates during weekday peak hours (13:00–19:00 UTC, Mon–Fri). The interceptor detects peak windows and writes `peak_hour: true/false` to `quota-status.json`. See `docs/peak-hours-reference.md` for sources and details.
120
+
117
121
  ## Debug mode
118
122
 
119
123
  Enable debug logging to verify the fix is working:
@@ -132,6 +136,8 @@ Logs are written to `~/.claude/cache-fix-debug.log`. Look for:
132
136
  - `FALSE RATE LIMIT: synthetic model detected` — client-side false rate limit
133
137
  - `GROWTHBOOK FLAGS: {...}` — server-controlled feature flags on first call
134
138
  - `PROMPT SIZE: system=N tools=N injected=N (skills=N mcp=N ...)` — per-call prompt size breakdown
139
+ - `CACHE TTL: tier=1h create=N read=N hit=N% (1h=N 5m=N)` — TTL tier and cache hit rate per call
140
+ - `PEAK HOUR: weekday 13:00-19:00 UTC` — Anthropic peak hour throttling active
135
141
  - `SKIPPED: resume relocation (not a resume or already correct)` — no fix needed
136
142
 
137
143
  ### Prefix diff mode
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-code-cache-fix",
3
- "version": "1.3.0",
3
+ "version": "1.4.1",
4
4
  "description": "Fixes prompt cache regression in Claude Code that causes up to 20x cost increase on resumed sessions",
5
5
  "type": "module",
6
6
  "exports": "./preload.mjs",
package/preload.mjs CHANGED
@@ -784,20 +784,141 @@ globalThis.fetch = async function (url, options) {
784
784
  const overage = response.headers.get("anthropic-ratelimit-unified-overage-status");
785
785
 
786
786
  if (h5 || h7d) {
787
- const quota = {
788
- timestamp: new Date().toISOString(),
789
- five_hour: h5 ? { utilization: parseFloat(h5), pct: Math.round(parseFloat(h5) * 100), resets_at: reset5h ? parseInt(reset5h) : null } : null,
790
- seven_day: h7d ? { utilization: parseFloat(h7d), pct: Math.round(parseFloat(h7d) * 100), resets_at: reset7d ? parseInt(reset7d) : null } : null,
791
- status: status || null,
792
- overage_status: overage || null,
793
- };
794
787
  const quotaFile = join(homedir(), ".claude", "quota-status.json");
788
+ let quota = {};
789
+ try { quota = JSON.parse(readFileSync(quotaFile, "utf8")); } catch {}
790
+ quota.timestamp = new Date().toISOString();
791
+ quota.five_hour = h5 ? { utilization: parseFloat(h5), pct: Math.round(parseFloat(h5) * 100), resets_at: reset5h ? parseInt(reset5h) : null } : quota.five_hour;
792
+ quota.seven_day = h7d ? { utilization: parseFloat(h7d), pct: Math.round(parseFloat(h7d) * 100), resets_at: reset7d ? parseInt(reset7d) : null } : quota.seven_day;
793
+ quota.status = status || null;
794
+ quota.overage_status = overage || null;
795
+
796
+ // Peak hour detection — Anthropic applies higher quota drain rate during
797
+ // weekday peak hours: 13:00–19:00 UTC (Mon–Fri).
798
+ // Source: Thariq (Anthropic) via X, 2026-03-26; confirmed by The Register,
799
+ // PCWorld, Piunikaweb. No specific multiplier disclosed.
800
+ const now = new Date();
801
+ const utcHour = now.getUTCHours();
802
+ const utcDay = now.getUTCDay(); // 0=Sun, 6=Sat
803
+ const isPeak = utcDay >= 1 && utcDay <= 5 && utcHour >= 13 && utcHour < 19;
804
+ quota.peak_hour = isPeak;
805
+
795
806
  writeFileSync(quotaFile, JSON.stringify(quota, null, 2));
807
+
808
+ if (DEBUG && isPeak) {
809
+ debugLog("PEAK HOUR: weekday 13:00-19:00 UTC — quota drains at elevated rate");
810
+ }
796
811
  }
797
812
  } catch {
798
813
  // Non-critical — don't break the response
799
814
  }
815
+
816
+ // Clone response to extract TTL tier from usage (SSE stream)
817
+ try {
818
+ const clone = response.clone();
819
+ drainTTLFromClone(clone).catch(() => {});
820
+ } catch {
821
+ // clone() failure is non-fatal
822
+ }
800
823
  }
801
824
 
802
825
  return response;
803
826
  };
827
+
828
+ // --------------------------------------------------------------------------
829
+ // TTL tier extraction from SSE response stream
830
+ // --------------------------------------------------------------------------
831
+
832
+ /**
833
+ * Drain a cloned SSE response to extract cache TTL tier from the usage object.
834
+ * The message_start event contains usage.cache_creation with ephemeral_1h and
835
+ * ephemeral_5m token counts, revealing which TTL tier the server applied.
836
+ *
837
+ * Writes TTL tier to ~/.claude/quota-status.json (merges with existing data)
838
+ * and logs to debug log.
839
+ */
840
+ async function drainTTLFromClone(clone) {
841
+ if (!clone.body) return;
842
+
843
+ const reader = clone.body.getReader();
844
+ const decoder = new TextDecoder();
845
+ let buffer = "";
846
+
847
+ try {
848
+ while (true) {
849
+ const { done, value } = await reader.read();
850
+ if (done) break;
851
+ buffer += decoder.decode(value, { stream: true });
852
+
853
+ let newlineIdx;
854
+ while ((newlineIdx = buffer.indexOf("\n")) !== -1) {
855
+ const line = buffer.slice(0, newlineIdx).trim();
856
+ buffer = buffer.slice(newlineIdx + 1);
857
+
858
+ if (!line.startsWith("data: ") || line === "data: [DONE]") continue;
859
+
860
+ try {
861
+ const event = JSON.parse(line.slice(6));
862
+
863
+ if (event.type === "message_start" && event.message?.usage) {
864
+ const u = event.message.usage;
865
+ const cc = u.cache_creation || {};
866
+ const e1h = cc.ephemeral_1h_input_tokens ?? 0;
867
+ const e5m = cc.ephemeral_5m_input_tokens ?? 0;
868
+ const cacheCreate = u.cache_creation_input_tokens ?? 0;
869
+ const cacheRead = u.cache_read_input_tokens ?? 0;
870
+
871
+ // Determine TTL tier from which ephemeral bucket got tokens
872
+ // When cache is fully warm (no creation), infer tier from previous
873
+ let ttlTier = "unknown";
874
+ if (e1h > 0 && e5m === 0) ttlTier = "1h";
875
+ else if (e5m > 0 && e1h === 0) ttlTier = "5m";
876
+ else if (e1h === 0 && e5m === 0 && cacheCreate === 0) {
877
+ // Fully cached — no creation to determine tier. Preserve previous.
878
+ try {
879
+ const prev = JSON.parse(readFileSync(join(homedir(), ".claude", "quota-status.json"), "utf8"));
880
+ ttlTier = prev.cache?.ttl_tier || "1h";
881
+ } catch { ttlTier = "1h"; }
882
+ }
883
+ else if (e1h > 0 && e5m > 0) ttlTier = "mixed";
884
+
885
+ const hitRate = (cacheRead + cacheCreate) > 0
886
+ ? (cacheRead / (cacheRead + cacheCreate) * 100).toFixed(1)
887
+ : "N/A";
888
+
889
+ debugLog(
890
+ `CACHE TTL: tier=${ttlTier}`,
891
+ `create=${cacheCreate} read=${cacheRead} hit=${hitRate}%`,
892
+ `(1h=${e1h} 5m=${e5m})`
893
+ );
894
+
895
+ // Merge TTL data into quota-status.json
896
+ try {
897
+ const quotaFile = join(homedir(), ".claude", "quota-status.json");
898
+ let quota = {};
899
+ try { quota = JSON.parse(readFileSync(quotaFile, "utf8")); } catch {}
900
+ quota.cache = {
901
+ ttl_tier: ttlTier,
902
+ cache_creation: cacheCreate,
903
+ cache_read: cacheRead,
904
+ ephemeral_1h: e1h,
905
+ ephemeral_5m: e5m,
906
+ hit_rate: hitRate,
907
+ timestamp: new Date().toISOString(),
908
+ };
909
+ writeFileSync(quotaFile, JSON.stringify(quota, null, 2));
910
+ } catch {}
911
+
912
+ // Got what we need — stop reading
913
+ reader.cancel();
914
+ return;
915
+ }
916
+ } catch {
917
+ // Skip malformed SSE lines
918
+ }
919
+ }
920
+ }
921
+ } finally {
922
+ try { reader.releaseLock(); } catch {}
923
+ }
924
+ }