claude-code-cache-fix 1.3.0 → 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -0
- package/package.json +1 -1
- package/preload.mjs +128 -7
package/README.md
CHANGED
|
@@ -114,6 +114,10 @@ On the first API call, the interceptor reads `~/.claude.json` and logs the curre
|
|
|
114
114
|
|
|
115
115
|
Response headers are parsed for `anthropic-ratelimit-unified-5h-utilization` and `7d-utilization`, saved to `~/.claude/quota-status.json` for consumption by status line hooks or other tools.
|
|
116
116
|
|
|
117
|
+
### Peak hour detection
|
|
118
|
+
|
|
119
|
+
Anthropic applies elevated quota drain rates during weekday peak hours (13:00–19:00 UTC, Mon–Fri). The interceptor detects peak windows and writes `peak_hour: true/false` to `quota-status.json`. See `docs/peak-hours-reference.md` for sources and details.
|
|
120
|
+
|
|
117
121
|
## Debug mode
|
|
118
122
|
|
|
119
123
|
Enable debug logging to verify the fix is working:
|
|
@@ -132,6 +136,8 @@ Logs are written to `~/.claude/cache-fix-debug.log`. Look for:
|
|
|
132
136
|
- `FALSE RATE LIMIT: synthetic model detected` — client-side false rate limit
|
|
133
137
|
- `GROWTHBOOK FLAGS: {...}` — server-controlled feature flags on first call
|
|
134
138
|
- `PROMPT SIZE: system=N tools=N injected=N (skills=N mcp=N ...)` — per-call prompt size breakdown
|
|
139
|
+
- `CACHE TTL: tier=1h create=N read=N hit=N% (1h=N 5m=N)` — TTL tier and cache hit rate per call
|
|
140
|
+
- `PEAK HOUR: weekday 13:00-19:00 UTC` — Anthropic peak hour throttling active
|
|
135
141
|
- `SKIPPED: resume relocation (not a resume or already correct)` — no fix needed
|
|
136
142
|
|
|
137
143
|
### Prefix diff mode
|
package/package.json
CHANGED
package/preload.mjs
CHANGED
|
@@ -784,20 +784,141 @@ globalThis.fetch = async function (url, options) {
|
|
|
784
784
|
const overage = response.headers.get("anthropic-ratelimit-unified-overage-status");
|
|
785
785
|
|
|
786
786
|
if (h5 || h7d) {
|
|
787
|
-
const quota = {
|
|
788
|
-
timestamp: new Date().toISOString(),
|
|
789
|
-
five_hour: h5 ? { utilization: parseFloat(h5), pct: Math.round(parseFloat(h5) * 100), resets_at: reset5h ? parseInt(reset5h) : null } : null,
|
|
790
|
-
seven_day: h7d ? { utilization: parseFloat(h7d), pct: Math.round(parseFloat(h7d) * 100), resets_at: reset7d ? parseInt(reset7d) : null } : null,
|
|
791
|
-
status: status || null,
|
|
792
|
-
overage_status: overage || null,
|
|
793
|
-
};
|
|
794
787
|
const quotaFile = join(homedir(), ".claude", "quota-status.json");
|
|
788
|
+
let quota = {};
|
|
789
|
+
try { quota = JSON.parse(readFileSync(quotaFile, "utf8")); } catch {}
|
|
790
|
+
quota.timestamp = new Date().toISOString();
|
|
791
|
+
quota.five_hour = h5 ? { utilization: parseFloat(h5), pct: Math.round(parseFloat(h5) * 100), resets_at: reset5h ? parseInt(reset5h) : null } : quota.five_hour;
|
|
792
|
+
quota.seven_day = h7d ? { utilization: parseFloat(h7d), pct: Math.round(parseFloat(h7d) * 100), resets_at: reset7d ? parseInt(reset7d) : null } : quota.seven_day;
|
|
793
|
+
quota.status = status || null;
|
|
794
|
+
quota.overage_status = overage || null;
|
|
795
|
+
|
|
796
|
+
// Peak hour detection — Anthropic applies higher quota drain rate during
|
|
797
|
+
// weekday peak hours: 13:00–19:00 UTC (Mon–Fri).
|
|
798
|
+
// Source: Thariq (Anthropic) via X, 2026-03-26; confirmed by The Register,
|
|
799
|
+
// PCWorld, Piunikaweb. No specific multiplier disclosed.
|
|
800
|
+
const now = new Date();
|
|
801
|
+
const utcHour = now.getUTCHours();
|
|
802
|
+
const utcDay = now.getUTCDay(); // 0=Sun, 6=Sat
|
|
803
|
+
const isPeak = utcDay >= 1 && utcDay <= 5 && utcHour >= 13 && utcHour < 19;
|
|
804
|
+
quota.peak_hour = isPeak;
|
|
805
|
+
|
|
795
806
|
writeFileSync(quotaFile, JSON.stringify(quota, null, 2));
|
|
807
|
+
|
|
808
|
+
if (DEBUG && isPeak) {
|
|
809
|
+
debugLog("PEAK HOUR: weekday 13:00-19:00 UTC — quota drains at elevated rate");
|
|
810
|
+
}
|
|
796
811
|
}
|
|
797
812
|
} catch {
|
|
798
813
|
// Non-critical — don't break the response
|
|
799
814
|
}
|
|
815
|
+
|
|
816
|
+
// Clone response to extract TTL tier from usage (SSE stream)
|
|
817
|
+
try {
|
|
818
|
+
const clone = response.clone();
|
|
819
|
+
drainTTLFromClone(clone).catch(() => {});
|
|
820
|
+
} catch {
|
|
821
|
+
// clone() failure is non-fatal
|
|
822
|
+
}
|
|
800
823
|
}
|
|
801
824
|
|
|
802
825
|
return response;
|
|
803
826
|
};
|
|
827
|
+
|
|
828
|
+
// --------------------------------------------------------------------------
|
|
829
|
+
// TTL tier extraction from SSE response stream
|
|
830
|
+
// --------------------------------------------------------------------------
|
|
831
|
+
|
|
832
|
+
/**
|
|
833
|
+
* Drain a cloned SSE response to extract cache TTL tier from the usage object.
|
|
834
|
+
* The message_start event contains usage.cache_creation with ephemeral_1h and
|
|
835
|
+
* ephemeral_5m token counts, revealing which TTL tier the server applied.
|
|
836
|
+
*
|
|
837
|
+
* Writes TTL tier to ~/.claude/quota-status.json (merges with existing data)
|
|
838
|
+
* and logs to debug log.
|
|
839
|
+
*/
|
|
840
|
+
async function drainTTLFromClone(clone) {
|
|
841
|
+
if (!clone.body) return;
|
|
842
|
+
|
|
843
|
+
const reader = clone.body.getReader();
|
|
844
|
+
const decoder = new TextDecoder();
|
|
845
|
+
let buffer = "";
|
|
846
|
+
|
|
847
|
+
try {
|
|
848
|
+
while (true) {
|
|
849
|
+
const { done, value } = await reader.read();
|
|
850
|
+
if (done) break;
|
|
851
|
+
buffer += decoder.decode(value, { stream: true });
|
|
852
|
+
|
|
853
|
+
let newlineIdx;
|
|
854
|
+
while ((newlineIdx = buffer.indexOf("\n")) !== -1) {
|
|
855
|
+
const line = buffer.slice(0, newlineIdx).trim();
|
|
856
|
+
buffer = buffer.slice(newlineIdx + 1);
|
|
857
|
+
|
|
858
|
+
if (!line.startsWith("data: ") || line === "data: [DONE]") continue;
|
|
859
|
+
|
|
860
|
+
try {
|
|
861
|
+
const event = JSON.parse(line.slice(6));
|
|
862
|
+
|
|
863
|
+
if (event.type === "message_start" && event.message?.usage) {
|
|
864
|
+
const u = event.message.usage;
|
|
865
|
+
const cc = u.cache_creation || {};
|
|
866
|
+
const e1h = cc.ephemeral_1h_input_tokens ?? 0;
|
|
867
|
+
const e5m = cc.ephemeral_5m_input_tokens ?? 0;
|
|
868
|
+
const cacheCreate = u.cache_creation_input_tokens ?? 0;
|
|
869
|
+
const cacheRead = u.cache_read_input_tokens ?? 0;
|
|
870
|
+
|
|
871
|
+
// Determine TTL tier from which ephemeral bucket got tokens
|
|
872
|
+
// When cache is fully warm (no creation), infer tier from previous
|
|
873
|
+
let ttlTier = "unknown";
|
|
874
|
+
if (e1h > 0 && e5m === 0) ttlTier = "1h";
|
|
875
|
+
else if (e5m > 0 && e1h === 0) ttlTier = "5m";
|
|
876
|
+
else if (e1h === 0 && e5m === 0 && cacheCreate === 0) {
|
|
877
|
+
// Fully cached — no creation to determine tier. Preserve previous.
|
|
878
|
+
try {
|
|
879
|
+
const prev = JSON.parse(readFileSync(join(homedir(), ".claude", "quota-status.json"), "utf8"));
|
|
880
|
+
ttlTier = prev.cache?.ttl_tier || "1h";
|
|
881
|
+
} catch { ttlTier = "1h"; }
|
|
882
|
+
}
|
|
883
|
+
else if (e1h > 0 && e5m > 0) ttlTier = "mixed";
|
|
884
|
+
|
|
885
|
+
const hitRate = (cacheRead + cacheCreate) > 0
|
|
886
|
+
? (cacheRead / (cacheRead + cacheCreate) * 100).toFixed(1)
|
|
887
|
+
: "N/A";
|
|
888
|
+
|
|
889
|
+
debugLog(
|
|
890
|
+
`CACHE TTL: tier=${ttlTier}`,
|
|
891
|
+
`create=${cacheCreate} read=${cacheRead} hit=${hitRate}%`,
|
|
892
|
+
`(1h=${e1h} 5m=${e5m})`
|
|
893
|
+
);
|
|
894
|
+
|
|
895
|
+
// Merge TTL data into quota-status.json
|
|
896
|
+
try {
|
|
897
|
+
const quotaFile = join(homedir(), ".claude", "quota-status.json");
|
|
898
|
+
let quota = {};
|
|
899
|
+
try { quota = JSON.parse(readFileSync(quotaFile, "utf8")); } catch {}
|
|
900
|
+
quota.cache = {
|
|
901
|
+
ttl_tier: ttlTier,
|
|
902
|
+
cache_creation: cacheCreate,
|
|
903
|
+
cache_read: cacheRead,
|
|
904
|
+
ephemeral_1h: e1h,
|
|
905
|
+
ephemeral_5m: e5m,
|
|
906
|
+
hit_rate: hitRate,
|
|
907
|
+
timestamp: new Date().toISOString(),
|
|
908
|
+
};
|
|
909
|
+
writeFileSync(quotaFile, JSON.stringify(quota, null, 2));
|
|
910
|
+
} catch {}
|
|
911
|
+
|
|
912
|
+
// Got what we need — stop reading
|
|
913
|
+
reader.cancel();
|
|
914
|
+
return;
|
|
915
|
+
}
|
|
916
|
+
} catch {
|
|
917
|
+
// Skip malformed SSE lines
|
|
918
|
+
}
|
|
919
|
+
}
|
|
920
|
+
}
|
|
921
|
+
} finally {
|
|
922
|
+
try { reader.releaseLock(); } catch {}
|
|
923
|
+
}
|
|
924
|
+
}
|