claude-code-cache-fix 1.3.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -0
- package/package.json +1 -1
- package/preload.mjs +113 -7
package/README.md
CHANGED
|
@@ -132,6 +132,7 @@ Logs are written to `~/.claude/cache-fix-debug.log`. Look for:
|
|
|
132
132
|
- `FALSE RATE LIMIT: synthetic model detected` — client-side false rate limit
|
|
133
133
|
- `GROWTHBOOK FLAGS: {...}` — server-controlled feature flags on first call
|
|
134
134
|
- `PROMPT SIZE: system=N tools=N injected=N (skills=N mcp=N ...)` — per-call prompt size breakdown
|
|
135
|
+
- `CACHE TTL: tier=1h create=N read=N hit=N% (1h=N 5m=N)` — TTL tier and cache hit rate per call
|
|
135
136
|
- `SKIPPED: resume relocation (not a resume or already correct)` — no fix needed
|
|
136
137
|
|
|
137
138
|
### Prefix diff mode
|
package/package.json
CHANGED
package/preload.mjs
CHANGED
|
@@ -784,20 +784,126 @@ globalThis.fetch = async function (url, options) {
|
|
|
784
784
|
const overage = response.headers.get("anthropic-ratelimit-unified-overage-status");
|
|
785
785
|
|
|
786
786
|
if (h5 || h7d) {
|
|
787
|
-
const quota = {
|
|
788
|
-
timestamp: new Date().toISOString(),
|
|
789
|
-
five_hour: h5 ? { utilization: parseFloat(h5), pct: Math.round(parseFloat(h5) * 100), resets_at: reset5h ? parseInt(reset5h) : null } : null,
|
|
790
|
-
seven_day: h7d ? { utilization: parseFloat(h7d), pct: Math.round(parseFloat(h7d) * 100), resets_at: reset7d ? parseInt(reset7d) : null } : null,
|
|
791
|
-
status: status || null,
|
|
792
|
-
overage_status: overage || null,
|
|
793
|
-
};
|
|
794
787
|
const quotaFile = join(homedir(), ".claude", "quota-status.json");
|
|
788
|
+
let quota = {};
|
|
789
|
+
try { quota = JSON.parse(readFileSync(quotaFile, "utf8")); } catch {}
|
|
790
|
+
quota.timestamp = new Date().toISOString();
|
|
791
|
+
quota.five_hour = h5 ? { utilization: parseFloat(h5), pct: Math.round(parseFloat(h5) * 100), resets_at: reset5h ? parseInt(reset5h) : null } : quota.five_hour;
|
|
792
|
+
quota.seven_day = h7d ? { utilization: parseFloat(h7d), pct: Math.round(parseFloat(h7d) * 100), resets_at: reset7d ? parseInt(reset7d) : null } : quota.seven_day;
|
|
793
|
+
quota.status = status || null;
|
|
794
|
+
quota.overage_status = overage || null;
|
|
795
795
|
writeFileSync(quotaFile, JSON.stringify(quota, null, 2));
|
|
796
796
|
}
|
|
797
797
|
} catch {
|
|
798
798
|
// Non-critical — don't break the response
|
|
799
799
|
}
|
|
800
|
+
|
|
801
|
+
// Clone response to extract TTL tier from usage (SSE stream)
|
|
802
|
+
try {
|
|
803
|
+
const clone = response.clone();
|
|
804
|
+
drainTTLFromClone(clone).catch(() => {});
|
|
805
|
+
} catch {
|
|
806
|
+
// clone() failure is non-fatal
|
|
807
|
+
}
|
|
800
808
|
}
|
|
801
809
|
|
|
802
810
|
return response;
|
|
803
811
|
};
|
|
812
|
+
|
|
813
|
+
// --------------------------------------------------------------------------
|
|
814
|
+
// TTL tier extraction from SSE response stream
|
|
815
|
+
// --------------------------------------------------------------------------
|
|
816
|
+
|
|
817
|
+
/**
|
|
818
|
+
* Drain a cloned SSE response to extract cache TTL tier from the usage object.
|
|
819
|
+
* The message_start event contains usage.cache_creation with ephemeral_1h and
|
|
820
|
+
* ephemeral_5m token counts, revealing which TTL tier the server applied.
|
|
821
|
+
*
|
|
822
|
+
* Writes TTL tier to ~/.claude/quota-status.json (merges with existing data)
|
|
823
|
+
* and logs to debug log.
|
|
824
|
+
*/
|
|
825
|
+
async function drainTTLFromClone(clone) {
|
|
826
|
+
if (!clone.body) return;
|
|
827
|
+
|
|
828
|
+
const reader = clone.body.getReader();
|
|
829
|
+
const decoder = new TextDecoder();
|
|
830
|
+
let buffer = "";
|
|
831
|
+
|
|
832
|
+
try {
|
|
833
|
+
while (true) {
|
|
834
|
+
const { done, value } = await reader.read();
|
|
835
|
+
if (done) break;
|
|
836
|
+
buffer += decoder.decode(value, { stream: true });
|
|
837
|
+
|
|
838
|
+
let newlineIdx;
|
|
839
|
+
while ((newlineIdx = buffer.indexOf("\n")) !== -1) {
|
|
840
|
+
const line = buffer.slice(0, newlineIdx).trim();
|
|
841
|
+
buffer = buffer.slice(newlineIdx + 1);
|
|
842
|
+
|
|
843
|
+
if (!line.startsWith("data: ") || line === "data: [DONE]") continue;
|
|
844
|
+
|
|
845
|
+
try {
|
|
846
|
+
const event = JSON.parse(line.slice(6));
|
|
847
|
+
|
|
848
|
+
if (event.type === "message_start" && event.message?.usage) {
|
|
849
|
+
const u = event.message.usage;
|
|
850
|
+
const cc = u.cache_creation || {};
|
|
851
|
+
const e1h = cc.ephemeral_1h_input_tokens ?? 0;
|
|
852
|
+
const e5m = cc.ephemeral_5m_input_tokens ?? 0;
|
|
853
|
+
const cacheCreate = u.cache_creation_input_tokens ?? 0;
|
|
854
|
+
const cacheRead = u.cache_read_input_tokens ?? 0;
|
|
855
|
+
|
|
856
|
+
// Determine TTL tier from which ephemeral bucket got tokens
|
|
857
|
+
// When cache is fully warm (no creation), infer tier from previous
|
|
858
|
+
let ttlTier = "unknown";
|
|
859
|
+
if (e1h > 0 && e5m === 0) ttlTier = "1h";
|
|
860
|
+
else if (e5m > 0 && e1h === 0) ttlTier = "5m";
|
|
861
|
+
else if (e1h === 0 && e5m === 0 && cacheCreate === 0) {
|
|
862
|
+
// Fully cached — no creation to determine tier. Preserve previous.
|
|
863
|
+
try {
|
|
864
|
+
const prev = JSON.parse(readFileSync(join(homedir(), ".claude", "quota-status.json"), "utf8"));
|
|
865
|
+
ttlTier = prev.cache?.ttl_tier || "1h";
|
|
866
|
+
} catch { ttlTier = "1h"; }
|
|
867
|
+
}
|
|
868
|
+
else if (e1h > 0 && e5m > 0) ttlTier = "mixed";
|
|
869
|
+
|
|
870
|
+
const hitRate = (cacheRead + cacheCreate) > 0
|
|
871
|
+
? (cacheRead / (cacheRead + cacheCreate) * 100).toFixed(1)
|
|
872
|
+
: "N/A";
|
|
873
|
+
|
|
874
|
+
debugLog(
|
|
875
|
+
`CACHE TTL: tier=${ttlTier}`,
|
|
876
|
+
`create=${cacheCreate} read=${cacheRead} hit=${hitRate}%`,
|
|
877
|
+
`(1h=${e1h} 5m=${e5m})`
|
|
878
|
+
);
|
|
879
|
+
|
|
880
|
+
// Merge TTL data into quota-status.json
|
|
881
|
+
try {
|
|
882
|
+
const quotaFile = join(homedir(), ".claude", "quota-status.json");
|
|
883
|
+
let quota = {};
|
|
884
|
+
try { quota = JSON.parse(readFileSync(quotaFile, "utf8")); } catch {}
|
|
885
|
+
quota.cache = {
|
|
886
|
+
ttl_tier: ttlTier,
|
|
887
|
+
cache_creation: cacheCreate,
|
|
888
|
+
cache_read: cacheRead,
|
|
889
|
+
ephemeral_1h: e1h,
|
|
890
|
+
ephemeral_5m: e5m,
|
|
891
|
+
hit_rate: hitRate,
|
|
892
|
+
timestamp: new Date().toISOString(),
|
|
893
|
+
};
|
|
894
|
+
writeFileSync(quotaFile, JSON.stringify(quota, null, 2));
|
|
895
|
+
} catch {}
|
|
896
|
+
|
|
897
|
+
// Got what we need — stop reading
|
|
898
|
+
reader.cancel();
|
|
899
|
+
return;
|
|
900
|
+
}
|
|
901
|
+
} catch {
|
|
902
|
+
// Skip malformed SSE lines
|
|
903
|
+
}
|
|
904
|
+
}
|
|
905
|
+
}
|
|
906
|
+
} finally {
|
|
907
|
+
try { reader.releaseLock(); } catch {}
|
|
908
|
+
}
|
|
909
|
+
}
|