claude-code-cache-fix 1.5.1 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  English | [中文](./README.zh.md)
4
4
 
5
- Fixes prompt cache regressions in [Claude Code](https://github.com/anthropics/claude-code) that cause **up to 20x cost increase** on resumed sessions, plus monitoring for silent context degradation. Confirmed through v2.1.92.
5
+ Fixes prompt cache regressions in [Claude Code](https://github.com/anthropics/claude-code) that cause **up to 20x cost increase** on resumed sessions, plus monitoring for silent context degradation. Confirmed through v2.1.97.
6
6
 
7
7
  ## The problem
8
8
 
package/README.zh.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  [English](./README.md) | 中文
4
4
 
5
- 修复 [Claude Code](https://github.com/anthropics/claude-code) 中导致恢复会话时**成本增加高达 20 倍**的提示缓存回归问题,同时监控静默上下文降级。已在 v2.1.92 至 v2.1.96 上验证。
5
+ 修复 [Claude Code](https://github.com/anthropics/claude-code) 中导致恢复会话时**成本增加高达 20 倍**的提示缓存回归问题,同时监控静默上下文降级。已在 v2.1.92 至 v2.1.97 上验证。
6
6
 
7
7
  ## 问题描述
8
8
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-code-cache-fix",
3
- "version": "1.5.1",
3
+ "version": "1.6.1",
4
4
  "description": "Fixes prompt cache regression in Claude Code that causes up to 20x cost increase on resumed sessions",
5
5
  "type": "module",
6
6
  "exports": "./preload.mjs",
package/preload.mjs CHANGED
@@ -834,6 +834,41 @@ globalThis.fetch = async function (url, options) {
834
834
  }
835
835
  }
836
836
 
837
+ // Bug 5: 1h TTL enforcement
838
+ // The client gates 1h cache TTL behind a GrowthBook allowlist that checks
839
+ // querySource against patterns like "repl_main_thread*", "sdk", "auto_mode".
840
+ // Interactive CLI sessions may not match any pattern, causing the client to
841
+ // send cache_control without ttl (defaulting to 5m server-side).
842
+ // The server honors whatever TTL the client requests — so we inject it.
843
+ // Discovered by @TigerKay1926 on #42052 using our GrowthBook flag dump.
844
+ if (payload.system) {
845
+ let ttlInjected = 0;
846
+ payload.system = payload.system.map((block) => {
847
+ if (block.cache_control?.type === "ephemeral" && !block.cache_control.ttl) {
848
+ ttlInjected++;
849
+ return { ...block, cache_control: { ...block.cache_control, ttl: "1h" } };
850
+ }
851
+ return block;
852
+ });
853
+ // Also check messages for cache_control blocks (conversation history breakpoints)
854
+ if (payload.messages) {
855
+ for (const msg of payload.messages) {
856
+ if (!Array.isArray(msg.content)) continue;
857
+ for (let i = 0; i < msg.content.length; i++) {
858
+ const b = msg.content[i];
859
+ if (b.cache_control?.type === "ephemeral" && !b.cache_control.ttl) {
860
+ msg.content[i] = { ...b, cache_control: { ...b.cache_control, ttl: "1h" } };
861
+ ttlInjected++;
862
+ }
863
+ }
864
+ }
865
+ }
866
+ if (ttlInjected > 0) {
867
+ modified = true;
868
+ debugLog(`APPLIED: 1h TTL injected on ${ttlInjected} cache_control block(s)`);
869
+ }
870
+ }
871
+
837
872
  if (modified) {
838
873
  options = { ...options, body: JSON.stringify(payload) };
839
874
  debugLog("Request body rewritten");
@@ -927,12 +962,18 @@ globalThis.fetch = async function (url, options) {
927
962
  }
928
963
 
929
964
  // Clone response to extract TTL tier and usage telemetry from SSE stream.
930
- // Pass the model from the request so we can log a complete usage record.
965
+ // Pass the model and quota headers so we can log a complete usage record.
931
966
  try {
932
967
  let reqModel = "unknown";
933
968
  try { reqModel = JSON.parse(options?.body)?.model || "unknown"; } catch {}
969
+ const quotaHeaders = {
970
+ q5h: parseFloat(response.headers.get("anthropic-ratelimit-unified-5h-utilization") || "0"),
971
+ q7d: parseFloat(response.headers.get("anthropic-ratelimit-unified-7d-utilization") || "0"),
972
+ status: response.headers.get("anthropic-ratelimit-unified-status") || null,
973
+ overage: response.headers.get("anthropic-ratelimit-unified-overage-status") || null,
974
+ };
934
975
  const clone = response.clone();
935
- drainTTLFromClone(clone, reqModel).catch(() => {});
976
+ drainTTLFromClone(clone, reqModel, quotaHeaders).catch(() => {});
936
977
  } catch {
937
978
  // clone() failure is non-fatal
938
979
  }
@@ -953,7 +994,7 @@ globalThis.fetch = async function (url, options) {
953
994
  * Writes TTL tier to ~/.claude/quota-status.json (merges with existing data)
954
995
  * and logs to debug log.
955
996
  */
956
- async function drainTTLFromClone(clone, model) {
997
+ async function drainTTLFromClone(clone, model, quotaHeaders) {
957
998
  if (!clone.body) return;
958
999
 
959
1000
  const reader = clone.body.getReader();
@@ -1047,8 +1088,11 @@ async function drainTTLFromClone(clone, model) {
1047
1088
  if (startUsage) {
1048
1089
  try {
1049
1090
  const cc = startUsage.cache_creation || {};
1091
+ const now = new Date();
1092
+ const utcHour = now.getUTCHours();
1093
+ const utcDay = now.getUTCDay();
1050
1094
  const record = {
1051
- timestamp: new Date().toISOString(),
1095
+ timestamp: now.toISOString(),
1052
1096
  model: model || "unknown",
1053
1097
  input_tokens: startUsage.input_tokens ?? 0,
1054
1098
  output_tokens: deltaUsage?.output_tokens ?? 0,
@@ -1057,6 +1101,9 @@ async function drainTTLFromClone(clone, model) {
1057
1101
  ephemeral_1h_input_tokens: cc.ephemeral_1h_input_tokens ?? 0,
1058
1102
  ephemeral_5m_input_tokens: cc.ephemeral_5m_input_tokens ?? 0,
1059
1103
  ttl_tier: ttlTier,
1104
+ q5h_pct: quotaHeaders ? Math.round(quotaHeaders.q5h * 100) : null,
1105
+ q7d_pct: quotaHeaders ? Math.round(quotaHeaders.q7d * 100) : null,
1106
+ peak_hour: utcDay >= 1 && utcDay <= 5 && utcHour >= 13 && utcHour < 19,
1060
1107
  };
1061
1108
  appendFileSync(USAGE_JSONL, JSON.stringify(record) + "\n");
1062
1109
  } catch {
@@ -0,0 +1,249 @@
1
+ #!/bin/bash
2
+ # cache-test.sh — Test Claude Code cache behavior with and without interceptor.
3
+ #
4
+ # Runs four scenarios and captures cache stats for each:
5
+ # 1. One-shot WITHOUT interceptor (baseline)
6
+ # 2. One-shot WITH interceptor
7
+ # 3. Multi-turn WITHOUT interceptor (conversation + resume)
8
+ # 4. Multi-turn WITH interceptor (conversation + resume)
9
+ #
10
+ # Outputs a summary report comparing TTL tier, cache hit rates, and
11
+ # whether the interceptor's fixes fired.
12
+ #
13
+ # Usage:
14
+ # ./cache-test.sh [--skip-resume] # --skip-resume skips the resume tests
15
+ #
16
+ # Requires: Claude Code installed via npm, claude-code-cache-fix installed.
17
+
18
+ set -euo pipefail
19
+
20
+ CLAUDE_CLI="$HOME/.npm-global/lib/node_modules/@anthropic-ai/claude-code/cli.js"
21
+ PRELOAD="$HOME/.claude/cache-fix-preload.mjs"
22
+ QUOTA_FILE="$HOME/.claude/quota-status.json"
23
+ USAGE_LOG="$HOME/.claude/usage.jsonl"
24
+ DEBUG_LOG="$HOME/.claude/cache-fix-debug.log"
25
+ REPORT_DIR="/tmp/cache-test-$(date +%Y%m%d_%H%M%S)"
26
+ SKIP_RESUME=false
27
+
28
+ for arg in "$@"; do
29
+ case "$arg" in
30
+ --skip-resume) SKIP_RESUME=true ;;
31
+ esac
32
+ done
33
+
34
+ # Verify prerequisites
35
+ if [ ! -f "$CLAUDE_CLI" ]; then
36
+ echo "ERROR: Claude Code not found at $CLAUDE_CLI" >&2
37
+ echo "Install with: npm install -g @anthropic-ai/claude-code" >&2
38
+ exit 1
39
+ fi
40
+
41
+ if [ ! -f "$PRELOAD" ]; then
42
+ echo "ERROR: cache-fix preload not found at $PRELOAD" >&2
43
+ echo "Install with: npm install -g claude-code-cache-fix" >&2
44
+ exit 1
45
+ fi
46
+
47
+ CC_VERSION=$(node "$CLAUDE_CLI" --version 2>/dev/null | head -1)
48
+ echo "=========================================="
49
+ echo " CACHE BEHAVIOR TEST"
50
+ echo " Claude Code: $CC_VERSION"
51
+ echo " Report dir: $REPORT_DIR"
52
+ echo "=========================================="
53
+ echo ""
54
+
55
+ mkdir -p "$REPORT_DIR"
56
+
57
+ # Helper: snapshot cache state from quota-status.json
58
+ snapshot_cache() {
59
+ local label="$1"
60
+ local outfile="$REPORT_DIR/${label}.json"
61
+ if [ -f "$QUOTA_FILE" ]; then
62
+ cp "$QUOTA_FILE" "$outfile"
63
+ local tier=$(python3 -c "import json; d=json.load(open('$QUOTA_FILE')); print(d.get('cache',{}).get('ttl_tier','?'))" 2>/dev/null || echo "?")
64
+ local create=$(python3 -c "import json; d=json.load(open('$QUOTA_FILE')); print(d.get('cache',{}).get('cache_creation',0))" 2>/dev/null || echo "?")
65
+ local read=$(python3 -c "import json; d=json.load(open('$QUOTA_FILE')); print(d.get('cache',{}).get('cache_read',0))" 2>/dev/null || echo "?")
66
+ local e1h=$(python3 -c "import json; d=json.load(open('$QUOTA_FILE')); print(d.get('cache',{}).get('ephemeral_1h',0))" 2>/dev/null || echo "?")
67
+ local e5m=$(python3 -c "import json; d=json.load(open('$QUOTA_FILE')); print(d.get('cache',{}).get('ephemeral_5m',0))" 2>/dev/null || echo "?")
68
+ local hit=$(python3 -c "import json; d=json.load(open('$QUOTA_FILE')); print(d.get('cache',{}).get('hit_rate','?'))" 2>/dev/null || echo "?")
69
+ echo " [$label] TTL=$tier create=$create read=$read 1h=$e1h 5m=$e5m hit=$hit%"
70
+ else
71
+ echo " [$label] No quota-status.json found"
72
+ fi
73
+ }
74
+
75
+ # Helper: count usage.jsonl entries
76
+ count_usage() {
77
+ if [ -f "$USAGE_LOG" ]; then
78
+ wc -l < "$USAGE_LOG" | tr -d ' '
79
+ else
80
+ echo "0"
81
+ fi
82
+ }
83
+
84
+ # Helper: capture debug log entries
85
+ snapshot_debug() {
86
+ local label="$1"
87
+ if [ -f "$DEBUG_LOG" ]; then
88
+ cp "$DEBUG_LOG" "$REPORT_DIR/${label}-debug.log"
89
+ fi
90
+ }
91
+
92
+ # ─── Test 1: One-shot WITHOUT interceptor ────────────────────────────────────
93
+
94
+ echo "--- Test 1: One-shot WITHOUT interceptor ---"
95
+ rm -f "$DEBUG_LOG"
96
+ usage_before=$(count_usage)
97
+
98
+ # Call 1: cold start
99
+ node "$CLAUDE_CLI" -p "respond with exactly: cache-test-1a" --dangerously-skip-permissions > "$REPORT_DIR/test1a-output.txt" 2>&1
100
+ snapshot_cache "test1a-no-interceptor"
101
+
102
+ # Wait 2 seconds for any async writes
103
+ sleep 2
104
+
105
+ # Call 2: should get cache hit
106
+ node "$CLAUDE_CLI" -p "respond with exactly: cache-test-1b" --dangerously-skip-permissions > "$REPORT_DIR/test1b-output.txt" 2>&1
107
+ snapshot_cache "test1b-no-interceptor"
108
+
109
+ usage_after=$(count_usage)
110
+ echo " Usage entries added: $((usage_after - usage_before))"
111
+ echo ""
112
+
113
+ # ─── Test 2: One-shot WITH interceptor ───────────────────────────────────────
114
+
115
+ echo "--- Test 2: One-shot WITH interceptor ---"
116
+ rm -f "$DEBUG_LOG"
117
+ usage_before=$(count_usage)
118
+
119
+ # Call 1: cold start with interceptor
120
+ CACHE_FIX_DEBUG=1 NODE_OPTIONS="--import $PRELOAD" \
121
+ node "$CLAUDE_CLI" -p "respond with exactly: cache-test-2a" --dangerously-skip-permissions > "$REPORT_DIR/test2a-output.txt" 2>&1
122
+ snapshot_cache "test2a-with-interceptor"
123
+ snapshot_debug "test2a"
124
+
125
+ sleep 2
126
+
127
+ # Call 2: should get cache hit
128
+ CACHE_FIX_DEBUG=1 NODE_OPTIONS="--import $PRELOAD" \
129
+ node "$CLAUDE_CLI" -p "respond with exactly: cache-test-2b" --dangerously-skip-permissions > "$REPORT_DIR/test2b-output.txt" 2>&1
130
+ snapshot_cache "test2b-with-interceptor"
131
+ snapshot_debug "test2b"
132
+
133
+ usage_after=$(count_usage)
134
+ echo " Usage entries added: $((usage_after - usage_before))"
135
+ echo ""
136
+
137
+ # ─── Test 3 & 4: Multi-turn + Resume ────────────────────────────────────────
138
+
139
+ if [ "$SKIP_RESUME" = true ]; then
140
+ echo "--- Tests 3 & 4: SKIPPED (--skip-resume) ---"
141
+ echo ""
142
+ else
143
+ # Test 3: Multi-turn WITHOUT interceptor
144
+ echo "--- Test 3: Multi-turn + Resume WITHOUT interceptor ---"
145
+ rm -f "$DEBUG_LOG"
146
+ usage_before=$(count_usage)
147
+
148
+ # Start a session with a named session, do 2 turns, exit, then resume
149
+ SESSION_NAME="cache-test-no-fix-$$"
150
+
151
+ # Turn 1
152
+ node "$CLAUDE_CLI" -p "respond with exactly: turn1-done" \
153
+ --dangerously-skip-permissions -n "$SESSION_NAME" \
154
+ > "$REPORT_DIR/test3-turn1-output.txt" 2>&1
155
+ snapshot_cache "test3-turn1-no-interceptor"
156
+
157
+ sleep 2
158
+
159
+ # Turn 2 (resume)
160
+ node "$CLAUDE_CLI" -p "respond with exactly: turn2-done" \
161
+ --dangerously-skip-permissions -c \
162
+ > "$REPORT_DIR/test3-turn2-output.txt" 2>&1
163
+ snapshot_cache "test3-turn2-no-interceptor"
164
+
165
+ sleep 2
166
+
167
+ # Turn 3 (second resume — this is where scatter typically shows)
168
+ node "$CLAUDE_CLI" -p "respond with exactly: turn3-done" \
169
+ --dangerously-skip-permissions -c \
170
+ > "$REPORT_DIR/test3-turn3-output.txt" 2>&1
171
+ snapshot_cache "test3-turn3-no-interceptor"
172
+
173
+ usage_after=$(count_usage)
174
+ echo " Usage entries added: $((usage_after - usage_before))"
175
+ echo ""
176
+
177
+ # Test 4: Multi-turn WITH interceptor
178
+ echo "--- Test 4: Multi-turn + Resume WITH interceptor ---"
179
+ rm -f "$DEBUG_LOG"
180
+ usage_before=$(count_usage)
181
+
182
+ SESSION_NAME="cache-test-with-fix-$$"
183
+
184
+ # Turn 1
185
+ CACHE_FIX_DEBUG=1 CACHE_FIX_PREFIXDIFF=1 NODE_OPTIONS="--import $PRELOAD" \
186
+ node "$CLAUDE_CLI" -p "respond with exactly: turn1-done" \
187
+ --dangerously-skip-permissions -n "$SESSION_NAME" \
188
+ > "$REPORT_DIR/test4-turn1-output.txt" 2>&1
189
+ snapshot_cache "test4-turn1-with-interceptor"
190
+ snapshot_debug "test4-turn1"
191
+
192
+ sleep 2
193
+
194
+ # Turn 2 (resume)
195
+ CACHE_FIX_DEBUG=1 CACHE_FIX_PREFIXDIFF=1 NODE_OPTIONS="--import $PRELOAD" \
196
+ node "$CLAUDE_CLI" -p "respond with exactly: turn2-done" \
197
+ --dangerously-skip-permissions -c \
198
+ > "$REPORT_DIR/test4-turn2-output.txt" 2>&1
199
+ snapshot_cache "test4-turn2-with-interceptor"
200
+ snapshot_debug "test4-turn2"
201
+
202
+ sleep 2
203
+
204
+ # Turn 3 (second resume)
205
+ CACHE_FIX_DEBUG=1 CACHE_FIX_PREFIXDIFF=1 NODE_OPTIONS="--import $PRELOAD" \
206
+ node "$CLAUDE_CLI" -p "respond with exactly: turn3-done" \
207
+ --dangerously-skip-permissions -c \
208
+ > "$REPORT_DIR/test4-turn3-output.txt" 2>&1
209
+ snapshot_cache "test4-turn3-with-interceptor"
210
+ snapshot_debug "test4-turn3"
211
+
212
+ usage_after=$(count_usage)
213
+ echo " Usage entries added: $((usage_after - usage_before))"
214
+ echo ""
215
+ fi
216
+
217
+ # ─── Summary ────────────────────────────────────────────────────────────────
218
+
219
+ echo "=========================================="
220
+ echo " SUMMARY"
221
+ echo "=========================================="
222
+ echo ""
223
+ echo "All snapshots saved to: $REPORT_DIR"
224
+ echo ""
225
+ echo "Cache snapshots:"
226
+ for f in "$REPORT_DIR"/*.json; do
227
+ label=$(basename "$f" .json)
228
+ tier=$(python3 -c "import json; d=json.load(open('$f')); print(d.get('cache',{}).get('ttl_tier','?'))" 2>/dev/null || echo "?")
229
+ create=$(python3 -c "import json; d=json.load(open('$f')); print(d.get('cache',{}).get('cache_creation',0))" 2>/dev/null || echo "?")
230
+ read=$(python3 -c "import json; d=json.load(open('$f')); print(d.get('cache',{}).get('cache_read',0))" 2>/dev/null || echo "?")
231
+ e1h=$(python3 -c "import json; d=json.load(open('$f')); print(d.get('cache',{}).get('ephemeral_1h',0))" 2>/dev/null || echo "?")
232
+ e5m=$(python3 -c "import json; d=json.load(open('$f')); print(d.get('cache',{}).get('ephemeral_5m',0))" 2>/dev/null || echo "?")
233
+ printf " %-40s TTL=%-4s create=%-6s read=%-6s 1h=%-6s 5m=%-6s\n" "$label" "$tier" "$create" "$read" "$e1h" "$e5m"
234
+ done
235
+
236
+ # Check for interceptor actions in debug logs
237
+ echo ""
238
+ echo "Interceptor actions:"
239
+ for f in "$REPORT_DIR"/*-debug.log; do
240
+ [ -f "$f" ] || continue
241
+ label=$(basename "$f" -debug.log)
242
+ applied=$(grep -c "APPLIED:" "$f" 2>/dev/null || echo 0)
243
+ skipped=$(grep -c "SKIPPED:" "$f" 2>/dev/null || echo 0)
244
+ pins=$(grep -c "CONTENT PIN:" "$f" 2>/dev/null || echo 0)
245
+ echo " $label: $applied applied, $skipped skipped, $pins content pins"
246
+ done
247
+
248
+ echo ""
249
+ echo "Done. Review $REPORT_DIR for full details."