claude-code-cache-fix 1.5.1 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/README.zh.md +1 -1
- package/package.json +1 -1
- package/preload.mjs +35 -0
- package/tools/cache-test.sh +249 -0
package/README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
English | [中文](./README.zh.md)
|
|
4
4
|
|
|
5
|
-
Fixes prompt cache regressions in [Claude Code](https://github.com/anthropics/claude-code) that cause **up to 20x cost increase** on resumed sessions, plus monitoring for silent context degradation. Confirmed through v2.1.
|
|
5
|
+
Fixes prompt cache regressions in [Claude Code](https://github.com/anthropics/claude-code) that cause **up to 20x cost increase** on resumed sessions, plus monitoring for silent context degradation. Confirmed through v2.1.97.
|
|
6
6
|
|
|
7
7
|
## The problem
|
|
8
8
|
|
package/README.zh.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
[English](./README.md) | 中文
|
|
4
4
|
|
|
5
|
-
修复 [Claude Code](https://github.com/anthropics/claude-code) 中导致恢复会话时**成本增加高达 20 倍**的提示缓存回归问题,同时监控静默上下文降级。已在 v2.1.92 至 v2.1.
|
|
5
|
+
修复 [Claude Code](https://github.com/anthropics/claude-code) 中导致恢复会话时**成本增加高达 20 倍**的提示缓存回归问题,同时监控静默上下文降级。已在 v2.1.92 至 v2.1.97 上验证。
|
|
6
6
|
|
|
7
7
|
## 问题描述
|
|
8
8
|
|
package/package.json
CHANGED
package/preload.mjs
CHANGED
|
@@ -834,6 +834,41 @@ globalThis.fetch = async function (url, options) {
|
|
|
834
834
|
}
|
|
835
835
|
}
|
|
836
836
|
|
|
837
|
+
// Bug 5: 1h TTL enforcement
|
|
838
|
+
// The client gates 1h cache TTL behind a GrowthBook allowlist that checks
|
|
839
|
+
// querySource against patterns like "repl_main_thread*", "sdk", "auto_mode".
|
|
840
|
+
// Interactive CLI sessions may not match any pattern, causing the client to
|
|
841
|
+
// send cache_control without ttl (defaulting to 5m server-side).
|
|
842
|
+
// The server honors whatever TTL the client requests — so we inject it.
|
|
843
|
+
// Discovered by @TigerKay1926 on #42052 using our GrowthBook flag dump.
|
|
844
|
+
if (payload.system) {
|
|
845
|
+
let ttlInjected = 0;
|
|
846
|
+
payload.system = payload.system.map((block) => {
|
|
847
|
+
if (block.cache_control?.type === "ephemeral" && !block.cache_control.ttl) {
|
|
848
|
+
ttlInjected++;
|
|
849
|
+
return { ...block, cache_control: { ...block.cache_control, ttl: "1h" } };
|
|
850
|
+
}
|
|
851
|
+
return block;
|
|
852
|
+
});
|
|
853
|
+
// Also check messages for cache_control blocks (conversation history breakpoints)
|
|
854
|
+
if (payload.messages) {
|
|
855
|
+
for (const msg of payload.messages) {
|
|
856
|
+
if (!Array.isArray(msg.content)) continue;
|
|
857
|
+
for (let i = 0; i < msg.content.length; i++) {
|
|
858
|
+
const b = msg.content[i];
|
|
859
|
+
if (b.cache_control?.type === "ephemeral" && !b.cache_control.ttl) {
|
|
860
|
+
msg.content[i] = { ...b, cache_control: { ...b.cache_control, ttl: "1h" } };
|
|
861
|
+
ttlInjected++;
|
|
862
|
+
}
|
|
863
|
+
}
|
|
864
|
+
}
|
|
865
|
+
}
|
|
866
|
+
if (ttlInjected > 0) {
|
|
867
|
+
modified = true;
|
|
868
|
+
debugLog(`APPLIED: 1h TTL injected on ${ttlInjected} cache_control block(s)`);
|
|
869
|
+
}
|
|
870
|
+
}
|
|
871
|
+
|
|
837
872
|
if (modified) {
|
|
838
873
|
options = { ...options, body: JSON.stringify(payload) };
|
|
839
874
|
debugLog("Request body rewritten");
|
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# cache-test.sh — Test Claude Code cache behavior with and without interceptor.
|
|
3
|
+
#
|
|
4
|
+
# Runs four scenarios and captures cache stats for each:
|
|
5
|
+
# 1. One-shot WITHOUT interceptor (baseline)
|
|
6
|
+
# 2. One-shot WITH interceptor
|
|
7
|
+
# 3. Multi-turn WITHOUT interceptor (conversation + resume)
|
|
8
|
+
# 4. Multi-turn WITH interceptor (conversation + resume)
|
|
9
|
+
#
|
|
10
|
+
# Outputs a summary report comparing TTL tier, cache hit rates, and
|
|
11
|
+
# whether the interceptor's fixes fired.
|
|
12
|
+
#
|
|
13
|
+
# Usage:
|
|
14
|
+
# ./cache-test.sh [--skip-resume] # --skip-resume skips the resume tests
|
|
15
|
+
#
|
|
16
|
+
# Requires: Claude Code installed via npm, claude-code-cache-fix installed.
|
|
17
|
+
|
|
18
|
+
set -euo pipefail
|
|
19
|
+
|
|
20
|
+
CLAUDE_CLI="$HOME/.npm-global/lib/node_modules/@anthropic-ai/claude-code/cli.js"
|
|
21
|
+
PRELOAD="$HOME/.claude/cache-fix-preload.mjs"
|
|
22
|
+
QUOTA_FILE="$HOME/.claude/quota-status.json"
|
|
23
|
+
USAGE_LOG="$HOME/.claude/usage.jsonl"
|
|
24
|
+
DEBUG_LOG="$HOME/.claude/cache-fix-debug.log"
|
|
25
|
+
REPORT_DIR="/tmp/cache-test-$(date +%Y%m%d_%H%M%S)"
|
|
26
|
+
SKIP_RESUME=false
|
|
27
|
+
|
|
28
|
+
for arg in "$@"; do
|
|
29
|
+
case "$arg" in
|
|
30
|
+
--skip-resume) SKIP_RESUME=true ;;
|
|
31
|
+
esac
|
|
32
|
+
done
|
|
33
|
+
|
|
34
|
+
# Verify prerequisites
|
|
35
|
+
if [ ! -f "$CLAUDE_CLI" ]; then
|
|
36
|
+
echo "ERROR: Claude Code not found at $CLAUDE_CLI" >&2
|
|
37
|
+
echo "Install with: npm install -g @anthropic-ai/claude-code" >&2
|
|
38
|
+
exit 1
|
|
39
|
+
fi
|
|
40
|
+
|
|
41
|
+
if [ ! -f "$PRELOAD" ]; then
|
|
42
|
+
echo "ERROR: cache-fix preload not found at $PRELOAD" >&2
|
|
43
|
+
echo "Install with: npm install -g claude-code-cache-fix" >&2
|
|
44
|
+
exit 1
|
|
45
|
+
fi
|
|
46
|
+
|
|
47
|
+
CC_VERSION=$(node "$CLAUDE_CLI" --version 2>/dev/null | head -1)
|
|
48
|
+
echo "=========================================="
|
|
49
|
+
echo " CACHE BEHAVIOR TEST"
|
|
50
|
+
echo " Claude Code: $CC_VERSION"
|
|
51
|
+
echo " Report dir: $REPORT_DIR"
|
|
52
|
+
echo "=========================================="
|
|
53
|
+
echo ""
|
|
54
|
+
|
|
55
|
+
mkdir -p "$REPORT_DIR"
|
|
56
|
+
|
|
57
|
+
# Helper: snapshot cache state from quota-status.json
|
|
58
|
+
snapshot_cache() {
|
|
59
|
+
local label="$1"
|
|
60
|
+
local outfile="$REPORT_DIR/${label}.json"
|
|
61
|
+
if [ -f "$QUOTA_FILE" ]; then
|
|
62
|
+
cp "$QUOTA_FILE" "$outfile"
|
|
63
|
+
local tier=$(python3 -c "import json; d=json.load(open('$QUOTA_FILE')); print(d.get('cache',{}).get('ttl_tier','?'))" 2>/dev/null || echo "?")
|
|
64
|
+
local create=$(python3 -c "import json; d=json.load(open('$QUOTA_FILE')); print(d.get('cache',{}).get('cache_creation',0))" 2>/dev/null || echo "?")
|
|
65
|
+
local read=$(python3 -c "import json; d=json.load(open('$QUOTA_FILE')); print(d.get('cache',{}).get('cache_read',0))" 2>/dev/null || echo "?")
|
|
66
|
+
local e1h=$(python3 -c "import json; d=json.load(open('$QUOTA_FILE')); print(d.get('cache',{}).get('ephemeral_1h',0))" 2>/dev/null || echo "?")
|
|
67
|
+
local e5m=$(python3 -c "import json; d=json.load(open('$QUOTA_FILE')); print(d.get('cache',{}).get('ephemeral_5m',0))" 2>/dev/null || echo "?")
|
|
68
|
+
local hit=$(python3 -c "import json; d=json.load(open('$QUOTA_FILE')); print(d.get('cache',{}).get('hit_rate','?'))" 2>/dev/null || echo "?")
|
|
69
|
+
echo " [$label] TTL=$tier create=$create read=$read 1h=$e1h 5m=$e5m hit=$hit%"
|
|
70
|
+
else
|
|
71
|
+
echo " [$label] No quota-status.json found"
|
|
72
|
+
fi
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
# Helper: count usage.jsonl entries
|
|
76
|
+
count_usage() {
|
|
77
|
+
if [ -f "$USAGE_LOG" ]; then
|
|
78
|
+
wc -l < "$USAGE_LOG" | tr -d ' '
|
|
79
|
+
else
|
|
80
|
+
echo "0"
|
|
81
|
+
fi
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
# Helper: capture debug log entries
|
|
85
|
+
snapshot_debug() {
|
|
86
|
+
local label="$1"
|
|
87
|
+
if [ -f "$DEBUG_LOG" ]; then
|
|
88
|
+
cp "$DEBUG_LOG" "$REPORT_DIR/${label}-debug.log"
|
|
89
|
+
fi
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
# ─── Test 1: One-shot WITHOUT interceptor ────────────────────────────────────
|
|
93
|
+
|
|
94
|
+
echo "--- Test 1: One-shot WITHOUT interceptor ---"
|
|
95
|
+
rm -f "$DEBUG_LOG"
|
|
96
|
+
usage_before=$(count_usage)
|
|
97
|
+
|
|
98
|
+
# Call 1: cold start
|
|
99
|
+
node "$CLAUDE_CLI" -p "respond with exactly: cache-test-1a" --dangerously-skip-permissions > "$REPORT_DIR/test1a-output.txt" 2>&1
|
|
100
|
+
snapshot_cache "test1a-no-interceptor"
|
|
101
|
+
|
|
102
|
+
# Wait 2 seconds for any async writes
|
|
103
|
+
sleep 2
|
|
104
|
+
|
|
105
|
+
# Call 2: should get cache hit
|
|
106
|
+
node "$CLAUDE_CLI" -p "respond with exactly: cache-test-1b" --dangerously-skip-permissions > "$REPORT_DIR/test1b-output.txt" 2>&1
|
|
107
|
+
snapshot_cache "test1b-no-interceptor"
|
|
108
|
+
|
|
109
|
+
usage_after=$(count_usage)
|
|
110
|
+
echo " Usage entries added: $((usage_after - usage_before))"
|
|
111
|
+
echo ""
|
|
112
|
+
|
|
113
|
+
# ─── Test 2: One-shot WITH interceptor ───────────────────────────────────────
|
|
114
|
+
|
|
115
|
+
echo "--- Test 2: One-shot WITH interceptor ---"
|
|
116
|
+
rm -f "$DEBUG_LOG"
|
|
117
|
+
usage_before=$(count_usage)
|
|
118
|
+
|
|
119
|
+
# Call 1: cold start with interceptor
|
|
120
|
+
CACHE_FIX_DEBUG=1 NODE_OPTIONS="--import $PRELOAD" \
|
|
121
|
+
node "$CLAUDE_CLI" -p "respond with exactly: cache-test-2a" --dangerously-skip-permissions > "$REPORT_DIR/test2a-output.txt" 2>&1
|
|
122
|
+
snapshot_cache "test2a-with-interceptor"
|
|
123
|
+
snapshot_debug "test2a"
|
|
124
|
+
|
|
125
|
+
sleep 2
|
|
126
|
+
|
|
127
|
+
# Call 2: should get cache hit
|
|
128
|
+
CACHE_FIX_DEBUG=1 NODE_OPTIONS="--import $PRELOAD" \
|
|
129
|
+
node "$CLAUDE_CLI" -p "respond with exactly: cache-test-2b" --dangerously-skip-permissions > "$REPORT_DIR/test2b-output.txt" 2>&1
|
|
130
|
+
snapshot_cache "test2b-with-interceptor"
|
|
131
|
+
snapshot_debug "test2b"
|
|
132
|
+
|
|
133
|
+
usage_after=$(count_usage)
|
|
134
|
+
echo " Usage entries added: $((usage_after - usage_before))"
|
|
135
|
+
echo ""
|
|
136
|
+
|
|
137
|
+
# ─── Test 3 & 4: Multi-turn + Resume ────────────────────────────────────────
|
|
138
|
+
|
|
139
|
+
if [ "$SKIP_RESUME" = true ]; then
|
|
140
|
+
echo "--- Tests 3 & 4: SKIPPED (--skip-resume) ---"
|
|
141
|
+
echo ""
|
|
142
|
+
else
|
|
143
|
+
# Test 3: Multi-turn WITHOUT interceptor
|
|
144
|
+
echo "--- Test 3: Multi-turn + Resume WITHOUT interceptor ---"
|
|
145
|
+
rm -f "$DEBUG_LOG"
|
|
146
|
+
usage_before=$(count_usage)
|
|
147
|
+
|
|
148
|
+
# Start a session with a named session, do 2 turns, exit, then resume
|
|
149
|
+
SESSION_NAME="cache-test-no-fix-$$"
|
|
150
|
+
|
|
151
|
+
# Turn 1
|
|
152
|
+
node "$CLAUDE_CLI" -p "respond with exactly: turn1-done" \
|
|
153
|
+
--dangerously-skip-permissions -n "$SESSION_NAME" \
|
|
154
|
+
> "$REPORT_DIR/test3-turn1-output.txt" 2>&1
|
|
155
|
+
snapshot_cache "test3-turn1-no-interceptor"
|
|
156
|
+
|
|
157
|
+
sleep 2
|
|
158
|
+
|
|
159
|
+
# Turn 2 (resume)
|
|
160
|
+
node "$CLAUDE_CLI" -p "respond with exactly: turn2-done" \
|
|
161
|
+
--dangerously-skip-permissions -c \
|
|
162
|
+
> "$REPORT_DIR/test3-turn2-output.txt" 2>&1
|
|
163
|
+
snapshot_cache "test3-turn2-no-interceptor"
|
|
164
|
+
|
|
165
|
+
sleep 2
|
|
166
|
+
|
|
167
|
+
# Turn 3 (second resume — this is where scatter typically shows)
|
|
168
|
+
node "$CLAUDE_CLI" -p "respond with exactly: turn3-done" \
|
|
169
|
+
--dangerously-skip-permissions -c \
|
|
170
|
+
> "$REPORT_DIR/test3-turn3-output.txt" 2>&1
|
|
171
|
+
snapshot_cache "test3-turn3-no-interceptor"
|
|
172
|
+
|
|
173
|
+
usage_after=$(count_usage)
|
|
174
|
+
echo " Usage entries added: $((usage_after - usage_before))"
|
|
175
|
+
echo ""
|
|
176
|
+
|
|
177
|
+
# Test 4: Multi-turn WITH interceptor
|
|
178
|
+
echo "--- Test 4: Multi-turn + Resume WITH interceptor ---"
|
|
179
|
+
rm -f "$DEBUG_LOG"
|
|
180
|
+
usage_before=$(count_usage)
|
|
181
|
+
|
|
182
|
+
SESSION_NAME="cache-test-with-fix-$$"
|
|
183
|
+
|
|
184
|
+
# Turn 1
|
|
185
|
+
CACHE_FIX_DEBUG=1 CACHE_FIX_PREFIXDIFF=1 NODE_OPTIONS="--import $PRELOAD" \
|
|
186
|
+
node "$CLAUDE_CLI" -p "respond with exactly: turn1-done" \
|
|
187
|
+
--dangerously-skip-permissions -n "$SESSION_NAME" \
|
|
188
|
+
> "$REPORT_DIR/test4-turn1-output.txt" 2>&1
|
|
189
|
+
snapshot_cache "test4-turn1-with-interceptor"
|
|
190
|
+
snapshot_debug "test4-turn1"
|
|
191
|
+
|
|
192
|
+
sleep 2
|
|
193
|
+
|
|
194
|
+
# Turn 2 (resume)
|
|
195
|
+
CACHE_FIX_DEBUG=1 CACHE_FIX_PREFIXDIFF=1 NODE_OPTIONS="--import $PRELOAD" \
|
|
196
|
+
node "$CLAUDE_CLI" -p "respond with exactly: turn2-done" \
|
|
197
|
+
--dangerously-skip-permissions -c \
|
|
198
|
+
> "$REPORT_DIR/test4-turn2-output.txt" 2>&1
|
|
199
|
+
snapshot_cache "test4-turn2-with-interceptor"
|
|
200
|
+
snapshot_debug "test4-turn2"
|
|
201
|
+
|
|
202
|
+
sleep 2
|
|
203
|
+
|
|
204
|
+
# Turn 3 (second resume)
|
|
205
|
+
CACHE_FIX_DEBUG=1 CACHE_FIX_PREFIXDIFF=1 NODE_OPTIONS="--import $PRELOAD" \
|
|
206
|
+
node "$CLAUDE_CLI" -p "respond with exactly: turn3-done" \
|
|
207
|
+
--dangerously-skip-permissions -c \
|
|
208
|
+
> "$REPORT_DIR/test4-turn3-output.txt" 2>&1
|
|
209
|
+
snapshot_cache "test4-turn3-with-interceptor"
|
|
210
|
+
snapshot_debug "test4-turn3"
|
|
211
|
+
|
|
212
|
+
usage_after=$(count_usage)
|
|
213
|
+
echo " Usage entries added: $((usage_after - usage_before))"
|
|
214
|
+
echo ""
|
|
215
|
+
fi
|
|
216
|
+
|
|
217
|
+
# ─── Summary ────────────────────────────────────────────────────────────────
|
|
218
|
+
|
|
219
|
+
echo "=========================================="
|
|
220
|
+
echo " SUMMARY"
|
|
221
|
+
echo "=========================================="
|
|
222
|
+
echo ""
|
|
223
|
+
echo "All snapshots saved to: $REPORT_DIR"
|
|
224
|
+
echo ""
|
|
225
|
+
echo "Cache snapshots:"
|
|
226
|
+
for f in "$REPORT_DIR"/*.json; do
|
|
227
|
+
label=$(basename "$f" .json)
|
|
228
|
+
tier=$(python3 -c "import json; d=json.load(open('$f')); print(d.get('cache',{}).get('ttl_tier','?'))" 2>/dev/null || echo "?")
|
|
229
|
+
create=$(python3 -c "import json; d=json.load(open('$f')); print(d.get('cache',{}).get('cache_creation',0))" 2>/dev/null || echo "?")
|
|
230
|
+
read=$(python3 -c "import json; d=json.load(open('$f')); print(d.get('cache',{}).get('cache_read',0))" 2>/dev/null || echo "?")
|
|
231
|
+
e1h=$(python3 -c "import json; d=json.load(open('$f')); print(d.get('cache',{}).get('ephemeral_1h',0))" 2>/dev/null || echo "?")
|
|
232
|
+
e5m=$(python3 -c "import json; d=json.load(open('$f')); print(d.get('cache',{}).get('ephemeral_5m',0))" 2>/dev/null || echo "?")
|
|
233
|
+
printf " %-40s TTL=%-4s create=%-6s read=%-6s 1h=%-6s 5m=%-6s\n" "$label" "$tier" "$create" "$read" "$e1h" "$e5m"
|
|
234
|
+
done
|
|
235
|
+
|
|
236
|
+
# Check for interceptor actions in debug logs
|
|
237
|
+
echo ""
|
|
238
|
+
echo "Interceptor actions:"
|
|
239
|
+
for f in "$REPORT_DIR"/*-debug.log; do
|
|
240
|
+
[ -f "$f" ] || continue
|
|
241
|
+
label=$(basename "$f" -debug.log)
|
|
242
|
+
applied=$(grep -c "APPLIED:" "$f" 2>/dev/null || echo 0)
|
|
243
|
+
skipped=$(grep -c "SKIPPED:" "$f" 2>/dev/null || echo 0)
|
|
244
|
+
pins=$(grep -c "CONTENT PIN:" "$f" 2>/dev/null || echo 0)
|
|
245
|
+
echo " $label: $applied applied, $skipped skipped, $pins content pins"
|
|
246
|
+
done
|
|
247
|
+
|
|
248
|
+
echo ""
|
|
249
|
+
echo "Done. Review $REPORT_DIR for full details."
|