claude-code-cache-fix 3.0.0 → 3.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +121 -428
- package/package.json +2 -2
- package/preload.mjs +2 -2
- package/proxy/pipeline.mjs +2 -1
package/README.md
CHANGED
|
@@ -4,199 +4,158 @@
|
|
|
4
4
|
|
|
5
5
|
English | [中文](./README.zh.md) | [한국어](./README.ko.md) | [Português](./docs/guia-pt-br.md)
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
Cache optimization proxy for [Claude Code](https://github.com/anthropics/claude-code). Fixes prompt cache bugs that cause excessive quota burn, stabilizes the request prefix, and monitors for silent regressions. Works with all CC versions including the v2.1.113+ Bun binary.
|
|
8
8
|
|
|
9
|
-
> **
|
|
9
|
+
> **v3.0.1** — Local HTTP proxy with 7 hot-reloadable extensions. A/B tested on v2.1.117: **95.5% cache hit rate through proxy vs 82.3% direct** on first warm turn. [Full release notes →](https://github.com/cnighswonger/claude-code-cache-fix/releases/tag/v3.0.0)
|
|
10
10
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
> **This interceptor patches `globalThis.fetch`.** By design, it has full read/write access to all API requests and responses in the Claude Code process. This is inherent to the approach — any fetch interceptor, proxy, or gateway has this position.
|
|
14
|
-
|
|
15
|
-
**What it does:** Modifies outgoing request structure (block order, fingerprint, TTL, git-status) to fix cache bugs. Reads response headers and SSE usage data for monitoring.
|
|
16
|
-
|
|
17
|
-
**What it does NOT do:** No network calls from the interceptor. All telemetry is written to local files under `~/.claude/`. No data leaves your machine unless you explicitly opt in to [claude-code-meter](https://github.com/cnighswonger/claude-code-meter) sharing (separate package, requires interactive consent).
|
|
11
|
+
> **Opus 4.7 advisory:** Metered data shows 4.7 burns Q5h quota at **~2.4x the rate of 4.6** for equivalent visible token counts ([independently confirmed by @ArkNill](https://github.com/ArkNill/claude-code-hidden-problem-analysis/blob/main/16_OPUS-47-ADVISORY.md)). Two factors: a new tokenizer (up to 35% more tokens, [documented](https://platform.claude.com/docs/en/about-claude/models/whats-new-claude-4-7)) and adaptive thinking overhead (~105%, not documented in usage response). The Q5h impact compounds into **Q7d** — the weekly quota ceiling that most heavy users will hit first. Workaround: `CLAUDE_CODE_DISABLE_ADAPTIVE_THINKING=1` reduces burn by ~3.3x but may reduce quality on complex tasks. See [Discussion #25](https://github.com/cnighswonger/claude-code-cache-fix/discussions/25) (initial observation) and [Discussion #42](https://github.com/cnighswonger/claude-code-cache-fix/discussions/42) (controlled A/B data + Q7d analysis).
|
|
18
12
|
|
|
19
|
-
|
|
13
|
+
## Quick Start: Proxy (recommended)
|
|
20
14
|
|
|
21
|
-
|
|
15
|
+
The proxy works with any CC version — Node.js or Bun binary. It sits between Claude Code and the Anthropic API, applying cache fixes as hot-reloadable extensions.
|
|
22
16
|
|
|
23
|
-
|
|
17
|
+
```bash
|
|
18
|
+
# Install
|
|
19
|
+
npm install -g claude-code-cache-fix
|
|
24
20
|
|
|
25
|
-
|
|
21
|
+
# Start the proxy (runs on localhost:9801)
|
|
22
|
+
node "$(npm root -g)/claude-code-cache-fix/proxy/server.mjs" &
|
|
26
23
|
|
|
27
|
-
|
|
24
|
+
# Launch Claude Code through it
|
|
25
|
+
ANTHROPIC_BASE_URL=http://127.0.0.1:9801 claude
|
|
26
|
+
```
|
|
28
27
|
|
|
29
|
-
|
|
28
|
+
That's it. The proxy applies all 7 cache-fix extensions automatically. No wrapper scripts, no `NODE_OPTIONS`, no preload.
|
|
30
29
|
|
|
31
|
-
|
|
30
|
+
### What the proxy does
|
|
32
31
|
|
|
33
|
-
|
|
32
|
+
On every `/v1/messages` request, 7 extensions run in order:
|
|
34
33
|
|
|
35
|
-
|
|
34
|
+
| Extension | What it fixes |
|
|
35
|
+
|-----------|--------------|
|
|
36
|
+
| `fingerprint-strip` | Removes unstable cc_version fingerprint from system prompt |
|
|
37
|
+
| `sort-stabilization` | Deterministic ordering of tool and MCP definitions |
|
|
38
|
+
| `ttl-management` | Detects server TTL tier, injects correct cache_control markers |
|
|
39
|
+
| `identity-normalization` | Normalizes message identity fields for prefix stability |
|
|
40
|
+
| `fresh-session-sort` | Fixes non-deterministic ordering on first turn |
|
|
41
|
+
| `cache-control-normalize` | Normalizes cache_control markers across messages |
|
|
42
|
+
| `cache-telemetry` | Extracts cache stats from response headers → `~/.claude/quota-status.json` |
|
|
36
43
|
|
|
37
|
-
|
|
44
|
+
Extensions are hot-reloadable — add, remove, or modify `.mjs` files in `proxy/extensions/` and changes apply to the next request without restarting. Configuration in `proxy/extensions.json`.
|
|
38
45
|
|
|
39
|
-
|
|
46
|
+
### Running as a service
|
|
40
47
|
|
|
41
|
-
|
|
42
|
-
npm install -g claude-code-cache-fix
|
|
43
|
-
```
|
|
48
|
+
**Linux (systemd — recommended):**
|
|
44
49
|
|
|
45
|
-
|
|
50
|
+
Create `~/.config/systemd/user/cache-fix-proxy.service`:
|
|
46
51
|
|
|
47
|
-
|
|
52
|
+
```ini
|
|
53
|
+
[Unit]
|
|
54
|
+
Description=Claude Code Cache Fix Proxy (v3.x)
|
|
55
|
+
After=network.target
|
|
48
56
|
|
|
49
|
-
|
|
57
|
+
[Service]
|
|
58
|
+
Type=simple
|
|
59
|
+
ExecStart=/usr/local/bin/node /path/to/claude-code-cache-fix/proxy/server.mjs
|
|
60
|
+
Restart=on-failure
|
|
61
|
+
RestartSec=5
|
|
62
|
+
Environment=CACHE_FIX_PROXY_PORT=9801
|
|
50
63
|
|
|
51
|
-
|
|
64
|
+
[Install]
|
|
65
|
+
WantedBy=default.target
|
|
66
|
+
```
|
|
52
67
|
|
|
53
68
|
```bash
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
CLAUDE_NPM_CLI="$NPM_GLOBAL_ROOT/@anthropic-ai/claude-code/cli.js"
|
|
58
|
-
CACHE_FIX="$NPM_GLOBAL_ROOT/claude-code-cache-fix/preload.mjs"
|
|
69
|
+
systemctl --user daemon-reload
|
|
70
|
+
systemctl --user enable --now cache-fix-proxy
|
|
59
71
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
echo "Install with: npm install -g @anthropic-ai/claude-code" >&2
|
|
63
|
-
exit 1
|
|
64
|
-
fi
|
|
65
|
-
|
|
66
|
-
if [ ! -f "$CACHE_FIX" ]; then
|
|
67
|
-
echo "Error: claude-code-cache-fix not found at $CACHE_FIX" >&2
|
|
68
|
-
echo "Install with: npm install -g claude-code-cache-fix" >&2
|
|
69
|
-
exit 1
|
|
70
|
-
fi
|
|
71
|
-
|
|
72
|
-
exec env NODE_OPTIONS="--import $CACHE_FIX" node "$CLAUDE_NPM_CLI" "$@"
|
|
72
|
+
# Optional: start on boot (before login)
|
|
73
|
+
sudo loginctl enable-linger $USER
|
|
73
74
|
```
|
|
74
75
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
76
|
+
A `cache-fix-proxy install-service` subcommand is planned for v3.1.0 ([#48](https://github.com/cnighswonger/claude-code-cache-fix/issues/48)).
|
|
77
|
+
|
|
78
|
+
**Fallback (any OS):**
|
|
78
79
|
|
|
79
|
-
Adjust `CLAUDE_NPM_CLI` if your npm global prefix differs. Find it with:
|
|
80
80
|
```bash
|
|
81
|
-
npm root -g
|
|
81
|
+
nohup node "$(npm root -g)/claude-code-cache-fix/proxy/server.mjs" > /tmp/cache-fix-proxy.log 2>&1 &
|
|
82
|
+
echo 'export ANTHROPIC_BASE_URL=http://127.0.0.1:9801' >> ~/.bashrc
|
|
82
83
|
```
|
|
83
84
|
|
|
84
|
-
###
|
|
85
|
+
### Health check
|
|
85
86
|
|
|
86
87
|
```bash
|
|
87
|
-
|
|
88
|
+
curl http://127.0.0.1:9801/health
|
|
89
|
+
# {"status":"ok"}
|
|
88
90
|
```
|
|
89
91
|
|
|
90
|
-
|
|
92
|
+
## Quick Start: Preload (CC v2.1.112 and earlier)
|
|
93
|
+
|
|
94
|
+
If you're on a Node.js-based CC version (v2.1.112 or earlier), the preload interceptor works without a proxy:
|
|
91
95
|
|
|
92
96
|
```bash
|
|
97
|
+
npm install -g claude-code-cache-fix
|
|
93
98
|
NODE_OPTIONS="--import claude-code-cache-fix" claude
|
|
94
99
|
```
|
|
95
100
|
|
|
96
|
-
> **Note
|
|
97
|
-
|
|
98
|
-
### Windows users
|
|
99
|
-
|
|
100
|
-
On Windows, `NODE_OPTIONS="--import ..."` doesn't work the same way as on Linux/macOS. Use the included `claude-fixed.bat` wrapper instead:
|
|
101
|
-
|
|
102
|
-
1. After installing both packages globally:
|
|
103
|
-
```bat
|
|
104
|
-
npm install -g claude-code-cache-fix
|
|
105
|
-
npm install -g @anthropic-ai/claude-code
|
|
106
|
-
```
|
|
101
|
+
> **Note:** The preload does NOT work on CC v2.1.113+ (Bun binary). Use the proxy above.
|
|
107
102
|
|
|
108
|
-
|
|
109
|
-
```bat
|
|
110
|
-
copy "%NPM_ROOT%\claude-code-cache-fix\claude-fixed.bat" C:\Users\%USERNAME%\bin\
|
|
111
|
-
```
|
|
112
|
-
Or find the file manually at your npm global root (run `npm root -g` to locate it).
|
|
113
|
-
|
|
114
|
-
3. Run Claude Code with the interceptor active:
|
|
115
|
-
```bat
|
|
116
|
-
claude-fixed [any claude args...]
|
|
117
|
-
```
|
|
118
|
-
|
|
119
|
-
The wrapper dynamically resolves your npm global root, constructs a `file:///` URL for the preload module (converting backslashes to forward slashes for Node.js), and launches Claude Code with the interceptor loaded. All environment variables (`CACHE_FIX_DEBUG`, `CACHE_FIX_IMAGE_KEEP_LAST`, etc.) work the same as on Linux/macOS.
|
|
120
|
-
|
|
121
|
-
Credit: [@TomTheMenace](https://github.com/anthropics/claude-code/issues/38335) contributed the Windows wrapper and validated the interceptor across a 7.5-hour, 536-call Opus 4.6 session on Windows — 98.4% cache hit rate, 81% of calls had fingerprint instability that the interceptor corrected.
|
|
103
|
+
See [docs/preload-setup.md](docs/preload-setup.md) for wrapper scripts, shell aliases, Windows instructions, and VS Code preload-mode integration.
|
|
122
104
|
|
|
123
105
|
## VS Code Extension
|
|
124
106
|
|
|
125
|
-
|
|
107
|
+
The [VS Code extension](https://github.com/cnighswonger/claude-code-cache-fix-vscode) (v0.5.0) supports both proxy and preload modes:
|
|
126
108
|
|
|
127
|
-
|
|
109
|
+
**Proxy mode (recommended):**
|
|
110
|
+
1. Start the proxy (see above)
|
|
111
|
+
2. In VS Code command palette: **Claude Code Cache Fix: Enable Proxy Mode**
|
|
112
|
+
3. Restart any active Claude Code session
|
|
128
113
|
|
|
129
|
-
|
|
114
|
+
**Preload mode (CC ≤v2.1.112):**
|
|
115
|
+
1. `npm install -g claude-code-cache-fix`
|
|
130
116
|
2. Download the VSIX from [GitHub Releases](https://github.com/cnighswonger/claude-code-cache-fix-vscode/releases/latest)
|
|
131
|
-
3. Install: `code --install-extension claude-code-cache-fix-0.
|
|
132
|
-
|
|
133
|
-
4. Restart any active Claude Code session
|
|
117
|
+
3. Install: `code --install-extension claude-code-cache-fix-0.5.0.vsix`
|
|
118
|
+
4. Command palette: **Claude Code Cache Fix: Enable**
|
|
134
119
|
|
|
135
|
-
|
|
120
|
+
For manual VS Code wrapper setup (without the VSIX), see [docs/preload-setup.md](docs/preload-setup.md#vs-code-preload-mode).
|
|
136
121
|
|
|
137
|
-
|
|
138
|
-
- **Claude Code Cache Fix: Enable** / **Disable** / **Show Status**
|
|
139
|
-
|
|
140
|
-
### Option B: Manual wrapper (if you prefer not to install the VSIX)
|
|
122
|
+
## Security model
|
|
141
123
|
|
|
142
|
-
The
|
|
124
|
+
> **The proxy and interceptor have full read/write access to API requests and responses.** This is inherent to the approach — any fetch interceptor, proxy, or gateway has this position.
|
|
143
125
|
|
|
144
|
-
**
|
|
126
|
+
**What it does:** Modifies outgoing request structure (block order, fingerprint, TTL, git-status) to fix cache bugs. Reads response headers and SSE usage data for monitoring.
|
|
145
127
|
|
|
146
|
-
|
|
147
|
-
#!/bin/bash
|
|
148
|
-
NPM_ROOT="$(npm root -g 2>/dev/null)"
|
|
149
|
-
PRELOAD="$NPM_ROOT/claude-code-cache-fix/preload.mjs"
|
|
150
|
-
shift # VS Code passes the original claude path as $1
|
|
151
|
-
export NODE_OPTIONS="--import $PRELOAD"
|
|
152
|
-
exec node "$NPM_ROOT/@anthropic-ai/claude-code/cli.js" "$@"
|
|
153
|
-
```
|
|
128
|
+
**What it does NOT do:** No network calls from the proxy or interceptor. All telemetry is written to local files under `~/.claude/`. No data leaves your machine unless you explicitly opt in to [claude-code-meter](https://github.com/cnighswonger/claude-code-meter) sharing (separate package, requires interactive consent).
|
|
154
129
|
|
|
155
|
-
|
|
156
|
-
chmod +x ~/bin/claude-vscode-wrapper
|
|
157
|
-
```
|
|
130
|
+
**Supply chain:** Proxy mode: 7 small extension modules in `proxy/extensions/` (each under 200 lines). Preload mode: single unminified file (`preload.mjs`, ~1,700 lines). One dev dependency (`zod` for schema validation in tests only). Review before installing. npm provenance links each published version to its source commit.
|
|
158
131
|
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
```json
|
|
162
|
-
{
|
|
163
|
-
"claudeCode.claudeProcessWrapper": "/home/YOUR_USERNAME/bin/claude-vscode-wrapper"
|
|
164
|
-
}
|
|
165
|
-
```
|
|
132
|
+
**Independent audit:** [Assessed as "LEGITIMATE TOOL"](https://github.com/anthropics/claude-code/issues/38335#issuecomment-4244413605) by @TheAuditorTool (2026-04-14).
|
|
166
133
|
|
|
167
|
-
|
|
134
|
+
## The problem
|
|
168
135
|
|
|
169
|
-
|
|
170
|
-
cl tools\claude-vscode-wrapper.c /Fe:claude-vscode-wrapper.exe
|
|
171
|
-
```
|
|
136
|
+
When you use `--resume` or `/resume` in Claude Code, the prompt cache breaks silently. Instead of reading cached tokens (cheap), the API rebuilds them from scratch on every turn (expensive). A session that should cost ~$0.50/hour can burn through $5–10/hour with no visible indication anything is wrong.
|
|
172
137
|
|
|
173
|
-
|
|
138
|
+
Three bugs cause this:
|
|
174
139
|
|
|
175
|
-
|
|
176
|
-
{
|
|
177
|
-
"claudeCode.claudeProcessWrapper": "C:\\path\\to\\claude-vscode-wrapper.exe"
|
|
178
|
-
}
|
|
179
|
-
```
|
|
140
|
+
1. **Partial block scatter** — Attachment blocks (skills listing, MCP servers, deferred tools, hooks) are supposed to live in `messages[0]`. On resume, some or all drift to later messages, changing the cache prefix.
|
|
180
141
|
|
|
181
|
-
|
|
142
|
+
2. **Fingerprint instability** — The `cc_version` fingerprint (e.g. `2.1.92.a3f`) is computed from `messages[0]` content including meta/attachment blocks. When those blocks shift, the fingerprint changes, the system prompt changes, and cache busts.
|
|
182
143
|
|
|
183
|
-
|
|
144
|
+
3. **Non-deterministic tool ordering** — Tool definitions can arrive in different orders between turns, changing request bytes and invalidating the cache key.
|
|
184
145
|
|
|
185
|
-
|
|
146
|
+
Additionally, images read via the Read tool persist as base64 in conversation history and are sent on every subsequent API call, compounding token costs silently.
|
|
186
147
|
|
|
187
148
|
## How it works
|
|
188
149
|
|
|
189
|
-
|
|
150
|
+
**Proxy mode** (v3.0.0+): An HTTP server on `localhost:9801` intercepts `POST /v1/messages` requests. Seven extension modules process each request through a pipeline — normalizing block order, stripping fingerprints, stabilizing tool sort, managing TTL markers. Extensions are hot-reloadable `.mjs` files configured in `proxy/extensions.json`. All other traffic passes through untouched.
|
|
190
151
|
|
|
191
|
-
|
|
192
|
-
2. **Sorts tool definitions** alphabetically by name for deterministic ordering
|
|
193
|
-
3. **Recomputes the cc_version fingerprint** from the real user message text instead of meta/attachment content
|
|
152
|
+
**Preload mode** (v2.x): A Node.js `--import` module that patches `globalThis.fetch` before Claude Code makes API calls. Applies the same fixes inline — scans user messages for relocated blocks, sorts tools, recomputes fingerprints, injects TTL markers.
|
|
194
153
|
|
|
195
|
-
|
|
154
|
+
Both modes are idempotent — if nothing needs fixing, the request passes through unmodified. Neither mode modifies your conversation; they only normalize the request structure before it hits the API.
|
|
196
155
|
|
|
197
|
-
## Graduating from
|
|
156
|
+
## Graduating from fixes
|
|
198
157
|
|
|
199
|
-
The
|
|
158
|
+
The package serves three purposes with different lifecycles:
|
|
200
159
|
|
|
201
160
|
| Purpose | Examples | When to disable |
|
|
202
161
|
|---------|----------|-----------------|
|
|
@@ -204,7 +163,7 @@ The interceptor serves three purposes with different lifecycles:
|
|
|
204
163
|
| **Monitoring** | Quota tracking, microcompact detection, GrowthBook flags | Keep permanently — these detect future regressions |
|
|
205
164
|
| **Optimizations** | Image stripping, output efficiency rewrite | Keep as long as they help your workflow |
|
|
206
165
|
|
|
207
|
-
### Health status
|
|
166
|
+
### Health status (preload mode)
|
|
208
167
|
|
|
209
168
|
On first API call, the interceptor logs a health status line (requires `CACHE_FIX_DEBUG=1`):
|
|
210
169
|
|
|
@@ -212,25 +171,14 @@ On first API call, the interceptor logs a health status line (requires `CACHE_FI
|
|
|
212
171
|
cache-fix health: relocate=active(2h ago) fingerprint=dormant(5 clean sessions) tool_sort=active ttl=active identity=waiting
|
|
213
172
|
```
|
|
214
173
|
|
|
215
|
-
Status meanings:
|
|
216
174
|
- **active(Xh ago)** — fix was applied recently
|
|
217
|
-
- **dormant(N clean sessions)** — bug not detected in N
|
|
218
|
-
- **safety-blocked(Nx)** — round-trip verification failed;
|
|
175
|
+
- **dormant(N clean sessions)** — bug not detected in N sessions; CC may have fixed it
|
|
176
|
+
- **safety-blocked(Nx)** — round-trip verification failed; fix auto-disabled
|
|
219
177
|
- **waiting** — fix hasn't been triggered yet
|
|
220
178
|
|
|
221
|
-
When a fix shows `dormant`, you can safely disable it:
|
|
222
|
-
```bash
|
|
223
|
-
export CACHE_FIX_SKIP_RELOCATE=1 # example
|
|
224
|
-
```
|
|
225
|
-
|
|
226
|
-
To disable all fixes but keep monitoring:
|
|
227
|
-
```bash
|
|
228
|
-
export CACHE_FIX_DISABLED=1
|
|
229
|
-
```
|
|
230
|
-
|
|
231
179
|
### Regression detection
|
|
232
180
|
|
|
233
|
-
If cache_read ratio drops below 50% across 5+ calls after disabling fixes
|
|
181
|
+
If cache_read ratio drops below 50% across 5+ calls after disabling fixes:
|
|
234
182
|
```
|
|
235
183
|
REGRESSION WARNING: cache_read ratio averaged 12% across last 5 calls.
|
|
236
184
|
Fixes are disabled — consider re-enabling to recover cache performance.
|
|
@@ -240,10 +188,7 @@ Fixes are disabled — consider re-enabling to recover cache performance.
|
|
|
240
188
|
|
|
241
189
|
### Fingerprint round-trip verification
|
|
242
190
|
|
|
243
|
-
Before rewriting the `cc_version` fingerprint, the interceptor verifies that its
|
|
244
|
-
hardcoded salt and character indices reproduce the fingerprint Claude Code sent.
|
|
245
|
-
If verification fails (CC changed its algorithm), the rewrite is skipped automatically.
|
|
246
|
-
This ensures the interceptor can never make cache performance *worse* than stock CC.
|
|
191
|
+
Before rewriting the `cc_version` fingerprint, the interceptor verifies that its hardcoded salt and character indices reproduce the fingerprint Claude Code sent. If verification fails (CC changed its algorithm), the rewrite is skipped automatically. This ensures the interceptor can never make cache performance *worse* than stock CC.
|
|
247
192
|
|
|
248
193
|
### Fail-safe design
|
|
249
194
|
|
|
@@ -257,21 +202,18 @@ The interceptor can only *help* or *do nothing*. It cannot make things worse.
|
|
|
257
202
|
|
|
258
203
|
## Status line — quota warnings in real time
|
|
259
204
|
|
|
260
|
-
|
|
205
|
+
Both proxy and preload modes write quota state to `~/.claude/quota-status.json` on every API call. The included `tools/quota-statusline.sh` script displays a live status line showing:
|
|
261
206
|
|
|
262
207
|
- **Q5h %** with burn rate (%/min)
|
|
263
208
|
- **Q7d %** with burn rate (%/hr)
|
|
264
|
-
- **TTL tier** —
|
|
209
|
+
- **TTL tier** — `TTL:1h` when healthy, **`TTL:5m` in red when the server has downgraded you** (typically at Q5h ≥ 100%)
|
|
265
210
|
- **PEAK** in yellow during weekday peak hours (13:00–19:00 UTC)
|
|
266
211
|
- **Cache hit rate %**
|
|
267
212
|
- **OVERAGE** flag when active
|
|
268
213
|
|
|
269
214
|
### Setup
|
|
270
215
|
|
|
271
|
-
Copy the script and configure Claude Code to use it:
|
|
272
|
-
|
|
273
216
|
```bash
|
|
274
|
-
# Copy from the npm package to Claude Code's hooks directory
|
|
275
217
|
mkdir -p ~/.claude/hooks
|
|
276
218
|
cp "$(npm root -g)/claude-code-cache-fix/tools/quota-statusline.sh" ~/.claude/hooks/
|
|
277
219
|
chmod +x ~/.claude/hooks/quota-statusline.sh
|
|
@@ -288,305 +230,57 @@ Add to `~/.claude/settings.json`:
|
|
|
288
230
|
}
|
|
289
231
|
```
|
|
290
232
|
|
|
291
|
-
### Recommended: disable git-status injection
|
|
292
|
-
|
|
293
|
-
Claude Code injects live `git status` output into the system prompt on every call. Any file edit changes the git status, which changes the system prompt, which busts the entire prefix cache. Disabling this saves ~1,800 tokens per call and fully stabilizes the system prompt across file edits:
|
|
294
|
-
|
|
295
|
-
```bash
|
|
296
|
-
export CLAUDE_CODE_DISABLE_GIT_INSTRUCTIONS=1
|
|
297
|
-
```
|
|
298
|
-
|
|
299
|
-
Or add `"includeGitInstructions": false` to `~/.claude/settings.json`. Claude Code can still run `git status` via the Bash tool when it needs git context — it just won't pre-inject it into every system prompt.
|
|
300
|
-
|
|
301
|
-
The flag also shrinks the Bash tool description by ~6,364 chars (the Bash tool includes git-related instructions that are stripped when the flag is set), for a total prefix savings of ~7,180 chars (~1,800 tokens) per call.
|
|
302
|
-
|
|
303
|
-
Community-validated by [@wadabum](https://github.com/cnighswonger/claude-code-cache-fix/issues/11): 18-token cache creation across git state changes (vs thousands without the flag). See [#11](https://github.com/cnighswonger/claude-code-cache-fix/issues/11) for the full telemetry comparison.
|
|
304
|
-
|
|
305
|
-
**Note:** this flag does not address the `"Primary working directory"` line in the system prompt, which changes per git worktree. A v1.9.0 interceptor fix to strip/normalize both is planned ([#11](https://github.com/cnighswonger/claude-code-cache-fix/issues/11)).
|
|
306
|
-
|
|
307
233
|
### Why the status line matters
|
|
308
234
|
|
|
309
|
-
When the server downgrades your TTL to 5m (
|
|
310
|
-
|
|
311
|
-
## Image stripping
|
|
312
|
-
|
|
313
|
-
Images read via the Read tool are encoded as base64 and stored in `tool_result` blocks in conversation history. They ride along on **every subsequent API call** until compaction. A single 500KB image costs ~62,500 tokens per turn on Opus 4.6, and potentially **~85,000+ tokens on Opus 4.7** due to the new tokenizer (up to 35% inflation) and high-res image support (2576px max, up from 1568px). Image stripping is strongly recommended on 4.7.
|
|
314
|
-
|
|
315
|
-
Enable image stripping to remove old images from tool results:
|
|
316
|
-
|
|
317
|
-
```bash
|
|
318
|
-
export CACHE_FIX_IMAGE_KEEP_LAST=3
|
|
319
|
-
```
|
|
320
|
-
|
|
321
|
-
This keeps images in the last 3 user messages and replaces older ones with a text placeholder. Only targets images inside `tool_result` blocks (Read tool output) — user-pasted images are never touched. Files remain on disk for re-reading if needed.
|
|
322
|
-
|
|
323
|
-
Set to `0` (default) to disable.
|
|
324
|
-
|
|
325
|
-
## System prompt rewrite (optional)
|
|
326
|
-
|
|
327
|
-
The interceptor can also rewrite Claude Code's `# Output efficiency` system-prompt section before the request is sent.
|
|
328
|
-
|
|
329
|
-
This feature is **optional** and **disabled by default**. If `CACHE_FIX_OUTPUT_EFFICIENCY_REPLACEMENT` is unset, nothing is changed.
|
|
330
|
-
|
|
331
|
-
Enable it by setting a replacement text:
|
|
332
|
-
|
|
333
|
-
```bash
|
|
334
|
-
export CACHE_FIX_OUTPUT_EFFICIENCY_REPLACEMENT=$'# Output efficiency\n\n...'
|
|
335
|
-
```
|
|
336
|
-
|
|
337
|
-
The rewrite is intentionally narrow:
|
|
338
|
-
|
|
339
|
-
- Only Claude Code's `# Output efficiency` section is replaced
|
|
340
|
-
- Other system prompt sections are preserved
|
|
341
|
-
- Existing system block structure and fields such as `cache_control` are preserved
|
|
342
|
-
|
|
343
|
-
This may be useful for users who want to stay on current Claude Code versions but experiment with a different `Output efficiency` instruction set instead of downgrading to an earlier release.
|
|
344
|
-
|
|
345
|
-
### Prompt variants
|
|
346
|
-
|
|
347
|
-
<details>
|
|
348
|
-
<summary>Anthropic internal / <code>USER_TYPE=ant</code> version</summary>
|
|
349
|
-
|
|
350
|
-
```text
|
|
351
|
-
# Output efficiency
|
|
352
|
-
|
|
353
|
-
When sending user-facing text, you're writing for a person, not logging to a console. Assume users can't see most tool calls or thinking - only your text output. Before your first tool call, briefly state what you're about to do. While working, give short updates at key moments: when you find something load-bearing (a bug, a root cause), when changing direction, when you've made progress without an update.
|
|
354
|
-
|
|
355
|
-
When you give updates, assume the recipient may have stepped away and lost the thread. They do not know your internal shorthand, codenames, or half-formed plan. Write in complete, grammatical sentences that can be understood cold. Spell out technical terms when helpful. If unsure, err on the side of a bit more explanation. Adapt to the user's expertise: experts can handle denser updates, but don't make novice users reconstruct context on their own.
|
|
356
|
-
|
|
357
|
-
User-facing text should read like natural prose. Avoid clipped sentence fragments, excessive dashes, symbolic shorthand, or formatting that reads like console output. Use tables only when they genuinely improve scanability, such as compact facts (files, lines, pass/fail) or quantitative comparisons. Keep explanatory reasoning in prose around the table, not inside it. Avoid semantic backtracking: structure sentences so the user can follow them linearly without having to reinterpret earlier clauses after reading later ones.
|
|
358
|
-
|
|
359
|
-
Optimize for fast human comprehension, not minimal surface area. If the user has to reread your summary or ask a follow-up just to understand what happened, you saved the wrong tokens. Match the level of structure to the task: for a simple question, answer in plain prose without unnecessary headings or numbered lists. While staying clear and direct, also be concise and avoid fluff. Skip filler, obvious restatements, and throat-clearing. Get to the point. Don't over-focus on low-signal details from your process. When it helps, use an inverted pyramid structure with the conclusion first and details later.
|
|
360
|
-
|
|
361
|
-
These user-facing text instructions do not apply to code or tool calls.
|
|
362
|
-
```
|
|
363
|
-
|
|
364
|
-
</details>
|
|
365
|
-
|
|
366
|
-
<details>
|
|
367
|
-
<summary>Public / default Claude Code version</summary>
|
|
368
|
-
|
|
369
|
-
```text
|
|
370
|
-
# Output efficiency
|
|
371
|
-
|
|
372
|
-
IMPORTANT: Go straight to the point. Try the simplest approach first without going in circles. Do not overdo it. Be extra concise.
|
|
373
|
-
|
|
374
|
-
Your text output is brief, direct, and to the point. Lead with the answer or action, not the reasoning. Omit filler, preamble, and unnecessary transitions. Do not restate the user's request; move directly to the work. When explanation is needed, include only what helps the user understand the outcome.
|
|
375
|
-
|
|
376
|
-
Prioritize user-facing text for:
|
|
377
|
-
- decisions that require user input
|
|
378
|
-
- high-signal progress updates at natural milestones
|
|
379
|
-
- errors or blockers that change the plan
|
|
380
|
-
|
|
381
|
-
If a sentence can do the job, do not turn it into three. Favor short, direct constructions over long explanatory prose. These instructions do not apply to code or tool calls.
|
|
382
|
-
```
|
|
383
|
-
|
|
384
|
-
</details>
|
|
385
|
-
|
|
386
|
-
<details>
|
|
387
|
-
<summary>Example custom replacement(A middle-ground version combining the two versions above)</summary>
|
|
388
|
-
|
|
389
|
-
```text
|
|
390
|
-
# Output efficiency
|
|
391
|
-
|
|
392
|
-
When sending user-facing text, write for a person, not a log file. Assume the user cannot see most tool calls or hidden reasoning - only your text output.
|
|
393
|
-
|
|
394
|
-
Keep user-facing text clear, direct, and reasonably concise. Lead with the answer or action. Skip filler, repetition, and unnecessary preamble.
|
|
395
|
-
|
|
396
|
-
Explain enough for the user to understand the reasoning, tradeoffs, or root cause when that would help them learn or make a decision, but do not turn simple answers into long writeups.
|
|
397
|
-
|
|
398
|
-
These instructions apply to user-facing text only. They do not apply to investigation, code reading, tool use, or verification.
|
|
399
|
-
|
|
400
|
-
Before making changes, read the relevant code and understand the surrounding context. Check types, signatures, call sites, and error causes before editing. Do not confuse brevity with rushing, and do not replace understanding with trial and error.
|
|
235
|
+
When the server downgrades your TTL to 5m (quota-aware downgrade at Q5h ≥ 100%), **every idle longer than 5 minutes causes a full context rebuild**. Without the status line, this is invisible. With it, the red `TTL:5m` warning tells you: **stop working, wait for the Q5h window to reset, then resume**. Powering through overage compounds the drain; pausing breaks the cycle.
|
|
401
236
|
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
When reporting results, be accurate and concrete. If you did not verify something, say so plainly. If a check failed, say that plainly too.
|
|
405
|
-
```
|
|
406
|
-
|
|
407
|
-
</details>
|
|
408
|
-
|
|
409
|
-
## Monitoring
|
|
410
|
-
|
|
411
|
-
The interceptor includes monitoring for several additional issues identified by the community:
|
|
412
|
-
|
|
413
|
-
### Microcompact / budget enforcement
|
|
414
|
-
|
|
415
|
-
Claude Code silently replaces old tool results with `[Old tool result content cleared]` via server-controlled mechanisms (GrowthBook flags). A 200,000-character aggregate cap and per-tool caps (Bash: 30K, Grep: 20K) truncate older results without notification. There is no `DISABLE_MICROCOMPACT` environment variable.
|
|
416
|
-
|
|
417
|
-
The interceptor detects cleared tool results and logs counts. When total tool result characters approach the 200K threshold, a warning is logged.
|
|
418
|
-
|
|
419
|
-
### False rate limiter
|
|
420
|
-
|
|
421
|
-
The client can generate synthetic "Rate limit reached" errors without making an API call, identifiable by `"model": "<synthetic>"`. The interceptor logs these events.
|
|
422
|
-
|
|
423
|
-
### GrowthBook flag dump
|
|
424
|
-
|
|
425
|
-
On the first API call, the interceptor reads `~/.claude.json` and logs the current state of cost/cache-relevant server-controlled flags (hawthorn_window, pewter_kestrel, slate_heron, session_memory, etc.).
|
|
426
|
-
|
|
427
|
-
### Quota tracking
|
|
428
|
-
|
|
429
|
-
Response headers are parsed for `anthropic-ratelimit-unified-5h-utilization` and `7d-utilization`, saved to `~/.claude/quota-status.json` for consumption by status line hooks or other tools.
|
|
430
|
-
|
|
431
|
-
### Peak hour detection
|
|
432
|
-
|
|
433
|
-
Anthropic applies elevated quota drain rates during weekday peak hours (13:00–19:00 UTC, Mon–Fri). The interceptor detects peak windows and writes `peak_hour: true/false` to `quota-status.json`. See `docs/peak-hours-reference.md` for sources and details.
|
|
434
|
-
|
|
435
|
-
### Usage telemetry and cost reporting
|
|
237
|
+
### Recommended: disable git-status injection
|
|
436
238
|
|
|
437
|
-
|
|
239
|
+
Claude Code injects live `git status` into the system prompt on every call. Any file edit changes the git status, which busts the entire prefix cache. Disabling this saves ~1,800 tokens per call:
|
|
438
240
|
|
|
439
241
|
```bash
|
|
440
|
-
|
|
441
|
-
node tools/cost-report.mjs --date 2026-04-08 # specific date
|
|
442
|
-
node tools/cost-report.mjs --since 2h # last 2 hours
|
|
443
|
-
node tools/cost-report.mjs --admin-key <key> # cross-reference with Admin API
|
|
242
|
+
export CLAUDE_CODE_DISABLE_GIT_INSTRUCTIONS=1
|
|
444
243
|
```
|
|
445
244
|
|
|
446
|
-
|
|
245
|
+
Or add `"includeGitInstructions": false` to `~/.claude/settings.json`. Claude Code can still run `git status` via the Bash tool when it needs context. Community-validated by [@wadabum](https://github.com/cnighswonger/claude-code-cache-fix/issues/11): 18-token cache creation across git state changes (vs thousands without the flag).
|
|
447
246
|
|
|
448
|
-
|
|
247
|
+
## Image stripping (preload mode)
|
|
449
248
|
|
|
450
|
-
|
|
249
|
+
Images read via the Read tool persist as base64 in conversation history, riding along on every subsequent API call. A single 500KB image costs ~62,500 tokens per turn on Opus 4.6, and **~85,000+ on Opus 4.7** due to the new tokenizer. Image stripping is strongly recommended on 4.7.
|
|
451
250
|
|
|
452
251
|
```bash
|
|
453
|
-
|
|
454
|
-
node tools/quota-analysis.mjs --since 24h # last 24 hours only
|
|
455
|
-
node tools/quota-analysis.mjs --json # machine-readable output
|
|
252
|
+
export CACHE_FIX_IMAGE_KEEP_LAST=3
|
|
456
253
|
```
|
|
457
254
|
|
|
458
|
-
|
|
255
|
+
Keeps images in the last 3 user messages, replaces older ones with a text placeholder. Only targets `tool_result` blocks — user-pasted images are never touched.
|
|
459
256
|
|
|
460
|
-
|
|
461
|
-
2. **Do peak hours cost more quota per token?** Splits windows into peak-dominant (≥80% peak calls) and off-peak-dominant (≤20%) and compares the implied 100% quota under the best-fit model.
|
|
462
|
-
3. **What is your account's effective 5-hour quota in token-equivalents?** Reports a concrete number you can compare against your subscription tier or against what other users measure.
|
|
257
|
+
## System prompt rewrite (preload mode, optional)
|
|
463
258
|
|
|
464
|
-
|
|
259
|
+
The interceptor can rewrite Claude Code's `# Output efficiency` system-prompt section. Disabled by default. Enable with `CACHE_FIX_OUTPUT_EFFICIENCY_REPLACEMENT`. See [docs/output-efficiency-prompts.md](docs/output-efficiency-prompts.md) for the three known prompt variants and usage instructions.
|
|
465
260
|
|
|
466
|
-
|
|
261
|
+
## Monitoring & diagnostics
|
|
467
262
|
|
|
468
|
-
|
|
263
|
+
The preload interceptor includes monitoring for microcompact degradation, false rate limiters, GrowthBook flag state, usage telemetry, and cost reporting. Quota tracking works in both proxy and preload modes via `~/.claude/quota-status.json`.
|
|
469
264
|
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
```bash
|
|
473
|
-
CACHE_FIX_DEBUG=1 claude-fixed
|
|
474
|
-
```
|
|
475
|
-
|
|
476
|
-
Logs are written to `~/.claude/cache-fix-debug.log`. Look for:
|
|
477
|
-
- `APPLIED: resume message relocation` — block scatter was detected and fixed
|
|
478
|
-
- `APPLIED: tool order stabilization` — tools were reordered
|
|
479
|
-
- `APPLIED: fingerprint stabilized from XXX to YYY` — fingerprint was corrected
|
|
480
|
-
- `APPLIED: stripped N images from old tool results` — images were stripped
|
|
481
|
-
- `APPLIED: output efficiency section rewritten` — output-efficiency section was replaced
|
|
482
|
-
- `MICROCOMPACT: N/M tool results cleared` — microcompact degradation detected
|
|
483
|
-
- `BUDGET WARNING: tool result chars at N / 200,000 threshold` — approaching budget cap
|
|
484
|
-
- `FALSE RATE LIMIT: synthetic model detected` — client-side false rate limit
|
|
485
|
-
- `GROWTHBOOK FLAGS: {...}` — server-controlled feature flags on first call
|
|
486
|
-
- `PROMPT SIZE: system=N tools=N injected=N (skills=N mcp=N ...)` — per-call prompt size breakdown
|
|
487
|
-
- `CACHE TTL: tier=1h create=N read=N hit=N% (1h=N 5m=N)` — TTL tier and cache hit rate per call
|
|
488
|
-
- `PEAK HOUR: weekday 13:00-19:00 UTC` — Anthropic peak hour throttling active
|
|
489
|
-
- `SKIPPED: resume relocation (not a resume or already correct)` — no fix needed
|
|
490
|
-
- `SKIPPED: output efficiency rewrite (section not found)` — no matching output-efficiency section found
|
|
491
|
-
|
|
492
|
-
### Prefix diff mode
|
|
493
|
-
|
|
494
|
-
Enable cross-process prefix snapshot diffing to diagnose cache busts on restart:
|
|
495
|
-
|
|
496
|
-
```bash
|
|
497
|
-
CACHE_FIX_PREFIXDIFF=1 claude-fixed
|
|
498
|
-
```
|
|
499
|
-
|
|
500
|
-
Snapshots are saved to `~/.claude/cache-fix-snapshots/` and diff reports are generated on the first API call after a restart.
|
|
501
|
-
|
|
502
|
-
## Environment variables
|
|
503
|
-
|
|
504
|
-
| Variable | Default | Description |
|
|
505
|
-
|----------|---------|-------------|
|
|
506
|
-
| `CACHE_FIX_DEBUG` | `0` | Enable debug logging to `~/.claude/cache-fix-debug.log` |
|
|
507
|
-
| `CACHE_FIX_PREFIXDIFF` | `0` | Enable prefix snapshot diffing |
|
|
508
|
-
| `CACHE_FIX_IMAGE_KEEP_LAST` | `0` | Keep images in last N user messages (0 = disabled) |
|
|
509
|
-
| `CACHE_FIX_OUTPUT_EFFICIENCY_REPLACEMENT` | unset | Replace Claude Code's `# Output efficiency` system-prompt section before the request is sent |
|
|
510
|
-
| `CACHE_FIX_USAGE_LOG` | `~/.claude/usage.jsonl` | Path for per-call usage telemetry log |
|
|
511
|
-
| `CACHE_FIX_DISABLED` | `0` | Disable all bug fixes; keep monitoring + optimizations active |
|
|
512
|
-
| `CACHE_FIX_SKIP_RELOCATE` | `0` | Skip block relocation fix (Bug 1) |
|
|
513
|
-
| `CACHE_FIX_SKIP_FINGERPRINT` | `0` | Skip fingerprint stabilization (Bug 2b) |
|
|
514
|
-
| `CACHE_FIX_SKIP_TOOL_SORT` | `0` | Skip tool ordering stabilization (Bug 2a) |
|
|
515
|
-
| `CACHE_FIX_SKIP_TTL` | `0` | Skip TTL injection (Bug 5) |
|
|
516
|
-
| `CACHE_FIX_SKIP_IDENTITY` | `0` | Skip identity normalization (Bug 6) |
|
|
517
|
-
| `CACHE_FIX_SKIP_GIT_STATUS` | `0` | Skip git-status stripping |
|
|
518
|
-
| `CACHE_FIX_STRIP_GIT_STATUS` | `0` | Strip volatile git-status from system prompt for prefix stability. Model can still run `git status` via Bash. |
|
|
519
|
-
| `CACHE_FIX_TTL_MAIN` | `1h` | TTL for main-thread requests: `1h`, `5m`, or `none` (pass-through) |
|
|
520
|
-
| `CACHE_FIX_TTL_SUBAGENT` | `1h` | TTL for subagent requests: `1h`, `5m`, or `none` (pass-through) |
|
|
521
|
-
| `CACHE_FIX_DUMP_BREAKPOINTS` | unset | Path to dump cache breakpoint structure (diagnostic for #12) |
|
|
265
|
+
See [docs/monitoring.md](docs/monitoring.md) for full details, debug mode, prefix diffing, environment variables, and the bundled quota analysis tool.
|
|
522
266
|
|
|
523
267
|
## Limitations
|
|
524
268
|
|
|
525
|
-
- **
|
|
526
|
-
- **Overage TTL downgrade** — Exceeding 100% of the 5-hour quota triggers a server-enforced TTL downgrade from 1h to 5m. This is
|
|
527
|
-
- **Microcompact is not preventable** — The monitoring features detect context degradation but cannot prevent it.
|
|
528
|
-
- **System prompt rewrite is experimental** —
|
|
269
|
+
- **Proxy requires a running process** — The proxy must be started before Claude Code. If it's not running and `ANTHROPIC_BASE_URL` points to it, CC will fail to connect. We recommend running it as a systemd service or with a health-checking wrapper script.
|
|
270
|
+
- **Overage TTL downgrade** — Exceeding 100% of the 5-hour quota triggers a server-enforced TTL downgrade from 1h to 5m. This is server-side and cannot be fixed client-side. The proxy/interceptor prevents the cache instability that can push you into overage in the first place.
|
|
271
|
+
- **Microcompact is not preventable** — The monitoring features detect context degradation but cannot prevent it. Microcompact and budget enforcement are server-controlled via GrowthBook flags with no client-side disable option.
|
|
272
|
+
- **System prompt rewrite is experimental** — Preload-only, opt-in. Not proven to be the cause of behavior differences discussed in community reports. Use at your own risk.
|
|
529
273
|
- **Version coupling** — The fingerprint salt and block detection heuristics are derived from Claude Code internals. A major refactor could require an update to this package.
|
|
530
274
|
|
|
531
275
|
## Tracked issues
|
|
532
276
|
|
|
533
|
-
|
|
534
|
-
- [#40524](https://github.com/anthropics/claude-code/issues/40524) — Within-session fingerprint invalidation, image persistence
|
|
535
|
-
- [#42052](https://github.com/anthropics/claude-code/issues/42052) — Community interceptor development, TTL downgrade discovery
|
|
536
|
-
- [#43044](https://github.com/anthropics/claude-code/issues/43044) — Resume loads 0% context on v2.1.91
|
|
537
|
-
- [#43657](https://github.com/anthropics/claude-code/issues/43657) — Resume cache invalidation confirmed on v2.1.92
|
|
538
|
-
- [#44045](https://github.com/anthropics/claude-code/issues/44045) — SDK-level reproduction with token measurements
|
|
539
|
-
- [#32508](https://github.com/anthropics/claude-code/issues/32508) — Community discussion around the `Output efficiency` system-prompt change and its possible effect on model behavior
|
|
277
|
+
We monitor 30+ upstream Claude Code issues related to cache, quota, and context bugs. See [TRACKED_ISSUES.md](TRACKED_ISSUES.md) for the full list with our involvement, community research, and key contributors.
|
|
540
278
|
|
|
541
279
|
## Related research
|
|
542
280
|
|
|
543
|
-
- **[@ArkNill/claude-code-hidden-problem-analysis](https://github.com/ArkNill/claude-code-hidden-problem-analysis)** —
|
|
544
|
-
- **[@Renvect/X-Ray-Claude-Code-Interceptor](https://github.com/Renvect/X-Ray-Claude-Code-Interceptor)** — Diagnostic HTTPS proxy with real-time dashboard, system prompt section diffing, per-tool stripping thresholds
|
|
545
|
-
- **[@fgrosswig/claude-usage-dashboard](https://github.com/fgrosswig/claude-usage-dashboard)** — Self-hosted forensic dashboard with SSE live monitoring, multi-host aggregation, cache-health scoring
|
|
546
|
-
|
|
547
|
-
## Works with @fgrosswig's dashboard
|
|
548
|
-
|
|
549
|
-
This interceptor and [@fgrosswig](https://github.com/fgrosswig)'s
|
|
550
|
-
[claude-usage-dashboard](https://github.com/fgrosswig/claude-usage-dashboard)
|
|
551
|
-
solve strongly complementary problems. The interceptor captures per-call API
|
|
552
|
-
data from inside the Node.js process — cache metrics, quota state, TTL tier,
|
|
553
|
-
rewrites applied. The dashboard provides the visualization layer — historical
|
|
554
|
-
trending, per-day charts, multi-host aggregation, cache-health scoring.
|
|
555
|
-
|
|
556
|
-
Running both gives you the best of both tools, and the integration is a
|
|
557
|
-
one-liner thanks to the dashboard's tolerant NDJSON ingest and our new
|
|
558
|
-
`usage-to-dashboard-ndjson` translator.
|
|
559
|
-
|
|
560
|
-
### Quick setup
|
|
561
|
-
|
|
562
|
-
```bash
|
|
563
|
-
# Install both tools
|
|
564
|
-
npm install -g claude-code-cache-fix
|
|
565
|
-
# (follow fgrosswig's dashboard install: https://github.com/fgrosswig/claude-usage-dashboard)
|
|
566
|
-
|
|
567
|
-
# One-shot translation (reads ~/.claude/usage.jsonl, writes to
|
|
568
|
-
# ~/.claude/anthropic-proxy-logs/proxy-YYYY-MM-DD.ndjson, which his
|
|
569
|
-
# dashboard already watches)
|
|
570
|
-
node $(npm root -g)/claude-code-cache-fix/tools/usage-to-dashboard-ndjson.mjs
|
|
571
|
-
|
|
572
|
-
# Or keep it live-updating as the interceptor logs new calls
|
|
573
|
-
node $(npm root -g)/claude-code-cache-fix/tools/usage-to-dashboard-ndjson.mjs --follow &
|
|
574
|
-
```
|
|
575
|
-
|
|
576
|
-
No configuration required on the dashboard side — fgrosswig's
|
|
577
|
-
`collectProxyNdjsonFiles()` auto-discovers files in
|
|
578
|
-
`~/.claude/anthropic-proxy-logs/` (or `$ANTHROPIC_PROXY_LOG_DIR`), and our
|
|
579
|
-
translator writes to exactly that path with the expected `proxy-YYYY-MM-DD.ndjson`
|
|
580
|
-
filename convention. The dashboard's tolerant ingestion layer ignores unknown
|
|
581
|
-
fields, so interceptor-specific extras (`ttl_tier`, `ephemeral_1h_input_tokens`,
|
|
582
|
-
`ephemeral_5m_input_tokens`, `peak_hour`, quota state) pass through cleanly
|
|
583
|
-
and remain available to downstream consumers that know to read them.
|
|
584
|
-
|
|
585
|
-
The `cost_factor` metric in `tools/cost-report.mjs` also comes from
|
|
586
|
-
fgrosswig's methodology — the `(input + output + cache_read + cache_creation) / output`
|
|
587
|
-
ratio that gives a single-number measure of how much context is being paid
|
|
588
|
-
per useful output token. A rising cost factor across a long session is the
|
|
589
|
-
measurable signature of cache-efficiency degradation.
|
|
281
|
+
- **[@ArkNill/claude-code-hidden-problem-analysis](https://github.com/ArkNill/claude-code-hidden-problem-analysis)** — 38,996-request proxy-based analysis: 7 bugs (microcompact, budget caps, false rate limiter, JSONL duplication, extended thinking), GrowthBook feature flag causal testing, Opus 4.7 burn rate advisory. The monitoring features in v1.1.0 are informed by this research.
|
|
282
|
+
- **[@Renvect/X-Ray-Claude-Code-Interceptor](https://github.com/Renvect/X-Ray-Claude-Code-Interceptor)** — Diagnostic HTTPS proxy with real-time dashboard, system prompt section diffing, per-tool stripping thresholds. Works with any Claude client that supports `ANTHROPIC_BASE_URL`.
|
|
283
|
+
- **[@fgrosswig/claude-usage-dashboard](https://github.com/fgrosswig/claude-usage-dashboard)** — Self-hosted forensic dashboard with SSE live monitoring, multi-host aggregation, cache-health scoring. Complementary to our proxy's vantage point. See [docs/dashboard-integration.md](docs/dashboard-integration.md) for the interop setup.
|
|
590
284
|
|
|
591
285
|
## Used in production
|
|
592
286
|
|
|
@@ -597,17 +291,16 @@ measurable signature of cache-efficiency degradation.
|
|
|
597
291
|
- **[@VictorSun92](https://github.com/VictorSun92)** — Original monkey-patch fix for v2.1.88, identified partial scatter on v2.1.90, contributed forward-scan detection, correct block ordering, tighter block matchers, and the optional output-efficiency rewrite hook
|
|
598
292
|
- **[@bilby91](https://github.com/bilby91)** ([Crunchloop DAP](https://dap.crunchloop.ai)) — Agent SDK / DAP production environment validation, 1h cache TTL confirmation, tool ordering jitter discovery via debug trace (fixed in v1.5.1), fresh-session sort bug discovery via SKILLS SORT diagnostic (fixed in v1.6.2). First production team to roll the interceptor to trunk.
|
|
599
293
|
- **[@jmarianski](https://github.com/jmarianski)** — Root cause analysis via MITM proxy capture and Ghidra reverse engineering, multi-mode cache test script
|
|
600
|
-
- **[@cnighswonger](https://github.com/cnighswonger)** — Fingerprint stabilization, tool ordering fix, image stripping, monitoring features, overage TTL downgrade discovery, package maintainer
|
|
601
|
-
- **[@ArkNill](https://github.com/ArkNill)** — Microcompact mechanism analysis, GrowthBook flag documentation, false rate limiter identification
|
|
294
|
+
- **[@cnighswonger](https://github.com/cnighswonger)** — Fingerprint stabilization, tool ordering fix, image stripping, monitoring features, overage TTL downgrade discovery, proxy architecture, package maintainer
|
|
295
|
+
- **[@ArkNill](https://github.com/ArkNill)** — Microcompact mechanism analysis, GrowthBook flag documentation, false rate limiter identification, fingerprint verification fix for CC v2.1.108+ (PR #21), Korean README (PR #22), [claude-code-hidden-problem-analysis](https://github.com/ArkNill/claude-code-hidden-problem-analysis) research
|
|
602
296
|
- **[@Renvect](https://github.com/Renvect)** — Image duplication discovery, cross-project directory contamination analysis
|
|
603
297
|
- **[@fgrosswig](https://github.com/fgrosswig)** — [claude-usage-dashboard](https://github.com/fgrosswig/claude-usage-dashboard) forensic methodology: cost-factor overhead ratio metric, `anthropic-*` header capture pattern, proxy NDJSON schema that informed our dashboard interop layer
|
|
604
|
-
- **[@TomTheMenace](https://github.com/TomTheMenace)** — Windows `.bat` wrapper
|
|
298
|
+
- **[@TomTheMenace](https://github.com/TomTheMenace)** — Windows `.bat` wrapper, first Windows platform validation (7.5h/536-call Opus 4.6 session, 98.4% cache hit rate)
|
|
605
299
|
- **[@arjansingh](https://github.com/arjansingh)** — nvm-compatible wrapper script with dynamic `npm root -g` path resolution (PR #15)
|
|
606
300
|
- **[@beekamai](https://github.com/beekamai)** — Windows URL-encoding fix for `claude-fixed.bat` when npm root contains spaces (PR #17)
|
|
607
301
|
- **[@JEONG-JIWOO](https://github.com/JEONG-JIWOO)** — VS Code extension investigation: discovered `claudeCode.claudeProcessWrapper` as the working integration path, wrote the C wrapper for Windows (#16)
|
|
608
302
|
- **[@X-15](https://github.com/X-15)** — VS Code extension validation, per-fix health status analysis confirming safety check behavior on v2.1.105 (#16)
|
|
609
|
-
- **[@
|
|
610
|
-
- **[@deafsquad](https://github.com/deafsquad)** — Universal smoosh_split un-smoosh fix (PR #26), source-level function attribution of resume scatter bug (anthropics/claude-code#43657), OTEL telemetry discovery
|
|
303
|
+
- **[@deafsquad](https://github.com/deafsquad)** — Universal smoosh_split un-smoosh fix (PR #26), source-level function attribution of resume scatter bug (anthropics/claude-code#43657), OTEL telemetry discovery, proposed and built proxy architecture for v3.0.0
|
|
611
304
|
|
|
612
305
|
If you contributed to the community effort on these issues and aren't listed here, please open an issue or PR — we want to credit everyone properly.
|
|
613
306
|
|
package/package.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-code-cache-fix",
|
|
3
|
-
"version": "3.0.
|
|
3
|
+
"version": "3.0.2",
|
|
4
4
|
"description": "Cache optimization proxy and interceptor for Claude Code. Fixes prompt cache bugs, stabilizes prefix, reduces quota burn.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"exports": "./preload.mjs",
|
|
7
7
|
"main": "./preload.mjs",
|
|
8
8
|
"bin": {
|
|
9
|
-
"
|
|
9
|
+
"cache-fix-proxy": "./bin/claude-via-proxy.mjs"
|
|
10
10
|
},
|
|
11
11
|
"files": [
|
|
12
12
|
"preload.mjs",
|
package/preload.mjs
CHANGED
|
@@ -2268,7 +2268,7 @@ globalThis.fetch = async function (url, options) {
|
|
|
2268
2268
|
}
|
|
2269
2269
|
return true;
|
|
2270
2270
|
});
|
|
2271
|
-
if (kept.length !== msg.content.length) msg.content = kept;
|
|
2271
|
+
if (kept.length !== msg.content.length && kept.length > 0) msg.content = kept;
|
|
2272
2272
|
}
|
|
2273
2273
|
if (trailerStripped > 0) {
|
|
2274
2274
|
modified = true;
|
|
@@ -2340,7 +2340,7 @@ globalThis.fetch = async function (url, options) {
|
|
|
2340
2340
|
}
|
|
2341
2341
|
return true;
|
|
2342
2342
|
});
|
|
2343
|
-
if (kept.length !== msg.content.length) msg.content = kept;
|
|
2343
|
+
if (kept.length !== msg.content.length && kept.length > 0) msg.content = kept;
|
|
2344
2344
|
}
|
|
2345
2345
|
if (reminderStripped > 0) {
|
|
2346
2346
|
modified = true;
|
package/proxy/pipeline.mjs
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { readdir, readFile } from "node:fs/promises";
|
|
2
2
|
import { join } from "node:path";
|
|
3
|
+
import { pathToFileURL } from "node:url";
|
|
3
4
|
|
|
4
5
|
let registry = [];
|
|
5
6
|
|
|
@@ -16,7 +17,7 @@ export async function loadExtensions(dir, configPath) {
|
|
|
16
17
|
const extensions = [];
|
|
17
18
|
for (const file of mjsFiles) {
|
|
18
19
|
try {
|
|
19
|
-
const mod = await import(join(dir, file) + "?t=" + Date.now());
|
|
20
|
+
const mod = await import(pathToFileURL(join(dir, file)).href + "?t=" + Date.now());
|
|
20
21
|
const ext = mod.default;
|
|
21
22
|
if (!ext || !ext.name) continue;
|
|
22
23
|
|