claude-code-cache-fix 3.0.3 → 3.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -0
- package/package.json +1 -1
- package/proxy/extensions/cache-telemetry.mjs +89 -1
- package/tools/quota-statusline.sh +40 -79
package/README.md
CHANGED
|
@@ -89,6 +89,20 @@ curl http://127.0.0.1:9801/health
|
|
|
89
89
|
# {"status":"ok"}
|
|
90
90
|
```
|
|
91
91
|
|
|
92
|
+
### Proxy configuration
|
|
93
|
+
|
|
94
|
+
All proxy settings are controlled via environment variables. Set them before starting the proxy server.
|
|
95
|
+
|
|
96
|
+
| Variable | Default | Description |
|
|
97
|
+
|----------|---------|-------------|
|
|
98
|
+
| `CACHE_FIX_PROXY_PORT` | `9801` | Listen port |
|
|
99
|
+
| `CACHE_FIX_PROXY_BIND` | `127.0.0.1` | Bind address |
|
|
100
|
+
| `CACHE_FIX_PROXY_UPSTREAM` | `https://api.anthropic.com` | Upstream URL. Change to chain another proxy (e.g. `http://localhost:8080`) |
|
|
101
|
+
| `CACHE_FIX_PROXY_TIMEOUT` | `600000` | Request timeout in milliseconds |
|
|
102
|
+
| `CACHE_FIX_EXTENSIONS_DIR` | `proxy/extensions/` | Directory for extension `.mjs` files |
|
|
103
|
+
| `CACHE_FIX_EXTENSIONS_CONFIG` | `proxy/extensions.json` | Extension configuration file |
|
|
104
|
+
| `CACHE_FIX_DEBUG` | `0` | Enable debug logging |
|
|
105
|
+
|
|
92
106
|
### Corporate environments (proxies, custom CAs)
|
|
93
107
|
|
|
94
108
|
The proxy honors the following environment variables when forwarding to `api.anthropic.com`. Behind Zscaler / Netskope / Forcepoint / Bluecoat / corporate squid, set these in the proxy's environment.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-code-cache-fix",
|
|
3
|
-
"version": "3.0.
|
|
3
|
+
"version": "3.0.5",
|
|
4
4
|
"description": "Cache optimization proxy and interceptor for Claude Code. Fixes prompt cache bugs, stabilizes prefix, reduces quota burn.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"exports": "./preload.mjs",
|
|
@@ -1,8 +1,63 @@
|
|
|
1
|
+
import { writeFileSync, mkdirSync } from "node:fs";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
import { homedir } from "node:os";
|
|
4
|
+
|
|
5
|
+
const QUOTA_PATH = join(homedir(), ".claude", "quota-status.json");
|
|
6
|
+
|
|
7
|
+
function parseHeaders(headers) {
|
|
8
|
+
const get = (key) => headers[key] || "";
|
|
9
|
+
const num = (key) => parseFloat(get(key)) || 0;
|
|
10
|
+
|
|
11
|
+
const q5h_util = num("anthropic-ratelimit-unified-5h-utilization");
|
|
12
|
+
const q7d_util = num("anthropic-ratelimit-unified-7d-utilization");
|
|
13
|
+
const q5h_reset = parseInt(get("anthropic-ratelimit-unified-5h-reset")) || 0;
|
|
14
|
+
const q7d_reset = parseInt(get("anthropic-ratelimit-unified-7d-reset")) || 0;
|
|
15
|
+
const status = get("anthropic-ratelimit-unified-status") || get("anthropic-ratelimit-unified-5h-status");
|
|
16
|
+
const overage_status = get("anthropic-ratelimit-unified-overage-status");
|
|
17
|
+
const overage_util = num("anthropic-ratelimit-unified-overage-utilization");
|
|
18
|
+
const overage_reset = parseInt(get("anthropic-ratelimit-unified-overage-reset")) || 0;
|
|
19
|
+
const fallback_pct = get("anthropic-ratelimit-unified-fallback-percentage");
|
|
20
|
+
const representative = get("anthropic-ratelimit-unified-representative-claim");
|
|
21
|
+
const surpassed = get("anthropic-ratelimit-unified-7d-surpassed-threshold");
|
|
22
|
+
|
|
23
|
+
if (!q5h_reset && !q7d_reset) return null;
|
|
24
|
+
|
|
25
|
+
const now = new Date();
|
|
26
|
+
const hour = now.getUTCHours();
|
|
27
|
+
const day = now.getUTCDay();
|
|
28
|
+
const peak = day >= 1 && day <= 5 && hour >= 13 && hour < 19;
|
|
29
|
+
|
|
30
|
+
const allHeaders = {};
|
|
31
|
+
for (const [k, v] of Object.entries(headers)) {
|
|
32
|
+
if (k.startsWith("anthropic-") || k === "cf-ray" || k === "request-id") {
|
|
33
|
+
allHeaders[k] = v;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
return {
|
|
38
|
+
five_hour: { utilization: q5h_util, pct: Math.round(q5h_util * 100), resets_at: q5h_reset },
|
|
39
|
+
seven_day: { utilization: q7d_util, pct: Math.round(q7d_util * 100), resets_at: q7d_reset },
|
|
40
|
+
status: status || "unknown",
|
|
41
|
+
overage_status: overage_status || "unknown",
|
|
42
|
+
peak_hour: peak,
|
|
43
|
+
all_headers: allHeaders,
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
|
|
1
47
|
export default {
|
|
2
48
|
name: "cache-telemetry",
|
|
3
|
-
description: "Extract cache
|
|
49
|
+
description: "Extract cache stats from response stream, persist quota state to ~/.claude/quota-status.json",
|
|
4
50
|
order: 600,
|
|
5
51
|
|
|
52
|
+
async onResponseStart(ctx) {
|
|
53
|
+
if (!ctx.headers) return;
|
|
54
|
+
|
|
55
|
+
const quota = parseHeaders(ctx.headers);
|
|
56
|
+
if (!quota) return;
|
|
57
|
+
|
|
58
|
+
ctx.meta._quotaData = quota;
|
|
59
|
+
},
|
|
60
|
+
|
|
6
61
|
async onStreamEvent(ctx) {
|
|
7
62
|
const { event, telemetry } = ctx;
|
|
8
63
|
if (!event || !telemetry) return;
|
|
@@ -19,6 +74,39 @@ export default {
|
|
|
19
74
|
if (event.type === "message_delta" && event.usage) {
|
|
20
75
|
if (!ctx.meta.cacheStats) ctx.meta.cacheStats = {};
|
|
21
76
|
ctx.meta.cacheStats.outputTokens = event.usage.output_tokens || 0;
|
|
77
|
+
|
|
78
|
+
const stats = ctx.meta.cacheStats;
|
|
79
|
+
const quota = ctx.meta._quotaData;
|
|
80
|
+
if (!quota) return;
|
|
81
|
+
|
|
82
|
+
const cr = stats.cacheRead || 0;
|
|
83
|
+
const cc = stats.cacheCreation || 0;
|
|
84
|
+
const total = cr + cc;
|
|
85
|
+
const hitRate = total > 0 ? ((cr / total) * 100).toFixed(1) : "N/A";
|
|
86
|
+
|
|
87
|
+
const ephemeral1h = cc;
|
|
88
|
+
const ephemeral5m = 0;
|
|
89
|
+
|
|
90
|
+
const ttl = cr > 0 ? "1h" : (cc > 0 ? "5m" : "unknown");
|
|
91
|
+
|
|
92
|
+
const output = {
|
|
93
|
+
cache: {
|
|
94
|
+
ttl_tier: ttl,
|
|
95
|
+
cache_creation: cc,
|
|
96
|
+
cache_read: cr,
|
|
97
|
+
ephemeral_1h: ephemeral1h,
|
|
98
|
+
ephemeral_5m: ephemeral5m,
|
|
99
|
+
hit_rate: hitRate,
|
|
100
|
+
timestamp: new Date().toISOString(),
|
|
101
|
+
},
|
|
102
|
+
timestamp: new Date().toISOString(),
|
|
103
|
+
...quota,
|
|
104
|
+
};
|
|
105
|
+
|
|
106
|
+
try {
|
|
107
|
+
mkdirSync(join(homedir(), ".claude"), { recursive: true });
|
|
108
|
+
writeFileSync(QUOTA_PATH, JSON.stringify(output, null, 2));
|
|
109
|
+
} catch {}
|
|
22
110
|
}
|
|
23
111
|
},
|
|
24
112
|
};
|
|
@@ -1,68 +1,43 @@
|
|
|
1
1
|
#!/bin/bash
|
|
2
|
-
# Status line: show quota % and burn rate from
|
|
3
|
-
#
|
|
4
|
-
# No prev file needed — each reading is self-contained
|
|
2
|
+
# Status line: show quota % and burn rate from quota-status.json
|
|
3
|
+
# Written by cache-fix proxy's cache-telemetry extension on every API call.
|
|
5
4
|
|
|
6
5
|
input=$(cat)
|
|
7
6
|
|
|
8
|
-
JSONL="$HOME/.claude/claude-meter.jsonl"
|
|
9
7
|
QS="$HOME/.claude/quota-status.json"
|
|
10
8
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
elif [ -f "$QS" ]; then
|
|
16
|
-
# Translate quota-status.json into the same shape the Python expects
|
|
17
|
-
last=$(python3 -c "
|
|
18
|
-
import json, pathlib
|
|
19
|
-
qs = json.load(open(pathlib.Path.home() / '.claude' / 'quota-status.json'))
|
|
20
|
-
fh = qs.get('five_hour', {})
|
|
21
|
-
sd = qs.get('seven_day', {})
|
|
22
|
-
print(json.dumps({
|
|
23
|
-
'q5h': fh.get('utilization', 0),
|
|
24
|
-
'q7d': sd.get('utilization', 0),
|
|
25
|
-
'q5h_reset': fh.get('resets_at', 0),
|
|
26
|
-
'q7d_reset': sd.get('resets_at', 0),
|
|
27
|
-
'qoverage': qs.get('overage_status', ''),
|
|
28
|
-
'ts': qs.get('timestamp', ''),
|
|
29
|
-
}))
|
|
30
|
-
" 2>/dev/null)
|
|
31
|
-
else
|
|
32
|
-
exit 0
|
|
33
|
-
fi
|
|
34
|
-
|
|
35
|
-
if [ -z "$last" ]; then exit 0; fi
|
|
9
|
+
if [ -f "$QS" ]; then
|
|
10
|
+
result=$(python3 -c "
|
|
11
|
+
import sys, json, os
|
|
12
|
+
from datetime import datetime, timezone, timedelta
|
|
36
13
|
|
|
37
|
-
|
|
38
|
-
import sys, json
|
|
39
|
-
from datetime import datetime, timezone
|
|
14
|
+
qs = json.load(open(os.path.expanduser('~/.claude/quota-status.json')))
|
|
40
15
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
16
|
+
q5h = qs.get('five_hour', {}).get('pct', 0)
|
|
17
|
+
q7d = qs.get('seven_day', {}).get('pct', 0)
|
|
18
|
+
q5h_reset = qs.get('five_hour', {}).get('resets_at', 0)
|
|
19
|
+
q7d_reset = qs.get('seven_day', {}).get('resets_at', 0)
|
|
20
|
+
status = qs.get('status', '')
|
|
21
|
+
overage = qs.get('overage_status', '')
|
|
22
|
+
ts = qs.get('timestamp', '')
|
|
48
23
|
|
|
49
|
-
now = datetime.fromisoformat(ts.replace('Z', '+00:00'))
|
|
24
|
+
now = datetime.fromisoformat(ts.replace('Z', '+00:00')) if ts else datetime.now(timezone.utc)
|
|
50
25
|
|
|
51
|
-
# Q5h
|
|
26
|
+
# Q5h burn rate
|
|
52
27
|
rate5 = ''
|
|
53
|
-
if q5h_reset > 0:
|
|
54
|
-
window_start = datetime.fromtimestamp(q5h_reset, tz=timezone.utc) -
|
|
28
|
+
if q5h_reset > 0 and q5h > 0:
|
|
29
|
+
window_start = datetime.fromtimestamp(q5h_reset, tz=timezone.utc) - timedelta(hours=5)
|
|
55
30
|
elapsed_min = (now - window_start).total_seconds() / 60
|
|
56
|
-
if elapsed_min > 1
|
|
31
|
+
if elapsed_min > 1:
|
|
57
32
|
rate5 = '{:+.1f}'.format(q5h / elapsed_min)
|
|
58
33
|
|
|
59
|
-
# Q7d
|
|
34
|
+
# Q7d burn rate
|
|
60
35
|
rate7 = ''
|
|
61
|
-
if q7d_reset > 0:
|
|
62
|
-
window_start_7d = datetime.fromtimestamp(q7d_reset, tz=timezone.utc) -
|
|
63
|
-
|
|
64
|
-
if
|
|
65
|
-
rate7 = '{:+.1f}'.format(q7d /
|
|
36
|
+
if q7d_reset > 0 and q7d > 0:
|
|
37
|
+
window_start_7d = datetime.fromtimestamp(q7d_reset, tz=timezone.utc) - timedelta(days=7)
|
|
38
|
+
elapsed_hr = (now - window_start_7d).total_seconds() / 3600
|
|
39
|
+
if elapsed_hr > 0.1:
|
|
40
|
+
rate7 = '{:+.1f}'.format(q7d / elapsed_hr)
|
|
66
41
|
|
|
67
42
|
label = 'Q5h: {}%'.format(q5h)
|
|
68
43
|
if rate5:
|
|
@@ -73,37 +48,23 @@ if rate7:
|
|
|
73
48
|
if overage == 'active':
|
|
74
49
|
label += ' | OVERAGE'
|
|
75
50
|
|
|
76
|
-
#
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
prefix = cache_cr + cache_rd
|
|
91
|
-
if prefix > 0:
|
|
92
|
-
if prefix >= 1_000_000:
|
|
93
|
-
label += ' \033[31m\u26A0 idle >5m = {:.1f}M rebuild\033[0m'.format(prefix / 1_000_000)
|
|
94
|
-
else:
|
|
95
|
-
label += ' \033[31m\u26A0 idle >5m = {:.0f}K rebuild\033[0m'.format(prefix / 1_000)
|
|
96
|
-
else:
|
|
97
|
-
label += ' | TTL:' + ttl
|
|
98
|
-
if hit and hit != 'N/A':
|
|
99
|
-
label += ' ' + hit + '%'
|
|
100
|
-
peak = qs.get('peak_hour', False)
|
|
101
|
-
if peak:
|
|
102
|
-
label += ' | \033[33mPEAK\033[0m' # yellow
|
|
103
|
-
except:
|
|
104
|
-
pass
|
|
51
|
+
# TTL and cache stats
|
|
52
|
+
ttl = qs.get('cache', {}).get('ttl_tier', '')
|
|
53
|
+
hit = qs.get('cache', {}).get('hit_rate', '')
|
|
54
|
+
if ttl:
|
|
55
|
+
if ttl == '5m':
|
|
56
|
+
label += ' | \033[31mTTL:5m\033[0m'
|
|
57
|
+
else:
|
|
58
|
+
label += ' | TTL:' + ttl
|
|
59
|
+
if hit and hit != 'N/A':
|
|
60
|
+
label += ' ' + hit + '%'
|
|
61
|
+
|
|
62
|
+
peak = qs.get('peak_hour', False)
|
|
63
|
+
if peak:
|
|
64
|
+
label += ' | \033[33mPEAK\033[0m'
|
|
105
65
|
|
|
106
66
|
print(label)
|
|
107
67
|
" 2>/dev/null)
|
|
108
68
|
|
|
109
69
|
[ -n "$result" ] && echo "$result"
|
|
70
|
+
fi
|