claude-code-cache-fix 3.0.3 → 3.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -89,6 +89,20 @@ curl http://127.0.0.1:9801/health
89
89
  # {"status":"ok"}
90
90
  ```
91
91
 
92
+ ### Proxy configuration
93
+
94
+ All proxy settings are controlled via environment variables. Set them before starting the proxy server.
95
+
96
+ | Variable | Default | Description |
97
+ |----------|---------|-------------|
98
+ | `CACHE_FIX_PROXY_PORT` | `9801` | Listen port |
99
+ | `CACHE_FIX_PROXY_BIND` | `127.0.0.1` | Bind address |
100
+ | `CACHE_FIX_PROXY_UPSTREAM` | `https://api.anthropic.com` | Upstream URL. Change to chain another proxy (e.g. `http://localhost:8080`) |
101
+ | `CACHE_FIX_PROXY_TIMEOUT` | `600000` | Request timeout in milliseconds |
102
+ | `CACHE_FIX_EXTENSIONS_DIR` | `proxy/extensions/` | Directory for extension `.mjs` files |
103
+ | `CACHE_FIX_EXTENSIONS_CONFIG` | `proxy/extensions.json` | Extension configuration file |
104
+ | `CACHE_FIX_DEBUG` | `0` | Enable debug logging |
105
+
92
106
  ### Corporate environments (proxies, custom CAs)
93
107
 
94
108
  The proxy honors the following environment variables when forwarding to `api.anthropic.com`. Behind Zscaler / Netskope / Forcepoint / Bluecoat / corporate squid, set these in the proxy's environment.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-code-cache-fix",
3
- "version": "3.0.3",
3
+ "version": "3.0.5",
4
4
  "description": "Cache optimization proxy and interceptor for Claude Code. Fixes prompt cache bugs, stabilizes prefix, reduces quota burn.",
5
5
  "type": "module",
6
6
  "exports": "./preload.mjs",
@@ -1,8 +1,63 @@
1
+ import { writeFileSync, mkdirSync } from "node:fs";
2
+ import { join } from "node:path";
3
+ import { homedir } from "node:os";
4
+
5
+ const QUOTA_PATH = join(homedir(), ".claude", "quota-status.json");
6
+
7
+ function parseHeaders(headers) {
8
+ const get = (key) => headers[key] || "";
9
+ const num = (key) => parseFloat(get(key)) || 0;
10
+
11
+ const q5h_util = num("anthropic-ratelimit-unified-5h-utilization");
12
+ const q7d_util = num("anthropic-ratelimit-unified-7d-utilization");
13
+ const q5h_reset = parseInt(get("anthropic-ratelimit-unified-5h-reset")) || 0;
14
+ const q7d_reset = parseInt(get("anthropic-ratelimit-unified-7d-reset")) || 0;
15
+ const status = get("anthropic-ratelimit-unified-status") || get("anthropic-ratelimit-unified-5h-status");
16
+ const overage_status = get("anthropic-ratelimit-unified-overage-status");
17
+ const overage_util = num("anthropic-ratelimit-unified-overage-utilization");
18
+ const overage_reset = parseInt(get("anthropic-ratelimit-unified-overage-reset")) || 0;
19
+ const fallback_pct = get("anthropic-ratelimit-unified-fallback-percentage");
20
+ const representative = get("anthropic-ratelimit-unified-representative-claim");
21
+ const surpassed = get("anthropic-ratelimit-unified-7d-surpassed-threshold");
22
+
23
+ if (!q5h_reset && !q7d_reset) return null;
24
+
25
+ const now = new Date();
26
+ const hour = now.getUTCHours();
27
+ const day = now.getUTCDay();
28
+ const peak = day >= 1 && day <= 5 && hour >= 13 && hour < 19;
29
+
30
+ const allHeaders = {};
31
+ for (const [k, v] of Object.entries(headers)) {
32
+ if (k.startsWith("anthropic-") || k === "cf-ray" || k === "request-id") {
33
+ allHeaders[k] = v;
34
+ }
35
+ }
36
+
37
+ return {
38
+ five_hour: { utilization: q5h_util, pct: Math.round(q5h_util * 100), resets_at: q5h_reset },
39
+ seven_day: { utilization: q7d_util, pct: Math.round(q7d_util * 100), resets_at: q7d_reset },
40
+ status: status || "unknown",
41
+ overage_status: overage_status || "unknown",
42
+ peak_hour: peak,
43
+ all_headers: allHeaders,
44
+ };
45
+ }
46
+
1
47
  export default {
2
48
  name: "cache-telemetry",
3
- description: "Extract cache hit/miss stats from response stream for monitoring",
49
+ description: "Extract cache stats from response stream, persist quota state to ~/.claude/quota-status.json",
4
50
  order: 600,
5
51
 
52
+ async onResponseStart(ctx) {
53
+ if (!ctx.headers) return;
54
+
55
+ const quota = parseHeaders(ctx.headers);
56
+ if (!quota) return;
57
+
58
+ ctx.meta._quotaData = quota;
59
+ },
60
+
6
61
  async onStreamEvent(ctx) {
7
62
  const { event, telemetry } = ctx;
8
63
  if (!event || !telemetry) return;
@@ -19,6 +74,39 @@ export default {
19
74
  if (event.type === "message_delta" && event.usage) {
20
75
  if (!ctx.meta.cacheStats) ctx.meta.cacheStats = {};
21
76
  ctx.meta.cacheStats.outputTokens = event.usage.output_tokens || 0;
77
+
78
+ const stats = ctx.meta.cacheStats;
79
+ const quota = ctx.meta._quotaData;
80
+ if (!quota) return;
81
+
82
+ const cr = stats.cacheRead || 0;
83
+ const cc = stats.cacheCreation || 0;
84
+ const total = cr + cc;
85
+ const hitRate = total > 0 ? ((cr / total) * 100).toFixed(1) : "N/A";
86
+
87
+ const ephemeral1h = cc;
88
+ const ephemeral5m = 0;
89
+
90
+ const ttl = cr > 0 ? "1h" : (cc > 0 ? "5m" : "unknown");
91
+
92
+ const output = {
93
+ cache: {
94
+ ttl_tier: ttl,
95
+ cache_creation: cc,
96
+ cache_read: cr,
97
+ ephemeral_1h: ephemeral1h,
98
+ ephemeral_5m: ephemeral5m,
99
+ hit_rate: hitRate,
100
+ timestamp: new Date().toISOString(),
101
+ },
102
+ timestamp: new Date().toISOString(),
103
+ ...quota,
104
+ };
105
+
106
+ try {
107
+ mkdirSync(join(homedir(), ".claude"), { recursive: true });
108
+ writeFileSync(QUOTA_PATH, JSON.stringify(output, null, 2));
109
+ } catch {}
22
110
  }
23
111
  },
24
112
  };
@@ -1,68 +1,43 @@
1
1
  #!/bin/bash
2
- # Status line: show quota % and burn rate from claude-meter JSONL
3
- # Rate is calculated from window start (reset_time - window_size) to now
4
- # No prev file needed — each reading is self-contained
2
+ # Status line: show quota % and burn rate from quota-status.json
3
+ # Written by cache-fix proxy's cache-telemetry extension on every API call.
5
4
 
6
5
  input=$(cat)
7
6
 
8
- JSONL="$HOME/.claude/claude-meter.jsonl"
9
7
  QS="$HOME/.claude/quota-status.json"
10
8
 
11
- # Primary source: claude-meter.jsonl (requires claude-code-meter package)
12
- # Fallback: quota-status.json (written by claude-code-cache-fix interceptor)
13
- if [ -f "$JSONL" ]; then
14
- last=$(tail -1 "$JSONL" 2>/dev/null)
15
- elif [ -f "$QS" ]; then
16
- # Translate quota-status.json into the same shape the Python expects
17
- last=$(python3 -c "
18
- import json, pathlib
19
- qs = json.load(open(pathlib.Path.home() / '.claude' / 'quota-status.json'))
20
- fh = qs.get('five_hour', {})
21
- sd = qs.get('seven_day', {})
22
- print(json.dumps({
23
- 'q5h': fh.get('utilization', 0),
24
- 'q7d': sd.get('utilization', 0),
25
- 'q5h_reset': fh.get('resets_at', 0),
26
- 'q7d_reset': sd.get('resets_at', 0),
27
- 'qoverage': qs.get('overage_status', ''),
28
- 'ts': qs.get('timestamp', ''),
29
- }))
30
- " 2>/dev/null)
31
- else
32
- exit 0
33
- fi
34
-
35
- if [ -z "$last" ]; then exit 0; fi
9
+ if [ -f "$QS" ]; then
10
+ result=$(python3 -c "
11
+ import sys, json, os
12
+ from datetime import datetime, timezone, timedelta
36
13
 
37
- result=$(echo "$last" | python3 -c "
38
- import sys, json
39
- from datetime import datetime, timezone
14
+ qs = json.load(open(os.path.expanduser('~/.claude/quota-status.json')))
40
15
 
41
- r = json.load(sys.stdin)
42
- q5h = int(r['q5h'] * 100)
43
- q7d = int(r.get('q7d', 0) * 100)
44
- overage = r.get('qoverage', '')
45
- ts = r.get('ts', '')
46
- q5h_reset = r.get('q5h_reset', 0)
47
- q7d_reset = r.get('q7d_reset', 0)
16
+ q5h = qs.get('five_hour', {}).get('pct', 0)
17
+ q7d = qs.get('seven_day', {}).get('pct', 0)
18
+ q5h_reset = qs.get('five_hour', {}).get('resets_at', 0)
19
+ q7d_reset = qs.get('seven_day', {}).get('resets_at', 0)
20
+ status = qs.get('status', '')
21
+ overage = qs.get('overage_status', '')
22
+ ts = qs.get('timestamp', '')
48
23
 
49
- now = datetime.fromisoformat(ts.replace('Z', '+00:00'))
24
+ now = datetime.fromisoformat(ts.replace('Z', '+00:00')) if ts else datetime.now(timezone.utc)
50
25
 
51
- # Q5h: 5-hour window, rate = pct / minutes elapsed since window start
26
+ # Q5h burn rate
52
27
  rate5 = ''
53
- if q5h_reset > 0:
54
- window_start = datetime.fromtimestamp(q5h_reset, tz=timezone.utc) - __import__('datetime').timedelta(hours=5)
28
+ if q5h_reset > 0 and q5h > 0:
29
+ window_start = datetime.fromtimestamp(q5h_reset, tz=timezone.utc) - timedelta(hours=5)
55
30
  elapsed_min = (now - window_start).total_seconds() / 60
56
- if elapsed_min > 1 and q5h > 0:
31
+ if elapsed_min > 1:
57
32
  rate5 = '{:+.1f}'.format(q5h / elapsed_min)
58
33
 
59
- # Q7d: 7-day window
34
+ # Q7d burn rate
60
35
  rate7 = ''
61
- if q7d_reset > 0:
62
- window_start_7d = datetime.fromtimestamp(q7d_reset, tz=timezone.utc) - __import__('datetime').timedelta(days=7)
63
- elapsed_min_7d = (now - window_start_7d).total_seconds() / 60
64
- if elapsed_min_7d > 1 and q7d > 0:
65
- rate7 = '{:+.1f}'.format(q7d / (elapsed_min_7d / 60))
36
+ if q7d_reset > 0 and q7d > 0:
37
+ window_start_7d = datetime.fromtimestamp(q7d_reset, tz=timezone.utc) - timedelta(days=7)
38
+ elapsed_hr = (now - window_start_7d).total_seconds() / 3600
39
+ if elapsed_hr > 0.1:
40
+ rate7 = '{:+.1f}'.format(q7d / elapsed_hr)
66
41
 
67
42
  label = 'Q5h: {}%'.format(q5h)
68
43
  if rate5:
@@ -73,37 +48,23 @@ if rate7:
73
48
  if overage == 'active':
74
49
  label += ' | OVERAGE'
75
50
 
76
- # Add TTL tier from quota-status.json (written by interceptor)
77
- import os, pathlib
78
- qs_path = pathlib.Path.home() / '.claude' / 'quota-status.json'
79
- try:
80
- qs = json.load(open(qs_path))
81
- ttl = qs.get('cache', {}).get('ttl_tier', '')
82
- hit = qs.get('cache', {}).get('hit_rate', '')
83
- if ttl:
84
- if ttl == '5m':
85
- label += ' | \033[31mTTL:5m\033[0m' # red
86
- # When on 5m tier, show the cold-rebuild size so users know
87
- # the cost of idling past 5 minutes
88
- cache_cr = qs.get('cache', {}).get('cache_creation', 0)
89
- cache_rd = qs.get('cache', {}).get('cache_read', 0)
90
- prefix = cache_cr + cache_rd
91
- if prefix > 0:
92
- if prefix >= 1_000_000:
93
- label += ' \033[31m\u26A0 idle >5m = {:.1f}M rebuild\033[0m'.format(prefix / 1_000_000)
94
- else:
95
- label += ' \033[31m\u26A0 idle >5m = {:.0f}K rebuild\033[0m'.format(prefix / 1_000)
96
- else:
97
- label += ' | TTL:' + ttl
98
- if hit and hit != 'N/A':
99
- label += ' ' + hit + '%'
100
- peak = qs.get('peak_hour', False)
101
- if peak:
102
- label += ' | \033[33mPEAK\033[0m' # yellow
103
- except:
104
- pass
51
+ # TTL and cache stats
52
+ ttl = qs.get('cache', {}).get('ttl_tier', '')
53
+ hit = qs.get('cache', {}).get('hit_rate', '')
54
+ if ttl:
55
+ if ttl == '5m':
56
+ label += ' | \033[31mTTL:5m\033[0m'
57
+ else:
58
+ label += ' | TTL:' + ttl
59
+ if hit and hit != 'N/A':
60
+ label += ' ' + hit + '%'
61
+
62
+ peak = qs.get('peak_hour', False)
63
+ if peak:
64
+ label += ' | \033[33mPEAK\033[0m'
105
65
 
106
66
  print(label)
107
67
  " 2>/dev/null)
108
68
 
109
69
  [ -n "$result" ] && echo "$result"
70
+ fi