claude-code-cache-fix 3.1.0 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +61 -20
- package/bin/claude-via-proxy.mjs +57 -0
- package/bin/install-service.mjs +476 -0
- package/package.json +3 -2
- package/proxy/extensions/overage-warning.mjs +385 -0
- package/proxy/extensions/upstream-change-detection.mjs +533 -0
- package/proxy/extensions/usage-log.mjs +252 -23
- package/proxy/extensions.json +1 -0
- package/proxy/rates.mjs +16 -0
- package/templates/cache-fix-proxy-healthcheck.service.template +7 -0
- package/templates/cache-fix-proxy-healthcheck.timer.template +14 -0
- package/templates/cache-fix-proxy.service.template +17 -0
- package/templates/com.cnighswonger.cache-fix-proxy.plist.template +33 -0
|
@@ -1,46 +1,275 @@
|
|
|
1
|
+
// usage-log — append per-call usage record to ~/.claude/usage.jsonl.
|
|
2
|
+
//
|
|
3
|
+
// The emitted record matches `MeterRowSchema` v:1 from
|
|
4
|
+
// `claude-code-meter/src/log/schema.mjs` exactly. claude-meter validates each
|
|
5
|
+
// row through that schema; the wire format is the cross-repo contract.
|
|
6
|
+
//
|
|
7
|
+
// Schema (every row):
|
|
8
|
+
// v: 1
|
|
9
|
+
// ts: ISO datetime
|
|
10
|
+
// sid: 8-char lowercase hex (proxy session, sticky for proxy lifetime)
|
|
11
|
+
// model: string ≤64, /^[a-z0-9._-]+$/
|
|
12
|
+
// requested_model?: string ≤64, /^[a-z0-9._-]*$/ (optional)
|
|
13
|
+
// model_mismatch?: bool (optional)
|
|
14
|
+
// speed: "standard" | "fast" | ""
|
|
15
|
+
// service_tier: string ≤32, /^[a-z0-9_-]*$/
|
|
16
|
+
// input_tokens, output_tokens, cache_creation_input_tokens,
|
|
17
|
+
// cache_read_input_tokens, ephemeral_1h_input_tokens,
|
|
18
|
+
// ephemeral_5m_input_tokens, web_search_requests: int ≥ 0
|
|
19
|
+
// q5h, q7d: float 0–2
|
|
20
|
+
// q5h_reset, q7d_reset: int (unix sec)
|
|
21
|
+
// qstatus, qoverage, qclaim: string lowercase enums
|
|
22
|
+
// qfallback_pct: float 0–1
|
|
23
|
+
// qoverage_util?: float ≥ 0 (optional)
|
|
24
|
+
// qrepresentative_claim?: string ≤16 (optional)
|
|
25
|
+
// org_id?: 16-char hex (sha256(raw header).digest("hex").slice(0,16))
|
|
26
|
+
// overage_disabled_reason?: string ≤64 (optional)
|
|
27
|
+
// cache_hit_rate: float 0–1
|
|
28
|
+
// q5h_delta, q7d_delta: float (0 on first call after restart)
|
|
29
|
+
//
|
|
30
|
+
// `peak_hour` is NOT in the wire format. It can be derived from `ts` if any
|
|
31
|
+
// consumer needs it.
|
|
32
|
+
//
|
|
33
|
+
// Activation: enabled:false in the export default (existing usage-log
|
|
34
|
+
// pattern). Users opt in by adding an entry to proxy/extensions.json:
|
|
35
|
+
// "usage-log": { "enabled": true, "order": 650 }
|
|
36
|
+
// CACHE_FIX_USAGE_LOG=<path> overrides the destination path only — it is NOT
|
|
37
|
+
// an enable flag and never has been.
|
|
38
|
+
//
|
|
39
|
+
// See `docs/directives/proxy-claude-meter-compat.md` for full design.
|
|
40
|
+
|
|
1
41
|
import { appendFile, mkdir } from "node:fs/promises";
|
|
2
42
|
import { join } from "node:path";
|
|
3
43
|
import { homedir } from "node:os";
|
|
44
|
+
import { createHash } from "node:crypto";
|
|
4
45
|
|
|
5
46
|
const LOG_PATH = process.env.CACHE_FIX_USAGE_LOG || join(homedir(), ".claude", "usage.jsonl");
|
|
6
47
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
48
|
+
// --- Module-scope state ---
|
|
49
|
+
|
|
50
|
+
const _sid = generateSid();
|
|
51
|
+
let _lastQ5h = null;
|
|
52
|
+
let _lastQ7d = null;
|
|
53
|
+
|
|
54
|
+
// --- Pure helpers (test seam) ---
|
|
11
55
|
|
|
12
|
-
|
|
13
|
-
|
|
56
|
+
export function generateSid() {
|
|
57
|
+
return createHash("sha256")
|
|
58
|
+
.update(`${process.pid}-${Date.now()}-${Math.random()}`)
|
|
59
|
+
.digest("hex")
|
|
60
|
+
.slice(0, 8);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
export function hashOrgId(rawOrgId) {
|
|
64
|
+
if (!rawOrgId || typeof rawOrgId !== "string") return undefined;
|
|
65
|
+
return createHash("sha256").update(rawOrgId).digest("hex").slice(0, 16);
|
|
66
|
+
}
|
|
14
67
|
|
|
68
|
+
export function extractMessageStartFields(event) {
|
|
69
|
+
if (!event || event.type !== "message_start") return null;
|
|
70
|
+
const msg = event.message;
|
|
71
|
+
if (!msg || !msg.usage) return null;
|
|
72
|
+
const usage = msg.usage;
|
|
73
|
+
const cc = usage.cache_creation || {};
|
|
74
|
+
const sti = usage.server_tool_use || {};
|
|
15
75
|
return {
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
76
|
+
model: typeof msg.model === "string" ? msg.model : "",
|
|
77
|
+
speed: usage.speed || "",
|
|
78
|
+
service_tier: usage.service_tier || "",
|
|
79
|
+
input_tokens: usage.input_tokens || 0,
|
|
80
|
+
cache_creation_input_tokens: usage.cache_creation_input_tokens || 0,
|
|
81
|
+
cache_read_input_tokens: usage.cache_read_input_tokens || 0,
|
|
82
|
+
ephemeral_1h_input_tokens: cc.ephemeral_1h_input_tokens || 0,
|
|
83
|
+
ephemeral_5m_input_tokens: cc.ephemeral_5m_input_tokens || 0,
|
|
84
|
+
web_search_requests: sti.web_search_requests || 0,
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
export function extractMessageDeltaFields(event) {
|
|
89
|
+
if (!event || event.type !== "message_delta") return null;
|
|
90
|
+
if (!event.usage) return null;
|
|
91
|
+
return { output_tokens: event.usage.output_tokens || 0 };
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
function num(headers, key) {
|
|
95
|
+
const v = headers?.[key];
|
|
96
|
+
if (v === undefined || v === null || v === "") return null;
|
|
97
|
+
const n = parseFloat(v);
|
|
98
|
+
return Number.isFinite(n) ? n : null;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
function intOf(headers, key) {
|
|
102
|
+
const v = headers?.[key];
|
|
103
|
+
if (v === undefined || v === null || v === "") return 0;
|
|
104
|
+
const n = parseInt(v, 10);
|
|
105
|
+
return Number.isFinite(n) ? n : 0;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function strOf(headers, key) {
|
|
109
|
+
const v = headers?.[key];
|
|
110
|
+
return typeof v === "string" ? v : "";
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
export function parseQuotaHeaders(headers) {
|
|
114
|
+
const h = headers || {};
|
|
115
|
+
return {
|
|
116
|
+
q5h: num(h, "anthropic-ratelimit-unified-5h-utilization") ?? 0,
|
|
117
|
+
q7d: num(h, "anthropic-ratelimit-unified-7d-utilization") ?? 0,
|
|
118
|
+
q5h_reset: intOf(h, "anthropic-ratelimit-unified-5h-reset"),
|
|
119
|
+
q7d_reset: intOf(h, "anthropic-ratelimit-unified-7d-reset"),
|
|
120
|
+
qstatus: strOf(h, "anthropic-ratelimit-unified-status"),
|
|
121
|
+
qoverage: strOf(h, "anthropic-ratelimit-unified-overage-status"),
|
|
122
|
+
qclaim: strOf(h, "anthropic-ratelimit-unified-claim"),
|
|
123
|
+
qfallback_pct: num(h, "anthropic-ratelimit-unified-fallback-percentage") ?? 0,
|
|
124
|
+
qoverage_util: num(h, "anthropic-ratelimit-unified-overage-utilization"),
|
|
125
|
+
qrepresentative_claim: strOf(h, "anthropic-ratelimit-unified-representative-claim") || undefined,
|
|
126
|
+
org_id_raw: strOf(h, "anthropic-organization-id") || undefined,
|
|
127
|
+
overage_disabled_reason: strOf(h, "anthropic-ratelimit-unified-overage-disabled-reason") || undefined,
|
|
128
|
+
};
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
export function computeDelta(current, previous) {
|
|
132
|
+
if (previous === null || previous === undefined) return 0;
|
|
133
|
+
if (typeof current !== "number" || typeof previous !== "number") return 0;
|
|
134
|
+
return current - previous;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
export function assembleRecord({ start, delta, quota, requestedModel, sid, prevQ5h, prevQ7d, now = new Date() }) {
|
|
138
|
+
const s = start || {};
|
|
139
|
+
const d = delta || {};
|
|
140
|
+
const q = quota || {};
|
|
141
|
+
|
|
142
|
+
const inputTokens = s.input_tokens || 0;
|
|
143
|
+
const outputTokens = d.output_tokens || 0;
|
|
144
|
+
const cacheRead = s.cache_read_input_tokens || 0;
|
|
145
|
+
const cacheCreation = s.cache_creation_input_tokens || 0;
|
|
146
|
+
const totalIn = inputTokens + cacheCreation + cacheRead;
|
|
147
|
+
const cacheHitRate = totalIn > 0 ? cacheRead / totalIn : 0;
|
|
148
|
+
|
|
149
|
+
const record = {
|
|
150
|
+
v: 1,
|
|
151
|
+
ts: now.toISOString(),
|
|
152
|
+
sid,
|
|
153
|
+
model: s.model || "",
|
|
154
|
+
speed: s.speed || "",
|
|
155
|
+
service_tier: s.service_tier || "",
|
|
156
|
+
input_tokens: inputTokens,
|
|
157
|
+
output_tokens: outputTokens,
|
|
158
|
+
cache_creation_input_tokens: cacheCreation,
|
|
159
|
+
cache_read_input_tokens: cacheRead,
|
|
160
|
+
ephemeral_1h_input_tokens: s.ephemeral_1h_input_tokens || 0,
|
|
161
|
+
ephemeral_5m_input_tokens: s.ephemeral_5m_input_tokens || 0,
|
|
162
|
+
web_search_requests: s.web_search_requests || 0,
|
|
163
|
+
q5h: q.q5h ?? 0,
|
|
164
|
+
q7d: q.q7d ?? 0,
|
|
165
|
+
q5h_reset: q.q5h_reset || 0,
|
|
166
|
+
q7d_reset: q.q7d_reset || 0,
|
|
167
|
+
qstatus: q.qstatus || "",
|
|
168
|
+
qoverage: q.qoverage || "",
|
|
169
|
+
qclaim: q.qclaim || "",
|
|
170
|
+
qfallback_pct: q.qfallback_pct ?? 0,
|
|
171
|
+
cache_hit_rate: cacheHitRate,
|
|
172
|
+
q5h_delta: computeDelta(q.q5h, prevQ5h),
|
|
173
|
+
q7d_delta: computeDelta(q.q7d, prevQ7d),
|
|
25
174
|
};
|
|
175
|
+
|
|
176
|
+
// Optional fields are OMITTED (not present as undefined) when source absent.
|
|
177
|
+
if (requestedModel) {
|
|
178
|
+
record.requested_model = requestedModel;
|
|
179
|
+
if (record.model && requestedModel !== record.model) {
|
|
180
|
+
record.model_mismatch = true;
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
if (q.qoverage_util !== null && q.qoverage_util !== undefined) {
|
|
184
|
+
record.qoverage_util = q.qoverage_util;
|
|
185
|
+
}
|
|
186
|
+
if (q.qrepresentative_claim) {
|
|
187
|
+
record.qrepresentative_claim = q.qrepresentative_claim;
|
|
188
|
+
}
|
|
189
|
+
const orgIdHashed = hashOrgId(q.org_id_raw);
|
|
190
|
+
if (orgIdHashed) {
|
|
191
|
+
record.org_id = orgIdHashed;
|
|
192
|
+
}
|
|
193
|
+
if (q.overage_disabled_reason) {
|
|
194
|
+
record.overage_disabled_reason = q.overage_disabled_reason;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
return record;
|
|
26
198
|
}
|
|
27
199
|
|
|
28
|
-
|
|
200
|
+
// --- I/O ---
|
|
201
|
+
|
|
202
|
+
async function appendJsonl(record, path = LOG_PATH) {
|
|
203
|
+
await mkdir(join(homedir(), ".claude"), { recursive: true });
|
|
204
|
+
await appendFile(path, JSON.stringify(record) + "\n");
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// Test helper: write a record to a caller-supplied path. Bypasses env-var
|
|
208
|
+
// lookup so tests don't race on a shared env.
|
|
209
|
+
export async function writeRecord(record, path) {
|
|
210
|
+
await mkdir(path.substring(0, path.lastIndexOf("/")), { recursive: true });
|
|
211
|
+
await appendFile(path, JSON.stringify(record) + "\n");
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
// Test helper: reset module-scope delta state.
|
|
215
|
+
export function _resetDeltaStateForTest() {
|
|
216
|
+
_lastQ5h = null;
|
|
217
|
+
_lastQ7d = null;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
export { LOG_PATH };
|
|
221
|
+
|
|
222
|
+
// --- Extension contract ---
|
|
29
223
|
|
|
30
224
|
export default {
|
|
31
225
|
name: "usage-log",
|
|
32
|
-
description: "Append per-call usage record to ~/.claude/usage.jsonl",
|
|
226
|
+
description: "Append per-call usage record to ~/.claude/usage.jsonl (MeterRowSchema v:1)",
|
|
33
227
|
enabled: false,
|
|
34
228
|
order: 650,
|
|
35
229
|
|
|
36
230
|
async onStreamEvent(ctx) {
|
|
37
|
-
if (!ctx
|
|
38
|
-
|
|
39
|
-
const record = buildRecord(ctx.meta, ctx.telemetry || {}, ctx.responseHeaders);
|
|
231
|
+
if (!ctx || !ctx.event) return;
|
|
40
232
|
|
|
41
233
|
try {
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
234
|
+
// message_start: capture per-response state into ctx.meta._usageLog.
|
|
235
|
+
if (ctx.event.type === "message_start") {
|
|
236
|
+
const start = extractMessageStartFields(ctx.event);
|
|
237
|
+
if (start) {
|
|
238
|
+
ctx.meta = ctx.meta || {};
|
|
239
|
+
ctx.meta._usageLog = { start };
|
|
240
|
+
}
|
|
241
|
+
return;
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
// message_delta: assemble and emit the final record.
|
|
245
|
+
if (ctx.event.type !== "message_delta" || !ctx.event.usage) return;
|
|
246
|
+
|
|
247
|
+
const start = ctx.meta?._usageLog?.start;
|
|
248
|
+
if (!start) return; // no message_start was observed for this response
|
|
249
|
+
|
|
250
|
+
const delta = extractMessageDeltaFields(ctx.event);
|
|
251
|
+
const quota = parseQuotaHeaders(ctx.responseHeaders || {});
|
|
252
|
+
const requestedModel = ctx.telemetry?.requestedModel || undefined;
|
|
253
|
+
|
|
254
|
+
const record = assembleRecord({
|
|
255
|
+
start,
|
|
256
|
+
delta,
|
|
257
|
+
quota,
|
|
258
|
+
requestedModel,
|
|
259
|
+
sid: _sid,
|
|
260
|
+
prevQ5h: _lastQ5h,
|
|
261
|
+
prevQ7d: _lastQ7d,
|
|
262
|
+
now: new Date(),
|
|
263
|
+
});
|
|
264
|
+
|
|
265
|
+
// Update delta tracking AFTER assembly so the first call's delta is 0
|
|
266
|
+
// (per the directive contract: first call after restart → deltas zero).
|
|
267
|
+
_lastQ5h = quota.q5h;
|
|
268
|
+
_lastQ7d = quota.q7d;
|
|
269
|
+
|
|
270
|
+
await appendJsonl(record, process.env.CACHE_FIX_USAGE_LOG || LOG_PATH);
|
|
271
|
+
} catch {
|
|
272
|
+
// Fail-open: never throw to the pipeline.
|
|
273
|
+
}
|
|
45
274
|
},
|
|
46
275
|
};
|
package/proxy/extensions.json
CHANGED
|
@@ -9,5 +9,6 @@
|
|
|
9
9
|
"cache-control-normalize": { "enabled": true, "order": 400 },
|
|
10
10
|
"ttl-management": { "enabled": true, "order": 500 },
|
|
11
11
|
"cache-telemetry": { "enabled": true, "order": 600 },
|
|
12
|
+
"overage-warning": { "enabled": true, "order": 610 },
|
|
12
13
|
"request-log": { "enabled": false, "order": 700 }
|
|
13
14
|
}
|
package/proxy/rates.mjs
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
// Shared rate constants for cost projections.
|
|
2
|
+
//
|
|
3
|
+
// This is a deliberate over-simplification for v3.2.0. Anthropic's
|
|
4
|
+
// per-token rates vary by model, by cache tier (input vs cache_read vs
|
|
5
|
+
// cache_creation_5m vs cache_creation_1h), and by overage classification.
|
|
6
|
+
// Encoding all of that correctly is its own subproject — see the v3.3.0
|
|
7
|
+
// follow-up for a precise per-tier engine.
|
|
8
|
+
//
|
|
9
|
+
// For v3.2.0 we ship a single weighted blend constant suitable for
|
|
10
|
+
// a coarse "burn rate at API rates" indicator. Consumers MUST label the
|
|
11
|
+
// resulting number as `coarse` so users do not mistake it for a precise
|
|
12
|
+
// quote.
|
|
13
|
+
|
|
14
|
+
// Heuristic blend covering input + cache_read + cache_creation + output
|
|
15
|
+
// at a typical Opus 4.7 mix. Order of magnitude is right; precise it is not.
|
|
16
|
+
export const WEIGHTED_TOKEN_COST_USD_COARSE = 0.000005;
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
[Unit]
|
|
2
|
+
Description=Claude Code Cache Fix Proxy — health check + auto-restart
|
|
3
|
+
After=network.target
|
|
4
|
+
|
|
5
|
+
[Service]
|
|
6
|
+
Type=oneshot
|
|
7
|
+
ExecStart=/bin/sh -c 'curl -fs --max-time 3 http://127.0.0.1:{{PORT}}/health > /dev/null || systemctl --user start cache-fix-proxy.service'
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
[Unit]
|
|
2
|
+
Description=Claude Code Cache Fix Proxy — health check timer (every 2 min)
|
|
3
|
+
Documentation=https://github.com/cnighswonger/claude-code-cache-fix
|
|
4
|
+
|
|
5
|
+
[Timer]
|
|
6
|
+
# Run 30s after boot, then every 2 minutes thereafter. Off-round minute
|
|
7
|
+
# so the firing distribution doesn't pile up on :00.
|
|
8
|
+
OnBootSec=30s
|
|
9
|
+
OnUnitActiveSec=2min
|
|
10
|
+
AccuracySec=15s
|
|
11
|
+
Unit=cache-fix-proxy-healthcheck.service
|
|
12
|
+
|
|
13
|
+
[Install]
|
|
14
|
+
WantedBy=timers.target
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
[Unit]
|
|
2
|
+
Description=Claude Code Cache Fix Proxy
|
|
3
|
+
After=network.target
|
|
4
|
+
{{REQUIRES_LINE}}
|
|
5
|
+
|
|
6
|
+
[Service]
|
|
7
|
+
Type=simple
|
|
8
|
+
ExecStart={{NODE}} {{SERVER_PATH}}
|
|
9
|
+
Restart=on-failure
|
|
10
|
+
RestartSec=5
|
|
11
|
+
Environment=CACHE_FIX_PROXY_PORT={{PORT}}
|
|
12
|
+
{{UPSTREAM_LINE}}
|
|
13
|
+
{{DEBUG_LINE}}
|
|
14
|
+
WorkingDirectory={{WORKING_DIR}}
|
|
15
|
+
|
|
16
|
+
[Install]
|
|
17
|
+
WantedBy=default.target
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
2
|
+
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
|
3
|
+
<plist version="1.0">
|
|
4
|
+
<dict>
|
|
5
|
+
<key>Label</key>
|
|
6
|
+
<string>com.cnighswonger.cache-fix-proxy</string>
|
|
7
|
+
<key>ProgramArguments</key>
|
|
8
|
+
<array>
|
|
9
|
+
<string>{{NODE}}</string>
|
|
10
|
+
<string>{{SERVER_PATH}}</string>
|
|
11
|
+
</array>
|
|
12
|
+
<key>EnvironmentVariables</key>
|
|
13
|
+
<dict>
|
|
14
|
+
<key>CACHE_FIX_PROXY_PORT</key>
|
|
15
|
+
<string>{{PORT}}</string>
|
|
16
|
+
{{UPSTREAM_PLIST}}
|
|
17
|
+
{{DEBUG_PLIST}}
|
|
18
|
+
</dict>
|
|
19
|
+
<key>WorkingDirectory</key>
|
|
20
|
+
<string>{{WORKING_DIR}}</string>
|
|
21
|
+
<key>RunAtLoad</key>
|
|
22
|
+
<true/>
|
|
23
|
+
<key>KeepAlive</key>
|
|
24
|
+
<dict>
|
|
25
|
+
<key>SuccessfulExit</key>
|
|
26
|
+
<false/>
|
|
27
|
+
</dict>
|
|
28
|
+
<key>StandardOutPath</key>
|
|
29
|
+
<string>{{LOG_DIR}}/cache-fix-proxy.log</string>
|
|
30
|
+
<key>StandardErrorPath</key>
|
|
31
|
+
<string>{{LOG_DIR}}/cache-fix-proxy.err</string>
|
|
32
|
+
</dict>
|
|
33
|
+
</plist>
|