claude-code-cache-fix 3.1.1 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -86,6 +86,43 @@ nohup cache-fix-proxy server > /tmp/cache-fix-proxy.log 2>&1 &
86
86
  echo 'export ANTHROPIC_BASE_URL=http://127.0.0.1:9801' >> ~/.bashrc
87
87
  ```
88
88
 
89
+ ### Docker
90
+
91
+ A multi-arch (amd64, arm64) container image is published to GitHub Container Registry on every release tag.
92
+
93
+ ```bash
94
+ docker run -d --name cache-fix-proxy \
95
+ --restart=always \
96
+ -p 9801:9801 \
97
+ ghcr.io/cnighswonger/claude-code-cache-fix:latest
98
+
99
+ # Then in your shell:
100
+ export ANTHROPIC_BASE_URL=http://127.0.0.1:9801
101
+ ```
102
+
103
+ Use `--restart=always` instead of the systemd healthcheck companion — Docker handles auto-recovery natively. Mount nothing; the container is stateless. Override the default port with `-e CACHE_FIX_PROXY_PORT=...`. Override the upstream (e.g. to chain through llm-relay) with `-e CACHE_FIX_PROXY_UPSTREAM=http://host.docker.internal:8080`. The image runs as the unprivileged `node` user (uid 1000) and exposes a `HEALTHCHECK` Docker can use for liveness.
104
+
105
+ For corporate environments behind an SSL-inspecting proxy, mount your CA bundle and set the env vars:
106
+
107
+ ```bash
108
+ docker run -d --name cache-fix-proxy --restart=always -p 9801:9801 \
109
+ -e HTTPS_PROXY=http://proxy.corp.example:8080 \
110
+ -e CACHE_FIX_PROXY_CA_FILE=/etc/ssl/corp-ca.pem \
111
+ -v /path/to/zscaler-root.pem:/etc/ssl/corp-ca.pem:ro \
112
+ ghcr.io/cnighswonger/claude-code-cache-fix:latest
113
+ ```
114
+
115
+ Image tags: `latest`, `3`, `3.2`, `3.2.0` (semver-ladder, so `3` always points to the newest 3.x). `latest` always tracks the newest tagged release.
116
+
117
+ **Linux note:** the chained-upstream `host.docker.internal` example below is automatic on Docker Desktop (macOS / Windows). On plain Linux Docker Engine you usually need `--add-host=host.docker.internal:host-gateway` so the name resolves to the host bridge. Without it, the container's name lookup fails and the proxy can't reach the upstream service running on the host. Example chaining cache-fix proxy through `llm-relay` running on the host:
118
+
119
+ ```bash
120
+ docker run -d --name cache-fix-proxy --restart=always -p 9801:9801 \
121
+ --add-host=host.docker.internal:host-gateway \
122
+ -e CACHE_FIX_PROXY_UPSTREAM=http://host.docker.internal:8080 \
123
+ ghcr.io/cnighswonger/claude-code-cache-fix:latest
124
+ ```
125
+
89
126
  ### Health check
90
127
 
91
128
  ```bash
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-code-cache-fix",
3
- "version": "3.1.1",
3
+ "version": "3.2.0",
4
4
  "description": "Cache optimization proxy and interceptor for Claude Code. Fixes prompt cache bugs, stabilizes prefix, reduces quota burn.",
5
5
  "type": "module",
6
6
  "exports": "./preload.mjs",
@@ -0,0 +1,385 @@
1
+ // overage-warning — emit a one-time warning per Q5h-window threshold
2
+ // crossing when Anthropic's response headers indicate the user is
3
+ // approaching or has crossed the overage threshold.
4
+ //
5
+ // Advisory only. No request mutation. Two outputs:
6
+ // 1. stderr line prefixed `[overage-warning]` for proxy journals/logs
7
+ // 2. structured JSON record appended to `~/.claude/overage-warnings.jsonl`
8
+ //
9
+ // Activation: `enabled: true` in extensions.json (this extension is
10
+ // always loaded), gated at runtime by `CACHE_FIX_OVERAGE_WARNING=1`.
11
+ // Matches the prefix-diff pattern (env-var-only opt-in).
12
+ //
13
+ // See `docs/directives/proxy-overage-cost-warning.md` for the full design.
14
+
15
+ import { appendFile, mkdir } from "node:fs/promises";
16
+ import { join, dirname } from "node:path";
17
+ import { homedir } from "node:os";
18
+
19
+ import { WEIGHTED_TOKEN_COST_USD_COARSE } from "../rates.mjs";
20
+
21
+ // Env-gated runtime flags read on each call. Reading at module load would
22
+ // freeze the values and make per-test isolation impossible. The check is
23
+ // cheap (one process.env lookup per invocation when disabled).
24
+ function isEnabled() {
25
+ return process.env.CACHE_FIX_OVERAGE_WARNING === "1";
26
+ }
27
+ function isQuiet() {
28
+ return process.env.CACHE_FIX_OVERAGE_WARNING_QUIET === "1";
29
+ }
30
+ function isDebug() {
31
+ return process.env.CACHE_FIX_DEBUG === "1";
32
+ }
33
+
34
+ function debug(msg) {
35
+ if (isDebug()) process.stderr.write(`[overage-warning] DEBUG: ${msg}\n`);
36
+ }
37
+
38
+ // --- Module-scope state ---
39
+ //
40
+ // Sliding window of (timestamp, q5h_util, input_tokens, cache_creation_tokens,
41
+ // cache_read_tokens, output_tokens) samples. Used to compute burn rate.
42
+ //
43
+ // Cross-response dedup: per Q5h window (keyed by q5h_resets_at), the set
44
+ // of thresholds we've already warned at. Window expires when q5h_resets_at
45
+ // changes (new window = new dedup state).
46
+
47
+ const WINDOW_MS = 15 * 60 * 1000;
48
+ const WINDOW_MAX_SAMPLES = 60;
49
+ const WARM_UP_MIN_SAMPLES = 3;
50
+
51
+ const _window = []; // { t, q5h, input, cache_creation, cache_read, output }
52
+ let _dedupWindowResetsAt = 0;
53
+ let _dedupThresholds = new Set();
54
+
55
+ function resetState() {
56
+ _window.length = 0;
57
+ _dedupWindowResetsAt = 0;
58
+ _dedupThresholds = new Set();
59
+ }
60
+
61
+ // --- Pure functions (test seam) ---
62
+
63
+ export function parseTriggerFromHeaders(headers) {
64
+ if (!headers || typeof headers !== "object") return { eligible: false };
65
+ const get = (k) => headers[k] || "";
66
+ const num = (k) => {
67
+ const v = get(k);
68
+ if (!v) return null;
69
+ const n = parseFloat(v);
70
+ return Number.isFinite(n) ? n : null;
71
+ };
72
+ const intOf = (k) => {
73
+ const v = get(k);
74
+ if (!v) return 0;
75
+ const n = parseInt(v, 10);
76
+ return Number.isFinite(n) ? n : 0;
77
+ };
78
+
79
+ const status =
80
+ get("anthropic-ratelimit-unified-status") ||
81
+ get("anthropic-ratelimit-unified-5h-status");
82
+ const surpassed = num("anthropic-ratelimit-unified-7d-surpassed-threshold");
83
+ const overage_status = get("anthropic-ratelimit-unified-overage-status") || "unknown";
84
+ const upgrade_paths_raw = get("anthropic-ratelimit-unified-upgrade-paths");
85
+ const q5h_util = num("anthropic-ratelimit-unified-5h-utilization");
86
+ const q7d_util = num("anthropic-ratelimit-unified-7d-utilization");
87
+ const q5h_resets_at = intOf("anthropic-ratelimit-unified-5h-reset");
88
+
89
+ // Trigger gates: status is allowed_warning or throttled, surpassed-threshold
90
+ // header is present and non-empty.
91
+ const isWarn = status === "allowed_warning" || status === "throttled";
92
+ if (!isWarn) return { eligible: false };
93
+ if (surpassed === null) return { eligible: false };
94
+
95
+ const upgrade_paths = upgrade_paths_raw
96
+ ? upgrade_paths_raw.split(",").map((s) => s.trim()).filter(Boolean)
97
+ : [];
98
+
99
+ return {
100
+ eligible: true,
101
+ trigger: {
102
+ status,
103
+ surpassed_threshold: surpassed,
104
+ overage_status,
105
+ upgrade_paths,
106
+ },
107
+ snapshot: {
108
+ q5h_pct: q5h_util !== null ? Math.round(q5h_util * 100) : null,
109
+ q7d_pct: q7d_util !== null ? Math.round(q7d_util * 100) : null,
110
+ q5h_resets_at,
111
+ },
112
+ raw: {
113
+ q5h_util,
114
+ q5h_resets_at,
115
+ },
116
+ };
117
+ }
118
+
119
+ export function dedupKey(threshold, q5h_resets_at) {
120
+ return `${threshold}@${q5h_resets_at}`;
121
+ }
122
+
123
+ // Compute burn-rate projection from samples. Returns:
124
+ // { min_to_100, tokens_per_min, cost_per_hr_usd_coarse, window_samples,
125
+ // window_minutes }
126
+ // All projection fields are `null` when fewer than WARM_UP_MIN_SAMPLES
127
+ // samples exist OR utilization is non-increasing across the window.
128
+ export function computeProjection(samples, now = Date.now()) {
129
+ // Drop expired samples (caller may have already done this; defensive).
130
+ const fresh = samples.filter((s) => now - s.t <= WINDOW_MS);
131
+
132
+ if (fresh.length < WARM_UP_MIN_SAMPLES) {
133
+ return {
134
+ min_to_100: null,
135
+ tokens_per_min: null,
136
+ cost_per_hr_usd_coarse: null,
137
+ window_samples: fresh.length,
138
+ window_minutes: 0,
139
+ };
140
+ }
141
+
142
+ const oldest = fresh[0];
143
+ const newest = fresh[fresh.length - 1];
144
+ const windowMin = (newest.t - oldest.t) / 60_000;
145
+
146
+ if (windowMin <= 0) {
147
+ return {
148
+ min_to_100: null,
149
+ tokens_per_min: null,
150
+ cost_per_hr_usd_coarse: null,
151
+ window_samples: fresh.length,
152
+ window_minutes: 0,
153
+ };
154
+ }
155
+
156
+ const deltaUtil = newest.q5h - oldest.q5h;
157
+ const utilPerMin = deltaUtil / windowMin;
158
+
159
+ let min_to_100 = null;
160
+ if (utilPerMin > 0) {
161
+ min_to_100 = Math.max(0, Math.round((1 - newest.q5h) / utilPerMin));
162
+ }
163
+
164
+ // Sum of all relevant tokens across the window. Each sample carries the
165
+ // per-call token deltas as pushed by recordSample (caller responsibility).
166
+ const totalTokens = fresh.reduce(
167
+ (acc, s) =>
168
+ acc + (s.input || 0) + (s.cache_creation || 0) + (s.cache_read || 0) + (s.output || 0),
169
+ 0,
170
+ );
171
+ const tokens_per_min = totalTokens / windowMin;
172
+ const cost_per_hr_usd_coarse =
173
+ utilPerMin > 0
174
+ ? +(tokens_per_min * 60 * WEIGHTED_TOKEN_COST_USD_COARSE).toFixed(2)
175
+ : null;
176
+
177
+ return {
178
+ min_to_100,
179
+ tokens_per_min: Math.round(tokens_per_min),
180
+ cost_per_hr_usd_coarse,
181
+ window_samples: fresh.length,
182
+ window_minutes: +windowMin.toFixed(1),
183
+ };
184
+ }
185
+
186
+ export function formatStderrLine({ ts, trigger, snapshot, projection }) {
187
+ const upgrade = trigger.upgrade_paths.length
188
+ ? trigger.upgrade_paths.join(", ")
189
+ : "(none)";
190
+ const head = `[overage-warning] ${ts} Q5h=${snapshot.q5h_pct}% Q7d=${snapshot.q7d_pct}% (surpassed ${trigger.surpassed_threshold})`;
191
+ if (projection && projection.min_to_100 !== null && projection.cost_per_hr_usd_coarse !== null) {
192
+ return `${head} — projected 100% in ~${projection.min_to_100} min, estimated continued burn ≈ $${projection.cost_per_hr_usd_coarse.toFixed(2)}/hr at API rates (coarse). Upgrade paths: ${upgrade}.`;
193
+ }
194
+ return `${head} — projection unavailable (warming up). Upgrade paths: ${upgrade}.`;
195
+ }
196
+
197
+ export function formatJsonlRecord({ ts, trigger, snapshot, projection }) {
198
+ return {
199
+ ts,
200
+ trigger: {
201
+ status: trigger.status,
202
+ surpassed_threshold: trigger.surpassed_threshold,
203
+ overage_status: trigger.overage_status,
204
+ upgrade_paths: trigger.upgrade_paths,
205
+ },
206
+ snapshot: {
207
+ q5h_pct: snapshot.q5h_pct,
208
+ q7d_pct: snapshot.q7d_pct,
209
+ q5h_resets_at: snapshot.q5h_resets_at,
210
+ },
211
+ projection: projection || {
212
+ min_to_100: null,
213
+ tokens_per_min: null,
214
+ cost_per_hr_usd_coarse: null,
215
+ window_samples: 0,
216
+ window_minutes: 0,
217
+ },
218
+ };
219
+ }
220
+
221
+ // --- Window management ---
222
+
223
+ export function recordSample(state, sample) {
224
+ state.window.push(sample);
225
+ const cutoff = sample.t - WINDOW_MS;
226
+ while (state.window.length && state.window[0].t < cutoff) state.window.shift();
227
+ while (state.window.length > WINDOW_MAX_SAMPLES) state.window.shift();
228
+ }
229
+
230
+ // --- Dedup helpers operating on module state ---
231
+
232
+ function checkAndMarkDedup(threshold, q5h_resets_at) {
233
+ // New Q5h window resets the dedup set.
234
+ if (q5h_resets_at !== _dedupWindowResetsAt) {
235
+ _dedupWindowResetsAt = q5h_resets_at;
236
+ _dedupThresholds = new Set();
237
+ }
238
+ const key = dedupKey(threshold, q5h_resets_at);
239
+ if (_dedupThresholds.has(key)) return false;
240
+ _dedupThresholds.add(key);
241
+ return true;
242
+ }
243
+
244
+ // --- I/O ---
245
+
246
+ async function appendJsonl(record, dir) {
247
+ const outDir = dir || (process.env.CACHE_FIX_OVERAGE_WARNING_DIR || join(homedir(), ".claude"));
248
+ const outPath = join(outDir, "overage-warnings.jsonl");
249
+ await mkdir(outDir, { recursive: true });
250
+ await appendFile(outPath, JSON.stringify(record) + "\n");
251
+ }
252
+
253
+ // Test helper: write a record using a caller-supplied directory. Bypasses
254
+ // env-var lookup so tests do not race on a shared env. Pure side effect.
255
+ export async function writeRecord(record, dir) {
256
+ await appendJsonl(record, dir);
257
+ }
258
+
259
+ // Test helper: drain in-memory state. For deterministic tests.
260
+ export function _resetForTest() {
261
+ resetState();
262
+ }
263
+
264
+ // --- Extension contract ---
265
+
266
+ export default {
267
+ name: "overage-warning",
268
+ description:
269
+ "Emit one-time warning per Q5h-window threshold crossing when overage headers indicate trouble",
270
+ enabled: true,
271
+ order: 610,
272
+
273
+ async onResponseStart(ctx) {
274
+ if (!isEnabled()) return;
275
+ if (!ctx || !ctx.headers) return;
276
+
277
+ try {
278
+ ctx.meta = ctx.meta || {};
279
+
280
+ // Always capture quota state if the headers carry it, regardless of
281
+ // whether THIS response's status crosses a warning threshold. Future
282
+ // responses need warm samples to project from.
283
+ const q5hRaw = ctx.headers["anthropic-ratelimit-unified-5h-utilization"];
284
+ const q5hUtil = q5hRaw ? parseFloat(q5hRaw) : null;
285
+ if (q5hUtil !== null && Number.isFinite(q5hUtil)) {
286
+ ctx.meta._overageQuota = { q5h_util: q5hUtil };
287
+ }
288
+
289
+ // Trigger eligibility latch — only set when this response is the one
290
+ // that crossed a threshold. Keeps emission gate separate from sampling.
291
+ const result = parseTriggerFromHeaders(ctx.headers);
292
+ if (!result.eligible) return;
293
+ ctx.meta._overageWarning = {
294
+ eligible: true,
295
+ emitted: false,
296
+ trigger: result.trigger,
297
+ snapshot: result.snapshot,
298
+ raw: result.raw,
299
+ };
300
+ } catch (err) {
301
+ debug(`onResponseStart unexpected: ${err?.message ?? err}`);
302
+ }
303
+ },
304
+
305
+ async onStreamEvent(ctx) {
306
+ if (!isEnabled()) return;
307
+ if (!ctx || !ctx.event) return;
308
+
309
+ try {
310
+ // Sample collection — happens on every response that has a quota
311
+ // reading, regardless of whether this response is the one that emits.
312
+ if (ctx.event.type === "message_start" && ctx.event.message?.usage) {
313
+ const u = ctx.event.message.usage;
314
+ const q5hUtil = ctx.meta?._overageQuota?.q5h_util;
315
+ if (q5hUtil !== undefined && q5hUtil !== null) {
316
+ const sample = {
317
+ t: Date.now(),
318
+ q5h: q5hUtil,
319
+ input: u.input_tokens || 0,
320
+ cache_creation: u.cache_creation_input_tokens || 0,
321
+ cache_read: u.cache_read_input_tokens || 0,
322
+ output: 0,
323
+ };
324
+ recordSample({ window: _window }, sample);
325
+ // Hand the response its own sample reference. message_delta updates
326
+ // THIS sample only — never the window's last sample, which could
327
+ // belong to a different response under interleaving.
328
+ ctx.meta._overageSample = sample;
329
+ }
330
+ }
331
+
332
+ if (ctx.event.type === "message_delta") {
333
+ // Update THIS response's sample with output tokens. The sample
334
+ // reference is response-local (set by message_start), so a response
335
+ // that never sampled cannot leak output tokens into another response.
336
+ const ownSample = ctx.meta?._overageSample;
337
+ if (ownSample && ctx.event.usage?.output_tokens) {
338
+ ownSample.output += ctx.event.usage.output_tokens;
339
+ }
340
+
341
+ // Emission gate.
342
+ const w = ctx.meta?._overageWarning;
343
+ if (!w || !w.eligible || w.emitted) return;
344
+
345
+ const allowed = checkAndMarkDedup(
346
+ w.trigger.surpassed_threshold,
347
+ w.snapshot.q5h_resets_at,
348
+ );
349
+ if (!allowed) {
350
+ w.emitted = true;
351
+ return;
352
+ }
353
+
354
+ const ts = new Date().toISOString();
355
+ const projection = computeProjection(_window, Date.now());
356
+ const projectionForOutput =
357
+ projection.window_samples >= WARM_UP_MIN_SAMPLES &&
358
+ projection.min_to_100 !== null
359
+ ? projection
360
+ : null;
361
+
362
+ const record = formatJsonlRecord({
363
+ ts,
364
+ trigger: w.trigger,
365
+ snapshot: w.snapshot,
366
+ projection: projectionForOutput || projection,
367
+ });
368
+
369
+ if (!isQuiet()) {
370
+ process.stderr.write(formatStderrLine({
371
+ ts,
372
+ trigger: w.trigger,
373
+ snapshot: w.snapshot,
374
+ projection: projectionForOutput,
375
+ }) + "\n");
376
+ }
377
+
378
+ await appendJsonl(record);
379
+ w.emitted = true;
380
+ }
381
+ } catch (err) {
382
+ debug(`onStreamEvent unexpected: ${err?.message ?? err}`);
383
+ }
384
+ },
385
+ };
@@ -0,0 +1,533 @@
1
+ // upstream-change-detection — read-only structural fingerprinter that
2
+ // detects when Anthropic ships CC updates that change the structural shape
3
+ // of /v1/messages requests. Per-namespace baseline persists across proxy
4
+ // restarts to prevent false-positive floods.
5
+ //
6
+ // Output:
7
+ // - stderr line prefixed [upstream-change] for proxy journals/logs
8
+ // - ~/.claude/upstream-changes.jsonl (event log: baseline_established,
9
+ // structural_change)
10
+ // - ~/.claude/upstream-baseline.json (per-namespace baseline, atomic
11
+ // full replace)
12
+ //
13
+ // Activation: `enabled: true` in extensions.json (always loaded), gated at
14
+ // runtime by `CACHE_FIX_UPSTREAM_DETECTION=1`. Prefix-diff pattern.
15
+ //
16
+ // Privacy: every persisted field is a count, position, boolean, bucket
17
+ // label, or hash of stable identifiers. NO prompt content, NO file paths,
18
+ // NO message text. Test #18 enforces this at unit level.
19
+ //
20
+ // See `docs/directives/proxy-upstream-change-detection.md` for full design.
21
+
22
+ import {
23
+ mkdir as _mkdir,
24
+ readFile as _readFile,
25
+ writeFile as _writeFile,
26
+ rename as _rename,
27
+ unlink as _unlink,
28
+ appendFile as _appendFile,
29
+ } from "node:fs/promises";
30
+ import { join } from "node:path";
31
+ import { homedir } from "node:os";
32
+ import { createHash, randomBytes } from "node:crypto";
33
+
34
+ // --- Allowlists ---
35
+ //
36
+ // New entries are signal: when a request's text contains a marker / tag we
37
+ // haven't seen before, the boolean unknown-detector flips. Add entries here
38
+ // only when investigation has confirmed the new item is legitimate
39
+ // (genuine CC change, not an exploit attempt).
40
+
41
+ const KNOWN_SECTION_MARKERS = [
42
+ "# Environment",
43
+ "# System",
44
+ "# Tools",
45
+ "# Personality",
46
+ "# Settings",
47
+ "# Memory",
48
+ "# Output efficiency",
49
+ "# auto memory",
50
+ "# Doing tasks",
51
+ "# Tone and style",
52
+ "# Using your tools",
53
+ "# Text output",
54
+ "# Session-specific guidance",
55
+ "# Code references",
56
+ "# Executing actions with care",
57
+ ];
58
+
59
+ const KNOWN_REMINDER_PATTERNS = [
60
+ "<system-reminder>",
61
+ "<command-name>",
62
+ "<command-message>",
63
+ "<command-args>",
64
+ "<git-status>",
65
+ "<local-command-stdout>",
66
+ "<local-command-stderr>",
67
+ "<command-stdout>",
68
+ "<command-stderr>",
69
+ "<file-attachment>",
70
+ ];
71
+
72
+ const SECTION_MARKER_SHAPE = /^# [A-Z][a-zA-Z ]{1,30}$/m;
73
+ const REMINDER_TAG_SHAPE = /<[a-z][a-z-]{1,30}>/;
74
+
75
+ // --- Env gates (per-call to ease test isolation) ---
76
+
77
+ function isEnabled() {
78
+ return process.env.CACHE_FIX_UPSTREAM_DETECTION === "1";
79
+ }
80
+ function isQuiet() {
81
+ return process.env.CACHE_FIX_UPSTREAM_QUIET === "1";
82
+ }
83
+ function isDebug() {
84
+ return process.env.CACHE_FIX_DEBUG === "1";
85
+ }
86
+
87
+ function debug(msg) {
88
+ if (isDebug()) process.stderr.write(`[upstream-change] DEBUG: ${msg}\n`);
89
+ }
90
+
91
+ // --- Default fs (overridable for tests) ---
92
+
93
+ const DEFAULT_FS = {
94
+ mkdir: _mkdir,
95
+ readFile: _readFile,
96
+ writeFile: _writeFile,
97
+ rename: _rename,
98
+ unlink: _unlink,
99
+ appendFile: _appendFile,
100
+ };
101
+
102
+ // --- Module-scope state ---
103
+
104
+ let _namespaceMap = new Map();
105
+ let _baselineLoadedFrom = null;
106
+
107
+ export function _resetForTest() {
108
+ _namespaceMap = new Map();
109
+ _baselineLoadedFrom = null;
110
+ }
111
+
112
+ function getOutputDir() {
113
+ return process.env.CACHE_FIX_UPSTREAM_DIR || join(homedir(), ".claude");
114
+ }
115
+
116
+ function getBaselinePath(dir) {
117
+ return join(dir || getOutputDir(), "upstream-baseline.json");
118
+ }
119
+
120
+ function getJsonlPath(dir) {
121
+ return join(dir || getOutputDir(), "upstream-changes.jsonl");
122
+ }
123
+
124
+ // --- Pure helpers ---
125
+
126
+ function sha16(s) {
127
+ return createHash("sha256").update(s).digest("hex").slice(0, 16);
128
+ }
129
+
130
+ export function bucketBlockSize(size) {
131
+ if (size < 200) return "tiny";
132
+ if (size < 2000) return "small";
133
+ if (size < 20000) return "medium";
134
+ return "large";
135
+ }
136
+
137
+ export function bucketMaxTokens(n) {
138
+ if (!Number.isFinite(n) || n <= 0) return "unset";
139
+ if (n < 1024) return "tiny";
140
+ if (n < 8192) return "1k-8k";
141
+ if (n < 32768) return "8k-32k";
142
+ if (n < 100000) return "32k-100k";
143
+ return "huge";
144
+ }
145
+
146
+ export function matchKnownSectionMarkers(text) {
147
+ if (typeof text !== "string" || !text) return [];
148
+ // Strict line-based match. The marker must be the ENTIRE line (after split),
149
+ // otherwise "# Environment Details" would falsely match "# Environment" and
150
+ // we would record an allowlist index for a marker that didn't actually
151
+ // appear as a section header.
152
+ const lineSet = new Set(text.split("\n"));
153
+ const indices = [];
154
+ for (let i = 0; i < KNOWN_SECTION_MARKERS.length; i++) {
155
+ if (lineSet.has(KNOWN_SECTION_MARKERS[i])) indices.push(i);
156
+ }
157
+ return indices;
158
+ }
159
+
160
+ export function hasUnknownSectionMarker(text) {
161
+ if (typeof text !== "string" || !text) return false;
162
+ // Find candidate markers via the shape regex on each line.
163
+ const lines = text.split("\n");
164
+ for (const line of lines) {
165
+ if (SECTION_MARKER_SHAPE.test(line) && !KNOWN_SECTION_MARKERS.includes(line)) {
166
+ return true;
167
+ }
168
+ }
169
+ return false;
170
+ }
171
+
172
+ export function matchKnownReminderPatterns(text) {
173
+ if (typeof text !== "string" || !text) return [];
174
+ const indices = [];
175
+ for (let i = 0; i < KNOWN_REMINDER_PATTERNS.length; i++) {
176
+ if (text.includes(KNOWN_REMINDER_PATTERNS[i])) indices.push(i);
177
+ }
178
+ return indices;
179
+ }
180
+
181
+ export function hasUnknownReminderPattern(text) {
182
+ if (typeof text !== "string" || !text) return false;
183
+ const matches = text.matchAll(/<[a-z][a-z-]{1,30}>/g);
184
+ for (const m of matches) {
185
+ if (!KNOWN_REMINDER_PATTERNS.includes(m[0])) return true;
186
+ }
187
+ return false;
188
+ }
189
+
190
+ export function namespaceKey(model, betaHeadersArr) {
191
+ const sorted = Array.isArray(betaHeadersArr) ? [...betaHeadersArr].sort() : [];
192
+ return sha16(`${model || ""}|${sorted.join(",")}`);
193
+ }
194
+
195
+ // Beta features arrive on the `anthropic-beta` REQUEST HEADER (Node http
196
+ // header keys are lowercased). The proxy surfaces request headers on
197
+ // `ctx.headers` to onRequest hooks. The function accepts the headers map
198
+ // (case-insensitive lookup) and falls back to body.anthropic_beta only as
199
+ // a defensive fallback for edge cases where a caller pre-merged it.
200
+ export function extractBetaHeaders(headers, body) {
201
+ const fromHeader = headers && (headers["anthropic-beta"] || headers["Anthropic-Beta"] || headers["ANTHROPIC-BETA"]);
202
+ let raw = fromHeader;
203
+ if (!raw) raw = body?.anthropic_beta;
204
+ if (!raw) return [];
205
+ if (Array.isArray(raw)) return raw.map(String).map((s) => s.trim()).filter(Boolean);
206
+ if (typeof raw === "string") return raw.split(",").map((s) => s.trim()).filter(Boolean);
207
+ return [];
208
+ }
209
+
210
+ function blockTextLength(block) {
211
+ if (!block || typeof block !== "object") return 0;
212
+ if (typeof block.text === "string") return block.text.length;
213
+ if (typeof block.content === "string") return block.content.length;
214
+ return 0;
215
+ }
216
+
217
+ function blockText(block) {
218
+ if (!block || typeof block !== "object") return "";
219
+ if (typeof block.text === "string") return block.text;
220
+ if (typeof block.content === "string") return block.content;
221
+ return "";
222
+ }
223
+
224
+ function countCacheControlInArray(arr) {
225
+ if (!Array.isArray(arr)) return { count: 0, positions: [] };
226
+ const positions = [];
227
+ for (let i = 0; i < arr.length; i++) {
228
+ const item = arr[i];
229
+ if (item && typeof item === "object" && item.cache_control) {
230
+ positions.push(i);
231
+ }
232
+ }
233
+ return { count: positions.length, positions };
234
+ }
235
+
236
+ function fingerprintSystem(system) {
237
+ const blocks = Array.isArray(system) ? system : [];
238
+ const types = blocks.map((b) => (b && typeof b === "object" && typeof b.type === "string" ? b.type : "unknown"));
239
+ const sizes = blocks.map((b) => bucketBlockSize(blockTextLength(b)));
240
+ const cc = countCacheControlInArray(blocks);
241
+
242
+ const knownIndicesSet = new Set();
243
+ let unknownPresent = false;
244
+ for (const b of blocks) {
245
+ const text = blockText(b);
246
+ for (const idx of matchKnownSectionMarkers(text)) knownIndicesSet.add(idx);
247
+ if (!unknownPresent && hasUnknownSectionMarker(text)) unknownPresent = true;
248
+ }
249
+ const knownIndicesSorted = [...knownIndicesSet].sort((a, b) => a - b);
250
+
251
+ return {
252
+ block_count: blocks.length,
253
+ block_types_in_order: types,
254
+ block_size_buckets: sizes,
255
+ known_section_marker_set_hash: sha16(knownIndicesSorted.join(",")),
256
+ known_section_marker_count: knownIndicesSorted.length,
257
+ unknown_section_marker_present: unknownPresent,
258
+ cache_control_count: cc.count,
259
+ cache_control_positions: cc.positions,
260
+ };
261
+ }
262
+
263
+ function fingerprintTools(tools) {
264
+ if (!Array.isArray(tools) || tools.length === 0) {
265
+ return {
266
+ count: 0,
267
+ names_sorted_hash: sha16(""),
268
+ schema_shape_hash: sha16(""),
269
+ };
270
+ }
271
+ const names = tools.map((t) => (t && typeof t.name === "string" ? t.name : "")).sort();
272
+ // Build name → sorted-param-keys map deterministically.
273
+ const shape = {};
274
+ for (const t of tools) {
275
+ if (!t || typeof t.name !== "string") continue;
276
+ const props = t.input_schema?.properties;
277
+ const keys = props && typeof props === "object" ? Object.keys(props).sort() : [];
278
+ shape[t.name] = keys;
279
+ }
280
+ const shapeOrdered = {};
281
+ for (const k of Object.keys(shape).sort()) shapeOrdered[k] = shape[k];
282
+
283
+ return {
284
+ count: tools.length,
285
+ names_sorted_hash: sha16(JSON.stringify(names)),
286
+ schema_shape_hash: sha16(JSON.stringify(shapeOrdered)),
287
+ };
288
+ }
289
+
290
+ function fingerprintMessages(messages) {
291
+ const arr = Array.isArray(messages) ? messages : [];
292
+ let cc = 0;
293
+ const knownSet = new Set();
294
+ let unknownPresent = false;
295
+
296
+ for (const msg of arr) {
297
+ if (!msg || typeof msg !== "object") continue;
298
+ if (Array.isArray(msg.content)) {
299
+ for (const block of msg.content) {
300
+ if (block && typeof block === "object" && block.cache_control) cc++;
301
+ const text = blockText(block);
302
+ for (const idx of matchKnownReminderPatterns(text)) knownSet.add(idx);
303
+ if (!unknownPresent && hasUnknownReminderPattern(text)) unknownPresent = true;
304
+ }
305
+ } else if (typeof msg.content === "string") {
306
+ for (const idx of matchKnownReminderPatterns(msg.content)) knownSet.add(idx);
307
+ if (!unknownPresent && hasUnknownReminderPattern(msg.content)) unknownPresent = true;
308
+ }
309
+ }
310
+
311
+ const knownSorted = [...knownSet].sort((a, b) => a - b);
312
+ return {
313
+ count: arr.length,
314
+ first_role: arr[0]?.role || null,
315
+ cache_control_count_in_messages: cc,
316
+ known_reminder_pattern_set_hash: sha16(knownSorted.join(",")),
317
+ known_reminder_pattern_count: knownSorted.length,
318
+ unknown_reminder_pattern_present: unknownPresent,
319
+ };
320
+ }
321
+
322
+ function fingerprintRequestExtras(body) {
323
+ return {
324
+ has_thinking: !!body?.thinking,
325
+ has_metadata: !!body?.metadata,
326
+ stream: body?.stream === true,
327
+ max_tokens_bucket: bucketMaxTokens(body?.max_tokens),
328
+ };
329
+ }
330
+
331
+ // Compute a structural fingerprint. `headers` is the request headers map
332
+ // (case-insensitive lookup is internal); pass `{}` if not available.
333
+ export function computeFingerprint(body, headers = {}) {
334
+ const safeBody = body && typeof body === "object" ? body : {};
335
+ const beta = extractBetaHeaders(headers, safeBody);
336
+ return {
337
+ version: 1,
338
+ namespace: {
339
+ model: typeof safeBody.model === "string" ? safeBody.model : "",
340
+ beta_headers_sorted_hash: sha16([...beta].sort().join(",")),
341
+ beta_headers_count: beta.length,
342
+ },
343
+ system: fingerprintSystem(safeBody.system),
344
+ tools: fingerprintTools(safeBody.tools),
345
+ messages: fingerprintMessages(safeBody.messages),
346
+ request_extras: fingerprintRequestExtras(safeBody),
347
+ };
348
+ }
349
+
350
+ // Diff two fingerprints. Returns array of { path, from, to } entries. Equality
351
+ // is structural (deep). Arrays are compared by JSON stringification.
352
+ export function diffFingerprints(prev, current) {
353
+ const diff = [];
354
+ if (!prev || !current) return diff;
355
+ walk("", prev, current, diff);
356
+ return diff;
357
+ }
358
+
359
+ function walk(prefix, a, b, out) {
360
+ if (a === b) return;
361
+ if (typeof a !== typeof b) {
362
+ out.push({ path: prefix, from: a, to: b });
363
+ return;
364
+ }
365
+ if (Array.isArray(a) || Array.isArray(b) || typeof a !== "object" || a === null || b === null) {
366
+ if (JSON.stringify(a) !== JSON.stringify(b)) {
367
+ out.push({ path: prefix, from: a, to: b });
368
+ }
369
+ return;
370
+ }
371
+ const keys = new Set([...Object.keys(a), ...Object.keys(b)]);
372
+ for (const k of keys) {
373
+ const subPath = prefix ? `${prefix}.${k}` : k;
374
+ walk(subPath, a[k], b[k], out);
375
+ }
376
+ }
377
+
378
+ // --- Persistence ---
379
+
380
+ async function loadBaseline(fs = DEFAULT_FS, dir = getOutputDir()) {
381
+ const path = getBaselinePath(dir);
382
+ try {
383
+ const raw = await fs.readFile(path, "utf8");
384
+ const parsed = JSON.parse(raw);
385
+ if (parsed && parsed.namespaces && typeof parsed.namespaces === "object") {
386
+ _namespaceMap = new Map(Object.entries(parsed.namespaces));
387
+ _baselineLoadedFrom = path;
388
+ return _namespaceMap;
389
+ }
390
+ } catch (err) {
391
+ debug(`baseline load failed (${path}): ${err?.message ?? err}`);
392
+ }
393
+ _namespaceMap = new Map();
394
+ return _namespaceMap;
395
+ }
396
+
397
+ async function persistBaseline(fs = DEFAULT_FS, dir = getOutputDir()) {
398
+ const finalPath = getBaselinePath(dir);
399
+ const tmpSuffix = `${process.pid}.${Date.now()}.${randomBytes(2).toString("hex")}`;
400
+ const tmpPath = `${finalPath}.tmp.${tmpSuffix}`;
401
+ const doc = {
402
+ version: 1,
403
+ namespaces: Object.fromEntries(_namespaceMap),
404
+ };
405
+ try {
406
+ await fs.mkdir(dir, { recursive: true });
407
+ await fs.writeFile(tmpPath, JSON.stringify(doc));
408
+ await fs.rename(tmpPath, finalPath);
409
+ } finally {
410
+ try { await fs.unlink(tmpPath); } catch {}
411
+ }
412
+ }
413
+
414
+ async function appendEvent(record, fs = DEFAULT_FS, dir = getOutputDir()) {
415
+ const path = getJsonlPath(dir);
416
+ await fs.mkdir(dir, { recursive: true });
417
+ await fs.appendFile(path, JSON.stringify(record) + "\n");
418
+ }
419
+
420
+ // Test seam: bypass module-scope state and operate on a caller-supplied map.
421
+ export async function processRequestForTest(body, { dir, map = _namespaceMap, fs = DEFAULT_FS, headers = {} } = {}) {
422
+ return _processRequest(body, headers, { dir, map, fs });
423
+ }
424
+
425
+ async function _processRequest(body, headers, { dir, map, fs }) {
426
+ const fingerprint = computeFingerprint(body, headers);
427
+ const nsKey = namespaceKey(fingerprint.namespace.model, extractBetaHeaders(headers, body));
428
+ const ts = new Date().toISOString();
429
+
430
+ const existing = map.get(nsKey);
431
+ if (!existing) {
432
+ const entry = {
433
+ namespace: fingerprint.namespace,
434
+ fingerprint,
435
+ established_at: ts,
436
+ last_updated_at: ts,
437
+ update_count: 0,
438
+ };
439
+ map.set(nsKey, entry);
440
+ await appendEvent(
441
+ { ts, event: "baseline_established", namespace: fingerprint.namespace, fingerprint },
442
+ fs,
443
+ dir,
444
+ );
445
+ return { event: "baseline_established", nsKey };
446
+ }
447
+
448
+ if (JSON.stringify(existing.fingerprint) === JSON.stringify(fingerprint)) {
449
+ return { event: "noop", nsKey };
450
+ }
451
+
452
+ const diff = diffFingerprints(existing.fingerprint, fingerprint);
453
+ const previous = existing.fingerprint;
454
+ const updated = {
455
+ namespace: fingerprint.namespace,
456
+ fingerprint,
457
+ established_at: existing.established_at,
458
+ last_updated_at: ts,
459
+ update_count: (existing.update_count || 0) + 1,
460
+ };
461
+ map.set(nsKey, updated);
462
+
463
+ await appendEvent(
464
+ {
465
+ ts,
466
+ event: "structural_change",
467
+ namespace: fingerprint.namespace,
468
+ diff,
469
+ previous,
470
+ current: fingerprint,
471
+ },
472
+ fs,
473
+ dir,
474
+ );
475
+
476
+ return { event: "structural_change", nsKey, diff };
477
+ }
478
+
479
+ function formatStderrLine({ ts, namespace, diff }) {
480
+ const head = `[upstream-change] ${ts} model=${namespace.model || "?"} beta=${namespace.beta_headers_count}`;
481
+ const summary = diff
482
+ .slice(0, 6)
483
+ .map((d) => `${d.path}: ${JSON.stringify(d.from)} → ${JSON.stringify(d.to)}`)
484
+ .join("; ");
485
+ const more = diff.length > 6 ? ` (+${diff.length - 6} more)` : "";
486
+ return `${head} :: ${summary}${more}`;
487
+ }
488
+
489
+ // --- Extension contract ---
490
+
491
+ export default {
492
+ name: "upstream-change-detection",
493
+ description:
494
+ "Detect structural changes in CC-originated /v1/messages requests via per-namespace fingerprint",
495
+ enabled: true,
496
+ order: 50,
497
+
498
+ async onRequest(ctx) {
499
+ if (!isEnabled()) return;
500
+ if (!ctx || !ctx.body) return;
501
+
502
+ try {
503
+ const dir = getOutputDir();
504
+ const fs = DEFAULT_FS;
505
+ // Lazy-load baseline on first call after module load.
506
+ if (_baselineLoadedFrom === null) {
507
+ await loadBaseline(fs, dir);
508
+ _baselineLoadedFrom = getBaselinePath(dir);
509
+ }
510
+ const result = await _processRequest(ctx.body, ctx.headers || {}, { dir, map: _namespaceMap, fs });
511
+ // Persist baseline whenever it changed.
512
+ if (result.event === "baseline_established" || result.event === "structural_change") {
513
+ await persistBaseline(fs, dir);
514
+ }
515
+ if (result.event === "structural_change" && !isQuiet()) {
516
+ const ts = new Date().toISOString();
517
+ const namespace = _namespaceMap.get(result.nsKey)?.namespace;
518
+ process.stderr.write(formatStderrLine({ ts, namespace: namespace || {}, diff: result.diff }) + "\n");
519
+ }
520
+ } catch (err) {
521
+ debug(`onRequest unexpected: ${err?.message ?? err}`);
522
+ }
523
+ },
524
+ };
525
+
526
+ // Expose internals for testing.
527
+ export {
528
+ loadBaseline,
529
+ persistBaseline,
530
+ appendEvent,
531
+ formatStderrLine,
532
+ _namespaceMap as __testNamespaceMap,
533
+ };
@@ -1,46 +1,275 @@
1
+ // usage-log — append per-call usage record to ~/.claude/usage.jsonl.
2
+ //
3
+ // The emitted record matches `MeterRowSchema` v:1 from
4
+ // `claude-code-meter/src/log/schema.mjs` exactly. claude-meter validates each
5
+ // row through that schema; the wire format is the cross-repo contract.
6
+ //
7
+ // Schema (every row):
8
+ // v: 1
9
+ // ts: ISO datetime
10
+ // sid: 8-char lowercase hex (proxy session, sticky for proxy lifetime)
11
+ // model: string ≤64, /^[a-z0-9._-]+$/
12
+ // requested_model?: string ≤64, /^[a-z0-9._-]*$/ (optional)
13
+ // model_mismatch?: bool (optional)
14
+ // speed: "standard" | "fast" | ""
15
+ // service_tier: string ≤32, /^[a-z0-9_-]*$/
16
+ // input_tokens, output_tokens, cache_creation_input_tokens,
17
+ // cache_read_input_tokens, ephemeral_1h_input_tokens,
18
+ // ephemeral_5m_input_tokens, web_search_requests: int ≥ 0
19
+ // q5h, q7d: float 0–2
20
+ // q5h_reset, q7d_reset: int (unix sec)
21
+ // qstatus, qoverage, qclaim: string lowercase enums
22
+ // qfallback_pct: float 0–1
23
+ // qoverage_util?: float ≥ 0 (optional)
24
+ // qrepresentative_claim?: string ≤16 (optional)
25
+ // org_id?: 16-char hex (sha256(raw header).digest("hex").slice(0,16))
26
+ // overage_disabled_reason?: string ≤64 (optional)
27
+ // cache_hit_rate: float 0–1
28
+ // q5h_delta, q7d_delta: float (0 on first call after restart)
29
+ //
30
+ // `peak_hour` is NOT in the wire format. It can be derived from `ts` if any
31
+ // consumer needs it.
32
+ //
33
+ // Activation: enabled:false in the export default (existing usage-log
34
+ // pattern). Users opt in by adding an entry to proxy/extensions.json:
35
+ // "usage-log": { "enabled": true, "order": 650 }
36
+ // CACHE_FIX_USAGE_LOG=<path> overrides the destination path only — it is NOT
37
+ // an enable flag and never has been.
38
+ //
39
+ // See `docs/directives/proxy-claude-meter-compat.md` for full design.
40
+
1
41
  import { appendFile, mkdir } from "node:fs/promises";
2
42
  import { join } from "node:path";
3
43
  import { homedir } from "node:os";
44
+ import { createHash } from "node:crypto";
4
45
 
5
46
  const LOG_PATH = process.env.CACHE_FIX_USAGE_LOG || join(homedir(), ".claude", "usage.jsonl");
6
47
 
7
- function buildRecord(meta, telemetry, responseHeaders) {
8
- const now = new Date();
9
- const utcHour = now.getUTCHours();
10
- const utcDay = now.getUTCDay();
48
+ // --- Module-scope state ---
49
+
50
+ const _sid = generateSid();
51
+ let _lastQ5h = null;
52
+ let _lastQ7d = null;
53
+
54
+ // --- Pure helpers (test seam) ---
11
55
 
12
- const stats = meta.cacheStats || {};
13
- const quota = meta._quotaData || {};
56
+ export function generateSid() {
57
+ return createHash("sha256")
58
+ .update(`${process.pid}-${Date.now()}-${Math.random()}`)
59
+ .digest("hex")
60
+ .slice(0, 8);
61
+ }
62
+
63
+ export function hashOrgId(rawOrgId) {
64
+ if (!rawOrgId || typeof rawOrgId !== "string") return undefined;
65
+ return createHash("sha256").update(rawOrgId).digest("hex").slice(0, 16);
66
+ }
14
67
 
68
+ export function extractMessageStartFields(event) {
69
+ if (!event || event.type !== "message_start") return null;
70
+ const msg = event.message;
71
+ if (!msg || !msg.usage) return null;
72
+ const usage = msg.usage;
73
+ const cc = usage.cache_creation || {};
74
+ const sti = usage.server_tool_use || {};
15
75
  return {
16
- timestamp: now.toISOString(),
17
- model: telemetry.model || "unknown",
18
- input_tokens: stats.inputTokens || 0,
19
- output_tokens: stats.outputTokens || 0,
20
- cache_read_input_tokens: stats.cacheRead || 0,
21
- cache_creation_input_tokens: stats.cacheCreation || 0,
22
- q5h_pct: quota.five_hour ? quota.five_hour.pct : null,
23
- q7d_pct: quota.seven_day ? quota.seven_day.pct : null,
24
- peak_hour: utcDay >= 1 && utcDay <= 5 && utcHour >= 13 && utcHour < 19,
76
+ model: typeof msg.model === "string" ? msg.model : "",
77
+ speed: usage.speed || "",
78
+ service_tier: usage.service_tier || "",
79
+ input_tokens: usage.input_tokens || 0,
80
+ cache_creation_input_tokens: usage.cache_creation_input_tokens || 0,
81
+ cache_read_input_tokens: usage.cache_read_input_tokens || 0,
82
+ ephemeral_1h_input_tokens: cc.ephemeral_1h_input_tokens || 0,
83
+ ephemeral_5m_input_tokens: cc.ephemeral_5m_input_tokens || 0,
84
+ web_search_requests: sti.web_search_requests || 0,
85
+ };
86
+ }
87
+
88
+ export function extractMessageDeltaFields(event) {
89
+ if (!event || event.type !== "message_delta") return null;
90
+ if (!event.usage) return null;
91
+ return { output_tokens: event.usage.output_tokens || 0 };
92
+ }
93
+
94
+ function num(headers, key) {
95
+ const v = headers?.[key];
96
+ if (v === undefined || v === null || v === "") return null;
97
+ const n = parseFloat(v);
98
+ return Number.isFinite(n) ? n : null;
99
+ }
100
+
101
+ function intOf(headers, key) {
102
+ const v = headers?.[key];
103
+ if (v === undefined || v === null || v === "") return 0;
104
+ const n = parseInt(v, 10);
105
+ return Number.isFinite(n) ? n : 0;
106
+ }
107
+
108
+ function strOf(headers, key) {
109
+ const v = headers?.[key];
110
+ return typeof v === "string" ? v : "";
111
+ }
112
+
113
+ export function parseQuotaHeaders(headers) {
114
+ const h = headers || {};
115
+ return {
116
+ q5h: num(h, "anthropic-ratelimit-unified-5h-utilization") ?? 0,
117
+ q7d: num(h, "anthropic-ratelimit-unified-7d-utilization") ?? 0,
118
+ q5h_reset: intOf(h, "anthropic-ratelimit-unified-5h-reset"),
119
+ q7d_reset: intOf(h, "anthropic-ratelimit-unified-7d-reset"),
120
+ qstatus: strOf(h, "anthropic-ratelimit-unified-status"),
121
+ qoverage: strOf(h, "anthropic-ratelimit-unified-overage-status"),
122
+ qclaim: strOf(h, "anthropic-ratelimit-unified-claim"),
123
+ qfallback_pct: num(h, "anthropic-ratelimit-unified-fallback-percentage") ?? 0,
124
+ qoverage_util: num(h, "anthropic-ratelimit-unified-overage-utilization"),
125
+ qrepresentative_claim: strOf(h, "anthropic-ratelimit-unified-representative-claim") || undefined,
126
+ org_id_raw: strOf(h, "anthropic-organization-id") || undefined,
127
+ overage_disabled_reason: strOf(h, "anthropic-ratelimit-unified-overage-disabled-reason") || undefined,
128
+ };
129
+ }
130
+
131
+ export function computeDelta(current, previous) {
132
+ if (previous === null || previous === undefined) return 0;
133
+ if (typeof current !== "number" || typeof previous !== "number") return 0;
134
+ return current - previous;
135
+ }
136
+
137
+ export function assembleRecord({ start, delta, quota, requestedModel, sid, prevQ5h, prevQ7d, now = new Date() }) {
138
+ const s = start || {};
139
+ const d = delta || {};
140
+ const q = quota || {};
141
+
142
+ const inputTokens = s.input_tokens || 0;
143
+ const outputTokens = d.output_tokens || 0;
144
+ const cacheRead = s.cache_read_input_tokens || 0;
145
+ const cacheCreation = s.cache_creation_input_tokens || 0;
146
+ const totalIn = inputTokens + cacheCreation + cacheRead;
147
+ const cacheHitRate = totalIn > 0 ? cacheRead / totalIn : 0;
148
+
149
+ const record = {
150
+ v: 1,
151
+ ts: now.toISOString(),
152
+ sid,
153
+ model: s.model || "",
154
+ speed: s.speed || "",
155
+ service_tier: s.service_tier || "",
156
+ input_tokens: inputTokens,
157
+ output_tokens: outputTokens,
158
+ cache_creation_input_tokens: cacheCreation,
159
+ cache_read_input_tokens: cacheRead,
160
+ ephemeral_1h_input_tokens: s.ephemeral_1h_input_tokens || 0,
161
+ ephemeral_5m_input_tokens: s.ephemeral_5m_input_tokens || 0,
162
+ web_search_requests: s.web_search_requests || 0,
163
+ q5h: q.q5h ?? 0,
164
+ q7d: q.q7d ?? 0,
165
+ q5h_reset: q.q5h_reset || 0,
166
+ q7d_reset: q.q7d_reset || 0,
167
+ qstatus: q.qstatus || "",
168
+ qoverage: q.qoverage || "",
169
+ qclaim: q.qclaim || "",
170
+ qfallback_pct: q.qfallback_pct ?? 0,
171
+ cache_hit_rate: cacheHitRate,
172
+ q5h_delta: computeDelta(q.q5h, prevQ5h),
173
+ q7d_delta: computeDelta(q.q7d, prevQ7d),
25
174
  };
175
+
176
+ // Optional fields are OMITTED (not present as undefined) when source absent.
177
+ if (requestedModel) {
178
+ record.requested_model = requestedModel;
179
+ if (record.model && requestedModel !== record.model) {
180
+ record.model_mismatch = true;
181
+ }
182
+ }
183
+ if (q.qoverage_util !== null && q.qoverage_util !== undefined) {
184
+ record.qoverage_util = q.qoverage_util;
185
+ }
186
+ if (q.qrepresentative_claim) {
187
+ record.qrepresentative_claim = q.qrepresentative_claim;
188
+ }
189
+ const orgIdHashed = hashOrgId(q.org_id_raw);
190
+ if (orgIdHashed) {
191
+ record.org_id = orgIdHashed;
192
+ }
193
+ if (q.overage_disabled_reason) {
194
+ record.overage_disabled_reason = q.overage_disabled_reason;
195
+ }
196
+
197
+ return record;
26
198
  }
27
199
 
28
- export { buildRecord, LOG_PATH };
200
+ // --- I/O ---
201
+
202
+ async function appendJsonl(record, path = LOG_PATH) {
203
+ await mkdir(join(homedir(), ".claude"), { recursive: true });
204
+ await appendFile(path, JSON.stringify(record) + "\n");
205
+ }
206
+
207
+ // Test helper: write a record to a caller-supplied path. Bypasses env-var
208
+ // lookup so tests don't race on a shared env.
209
+ export async function writeRecord(record, path) {
210
+ await mkdir(path.substring(0, path.lastIndexOf("/")), { recursive: true });
211
+ await appendFile(path, JSON.stringify(record) + "\n");
212
+ }
213
+
214
+ // Test helper: reset module-scope delta state.
215
+ export function _resetDeltaStateForTest() {
216
+ _lastQ5h = null;
217
+ _lastQ7d = null;
218
+ }
219
+
220
+ export { LOG_PATH };
221
+
222
+ // --- Extension contract ---
29
223
 
30
224
  export default {
31
225
  name: "usage-log",
32
- description: "Append per-call usage record to ~/.claude/usage.jsonl",
226
+ description: "Append per-call usage record to ~/.claude/usage.jsonl (MeterRowSchema v:1)",
33
227
  enabled: false,
34
228
  order: 650,
35
229
 
36
230
  async onStreamEvent(ctx) {
37
- if (!ctx.event || ctx.event.type !== "message_delta" || !ctx.event.usage) return;
38
-
39
- const record = buildRecord(ctx.meta, ctx.telemetry || {}, ctx.responseHeaders);
231
+ if (!ctx || !ctx.event) return;
40
232
 
41
233
  try {
42
- await mkdir(join(homedir(), ".claude"), { recursive: true });
43
- await appendFile(LOG_PATH, JSON.stringify(record) + "\n");
44
- } catch {}
234
+ // message_start: capture per-response state into ctx.meta._usageLog.
235
+ if (ctx.event.type === "message_start") {
236
+ const start = extractMessageStartFields(ctx.event);
237
+ if (start) {
238
+ ctx.meta = ctx.meta || {};
239
+ ctx.meta._usageLog = { start };
240
+ }
241
+ return;
242
+ }
243
+
244
+ // message_delta: assemble and emit the final record.
245
+ if (ctx.event.type !== "message_delta" || !ctx.event.usage) return;
246
+
247
+ const start = ctx.meta?._usageLog?.start;
248
+ if (!start) return; // no message_start was observed for this response
249
+
250
+ const delta = extractMessageDeltaFields(ctx.event);
251
+ const quota = parseQuotaHeaders(ctx.responseHeaders || {});
252
+ const requestedModel = ctx.telemetry?.requestedModel || undefined;
253
+
254
+ const record = assembleRecord({
255
+ start,
256
+ delta,
257
+ quota,
258
+ requestedModel,
259
+ sid: _sid,
260
+ prevQ5h: _lastQ5h,
261
+ prevQ7d: _lastQ7d,
262
+ now: new Date(),
263
+ });
264
+
265
+ // Update delta tracking AFTER assembly so the first call's delta is 0
266
+ // (per the directive contract: first call after restart → deltas zero).
267
+ _lastQ5h = quota.q5h;
268
+ _lastQ7d = quota.q7d;
269
+
270
+ await appendJsonl(record, process.env.CACHE_FIX_USAGE_LOG || LOG_PATH);
271
+ } catch {
272
+ // Fail-open: never throw to the pipeline.
273
+ }
45
274
  },
46
275
  };
@@ -9,5 +9,6 @@
9
9
  "cache-control-normalize": { "enabled": true, "order": 400 },
10
10
  "ttl-management": { "enabled": true, "order": 500 },
11
11
  "cache-telemetry": { "enabled": true, "order": 600 },
12
+ "overage-warning": { "enabled": true, "order": 610 },
12
13
  "request-log": { "enabled": false, "order": 700 }
13
14
  }
@@ -0,0 +1,16 @@
1
+ // Shared rate constants for cost projections.
2
+ //
3
+ // This is a deliberate over-simplification for v3.2.0. Anthropic's
4
+ // per-token rates vary by model, by cache tier (input vs cache_read vs
5
+ // cache_creation_5m vs cache_creation_1h), and by overage classification.
6
+ // Encoding all of that correctly is its own subproject — see the v3.3.0
7
+ // follow-up for a precise per-tier engine.
8
+ //
9
+ // For v3.2.0 we ship a single weighted blend constant suitable for
10
+ // a coarse "burn rate at API rates" indicator. Consumers MUST label the
11
+ // resulting number as `coarse` so users do not mistake it for a precise
12
+ // quote.
13
+
14
+ // Heuristic blend covering input + cache_read + cache_creation + output
15
+ // at a typical Opus 4.7 mix. Order of magnitude is right; precise it is not.
16
+ export const WEIGHTED_TOKEN_COST_USD_COARSE = 0.000005;