claude-code-cache-fix 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -114,6 +114,23 @@ On the first API call, the interceptor reads `~/.claude.json` and logs the curre
114
114
 
115
115
  Response headers are parsed for `anthropic-ratelimit-unified-5h-utilization` and `7d-utilization`, saved to `~/.claude/quota-status.json` for consumption by status line hooks or other tools.
116
116
 
117
+ ### Peak hour detection
118
+
119
+ Anthropic applies elevated quota drain rates during weekday peak hours (13:00–19:00 UTC, Mon–Fri). The interceptor detects peak windows and writes `peak_hour: true/false` to `quota-status.json`. See `docs/peak-hours-reference.md` for sources and details.
120
+
121
+ ### Usage telemetry and cost reporting
122
+
123
+ The interceptor logs per-call usage data to `~/.claude/usage.jsonl` — one JSON line per API call with model, token counts, and cache breakdown. Use the bundled cost report tool to analyze costs:
124
+
125
+ ```bash
126
+ node tools/cost-report.mjs # today's costs from interceptor log
127
+ node tools/cost-report.mjs --date 2026-04-08 # specific date
128
+ node tools/cost-report.mjs --since 2h # last 2 hours
129
+ node tools/cost-report.mjs --admin-key <key> # cross-reference with Admin API
130
+ ```
131
+
132
+ Also works with any JSONL containing Anthropic usage fields (`--file`, stdin) — useful for SDK users and proxy setups. See `docs/cost-report.md` for full documentation.
133
+
117
134
  ## Debug mode
118
135
 
119
136
  Enable debug logging to verify the fix is working:
@@ -133,6 +150,7 @@ Logs are written to `~/.claude/cache-fix-debug.log`. Look for:
133
150
  - `GROWTHBOOK FLAGS: {...}` — server-controlled feature flags on first call
134
151
  - `PROMPT SIZE: system=N tools=N injected=N (skills=N mcp=N ...)` — per-call prompt size breakdown
135
152
  - `CACHE TTL: tier=1h create=N read=N hit=N% (1h=N 5m=N)` — TTL tier and cache hit rate per call
153
+ - `PEAK HOUR: weekday 13:00-19:00 UTC` — Anthropic peak hour throttling active
136
154
  - `SKIPPED: resume relocation (not a resume or already correct)` — no fix needed
137
155
 
138
156
  ### Prefix diff mode
@@ -152,6 +170,7 @@ Snapshots are saved to `~/.claude/cache-fix-snapshots/` and diff reports are gen
152
170
  | `CACHE_FIX_DEBUG` | `0` | Enable debug logging to `~/.claude/cache-fix-debug.log` |
153
171
  | `CACHE_FIX_PREFIXDIFF` | `0` | Enable prefix snapshot diffing |
154
172
  | `CACHE_FIX_IMAGE_KEEP_LAST` | `0` | Keep images in last N user messages (0 = disabled) |
173
+ | `CACHE_FIX_USAGE_LOG` | `~/.claude/usage.jsonl` | Path for per-call usage telemetry log |
155
174
 
156
175
  ## Limitations
157
176
 
package/package.json CHANGED
@@ -1,12 +1,13 @@
1
1
  {
2
2
  "name": "claude-code-cache-fix",
3
- "version": "1.4.0",
3
+ "version": "1.5.0",
4
4
  "description": "Fixes prompt cache regression in Claude Code that causes up to 20x cost increase on resumed sessions",
5
5
  "type": "module",
6
6
  "exports": "./preload.mjs",
7
7
  "main": "./preload.mjs",
8
8
  "files": [
9
- "preload.mjs"
9
+ "preload.mjs",
10
+ "tools/"
10
11
  ],
11
12
  "engines": {
12
13
  "node": ">=18"
package/preload.mjs CHANGED
@@ -399,6 +399,7 @@ const DEBUG = process.env.CACHE_FIX_DEBUG === "1";
399
399
  const PREFIXDIFF = process.env.CACHE_FIX_PREFIXDIFF === "1";
400
400
  const LOG_PATH = join(homedir(), ".claude", "cache-fix-debug.log");
401
401
  const SNAPSHOT_DIR = join(homedir(), ".claude", "cache-fix-snapshots");
402
+ const USAGE_JSONL = process.env.CACHE_FIX_USAGE_LOG || join(homedir(), ".claude", "usage.jsonl");
402
403
 
403
404
  function debugLog(...args) {
404
405
  if (!DEBUG) return;
@@ -792,16 +793,34 @@ globalThis.fetch = async function (url, options) {
792
793
  quota.seven_day = h7d ? { utilization: parseFloat(h7d), pct: Math.round(parseFloat(h7d) * 100), resets_at: reset7d ? parseInt(reset7d) : null } : quota.seven_day;
793
794
  quota.status = status || null;
794
795
  quota.overage_status = overage || null;
796
+
797
+ // Peak hour detection — Anthropic applies higher quota drain rate during
798
+ // weekday peak hours: 13:00–19:00 UTC (Mon–Fri).
799
+ // Source: Thariq (Anthropic) via X, 2026-03-26; confirmed by The Register,
800
+ // PCWorld, Piunikaweb. No specific multiplier disclosed.
801
+ const now = new Date();
802
+ const utcHour = now.getUTCHours();
803
+ const utcDay = now.getUTCDay(); // 0=Sun, 6=Sat
804
+ const isPeak = utcDay >= 1 && utcDay <= 5 && utcHour >= 13 && utcHour < 19;
805
+ quota.peak_hour = isPeak;
806
+
795
807
  writeFileSync(quotaFile, JSON.stringify(quota, null, 2));
808
+
809
+ if (DEBUG && isPeak) {
810
+ debugLog("PEAK HOUR: weekday 13:00-19:00 UTC — quota drains at elevated rate");
811
+ }
796
812
  }
797
813
  } catch {
798
814
  // Non-critical — don't break the response
799
815
  }
800
816
 
801
- // Clone response to extract TTL tier from usage (SSE stream)
817
+ // Clone response to extract TTL tier and usage telemetry from SSE stream.
818
+ // Pass the model from the request so we can log a complete usage record.
802
819
  try {
820
+ let reqModel = "unknown";
821
+ try { reqModel = JSON.parse(options?.body)?.model || "unknown"; } catch {}
803
822
  const clone = response.clone();
804
- drainTTLFromClone(clone).catch(() => {});
823
+ drainTTLFromClone(clone, reqModel).catch(() => {});
805
824
  } catch {
806
825
  // clone() failure is non-fatal
807
826
  }
@@ -822,13 +841,18 @@ globalThis.fetch = async function (url, options) {
822
841
  * Writes TTL tier to ~/.claude/quota-status.json (merges with existing data)
823
842
  * and logs to debug log.
824
843
  */
825
- async function drainTTLFromClone(clone) {
844
+ async function drainTTLFromClone(clone, model) {
826
845
  if (!clone.body) return;
827
846
 
828
847
  const reader = clone.body.getReader();
829
848
  const decoder = new TextDecoder();
830
849
  let buffer = "";
831
850
 
851
+ // Accumulate usage across message_start (input/cache) and message_delta (output)
852
+ let startUsage = null;
853
+ let deltaUsage = null;
854
+ let ttlTier = "unknown";
855
+
832
856
  try {
833
857
  while (true) {
834
858
  const { done, value } = await reader.read();
@@ -847,6 +871,7 @@ async function drainTTLFromClone(clone) {
847
871
 
848
872
  if (event.type === "message_start" && event.message?.usage) {
849
873
  const u = event.message.usage;
874
+ startUsage = u;
850
875
  const cc = u.cache_creation || {};
851
876
  const e1h = cc.ephemeral_1h_input_tokens ?? 0;
852
877
  const e5m = cc.ephemeral_5m_input_tokens ?? 0;
@@ -854,8 +879,6 @@ async function drainTTLFromClone(clone) {
854
879
  const cacheRead = u.cache_read_input_tokens ?? 0;
855
880
 
856
881
  // Determine TTL tier from which ephemeral bucket got tokens
857
- // When cache is fully warm (no creation), infer tier from previous
858
- let ttlTier = "unknown";
859
882
  if (e1h > 0 && e5m === 0) ttlTier = "1h";
860
883
  else if (e5m > 0 && e1h === 0) ttlTier = "5m";
861
884
  else if (e1h === 0 && e5m === 0 && cacheCreate === 0) {
@@ -893,10 +916,11 @@ async function drainTTLFromClone(clone) {
893
916
  };
894
917
  writeFileSync(quotaFile, JSON.stringify(quota, null, 2));
895
918
  } catch {}
919
+ }
896
920
 
897
- // Got what we need stop reading
898
- reader.cancel();
899
- return;
921
+ // Capture final usage from message_delta (has output_tokens)
922
+ if (event.type === "message_delta" && event.usage) {
923
+ deltaUsage = event.usage;
900
924
  }
901
925
  } catch {
902
926
  // Skip malformed SSE lines
@@ -906,4 +930,25 @@ async function drainTTLFromClone(clone) {
906
930
  } finally {
907
931
  try { reader.releaseLock(); } catch {}
908
932
  }
933
+
934
+ // Write usage record to JSONL after stream completes
935
+ if (startUsage) {
936
+ try {
937
+ const cc = startUsage.cache_creation || {};
938
+ const record = {
939
+ timestamp: new Date().toISOString(),
940
+ model: model || "unknown",
941
+ input_tokens: startUsage.input_tokens ?? 0,
942
+ output_tokens: deltaUsage?.output_tokens ?? 0,
943
+ cache_read_input_tokens: startUsage.cache_read_input_tokens ?? 0,
944
+ cache_creation_input_tokens: startUsage.cache_creation_input_tokens ?? 0,
945
+ ephemeral_1h_input_tokens: cc.ephemeral_1h_input_tokens ?? 0,
946
+ ephemeral_5m_input_tokens: cc.ephemeral_5m_input_tokens ?? 0,
947
+ ttl_tier: ttlTier,
948
+ };
949
+ appendFileSync(USAGE_JSONL, JSON.stringify(record) + "\n");
950
+ } catch {
951
+ // Non-critical — don't break anything
952
+ }
953
+ }
909
954
  }
@@ -0,0 +1,880 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * cost-report — Calculate Claude API costs from usage telemetry.
4
+ *
5
+ * Input sources (in priority order):
6
+ * 1. Default: reads interceptor usage log at ~/.claude/usage.jsonl
7
+ * 2. --file / -f: any JSONL file (SDK output, proxy captures, etc.)
8
+ * 3. --sim-log: extract from simulation logs (Token telemetry: {...} lines)
9
+ * 4. stdin: pipe JSON-lines from any source
10
+ *
11
+ * Pricing sources (best → fallback):
12
+ * 1. Admin API actual billed usage (--admin-key)
13
+ * 2. Live rates from Anthropic docs (--live-rates)
14
+ * 3. Bundled rates.json (default)
15
+ *
16
+ * Part of claude-code-cache-fix. Works standalone or with the interceptor.
17
+ * https://github.com/cnighswonger/claude-code-cache-fix
18
+ */
19
+
20
+ import { readFileSync, writeFileSync, existsSync } from 'node:fs';
21
+ import { createInterface } from 'node:readline';
22
+ import { fileURLToPath } from 'node:url';
23
+ import { dirname, join } from 'node:path';
24
+ import { homedir } from 'node:os';
25
+
26
+ const __dirname = dirname(fileURLToPath(import.meta.url));
27
+ const RATES_PATH = join(__dirname, 'rates.json');
28
+ const PRICING_URL = 'https://platform.claude.com/docs/en/about-claude/pricing';
29
+ const ADMIN_API_BASE = 'https://api.anthropic.com/v1/organizations/usage_report/messages';
30
+ const DEFAULT_USAGE_LOG = join(homedir(), '.claude', 'usage.jsonl');
31
+
32
+ // ─── CLI parsing ────────────────────────────────────────────────────────────
33
+
34
+ function parseArgs() {
35
+ const args = process.argv.slice(2);
36
+ const opts = {
37
+ simLog: null, file: null, adminKey: null,
38
+ liveRates: false, updateRates: false, help: false,
39
+ date: null, since: null, format: 'text',
40
+ };
41
+
42
+ for (let i = 0; i < args.length; i++) {
43
+ switch (args[i]) {
44
+ case '--sim-log': opts.simLog = args[++i]; break;
45
+ case '--file':
46
+ case '-f': opts.file = args[++i]; break;
47
+ case '--admin-key': opts.adminKey = args[++i]; break;
48
+ case '--live-rates': opts.liveRates = true; break;
49
+ case '--update-rates': opts.updateRates = true; break;
50
+ case '--date': opts.date = args[++i]; break;
51
+ case '--since': opts.since = args[++i]; break;
52
+ case '--format': opts.format = args[++i]; break;
53
+ case '--json': opts.format = 'json'; break;
54
+ case '--md':
55
+ case '--markdown': opts.format = 'md'; break;
56
+ case '--help':
57
+ case '-h': opts.help = true; break;
58
+ default:
59
+ if (!args[i].startsWith('-') && !opts.file && !opts.simLog) {
60
+ opts.file = args[i];
61
+ }
62
+ }
63
+ }
64
+
65
+ opts.adminKey = opts.adminKey || process.env.ANTHROPIC_ADMIN_KEY;
66
+ return opts;
67
+ }
68
+
69
+ function printUsage() {
70
+ console.log(`
71
+ cost-report — Calculate Claude API costs from usage telemetry.
72
+
73
+ Usage:
74
+ node cost-report.mjs From interceptor log (~/.claude/usage.jsonl)
75
+ node cost-report.mjs --date 2026-04-08 Filter to a specific date
76
+ node cost-report.mjs --since 2h Filter to last N hours/minutes
77
+ node cost-report.mjs --file <path> From any JSONL file
78
+ node cost-report.mjs --sim-log <path> From a simulation log
79
+ node cost-report.mjs --admin-key <key> Cross-reference with Admin API
80
+ cat telemetry.jsonl | node cost-report.mjs From JSON-lines on stdin
81
+ node cost-report.mjs --update-rates Refresh bundled rates
82
+
83
+ Input sources (checked in order):
84
+ Default Reads ~/.claude/usage.jsonl (written by the interceptor)
85
+ --file, -f <path> Any JSONL file (SDK output, proxy captures, etc.)
86
+ --sim-log <path> Extract from simulation logs (Token telemetry lines)
87
+ stdin Pipe JSON-lines from any source
88
+
89
+ Filtering:
90
+ --date <YYYY-MM-DD> Show only entries from this date
91
+ --since <duration> Show entries from last Nh, Nm, or Nd (e.g. 2h, 30m, 1d)
92
+
93
+ Output:
94
+ --format <fmt> Output format: text (default), json, md
95
+ --json Shorthand for --format json
96
+ --md, --markdown Shorthand for --format md
97
+
98
+ Pricing:
99
+ --admin-key <key> Anthropic Admin API key for actual billed usage
100
+ (or set ANTHROPIC_ADMIN_KEY env var)
101
+ --live-rates Fetch current rates from Anthropic docs
102
+ --update-rates Fetch and save current rates to rates.json
103
+
104
+ Input JSON format (one object per line):
105
+ Required: model, input_tokens, output_tokens
106
+ Optional: cache_read_input_tokens, cache_creation_input_tokens,
107
+ ephemeral_1h_input_tokens, ephemeral_5m_input_tokens,
108
+ timestamp, preflight_input_tokens, degradation_steps
109
+
110
+ Example JSONL (as written by the interceptor):
111
+ {"timestamp":"2026-04-09T01:23:45Z","model":"claude-sonnet-4-5-20250929","input_tokens":50000,"output_tokens":1200,"cache_read_input_tokens":13000,"cache_creation_input_tokens":0,"ephemeral_1h_input_tokens":0,"ephemeral_5m_input_tokens":0}
112
+
113
+ For SDK users — log usage from API responses:
114
+ const msg = await anthropic.messages.create({...});
115
+ fs.appendFileSync('usage.jsonl', JSON.stringify({
116
+ timestamp: new Date().toISOString(),
117
+ model: msg.model,
118
+ ...msg.usage
119
+ }) + '\\n');
120
+ `);
121
+ }
122
+
123
+ // ─── Rates ──────────────────────────────────────────────────────────────────
124
+
125
+ function loadBundledRates() {
126
+ if (!existsSync(RATES_PATH)) {
127
+ console.error('WARNING: No bundled rates.json found. Use --update-rates to create one.');
128
+ return null;
129
+ }
130
+ const data = JSON.parse(readFileSync(RATES_PATH, 'utf8'));
131
+
132
+ // Check staleness
133
+ const lastUpdated = new Date(data.last_updated);
134
+ const daysSince = (Date.now() - lastUpdated.getTime()) / (1000 * 60 * 60 * 24);
135
+ if (daysSince > 30) {
136
+ console.error(`WARNING: Bundled rates are ${Math.floor(daysSince)} days old (last updated ${data.last_updated}).`);
137
+ console.error(' Run with --update-rates to refresh, or --live-rates to fetch once.');
138
+ }
139
+ return data;
140
+ }
141
+
142
+ async function fetchLiveRates() {
143
+ try {
144
+ const resp = await fetch(PRICING_URL);
145
+ if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
146
+ const html = await resp.text();
147
+ return parsePricingPage(html);
148
+ } catch (err) {
149
+ console.error(`WARNING: Failed to fetch live rates: ${err.message}`);
150
+ console.error(' Falling back to bundled rates.');
151
+ return null;
152
+ }
153
+ }
154
+
155
+ function parsePricingPage(html) {
156
+ // The docs page renders as HTML table rows with <td> elements.
157
+ // Pattern: model name in one <td>, then rates as "$X / MTok" in subsequent <td>s.
158
+ // We extract: Model | Base Input | 5m Cache Write | 1h Cache Write | Cache Read | Output
159
+ //
160
+ // The HTML has rows like:
161
+ // Opus 4.6</td><td ...>$5 / MTok</td><td ...>$6.25 / MTok</td>...
162
+
163
+ const models = {};
164
+ const parseRate = (s) => {
165
+ const m = s.match(/\$([\d.]+)/);
166
+ return m ? parseFloat(m[1]) : null;
167
+ };
168
+
169
+ // Strategy: find model name followed by 5 rate cells in the pricing table.
170
+ // Match: "ModelName</td><td...>$X / MTok</td>..." pattern
171
+ const rowPattern = /((?:Opus|Sonnet|Haiku)\s+[\d.]+(?:\s*\([^)]*\))?)\s*<\/td>\s*<td[^>]*>\s*\$([\d.]+)\s*\/\s*MTok\s*<\/td>\s*<td[^>]*>\s*\$([\d.]+)\s*\/\s*MTok\s*<\/td>\s*<td[^>]*>\s*\$([\d.]+)\s*\/\s*MTok\s*<\/td>\s*<td[^>]*>\s*\$([\d.]+)\s*\/\s*MTok\s*<\/td>\s*<td[^>]*>\s*\$([\d.]+)\s*\/\s*MTok/g;
172
+ let match;
173
+
174
+ while ((match = rowPattern.exec(html)) !== null) {
175
+ let name = match[1].trim();
176
+ // Strip "(deprecated)" etc.
177
+ name = name.replace(/\s*\([^)]*\)\s*$/, '').trim();
178
+ // Skip if it contains HTML
179
+ if (name.includes('<')) continue;
180
+
181
+ const input = parseFloat(match[2]);
182
+ const write5m = parseFloat(match[3]);
183
+ const write1h = parseFloat(match[4]);
184
+ const cacheRead = parseFloat(match[5]);
185
+ const output = parseFloat(match[6]);
186
+
187
+ if (isNaN(input) || isNaN(output)) continue;
188
+
189
+ const idMap = resolveModelId(name);
190
+ for (const id of idMap) {
191
+ models[id] = {
192
+ input, output,
193
+ cache_read: cacheRead,
194
+ cache_write_5m: write5m,
195
+ cache_write_1h: write1h,
196
+ };
197
+ }
198
+ }
199
+
200
+ if (Object.keys(models).length === 0) {
201
+ console.error('WARNING: Could not parse any model pricing from docs page.');
202
+ console.error(' The page format may have changed. Falling back to bundled rates.');
203
+ return null;
204
+ }
205
+
206
+ return {
207
+ last_updated: new Date().toISOString().slice(0, 10),
208
+ source: PRICING_URL,
209
+ notes: 'Auto-fetched from Anthropic docs.',
210
+ models,
211
+ };
212
+ }
213
+
214
+ function resolveModelId(displayName) {
215
+ // Map display names like "Opus 4.6" to API model IDs
216
+ const map = {
217
+ 'Opus 4.6': ['claude-opus-4-6'],
218
+ 'Opus 4.5': ['claude-opus-4-5-20251101'],
219
+ 'Opus 4.1': ['claude-opus-4-1-20250805'],
220
+ 'Opus 4': ['claude-opus-4-20250514'],
221
+ 'Opus 3': ['claude-3-opus-20240229'],
222
+ 'Sonnet 4.6': ['claude-sonnet-4-6'],
223
+ 'Sonnet 4.5': ['claude-sonnet-4-5-20250929'],
224
+ 'Sonnet 4': ['claude-sonnet-4-20250514'],
225
+ 'Sonnet 3.7': ['claude-sonnet-3-7-20250219'],
226
+ 'Haiku 4.5': ['claude-haiku-4-5-20251001'],
227
+ 'Haiku 3.5': ['claude-haiku-3-5-20241022'],
228
+ 'Haiku 3': ['claude-3-haiku-20240307'],
229
+ };
230
+ return map[displayName] || [`claude-${displayName.toLowerCase().replace(/\s+/g, '-')}`];
231
+ }
232
+
233
+ function lookupRates(ratesData, modelId) {
234
+ if (!ratesData || !ratesData.models) return null;
235
+
236
+ // Direct match
237
+ if (ratesData.models[modelId]) return ratesData.models[modelId];
238
+
239
+ // Try prefix match (e.g. "claude-sonnet-4-5-20250929" matches "claude-sonnet-4-5-*")
240
+ for (const [key, rates] of Object.entries(ratesData.models)) {
241
+ // Match if the stored key is a prefix or shares the same base
242
+ if (modelId.startsWith(key) || key.startsWith(modelId)) return rates;
243
+ }
244
+
245
+ // Try matching by family (strip date suffix)
246
+ const base = modelId.replace(/-\d{8}$/, '');
247
+ if (ratesData.models[base]) return ratesData.models[base];
248
+
249
+ return null;
250
+ }
251
+
252
+ // ─── Input parsing ──────────────────────────────────────────────────────────
253
+
254
+ function extractFromSimLog(filePath) {
255
+ const content = readFileSync(filePath, 'utf8');
256
+ const entries = [];
257
+ const lines = content.split('\n');
258
+
259
+ for (const line of lines) {
260
+ const match = line.match(/Token telemetry:\s*(\{.+\})/);
261
+ if (match) {
262
+ try {
263
+ const obj = JSON.parse(match[1]);
264
+ // Extract timestamp from log line
265
+ const tsMatch = line.match(/\[([^\]]+)\]/);
266
+ if (tsMatch) obj._timestamp = tsMatch[1];
267
+ entries.push(obj);
268
+ } catch { /* skip malformed */ }
269
+ }
270
+ }
271
+
272
+ if (entries.length === 0) {
273
+ console.error('WARNING: No "Token telemetry" entries found in sim log.');
274
+ console.error(' This log may use an older format without structured telemetry.');
275
+ }
276
+
277
+ return entries;
278
+ }
279
+
280
+ function parseJsonLines(text) {
281
+ return text.split('\n')
282
+ .filter(l => l.trim())
283
+ .map(l => { try { return JSON.parse(l); } catch { return null; } })
284
+ .filter(Boolean);
285
+ }
286
+
287
+ async function readStdin() {
288
+ const chunks = [];
289
+ const rl = createInterface({ input: process.stdin, terminal: false });
290
+ for await (const line of rl) chunks.push(line);
291
+ return chunks.join('\n');
292
+ }
293
+
294
+ function normalizeEntry(raw) {
295
+ // Accept multiple naming conventions:
296
+ // - Interceptor: input_tokens, output_tokens, timestamp
297
+ // - Sim telemetry: actual_input_tokens, actual_output_tokens, _timestamp
298
+ // - SDK: input_tokens, output_tokens (from usage object)
299
+ return {
300
+ model: raw.model || 'unknown',
301
+ timestamp: raw.timestamp || raw._timestamp || null,
302
+ input_tokens: raw.actual_input_tokens ?? raw.input_tokens ?? 0,
303
+ output_tokens: raw.actual_output_tokens ?? raw.output_tokens ?? 0,
304
+ cache_read: raw.cache_read_input_tokens ?? 0,
305
+ cache_create: raw.cache_creation_input_tokens ?? 0,
306
+ eph_1h: raw.ephemeral_1h_input_tokens ?? 0,
307
+ eph_5m: raw.ephemeral_5m_input_tokens ?? 0,
308
+ preflight: raw.preflight_input_tokens ?? null,
309
+ degradation: raw.degradation_steps ?? [],
310
+ would_have_exceeded: raw.would_have_exceeded ?? false,
311
+ sys_prompt_est: raw.system_prompt_tokens_est ?? null,
312
+ };
313
+ }
314
+
315
+ // ─── Admin API ──────────────────────────────────────────────────────────────
316
+
317
+ async function fetchAdminUsage(adminKey, startTime, endTime) {
318
+ // Round start down and end up to hour boundaries
319
+ const start = new Date(startTime);
320
+ start.setMinutes(0, 0, 0);
321
+ const end = new Date(endTime);
322
+ end.setHours(end.getHours() + 1, 0, 0, 0);
323
+
324
+ const url = `${ADMIN_API_BASE}?bucket_width=1h` +
325
+ `&starting_at=${start.toISOString()}` +
326
+ `&ending_at=${end.toISOString()}` +
327
+ `&group_by[]=model`;
328
+
329
+ try {
330
+ const resp = await fetch(url, {
331
+ headers: {
332
+ 'x-api-key': adminKey,
333
+ 'anthropic-version': '2023-06-01',
334
+ },
335
+ });
336
+ if (!resp.ok) throw new Error(`HTTP ${resp.status}: ${await resp.text()}`);
337
+ return await resp.json();
338
+ } catch (err) {
339
+ console.error(`WARNING: Admin API query failed: ${err.message}`);
340
+ return null;
341
+ }
342
+ }
343
+
344
+ function summarizeAdminData(apiData, ratesData) {
345
+ const byModel = {};
346
+ let totalCost = 0;
347
+
348
+ for (const bucket of (apiData.data || [])) {
349
+ for (const r of (bucket.results || [])) {
350
+ const model = r.model || 'unknown';
351
+ if (!byModel[model]) {
352
+ byModel[model] = { uncached: 0, cache_read: 0, cache_1h: 0, cache_5m: 0, output: 0, cost: 0 };
353
+ }
354
+ const m = byModel[model];
355
+ m.uncached += r.uncached_input_tokens || 0;
356
+ m.cache_read += r.cache_read_input_tokens || 0;
357
+ const cc = r.cache_creation || {};
358
+ m.cache_1h += cc.ephemeral_1h_input_tokens || 0;
359
+ m.cache_5m += cc.ephemeral_5m_input_tokens || 0;
360
+ m.output += r.output_tokens || 0;
361
+ }
362
+ }
363
+
364
+ // Calculate costs per model
365
+ for (const [model, m] of Object.entries(byModel)) {
366
+ const rates = lookupRates(ratesData, model);
367
+ if (rates) {
368
+ m.cost = (m.uncached * rates.input + m.cache_read * rates.cache_read +
369
+ m.cache_1h * rates.cache_write_1h + m.cache_5m * rates.cache_write_5m +
370
+ m.output * rates.output) / 1_000_000;
371
+ }
372
+ totalCost += m.cost;
373
+ }
374
+
375
+ return { byModel, totalCost };
376
+ }
377
+
378
+ // ─── Cost calculation ───────────────────────────────────────────────────────
379
+
380
+ function calculateCosts(entries, ratesData) {
381
+ const results = [];
382
+ const summary = {
383
+ calls: 0,
384
+ byModel: {},
385
+ totals: { input: 0, output: 0, cache_read: 0, cache_1h: 0, cache_5m: 0, preflight: 0 },
386
+ totalCost: 0,
387
+ degradedCalls: 0,
388
+ exceededCalls: 0,
389
+ degradationSteps: {},
390
+ };
391
+
392
+ for (const entry of entries) {
393
+ const rates = lookupRates(ratesData, entry.model);
394
+ if (!rates) {
395
+ console.error(`WARNING: No rates found for model "${entry.model}". Skipping cost calculation.`);
396
+ results.push({ ...entry, cost: null, rateSource: 'missing' });
397
+ continue;
398
+ }
399
+
400
+ // Determine cache write tier breakdown
401
+ // If telemetry has eph_1h/eph_5m, use those; otherwise assume all cache_create is 5m
402
+ let cw1h = entry.eph_1h;
403
+ let cw5m = entry.eph_5m;
404
+ if (cw1h === 0 && cw5m === 0 && entry.cache_create > 0) {
405
+ // No tier breakdown available; assume 5m (conservative — lower rate)
406
+ cw5m = entry.cache_create;
407
+ }
408
+
409
+ const cost = (
410
+ entry.input_tokens * rates.input +
411
+ entry.output_tokens * rates.output +
412
+ entry.cache_read * rates.cache_read +
413
+ cw1h * rates.cache_write_1h +
414
+ cw5m * rates.cache_write_5m
415
+ ) / 1_000_000;
416
+
417
+ results.push({ ...entry, cost, cw1h, cw5m });
418
+
419
+ // Accumulate summary
420
+ summary.calls++;
421
+ summary.totals.input += entry.input_tokens;
422
+ summary.totals.output += entry.output_tokens;
423
+ summary.totals.cache_read += entry.cache_read;
424
+ summary.totals.cache_1h += cw1h;
425
+ summary.totals.cache_5m += cw5m;
426
+ if (entry.preflight != null) summary.totals.preflight += entry.preflight;
427
+
428
+ if (!summary.byModel[entry.model]) {
429
+ summary.byModel[entry.model] = { calls: 0, cost: 0 };
430
+ }
431
+ summary.byModel[entry.model].calls++;
432
+ summary.byModel[entry.model].cost += cost;
433
+
434
+ summary.totalCost += cost;
435
+
436
+ if (entry.degradation.length > 0) {
437
+ summary.degradedCalls++;
438
+ for (const step of entry.degradation) {
439
+ summary.degradationSteps[step] = (summary.degradationSteps[step] || 0) + 1;
440
+ }
441
+ }
442
+ if (entry.would_have_exceeded) summary.exceededCalls++;
443
+ }
444
+
445
+ return { results, summary };
446
+ }
447
+
448
+ // ─── Report formatting ──────────────────────────────────────────────────────
449
+
450
+ function fmt(n) {
451
+ return n.toLocaleString('en-US');
452
+ }
453
+
454
+ function fmtCost(n) {
455
+ if (n == null) return ' N/A';
456
+ return `$${n.toFixed(4)}`;
457
+ }
458
+
459
+ function printReport(results, summary, ratesData, adminSummary, format) {
460
+ if (format === 'json') return printJsonReport(results, summary, ratesData, adminSummary);
461
+ if (format === 'md') return printMarkdownReport(results, summary, ratesData, adminSummary);
462
+ return printTextReport(results, summary, ratesData, adminSummary);
463
+ }
464
+
465
+ // ─── JSON output ────────────────────────────────────────────────────────────
466
+
467
+ function printJsonReport(results, summary, ratesData, adminSummary) {
468
+ const report = {
469
+ generated: new Date().toISOString(),
470
+ pricing: { source: ratesData?.source || 'bundled', last_updated: ratesData?.last_updated },
471
+ calls: results.map(r => ({
472
+ timestamp: r.timestamp,
473
+ model: r.model,
474
+ input_tokens: r.input_tokens,
475
+ output_tokens: r.output_tokens,
476
+ cache_read: r.cache_read,
477
+ cache_write_1h: r.cw1h || 0,
478
+ cache_write_5m: r.cw5m || 0,
479
+ cost: r.cost,
480
+ degradation_steps: r.degradation.length > 0 ? r.degradation : undefined,
481
+ })),
482
+ summary: {
483
+ total_calls: summary.calls,
484
+ total_cost: summary.totalCost,
485
+ avg_cost_per_call: summary.totalCost / summary.calls,
486
+ tokens: summary.totals,
487
+ by_model: summary.byModel,
488
+ degradation: summary.degradedCalls > 0 ? {
489
+ degraded_calls: summary.degradedCalls,
490
+ exceeded_calls: summary.exceededCalls,
491
+ steps: summary.degradationSteps,
492
+ } : undefined,
493
+ },
494
+ };
495
+ if (adminSummary) {
496
+ report.admin_api = {
497
+ total_cost: adminSummary.totalCost,
498
+ delta: adminSummary.totalCost - summary.totalCost,
499
+ by_model: adminSummary.byModel,
500
+ };
501
+ }
502
+ console.log(JSON.stringify(report, null, 2));
503
+ }
504
+
505
+ // ─── Markdown output ────────────────────────────────────────────────────────
506
+
507
+ function printMarkdownReport(results, summary, ratesData, adminSummary) {
508
+ const rateSource = ratesData?.last_updated ? `rates from ${ratesData.last_updated}` : 'unknown rates';
509
+ const lines = [];
510
+
511
+ lines.push('# Claude API Cost Report');
512
+ lines.push('');
513
+ lines.push(`Pricing: ${rateSource} (${ratesData?.source || 'bundled'})`);
514
+ lines.push('');
515
+
516
+ // Per-call table
517
+ if (results.length <= 50) {
518
+ lines.push('## Per-Call Breakdown');
519
+ lines.push('');
520
+ lines.push('| # | Timestamp | Model | Input | Output | Cache Rd | Cache Wr | Cost | Degradation |');
521
+ lines.push('|---|-----------|-------|------:|-------:|---------:|---------:|-----:|-------------|');
522
+
523
+ for (let i = 0; i < results.length; i++) {
524
+ const r = results[i];
525
+ const ts = r.timestamp ? r.timestamp.slice(0, 19) : '—';
526
+ const modelShort = r.model.replace('claude-', '').replace(/-\d{8}$/, '');
527
+ const cacheWr = (r.cw1h || 0) + (r.cw5m || 0);
528
+ const deg = r.degradation.length > 0 ? r.degradation.length + ' steps' : '';
529
+ lines.push(`| ${i + 1} | ${ts} | ${modelShort} | ${fmt(r.input_tokens)} | ${fmt(r.output_tokens)} | ${fmt(r.cache_read)} | ${fmt(cacheWr)} | ${fmtCost(r.cost)} | ${deg} |`);
530
+ }
531
+ lines.push('');
532
+ }
533
+
534
+ // Summary
535
+ lines.push('## Summary');
536
+ lines.push('');
537
+ lines.push(`| Metric | Value |`);
538
+ lines.push(`|--------|------:|`);
539
+ lines.push(`| Total API calls | ${summary.calls} |`);
540
+ lines.push(`| Total input tokens | ${fmt(summary.totals.input)} |`);
541
+ lines.push(`| Total output tokens | ${fmt(summary.totals.output)} |`);
542
+ lines.push(`| Total cache read | ${fmt(summary.totals.cache_read)} |`);
543
+ lines.push(`| Total cache write 1h | ${fmt(summary.totals.cache_1h)} |`);
544
+ lines.push(`| Total cache write 5m | ${fmt(summary.totals.cache_5m)} |`);
545
+ lines.push(`| **Total cost** | **${fmtCost(summary.totalCost)}** |`);
546
+ lines.push(`| Avg cost per call | ${fmtCost(summary.totalCost / summary.calls)} |`);
547
+ lines.push('');
548
+
549
+ // By model
550
+ lines.push('## By Model');
551
+ lines.push('');
552
+ lines.push('| Model | Calls | Cost |');
553
+ lines.push('|-------|------:|-----:|');
554
+ for (const [model, info] of Object.entries(summary.byModel)) {
555
+ lines.push(`| ${model} | ${info.calls} | ${fmtCost(info.cost)} |`);
556
+ }
557
+ lines.push('');
558
+
559
+ // Degradation
560
+ if (summary.degradedCalls > 0) {
561
+ lines.push('## Degradation');
562
+ lines.push('');
563
+ lines.push(`Calls with degradation: ${summary.degradedCalls}/${summary.calls}`);
564
+ lines.push('');
565
+ lines.push('| Step | Count |');
566
+ lines.push('|------|------:|');
567
+ for (const [step, count] of Object.entries(summary.degradationSteps).sort((a, b) => b[1] - a[1])) {
568
+ lines.push(`| ${step} | ${count}/${summary.calls} |`);
569
+ }
570
+ lines.push('');
571
+ }
572
+
573
+ // Admin API
574
+ if (adminSummary) {
575
+ const delta = adminSummary.totalCost - summary.totalCost;
576
+ lines.push('## Admin API (Actual Billed)');
577
+ lines.push('');
578
+ lines.push(`| Source | Cost |`);
579
+ lines.push(`|--------|-----:|`);
580
+ lines.push(`| API-reported | ${fmtCost(adminSummary.totalCost)} |`);
581
+ lines.push(`| Telemetry | ${fmtCost(summary.totalCost)} |`);
582
+ lines.push(`| Delta | ${fmtCost(Math.abs(delta))} (${delta > 0 ? 'API higher' : 'telemetry higher'}) |`);
583
+ lines.push('');
584
+ lines.push('> Note: Admin API reports all usage for the time window, which may include other concurrent API activity.');
585
+ lines.push('');
586
+ }
587
+
588
+ console.log(lines.join('\n'));
589
+ }
590
+
591
+ // ─── Text output ────────────────────────────────────────────────────────────
592
+
593
+ function printTextReport(results, summary, ratesData, adminSummary) {
594
+ const rateSource = ratesData?.last_updated ? `rates from ${ratesData.last_updated}` : 'unknown rates';
595
+
596
+ console.log('');
597
+ console.log('='.repeat(80));
598
+ console.log(' CLAUDE API COST REPORT');
599
+ console.log('='.repeat(80));
600
+ console.log(` Pricing: ${rateSource} (${ratesData?.source || 'bundled'})`);
601
+ console.log('');
602
+
603
+ // ── Per-call table ──
604
+ if (results.length <= 50) {
605
+ console.log('─── Per-Call Breakdown ─────────────────────────────────────────────────────────');
606
+ console.log(
607
+ ' #'.padEnd(5) +
608
+ 'Timestamp'.padEnd(28) +
609
+ 'Model'.padEnd(10) +
610
+ 'Input'.padStart(10) +
611
+ 'Output'.padStart(9) +
612
+ 'CacheRd'.padStart(9) +
613
+ 'CacheWr'.padStart(9) +
614
+ 'Cost'.padStart(10) +
615
+ ' Degradation'
616
+ );
617
+ console.log(' ' + '─'.repeat(78));
618
+
619
+ for (let i = 0; i < results.length; i++) {
620
+ const r = results[i];
621
+ const ts = r.timestamp ? r.timestamp.slice(0, 19) : '—';
622
+ const modelShort = r.model.replace('claude-', '').replace(/-\d{8}$/, '').slice(0, 8);
623
+ const cacheWr = (r.cw1h || 0) + (r.cw5m || 0);
624
+ const deg = r.degradation.length > 0 ? r.degradation.length + ' steps' : '';
625
+
626
+ console.log(
627
+ ` ${String(i + 1).padStart(2)} ` +
628
+ ts.padEnd(28) +
629
+ modelShort.padEnd(10) +
630
+ fmt(r.input_tokens).padStart(10) +
631
+ fmt(r.output_tokens).padStart(9) +
632
+ fmt(r.cache_read).padStart(9) +
633
+ fmt(cacheWr).padStart(9) +
634
+ fmtCost(r.cost).padStart(10) +
635
+ ' ' + deg
636
+ );
637
+ }
638
+ console.log('');
639
+ }
640
+
641
+ // ── Summary ──
642
+ console.log('─── Summary ────────────────────────────────────────────────────────────────────');
643
+ console.log(` Total API calls: ${summary.calls}`);
644
+ console.log(` Total input tokens: ${fmt(summary.totals.input)}`);
645
+ console.log(` Total output tokens: ${fmt(summary.totals.output)}`);
646
+ console.log(` Total cache read: ${fmt(summary.totals.cache_read)}`);
647
+ console.log(` Total cache write 1h: ${fmt(summary.totals.cache_1h)}`);
648
+ console.log(` Total cache write 5m: ${fmt(summary.totals.cache_5m)}`);
649
+ if (summary.totals.preflight > 0) {
650
+ const saved = summary.totals.preflight - summary.totals.input;
651
+ const pct = (saved / summary.totals.preflight * 100).toFixed(1);
652
+ console.log(` Preflight estimate: ${fmt(summary.totals.preflight)} (degradation saved ${fmt(saved)} tokens, ${pct}%)`);
653
+ }
654
+ console.log('');
655
+
656
+ // ── By model ──
657
+ console.log(' By model:');
658
+ for (const [model, info] of Object.entries(summary.byModel)) {
659
+ const modelShort = model.replace('claude-', '');
660
+ console.log(` ${modelShort}: ${info.calls} calls, ${fmtCost(info.cost)}`);
661
+ }
662
+ console.log('');
663
+
664
+ // ── Cost ──
665
+ console.log('─── Cost ───────────────────────────────────────────────────────────────────────');
666
+ console.log(` Telemetry-calculated: ${fmtCost(summary.totalCost)}`);
667
+ console.log(` Avg cost per call: ${fmtCost(summary.totalCost / summary.calls)}`);
668
+
669
+ // Cache savings estimate
670
+ if (summary.totals.cache_read > 0) {
671
+ // What cache reads would have cost at full input rate
672
+ const models = Object.keys(summary.byModel);
673
+ if (models.length === 1) {
674
+ const rates = lookupRates(ratesData, models[0]);
675
+ if (rates) {
676
+ const fullCost = summary.totals.cache_read * rates.input / 1_000_000;
677
+ const cacheCost = summary.totals.cache_read * rates.cache_read / 1_000_000;
678
+ const saved = fullCost - cacheCost;
679
+ console.log(` Cache read savings: ${fmtCost(saved)} (${(saved / summary.totalCost * 100).toFixed(1)}% of total)`);
680
+ }
681
+ }
682
+ }
683
+ console.log('');
684
+
685
+ // ── Degradation ──
686
+ if (summary.degradedCalls > 0) {
687
+ console.log('─── Degradation ────────────────────────────────────────────────────────────────');
688
+ console.log(` Calls with degradation: ${summary.degradedCalls}/${summary.calls}`);
689
+ console.log(` Budget exceeded: ${summary.exceededCalls}/${summary.calls}`);
690
+ for (const [step, count] of Object.entries(summary.degradationSteps).sort((a, b) => b[1] - a[1])) {
691
+ console.log(` ${step}: ${count}/${summary.calls}`);
692
+ }
693
+ console.log('');
694
+ }
695
+
696
+ // ── Admin API comparison ──
697
+ if (adminSummary) {
698
+ console.log('─── Admin API (Actual Billed) ──────────────────────────────────────────────────');
699
+ console.log(` API-reported total: ${fmtCost(adminSummary.totalCost)}`);
700
+ console.log(` Telemetry total: ${fmtCost(summary.totalCost)}`);
701
+ const delta = adminSummary.totalCost - summary.totalCost;
702
+ console.log(` Delta: ${fmtCost(Math.abs(delta))} (${delta > 0 ? 'API higher' : 'telemetry higher'})`);
703
+ console.log('');
704
+ console.log(' API breakdown by model:');
705
+ for (const [model, m] of Object.entries(adminSummary.byModel)) {
706
+ const modelShort = model.replace('claude-', '');
707
+ console.log(` ${modelShort}:`);
708
+ console.log(` Uncached input: ${fmt(m.uncached)}`);
709
+ console.log(` Cache read: ${fmt(m.cache_read)}`);
710
+ console.log(` Cache write (1h): ${fmt(m.cache_1h)}`);
711
+ console.log(` Cache write (5m): ${fmt(m.cache_5m)}`);
712
+ console.log(` Output: ${fmt(m.output)}`);
713
+ console.log(` Cost: ${fmtCost(m.cost)}`);
714
+ }
715
+ console.log('');
716
+ console.log(' NOTE: Admin API reports all usage for the sim\'s time window,');
717
+ console.log(' which may include other concurrent API activity.');
718
+ }
719
+
720
+ console.log('='.repeat(80));
721
+ console.log('');
722
+ }
723
+
724
+ // ─── Time window extraction ─────────────────────────────────────────────────
725
+
726
+ function getTimeWindow(entries) {
727
+ const timestamps = entries
728
+ .filter(e => e.timestamp)
729
+ .map(e => new Date(e.timestamp));
730
+
731
+ if (timestamps.length === 0) return null;
732
+
733
+ return {
734
+ start: new Date(Math.min(...timestamps)),
735
+ end: new Date(Math.max(...timestamps)),
736
+ };
737
+ }
738
+
739
+ // ─── Time filtering ─────────────────────────────────────────────────────────
740
+
741
+ function parseSinceDuration(since) {
742
+ const match = since.match(/^(\d+)\s*(h|m|d)$/i);
743
+ if (!match) return null;
744
+ const n = parseInt(match[1]);
745
+ const unit = match[2].toLowerCase();
746
+ const ms = unit === 'h' ? n * 3600000 : unit === 'm' ? n * 60000 : n * 86400000;
747
+ return new Date(Date.now() - ms);
748
+ }
749
+
750
+ function filterByTime(entries, opts) {
751
+ if (!opts.date && !opts.since) return entries;
752
+
753
+ let cutoff = null;
754
+ let dateEnd = null;
755
+
756
+ if (opts.date) {
757
+ // Filter to a specific date (YYYY-MM-DD)
758
+ cutoff = new Date(opts.date + 'T00:00:00');
759
+ dateEnd = new Date(opts.date + 'T23:59:59.999');
760
+ } else if (opts.since) {
761
+ cutoff = parseSinceDuration(opts.since);
762
+ if (!cutoff) {
763
+ console.error(`WARNING: Could not parse --since "${opts.since}". Use format like 2h, 30m, 1d.`);
764
+ return entries;
765
+ }
766
+ }
767
+
768
+ const before = entries.length;
769
+ const filtered = entries.filter(e => {
770
+ if (!e.timestamp) return true; // keep entries without timestamps
771
+ const ts = new Date(e.timestamp);
772
+ if (cutoff && ts < cutoff) return false;
773
+ if (dateEnd && ts > dateEnd) return false;
774
+ return true;
775
+ });
776
+
777
+ if (filtered.length < before) {
778
+ console.error(`Filtered: ${before} → ${filtered.length} entries (${opts.date ? 'date ' + opts.date : 'since ' + opts.since}).`);
779
+ }
780
+
781
+ return filtered;
782
+ }
783
+
784
+ // ─── Main ───────────────────────────────────────────────────────────────────
785
+
786
+ async function main() {
787
+ const opts = parseArgs();
788
+
789
+ if (opts.help) { printUsage(); process.exit(0); }
790
+
791
+ // ── Update rates mode ──
792
+ if (opts.updateRates) {
793
+ console.log(`Fetching rates from ${PRICING_URL}...`);
794
+ const live = await fetchLiveRates();
795
+ if (live) {
796
+ writeFileSync(RATES_PATH, JSON.stringify(live, null, 2) + '\n');
797
+ console.log(`Updated ${RATES_PATH} with ${Object.keys(live.models).length} models (${live.last_updated}).`);
798
+ } else {
799
+ console.error('Failed to fetch rates. Bundled rates unchanged.');
800
+ process.exit(1);
801
+ }
802
+ process.exit(0);
803
+ }
804
+
805
+ // ── Load rates ──
806
+ let ratesData;
807
+ if (opts.liveRates) {
808
+ ratesData = await fetchLiveRates();
809
+ }
810
+ if (!ratesData) {
811
+ ratesData = loadBundledRates();
812
+ }
813
+ if (!ratesData) {
814
+ console.error('ERROR: No rate data available. Run with --update-rates first.');
815
+ process.exit(1);
816
+ }
817
+
818
+ // ── Load telemetry ──
819
+ let rawEntries;
820
+ if (opts.simLog) {
821
+ rawEntries = extractFromSimLog(opts.simLog);
822
+ } else if (opts.file) {
823
+ rawEntries = parseJsonLines(readFileSync(opts.file, 'utf8'));
824
+ } else if (!process.stdin.isTTY) {
825
+ rawEntries = parseJsonLines(await readStdin());
826
+ } else if (existsSync(DEFAULT_USAGE_LOG)) {
827
+ // Default: read interceptor usage log
828
+ rawEntries = parseJsonLines(readFileSync(DEFAULT_USAGE_LOG, 'utf8'));
829
+ if (rawEntries.length > 0) {
830
+ console.error(`Reading from ${DEFAULT_USAGE_LOG}`);
831
+ }
832
+ } else {
833
+ console.error(`ERROR: No input found. Expected interceptor log at ${DEFAULT_USAGE_LOG}`);
834
+ console.error(' Use --file, --sim-log, or pipe JSON-lines to stdin.');
835
+ printUsage();
836
+ process.exit(1);
837
+ }
838
+
839
+ if (!rawEntries || rawEntries.length === 0) {
840
+ console.error('ERROR: No telemetry entries found.');
841
+ process.exit(1);
842
+ }
843
+
844
+ // ── Apply time filters ──
845
+ rawEntries = filterByTime(rawEntries, opts);
846
+
847
+ if (rawEntries.length === 0) {
848
+ console.error('ERROR: No entries match the time filter.');
849
+ process.exit(1);
850
+ }
851
+
852
+ console.error(`Loaded ${rawEntries.length} telemetry entries.`);
853
+
854
+ // ── Normalize and calculate ──
855
+ const entries = rawEntries.map(normalizeEntry);
856
+ const { results, summary } = calculateCosts(entries, ratesData);
857
+
858
+ // ── Admin API cross-reference ──
859
+ let adminSummary = null;
860
+ if (opts.adminKey) {
861
+ const window = getTimeWindow(entries);
862
+ if (window) {
863
+ console.error(`Querying Admin API for ${window.start.toISOString()} → ${window.end.toISOString()}...`);
864
+ const apiData = await fetchAdminUsage(opts.adminKey, window.start, window.end);
865
+ if (apiData) {
866
+ adminSummary = summarizeAdminData(apiData, ratesData);
867
+ }
868
+ } else {
869
+ console.error('WARNING: No timestamps in telemetry; cannot query Admin API.');
870
+ }
871
+ }
872
+
873
+ // ── Output ──
874
+ printReport(results, summary, ratesData, adminSummary, opts.format);
875
+ }
876
+
877
+ main().catch(err => {
878
+ console.error(`FATAL: ${err.message}`);
879
+ process.exit(1);
880
+ });
@@ -0,0 +1,91 @@
1
+ {
2
+ "last_updated": "2026-04-09",
3
+ "source": "https://platform.claude.com/docs/en/about-claude/pricing",
4
+ "notes": "Auto-fetched from Anthropic docs.",
5
+ "models": {
6
+ "claude-opus-4-6": {
7
+ "input": 5,
8
+ "output": 25,
9
+ "cache_read": 0.5,
10
+ "cache_write_5m": 6.25,
11
+ "cache_write_1h": 10
12
+ },
13
+ "claude-opus-4-5-20251101": {
14
+ "input": 5,
15
+ "output": 25,
16
+ "cache_read": 0.5,
17
+ "cache_write_5m": 6.25,
18
+ "cache_write_1h": 10
19
+ },
20
+ "claude-opus-4-1-20250805": {
21
+ "input": 15,
22
+ "output": 75,
23
+ "cache_read": 1.5,
24
+ "cache_write_5m": 18.75,
25
+ "cache_write_1h": 30
26
+ },
27
+ "claude-opus-4-20250514": {
28
+ "input": 15,
29
+ "output": 75,
30
+ "cache_read": 1.5,
31
+ "cache_write_5m": 18.75,
32
+ "cache_write_1h": 30
33
+ },
34
+ "claude-sonnet-4-6": {
35
+ "input": 3,
36
+ "output": 15,
37
+ "cache_read": 0.3,
38
+ "cache_write_5m": 3.75,
39
+ "cache_write_1h": 6
40
+ },
41
+ "claude-sonnet-4-5-20250929": {
42
+ "input": 3,
43
+ "output": 15,
44
+ "cache_read": 0.3,
45
+ "cache_write_5m": 3.75,
46
+ "cache_write_1h": 6
47
+ },
48
+ "claude-sonnet-4-20250514": {
49
+ "input": 3,
50
+ "output": 15,
51
+ "cache_read": 0.3,
52
+ "cache_write_5m": 3.75,
53
+ "cache_write_1h": 6
54
+ },
55
+ "claude-sonnet-3-7-20250219": {
56
+ "input": 3,
57
+ "output": 15,
58
+ "cache_read": 0.3,
59
+ "cache_write_5m": 3.75,
60
+ "cache_write_1h": 6
61
+ },
62
+ "claude-haiku-4-5-20251001": {
63
+ "input": 1,
64
+ "output": 5,
65
+ "cache_read": 0.1,
66
+ "cache_write_5m": 1.25,
67
+ "cache_write_1h": 2
68
+ },
69
+ "claude-haiku-3-5-20241022": {
70
+ "input": 0.8,
71
+ "output": 4,
72
+ "cache_read": 0.08,
73
+ "cache_write_5m": 1,
74
+ "cache_write_1h": 1.6
75
+ },
76
+ "claude-3-opus-20240229": {
77
+ "input": 15,
78
+ "output": 75,
79
+ "cache_read": 1.5,
80
+ "cache_write_5m": 18.75,
81
+ "cache_write_1h": 30
82
+ },
83
+ "claude-3-haiku-20240307": {
84
+ "input": 0.25,
85
+ "output": 1.25,
86
+ "cache_read": 0.03,
87
+ "cache_write_5m": 0.3,
88
+ "cache_write_1h": 0.5
89
+ }
90
+ }
91
+ }