claude-code-cache-fix 1.4.1 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -118,6 +118,19 @@ Response headers are parsed for `anthropic-ratelimit-unified-5h-utilization` and
118
118
 
119
119
  Anthropic applies elevated quota drain rates during weekday peak hours (13:00–19:00 UTC, Mon–Fri). The interceptor detects peak windows and writes `peak_hour: true/false` to `quota-status.json`. See `docs/peak-hours-reference.md` for sources and details.
120
120
 
121
+ ### Usage telemetry and cost reporting
122
+
123
+ The interceptor logs per-call usage data to `~/.claude/usage.jsonl` — one JSON line per API call with model, token counts, and cache breakdown. Use the bundled cost report tool to analyze costs:
124
+
125
+ ```bash
126
+ node tools/cost-report.mjs # today's costs from interceptor log
127
+ node tools/cost-report.mjs --date 2026-04-08 # specific date
128
+ node tools/cost-report.mjs --since 2h # last 2 hours
129
+ node tools/cost-report.mjs --admin-key <key> # cross-reference with Admin API
130
+ ```
131
+
132
+ Also works with any JSONL containing Anthropic usage fields (`--file`, stdin) — useful for SDK users and proxy setups. See `docs/cost-report.md` for full documentation.
133
+
121
134
  ## Debug mode
122
135
 
123
136
  Enable debug logging to verify the fix is working:
@@ -157,6 +170,7 @@ Snapshots are saved to `~/.claude/cache-fix-snapshots/` and diff reports are gen
157
170
  | `CACHE_FIX_DEBUG` | `0` | Enable debug logging to `~/.claude/cache-fix-debug.log` |
158
171
  | `CACHE_FIX_PREFIXDIFF` | `0` | Enable prefix snapshot diffing |
159
172
  | `CACHE_FIX_IMAGE_KEEP_LAST` | `0` | Keep images in last N user messages (0 = disabled) |
173
+ | `CACHE_FIX_USAGE_LOG` | `~/.claude/usage.jsonl` | Path for per-call usage telemetry log |
160
174
 
161
175
  ## Limitations
162
176
 
package/package.json CHANGED
@@ -1,12 +1,13 @@
1
1
  {
2
2
  "name": "claude-code-cache-fix",
3
- "version": "1.4.1",
3
+ "version": "1.5.0",
4
4
  "description": "Fixes prompt cache regression in Claude Code that causes up to 20x cost increase on resumed sessions",
5
5
  "type": "module",
6
6
  "exports": "./preload.mjs",
7
7
  "main": "./preload.mjs",
8
8
  "files": [
9
- "preload.mjs"
9
+ "preload.mjs",
10
+ "tools/"
10
11
  ],
11
12
  "engines": {
12
13
  "node": ">=18"
package/preload.mjs CHANGED
@@ -399,6 +399,7 @@ const DEBUG = process.env.CACHE_FIX_DEBUG === "1";
399
399
  const PREFIXDIFF = process.env.CACHE_FIX_PREFIXDIFF === "1";
400
400
  const LOG_PATH = join(homedir(), ".claude", "cache-fix-debug.log");
401
401
  const SNAPSHOT_DIR = join(homedir(), ".claude", "cache-fix-snapshots");
402
+ const USAGE_JSONL = process.env.CACHE_FIX_USAGE_LOG || join(homedir(), ".claude", "usage.jsonl");
402
403
 
403
404
  function debugLog(...args) {
404
405
  if (!DEBUG) return;
@@ -813,10 +814,13 @@ globalThis.fetch = async function (url, options) {
813
814
  // Non-critical — don't break the response
814
815
  }
815
816
 
816
- // Clone response to extract TTL tier from usage (SSE stream)
817
+ // Clone response to extract TTL tier and usage telemetry from SSE stream.
818
+ // Pass the model from the request so we can log a complete usage record.
817
819
  try {
820
+ let reqModel = "unknown";
821
+ try { reqModel = JSON.parse(options?.body)?.model || "unknown"; } catch {}
818
822
  const clone = response.clone();
819
- drainTTLFromClone(clone).catch(() => {});
823
+ drainTTLFromClone(clone, reqModel).catch(() => {});
820
824
  } catch {
821
825
  // clone() failure is non-fatal
822
826
  }
@@ -837,13 +841,18 @@ globalThis.fetch = async function (url, options) {
837
841
  * Writes TTL tier to ~/.claude/quota-status.json (merges with existing data)
838
842
  * and logs to debug log.
839
843
  */
840
- async function drainTTLFromClone(clone) {
844
+ async function drainTTLFromClone(clone, model) {
841
845
  if (!clone.body) return;
842
846
 
843
847
  const reader = clone.body.getReader();
844
848
  const decoder = new TextDecoder();
845
849
  let buffer = "";
846
850
 
851
+ // Accumulate usage across message_start (input/cache) and message_delta (output)
852
+ let startUsage = null;
853
+ let deltaUsage = null;
854
+ let ttlTier = "unknown";
855
+
847
856
  try {
848
857
  while (true) {
849
858
  const { done, value } = await reader.read();
@@ -862,6 +871,7 @@ async function drainTTLFromClone(clone) {
862
871
 
863
872
  if (event.type === "message_start" && event.message?.usage) {
864
873
  const u = event.message.usage;
874
+ startUsage = u;
865
875
  const cc = u.cache_creation || {};
866
876
  const e1h = cc.ephemeral_1h_input_tokens ?? 0;
867
877
  const e5m = cc.ephemeral_5m_input_tokens ?? 0;
@@ -869,8 +879,6 @@ async function drainTTLFromClone(clone) {
869
879
  const cacheRead = u.cache_read_input_tokens ?? 0;
870
880
 
871
881
  // Determine TTL tier from which ephemeral bucket got tokens
872
- // When cache is fully warm (no creation), infer tier from previous
873
- let ttlTier = "unknown";
874
882
  if (e1h > 0 && e5m === 0) ttlTier = "1h";
875
883
  else if (e5m > 0 && e1h === 0) ttlTier = "5m";
876
884
  else if (e1h === 0 && e5m === 0 && cacheCreate === 0) {
@@ -908,10 +916,11 @@ async function drainTTLFromClone(clone) {
908
916
  };
909
917
  writeFileSync(quotaFile, JSON.stringify(quota, null, 2));
910
918
  } catch {}
919
+ }
911
920
 
912
- // Got what we need stop reading
913
- reader.cancel();
914
- return;
921
+ // Capture final usage from message_delta (has output_tokens)
922
+ if (event.type === "message_delta" && event.usage) {
923
+ deltaUsage = event.usage;
915
924
  }
916
925
  } catch {
917
926
  // Skip malformed SSE lines
@@ -921,4 +930,25 @@ async function drainTTLFromClone(clone) {
921
930
  } finally {
922
931
  try { reader.releaseLock(); } catch {}
923
932
  }
933
+
934
+ // Write usage record to JSONL after stream completes
935
+ if (startUsage) {
936
+ try {
937
+ const cc = startUsage.cache_creation || {};
938
+ const record = {
939
+ timestamp: new Date().toISOString(),
940
+ model: model || "unknown",
941
+ input_tokens: startUsage.input_tokens ?? 0,
942
+ output_tokens: deltaUsage?.output_tokens ?? 0,
943
+ cache_read_input_tokens: startUsage.cache_read_input_tokens ?? 0,
944
+ cache_creation_input_tokens: startUsage.cache_creation_input_tokens ?? 0,
945
+ ephemeral_1h_input_tokens: cc.ephemeral_1h_input_tokens ?? 0,
946
+ ephemeral_5m_input_tokens: cc.ephemeral_5m_input_tokens ?? 0,
947
+ ttl_tier: ttlTier,
948
+ };
949
+ appendFileSync(USAGE_JSONL, JSON.stringify(record) + "\n");
950
+ } catch {
951
+ // Non-critical — don't break anything
952
+ }
953
+ }
924
954
  }
@@ -0,0 +1,880 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * cost-report — Calculate Claude API costs from usage telemetry.
4
+ *
5
+ * Input sources (in priority order):
6
+ * 1. Default: reads interceptor usage log at ~/.claude/usage.jsonl
7
+ * 2. --file / -f: any JSONL file (SDK output, proxy captures, etc.)
8
+ * 3. --sim-log: extract from simulation logs (Token telemetry: {...} lines)
9
+ * 4. stdin: pipe JSON-lines from any source
10
+ *
11
+ * Pricing sources (best → fallback):
12
+ * 1. Admin API actual billed usage (--admin-key)
13
+ * 2. Live rates from Anthropic docs (--live-rates)
14
+ * 3. Bundled rates.json (default)
15
+ *
16
+ * Part of claude-code-cache-fix. Works standalone or with the interceptor.
17
+ * https://github.com/cnighswonger/claude-code-cache-fix
18
+ */
19
+
20
+ import { readFileSync, writeFileSync, existsSync } from 'node:fs';
21
+ import { createInterface } from 'node:readline';
22
+ import { fileURLToPath } from 'node:url';
23
+ import { dirname, join } from 'node:path';
24
+ import { homedir } from 'node:os';
25
+
26
+ const __dirname = dirname(fileURLToPath(import.meta.url));
27
+ const RATES_PATH = join(__dirname, 'rates.json');
28
+ const PRICING_URL = 'https://platform.claude.com/docs/en/about-claude/pricing';
29
+ const ADMIN_API_BASE = 'https://api.anthropic.com/v1/organizations/usage_report/messages';
30
+ const DEFAULT_USAGE_LOG = join(homedir(), '.claude', 'usage.jsonl');
31
+
32
+ // ─── CLI parsing ────────────────────────────────────────────────────────────
33
+
34
+ function parseArgs() {
35
+ const args = process.argv.slice(2);
36
+ const opts = {
37
+ simLog: null, file: null, adminKey: null,
38
+ liveRates: false, updateRates: false, help: false,
39
+ date: null, since: null, format: 'text',
40
+ };
41
+
42
+ for (let i = 0; i < args.length; i++) {
43
+ switch (args[i]) {
44
+ case '--sim-log': opts.simLog = args[++i]; break;
45
+ case '--file':
46
+ case '-f': opts.file = args[++i]; break;
47
+ case '--admin-key': opts.adminKey = args[++i]; break;
48
+ case '--live-rates': opts.liveRates = true; break;
49
+ case '--update-rates': opts.updateRates = true; break;
50
+ case '--date': opts.date = args[++i]; break;
51
+ case '--since': opts.since = args[++i]; break;
52
+ case '--format': opts.format = args[++i]; break;
53
+ case '--json': opts.format = 'json'; break;
54
+ case '--md':
55
+ case '--markdown': opts.format = 'md'; break;
56
+ case '--help':
57
+ case '-h': opts.help = true; break;
58
+ default:
59
+ if (!args[i].startsWith('-') && !opts.file && !opts.simLog) {
60
+ opts.file = args[i];
61
+ }
62
+ }
63
+ }
64
+
65
+ opts.adminKey = opts.adminKey || process.env.ANTHROPIC_ADMIN_KEY;
66
+ return opts;
67
+ }
68
+
69
+ function printUsage() {
70
+ console.log(`
71
+ cost-report — Calculate Claude API costs from usage telemetry.
72
+
73
+ Usage:
74
+ node cost-report.mjs From interceptor log (~/.claude/usage.jsonl)
75
+ node cost-report.mjs --date 2026-04-08 Filter to a specific date
76
+ node cost-report.mjs --since 2h Filter to last N hours/minutes
77
+ node cost-report.mjs --file <path> From any JSONL file
78
+ node cost-report.mjs --sim-log <path> From a simulation log
79
+ node cost-report.mjs --admin-key <key> Cross-reference with Admin API
80
+ cat telemetry.jsonl | node cost-report.mjs From JSON-lines on stdin
81
+ node cost-report.mjs --update-rates Refresh bundled rates
82
+
83
+ Input sources (checked in order):
84
+ Default Reads ~/.claude/usage.jsonl (written by the interceptor)
85
+ --file, -f <path> Any JSONL file (SDK output, proxy captures, etc.)
86
+ --sim-log <path> Extract from simulation logs (Token telemetry lines)
87
+ stdin Pipe JSON-lines from any source
88
+
89
+ Filtering:
90
+ --date <YYYY-MM-DD> Show only entries from this date
91
+ --since <duration> Show entries from last Nh, Nm, or Nd (e.g. 2h, 30m, 1d)
92
+
93
+ Output:
94
+ --format <fmt> Output format: text (default), json, md
95
+ --json Shorthand for --format json
96
+ --md, --markdown Shorthand for --format md
97
+
98
+ Pricing:
99
+ --admin-key <key> Anthropic Admin API key for actual billed usage
100
+ (or set ANTHROPIC_ADMIN_KEY env var)
101
+ --live-rates Fetch current rates from Anthropic docs
102
+ --update-rates Fetch and save current rates to rates.json
103
+
104
+ Input JSON format (one object per line):
105
+ Required: model, input_tokens, output_tokens
106
+ Optional: cache_read_input_tokens, cache_creation_input_tokens,
107
+ ephemeral_1h_input_tokens, ephemeral_5m_input_tokens,
108
+ timestamp, preflight_input_tokens, degradation_steps
109
+
110
+ Example JSONL (as written by the interceptor):
111
+ {"timestamp":"2026-04-09T01:23:45Z","model":"claude-sonnet-4-5-20250929","input_tokens":50000,"output_tokens":1200,"cache_read_input_tokens":13000,"cache_creation_input_tokens":0,"ephemeral_1h_input_tokens":0,"ephemeral_5m_input_tokens":0}
112
+
113
+ For SDK users — log usage from API responses:
114
+ const msg = await anthropic.messages.create({...});
115
+ fs.appendFileSync('usage.jsonl', JSON.stringify({
116
+ timestamp: new Date().toISOString(),
117
+ model: msg.model,
118
+ ...msg.usage
119
+ }) + '\\n');
120
+ `);
121
+ }
122
+
123
+ // ─── Rates ──────────────────────────────────────────────────────────────────
124
+
125
+ function loadBundledRates() {
126
+ if (!existsSync(RATES_PATH)) {
127
+ console.error('WARNING: No bundled rates.json found. Use --update-rates to create one.');
128
+ return null;
129
+ }
130
+ const data = JSON.parse(readFileSync(RATES_PATH, 'utf8'));
131
+
132
+ // Check staleness
133
+ const lastUpdated = new Date(data.last_updated);
134
+ const daysSince = (Date.now() - lastUpdated.getTime()) / (1000 * 60 * 60 * 24);
135
+ if (daysSince > 30) {
136
+ console.error(`WARNING: Bundled rates are ${Math.floor(daysSince)} days old (last updated ${data.last_updated}).`);
137
+ console.error(' Run with --update-rates to refresh, or --live-rates to fetch once.');
138
+ }
139
+ return data;
140
+ }
141
+
142
+ async function fetchLiveRates() {
143
+ try {
144
+ const resp = await fetch(PRICING_URL);
145
+ if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
146
+ const html = await resp.text();
147
+ return parsePricingPage(html);
148
+ } catch (err) {
149
+ console.error(`WARNING: Failed to fetch live rates: ${err.message}`);
150
+ console.error(' Falling back to bundled rates.');
151
+ return null;
152
+ }
153
+ }
154
+
155
+ function parsePricingPage(html) {
156
+ // The docs page renders as HTML table rows with <td> elements.
157
+ // Pattern: model name in one <td>, then rates as "$X / MTok" in subsequent <td>s.
158
+ // We extract: Model | Base Input | 5m Cache Write | 1h Cache Write | Cache Read | Output
159
+ //
160
+ // The HTML has rows like:
161
+ // Opus 4.6</td><td ...>$5 / MTok</td><td ...>$6.25 / MTok</td>...
162
+
163
+ const models = {};
164
+ const parseRate = (s) => {
165
+ const m = s.match(/\$([\d.]+)/);
166
+ return m ? parseFloat(m[1]) : null;
167
+ };
168
+
169
+ // Strategy: find model name followed by 5 rate cells in the pricing table.
170
+ // Match: "ModelName</td><td...>$X / MTok</td>..." pattern
171
+ const rowPattern = /((?:Opus|Sonnet|Haiku)\s+[\d.]+(?:\s*\([^)]*\))?)\s*<\/td>\s*<td[^>]*>\s*\$([\d.]+)\s*\/\s*MTok\s*<\/td>\s*<td[^>]*>\s*\$([\d.]+)\s*\/\s*MTok\s*<\/td>\s*<td[^>]*>\s*\$([\d.]+)\s*\/\s*MTok\s*<\/td>\s*<td[^>]*>\s*\$([\d.]+)\s*\/\s*MTok\s*<\/td>\s*<td[^>]*>\s*\$([\d.]+)\s*\/\s*MTok/g;
172
+ let match;
173
+
174
+ while ((match = rowPattern.exec(html)) !== null) {
175
+ let name = match[1].trim();
176
+ // Strip "(deprecated)" etc.
177
+ name = name.replace(/\s*\([^)]*\)\s*$/, '').trim();
178
+ // Skip if it contains HTML
179
+ if (name.includes('<')) continue;
180
+
181
+ const input = parseFloat(match[2]);
182
+ const write5m = parseFloat(match[3]);
183
+ const write1h = parseFloat(match[4]);
184
+ const cacheRead = parseFloat(match[5]);
185
+ const output = parseFloat(match[6]);
186
+
187
+ if (isNaN(input) || isNaN(output)) continue;
188
+
189
+ const idMap = resolveModelId(name);
190
+ for (const id of idMap) {
191
+ models[id] = {
192
+ input, output,
193
+ cache_read: cacheRead,
194
+ cache_write_5m: write5m,
195
+ cache_write_1h: write1h,
196
+ };
197
+ }
198
+ }
199
+
200
+ if (Object.keys(models).length === 0) {
201
+ console.error('WARNING: Could not parse any model pricing from docs page.');
202
+ console.error(' The page format may have changed. Falling back to bundled rates.');
203
+ return null;
204
+ }
205
+
206
+ return {
207
+ last_updated: new Date().toISOString().slice(0, 10),
208
+ source: PRICING_URL,
209
+ notes: 'Auto-fetched from Anthropic docs.',
210
+ models,
211
+ };
212
+ }
213
+
214
+ function resolveModelId(displayName) {
215
+ // Map display names like "Opus 4.6" to API model IDs
216
+ const map = {
217
+ 'Opus 4.6': ['claude-opus-4-6'],
218
+ 'Opus 4.5': ['claude-opus-4-5-20251101'],
219
+ 'Opus 4.1': ['claude-opus-4-1-20250805'],
220
+ 'Opus 4': ['claude-opus-4-20250514'],
221
+ 'Opus 3': ['claude-3-opus-20240229'],
222
+ 'Sonnet 4.6': ['claude-sonnet-4-6'],
223
+ 'Sonnet 4.5': ['claude-sonnet-4-5-20250929'],
224
+ 'Sonnet 4': ['claude-sonnet-4-20250514'],
225
+ 'Sonnet 3.7': ['claude-sonnet-3-7-20250219'],
226
+ 'Haiku 4.5': ['claude-haiku-4-5-20251001'],
227
+ 'Haiku 3.5': ['claude-haiku-3-5-20241022'],
228
+ 'Haiku 3': ['claude-3-haiku-20240307'],
229
+ };
230
+ return map[displayName] || [`claude-${displayName.toLowerCase().replace(/\s+/g, '-')}`];
231
+ }
232
+
233
+ function lookupRates(ratesData, modelId) {
234
+ if (!ratesData || !ratesData.models) return null;
235
+
236
+ // Direct match
237
+ if (ratesData.models[modelId]) return ratesData.models[modelId];
238
+
239
+ // Try prefix match (e.g. "claude-sonnet-4-5-20250929" matches "claude-sonnet-4-5-*")
240
+ for (const [key, rates] of Object.entries(ratesData.models)) {
241
+ // Match if the stored key is a prefix or shares the same base
242
+ if (modelId.startsWith(key) || key.startsWith(modelId)) return rates;
243
+ }
244
+
245
+ // Try matching by family (strip date suffix)
246
+ const base = modelId.replace(/-\d{8}$/, '');
247
+ if (ratesData.models[base]) return ratesData.models[base];
248
+
249
+ return null;
250
+ }
251
+
252
+ // ─── Input parsing ──────────────────────────────────────────────────────────
253
+
254
+ function extractFromSimLog(filePath) {
255
+ const content = readFileSync(filePath, 'utf8');
256
+ const entries = [];
257
+ const lines = content.split('\n');
258
+
259
+ for (const line of lines) {
260
+ const match = line.match(/Token telemetry:\s*(\{.+\})/);
261
+ if (match) {
262
+ try {
263
+ const obj = JSON.parse(match[1]);
264
+ // Extract timestamp from log line
265
+ const tsMatch = line.match(/\[([^\]]+)\]/);
266
+ if (tsMatch) obj._timestamp = tsMatch[1];
267
+ entries.push(obj);
268
+ } catch { /* skip malformed */ }
269
+ }
270
+ }
271
+
272
+ if (entries.length === 0) {
273
+ console.error('WARNING: No "Token telemetry" entries found in sim log.');
274
+ console.error(' This log may use an older format without structured telemetry.');
275
+ }
276
+
277
+ return entries;
278
+ }
279
+
280
+ function parseJsonLines(text) {
281
+ return text.split('\n')
282
+ .filter(l => l.trim())
283
+ .map(l => { try { return JSON.parse(l); } catch { return null; } })
284
+ .filter(Boolean);
285
+ }
286
+
287
+ async function readStdin() {
288
+ const chunks = [];
289
+ const rl = createInterface({ input: process.stdin, terminal: false });
290
+ for await (const line of rl) chunks.push(line);
291
+ return chunks.join('\n');
292
+ }
293
+
294
+ function normalizeEntry(raw) {
295
+ // Accept multiple naming conventions:
296
+ // - Interceptor: input_tokens, output_tokens, timestamp
297
+ // - Sim telemetry: actual_input_tokens, actual_output_tokens, _timestamp
298
+ // - SDK: input_tokens, output_tokens (from usage object)
299
+ return {
300
+ model: raw.model || 'unknown',
301
+ timestamp: raw.timestamp || raw._timestamp || null,
302
+ input_tokens: raw.actual_input_tokens ?? raw.input_tokens ?? 0,
303
+ output_tokens: raw.actual_output_tokens ?? raw.output_tokens ?? 0,
304
+ cache_read: raw.cache_read_input_tokens ?? 0,
305
+ cache_create: raw.cache_creation_input_tokens ?? 0,
306
+ eph_1h: raw.ephemeral_1h_input_tokens ?? 0,
307
+ eph_5m: raw.ephemeral_5m_input_tokens ?? 0,
308
+ preflight: raw.preflight_input_tokens ?? null,
309
+ degradation: raw.degradation_steps ?? [],
310
+ would_have_exceeded: raw.would_have_exceeded ?? false,
311
+ sys_prompt_est: raw.system_prompt_tokens_est ?? null,
312
+ };
313
+ }
314
+
315
+ // ─── Admin API ──────────────────────────────────────────────────────────────
316
+
317
+ async function fetchAdminUsage(adminKey, startTime, endTime) {
318
+ // Round start down and end up to hour boundaries
319
+ const start = new Date(startTime);
320
+ start.setMinutes(0, 0, 0);
321
+ const end = new Date(endTime);
322
+ end.setHours(end.getHours() + 1, 0, 0, 0);
323
+
324
+ const url = `${ADMIN_API_BASE}?bucket_width=1h` +
325
+ `&starting_at=${start.toISOString()}` +
326
+ `&ending_at=${end.toISOString()}` +
327
+ `&group_by[]=model`;
328
+
329
+ try {
330
+ const resp = await fetch(url, {
331
+ headers: {
332
+ 'x-api-key': adminKey,
333
+ 'anthropic-version': '2023-06-01',
334
+ },
335
+ });
336
+ if (!resp.ok) throw new Error(`HTTP ${resp.status}: ${await resp.text()}`);
337
+ return await resp.json();
338
+ } catch (err) {
339
+ console.error(`WARNING: Admin API query failed: ${err.message}`);
340
+ return null;
341
+ }
342
+ }
343
+
344
+ function summarizeAdminData(apiData, ratesData) {
345
+ const byModel = {};
346
+ let totalCost = 0;
347
+
348
+ for (const bucket of (apiData.data || [])) {
349
+ for (const r of (bucket.results || [])) {
350
+ const model = r.model || 'unknown';
351
+ if (!byModel[model]) {
352
+ byModel[model] = { uncached: 0, cache_read: 0, cache_1h: 0, cache_5m: 0, output: 0, cost: 0 };
353
+ }
354
+ const m = byModel[model];
355
+ m.uncached += r.uncached_input_tokens || 0;
356
+ m.cache_read += r.cache_read_input_tokens || 0;
357
+ const cc = r.cache_creation || {};
358
+ m.cache_1h += cc.ephemeral_1h_input_tokens || 0;
359
+ m.cache_5m += cc.ephemeral_5m_input_tokens || 0;
360
+ m.output += r.output_tokens || 0;
361
+ }
362
+ }
363
+
364
+ // Calculate costs per model
365
+ for (const [model, m] of Object.entries(byModel)) {
366
+ const rates = lookupRates(ratesData, model);
367
+ if (rates) {
368
+ m.cost = (m.uncached * rates.input + m.cache_read * rates.cache_read +
369
+ m.cache_1h * rates.cache_write_1h + m.cache_5m * rates.cache_write_5m +
370
+ m.output * rates.output) / 1_000_000;
371
+ }
372
+ totalCost += m.cost;
373
+ }
374
+
375
+ return { byModel, totalCost };
376
+ }
377
+
378
+ // ─── Cost calculation ───────────────────────────────────────────────────────
379
+
380
+ function calculateCosts(entries, ratesData) {
381
+ const results = [];
382
+ const summary = {
383
+ calls: 0,
384
+ byModel: {},
385
+ totals: { input: 0, output: 0, cache_read: 0, cache_1h: 0, cache_5m: 0, preflight: 0 },
386
+ totalCost: 0,
387
+ degradedCalls: 0,
388
+ exceededCalls: 0,
389
+ degradationSteps: {},
390
+ };
391
+
392
+ for (const entry of entries) {
393
+ const rates = lookupRates(ratesData, entry.model);
394
+ if (!rates) {
395
+ console.error(`WARNING: No rates found for model "${entry.model}". Skipping cost calculation.`);
396
+ results.push({ ...entry, cost: null, rateSource: 'missing' });
397
+ continue;
398
+ }
399
+
400
+ // Determine cache write tier breakdown
401
+ // If telemetry has eph_1h/eph_5m, use those; otherwise assume all cache_create is 5m
402
+ let cw1h = entry.eph_1h;
403
+ let cw5m = entry.eph_5m;
404
+ if (cw1h === 0 && cw5m === 0 && entry.cache_create > 0) {
405
+ // No tier breakdown available; assume 5m (conservative — lower rate)
406
+ cw5m = entry.cache_create;
407
+ }
408
+
409
+ const cost = (
410
+ entry.input_tokens * rates.input +
411
+ entry.output_tokens * rates.output +
412
+ entry.cache_read * rates.cache_read +
413
+ cw1h * rates.cache_write_1h +
414
+ cw5m * rates.cache_write_5m
415
+ ) / 1_000_000;
416
+
417
+ results.push({ ...entry, cost, cw1h, cw5m });
418
+
419
+ // Accumulate summary
420
+ summary.calls++;
421
+ summary.totals.input += entry.input_tokens;
422
+ summary.totals.output += entry.output_tokens;
423
+ summary.totals.cache_read += entry.cache_read;
424
+ summary.totals.cache_1h += cw1h;
425
+ summary.totals.cache_5m += cw5m;
426
+ if (entry.preflight != null) summary.totals.preflight += entry.preflight;
427
+
428
+ if (!summary.byModel[entry.model]) {
429
+ summary.byModel[entry.model] = { calls: 0, cost: 0 };
430
+ }
431
+ summary.byModel[entry.model].calls++;
432
+ summary.byModel[entry.model].cost += cost;
433
+
434
+ summary.totalCost += cost;
435
+
436
+ if (entry.degradation.length > 0) {
437
+ summary.degradedCalls++;
438
+ for (const step of entry.degradation) {
439
+ summary.degradationSteps[step] = (summary.degradationSteps[step] || 0) + 1;
440
+ }
441
+ }
442
+ if (entry.would_have_exceeded) summary.exceededCalls++;
443
+ }
444
+
445
+ return { results, summary };
446
+ }
447
+
448
+ // ─── Report formatting ──────────────────────────────────────────────────────
449
+
450
+ function fmt(n) {
451
+ return n.toLocaleString('en-US');
452
+ }
453
+
454
+ function fmtCost(n) {
455
+ if (n == null) return ' N/A';
456
+ return `$${n.toFixed(4)}`;
457
+ }
458
+
459
+ function printReport(results, summary, ratesData, adminSummary, format) {
460
+ if (format === 'json') return printJsonReport(results, summary, ratesData, adminSummary);
461
+ if (format === 'md') return printMarkdownReport(results, summary, ratesData, adminSummary);
462
+ return printTextReport(results, summary, ratesData, adminSummary);
463
+ }
464
+
465
+ // ─── JSON output ────────────────────────────────────────────────────────────
466
+
467
+ function printJsonReport(results, summary, ratesData, adminSummary) {
468
+ const report = {
469
+ generated: new Date().toISOString(),
470
+ pricing: { source: ratesData?.source || 'bundled', last_updated: ratesData?.last_updated },
471
+ calls: results.map(r => ({
472
+ timestamp: r.timestamp,
473
+ model: r.model,
474
+ input_tokens: r.input_tokens,
475
+ output_tokens: r.output_tokens,
476
+ cache_read: r.cache_read,
477
+ cache_write_1h: r.cw1h || 0,
478
+ cache_write_5m: r.cw5m || 0,
479
+ cost: r.cost,
480
+ degradation_steps: r.degradation.length > 0 ? r.degradation : undefined,
481
+ })),
482
+ summary: {
483
+ total_calls: summary.calls,
484
+ total_cost: summary.totalCost,
485
+ avg_cost_per_call: summary.totalCost / summary.calls,
486
+ tokens: summary.totals,
487
+ by_model: summary.byModel,
488
+ degradation: summary.degradedCalls > 0 ? {
489
+ degraded_calls: summary.degradedCalls,
490
+ exceeded_calls: summary.exceededCalls,
491
+ steps: summary.degradationSteps,
492
+ } : undefined,
493
+ },
494
+ };
495
+ if (adminSummary) {
496
+ report.admin_api = {
497
+ total_cost: adminSummary.totalCost,
498
+ delta: adminSummary.totalCost - summary.totalCost,
499
+ by_model: adminSummary.byModel,
500
+ };
501
+ }
502
+ console.log(JSON.stringify(report, null, 2));
503
+ }
504
+
505
+ // ─── Markdown output ────────────────────────────────────────────────────────
506
+
507
+ function printMarkdownReport(results, summary, ratesData, adminSummary) {
508
+ const rateSource = ratesData?.last_updated ? `rates from ${ratesData.last_updated}` : 'unknown rates';
509
+ const lines = [];
510
+
511
+ lines.push('# Claude API Cost Report');
512
+ lines.push('');
513
+ lines.push(`Pricing: ${rateSource} (${ratesData?.source || 'bundled'})`);
514
+ lines.push('');
515
+
516
+ // Per-call table
517
+ if (results.length <= 50) {
518
+ lines.push('## Per-Call Breakdown');
519
+ lines.push('');
520
+ lines.push('| # | Timestamp | Model | Input | Output | Cache Rd | Cache Wr | Cost | Degradation |');
521
+ lines.push('|---|-----------|-------|------:|-------:|---------:|---------:|-----:|-------------|');
522
+
523
+ for (let i = 0; i < results.length; i++) {
524
+ const r = results[i];
525
+ const ts = r.timestamp ? r.timestamp.slice(0, 19) : '—';
526
+ const modelShort = r.model.replace('claude-', '').replace(/-\d{8}$/, '');
527
+ const cacheWr = (r.cw1h || 0) + (r.cw5m || 0);
528
+ const deg = r.degradation.length > 0 ? r.degradation.length + ' steps' : '';
529
+ lines.push(`| ${i + 1} | ${ts} | ${modelShort} | ${fmt(r.input_tokens)} | ${fmt(r.output_tokens)} | ${fmt(r.cache_read)} | ${fmt(cacheWr)} | ${fmtCost(r.cost)} | ${deg} |`);
530
+ }
531
+ lines.push('');
532
+ }
533
+
534
+ // Summary
535
+ lines.push('## Summary');
536
+ lines.push('');
537
+ lines.push(`| Metric | Value |`);
538
+ lines.push(`|--------|------:|`);
539
+ lines.push(`| Total API calls | ${summary.calls} |`);
540
+ lines.push(`| Total input tokens | ${fmt(summary.totals.input)} |`);
541
+ lines.push(`| Total output tokens | ${fmt(summary.totals.output)} |`);
542
+ lines.push(`| Total cache read | ${fmt(summary.totals.cache_read)} |`);
543
+ lines.push(`| Total cache write 1h | ${fmt(summary.totals.cache_1h)} |`);
544
+ lines.push(`| Total cache write 5m | ${fmt(summary.totals.cache_5m)} |`);
545
+ lines.push(`| **Total cost** | **${fmtCost(summary.totalCost)}** |`);
546
+ lines.push(`| Avg cost per call | ${fmtCost(summary.totalCost / summary.calls)} |`);
547
+ lines.push('');
548
+
549
+ // By model
550
+ lines.push('## By Model');
551
+ lines.push('');
552
+ lines.push('| Model | Calls | Cost |');
553
+ lines.push('|-------|------:|-----:|');
554
+ for (const [model, info] of Object.entries(summary.byModel)) {
555
+ lines.push(`| ${model} | ${info.calls} | ${fmtCost(info.cost)} |`);
556
+ }
557
+ lines.push('');
558
+
559
+ // Degradation
560
+ if (summary.degradedCalls > 0) {
561
+ lines.push('## Degradation');
562
+ lines.push('');
563
+ lines.push(`Calls with degradation: ${summary.degradedCalls}/${summary.calls}`);
564
+ lines.push('');
565
+ lines.push('| Step | Count |');
566
+ lines.push('|------|------:|');
567
+ for (const [step, count] of Object.entries(summary.degradationSteps).sort((a, b) => b[1] - a[1])) {
568
+ lines.push(`| ${step} | ${count}/${summary.calls} |`);
569
+ }
570
+ lines.push('');
571
+ }
572
+
573
+ // Admin API
574
+ if (adminSummary) {
575
+ const delta = adminSummary.totalCost - summary.totalCost;
576
+ lines.push('## Admin API (Actual Billed)');
577
+ lines.push('');
578
+ lines.push(`| Source | Cost |`);
579
+ lines.push(`|--------|-----:|`);
580
+ lines.push(`| API-reported | ${fmtCost(adminSummary.totalCost)} |`);
581
+ lines.push(`| Telemetry | ${fmtCost(summary.totalCost)} |`);
582
+ lines.push(`| Delta | ${fmtCost(Math.abs(delta))} (${delta > 0 ? 'API higher' : 'telemetry higher'}) |`);
583
+ lines.push('');
584
+ lines.push('> Note: Admin API reports all usage for the time window, which may include other concurrent API activity.');
585
+ lines.push('');
586
+ }
587
+
588
+ console.log(lines.join('\n'));
589
+ }
590
+
591
+ // ─── Text output ────────────────────────────────────────────────────────────
592
+
593
+ function printTextReport(results, summary, ratesData, adminSummary) {
594
+ const rateSource = ratesData?.last_updated ? `rates from ${ratesData.last_updated}` : 'unknown rates';
595
+
596
+ console.log('');
597
+ console.log('='.repeat(80));
598
+ console.log(' CLAUDE API COST REPORT');
599
+ console.log('='.repeat(80));
600
+ console.log(` Pricing: ${rateSource} (${ratesData?.source || 'bundled'})`);
601
+ console.log('');
602
+
603
+ // ── Per-call table ──
604
+ if (results.length <= 50) {
605
+ console.log('─── Per-Call Breakdown ─────────────────────────────────────────────────────────');
606
+ console.log(
607
+ ' #'.padEnd(5) +
608
+ 'Timestamp'.padEnd(28) +
609
+ 'Model'.padEnd(10) +
610
+ 'Input'.padStart(10) +
611
+ 'Output'.padStart(9) +
612
+ 'CacheRd'.padStart(9) +
613
+ 'CacheWr'.padStart(9) +
614
+ 'Cost'.padStart(10) +
615
+ ' Degradation'
616
+ );
617
+ console.log(' ' + '─'.repeat(78));
618
+
619
+ for (let i = 0; i < results.length; i++) {
620
+ const r = results[i];
621
+ const ts = r.timestamp ? r.timestamp.slice(0, 19) : '—';
622
+ const modelShort = r.model.replace('claude-', '').replace(/-\d{8}$/, '').slice(0, 8);
623
+ const cacheWr = (r.cw1h || 0) + (r.cw5m || 0);
624
+ const deg = r.degradation.length > 0 ? r.degradation.length + ' steps' : '';
625
+
626
+ console.log(
627
+ ` ${String(i + 1).padStart(2)} ` +
628
+ ts.padEnd(28) +
629
+ modelShort.padEnd(10) +
630
+ fmt(r.input_tokens).padStart(10) +
631
+ fmt(r.output_tokens).padStart(9) +
632
+ fmt(r.cache_read).padStart(9) +
633
+ fmt(cacheWr).padStart(9) +
634
+ fmtCost(r.cost).padStart(10) +
635
+ ' ' + deg
636
+ );
637
+ }
638
+ console.log('');
639
+ }
640
+
641
+ // ── Summary ──
642
+ console.log('─── Summary ────────────────────────────────────────────────────────────────────');
643
+ console.log(` Total API calls: ${summary.calls}`);
644
+ console.log(` Total input tokens: ${fmt(summary.totals.input)}`);
645
+ console.log(` Total output tokens: ${fmt(summary.totals.output)}`);
646
+ console.log(` Total cache read: ${fmt(summary.totals.cache_read)}`);
647
+ console.log(` Total cache write 1h: ${fmt(summary.totals.cache_1h)}`);
648
+ console.log(` Total cache write 5m: ${fmt(summary.totals.cache_5m)}`);
649
+ if (summary.totals.preflight > 0) {
650
+ const saved = summary.totals.preflight - summary.totals.input;
651
+ const pct = (saved / summary.totals.preflight * 100).toFixed(1);
652
+ console.log(` Preflight estimate: ${fmt(summary.totals.preflight)} (degradation saved ${fmt(saved)} tokens, ${pct}%)`);
653
+ }
654
+ console.log('');
655
+
656
+ // ── By model ──
657
+ console.log(' By model:');
658
+ for (const [model, info] of Object.entries(summary.byModel)) {
659
+ const modelShort = model.replace('claude-', '');
660
+ console.log(` ${modelShort}: ${info.calls} calls, ${fmtCost(info.cost)}`);
661
+ }
662
+ console.log('');
663
+
664
+ // ── Cost ──
665
+ console.log('─── Cost ───────────────────────────────────────────────────────────────────────');
666
+ console.log(` Telemetry-calculated: ${fmtCost(summary.totalCost)}`);
667
+ console.log(` Avg cost per call: ${fmtCost(summary.totalCost / summary.calls)}`);
668
+
669
+ // Cache savings estimate
670
+ if (summary.totals.cache_read > 0) {
671
+ // What cache reads would have cost at full input rate
672
+ const models = Object.keys(summary.byModel);
673
+ if (models.length === 1) {
674
+ const rates = lookupRates(ratesData, models[0]);
675
+ if (rates) {
676
+ const fullCost = summary.totals.cache_read * rates.input / 1_000_000;
677
+ const cacheCost = summary.totals.cache_read * rates.cache_read / 1_000_000;
678
+ const saved = fullCost - cacheCost;
679
+ console.log(` Cache read savings: ${fmtCost(saved)} (${(saved / summary.totalCost * 100).toFixed(1)}% of total)`);
680
+ }
681
+ }
682
+ }
683
+ console.log('');
684
+
685
+ // ── Degradation ──
686
+ if (summary.degradedCalls > 0) {
687
+ console.log('─── Degradation ────────────────────────────────────────────────────────────────');
688
+ console.log(` Calls with degradation: ${summary.degradedCalls}/${summary.calls}`);
689
+ console.log(` Budget exceeded: ${summary.exceededCalls}/${summary.calls}`);
690
+ for (const [step, count] of Object.entries(summary.degradationSteps).sort((a, b) => b[1] - a[1])) {
691
+ console.log(` ${step}: ${count}/${summary.calls}`);
692
+ }
693
+ console.log('');
694
+ }
695
+
696
+ // ── Admin API comparison ──
697
+ if (adminSummary) {
698
+ console.log('─── Admin API (Actual Billed) ──────────────────────────────────────────────────');
699
+ console.log(` API-reported total: ${fmtCost(adminSummary.totalCost)}`);
700
+ console.log(` Telemetry total: ${fmtCost(summary.totalCost)}`);
701
+ const delta = adminSummary.totalCost - summary.totalCost;
702
+ console.log(` Delta: ${fmtCost(Math.abs(delta))} (${delta > 0 ? 'API higher' : 'telemetry higher'})`);
703
+ console.log('');
704
+ console.log(' API breakdown by model:');
705
+ for (const [model, m] of Object.entries(adminSummary.byModel)) {
706
+ const modelShort = model.replace('claude-', '');
707
+ console.log(` ${modelShort}:`);
708
+ console.log(` Uncached input: ${fmt(m.uncached)}`);
709
+ console.log(` Cache read: ${fmt(m.cache_read)}`);
710
+ console.log(` Cache write (1h): ${fmt(m.cache_1h)}`);
711
+ console.log(` Cache write (5m): ${fmt(m.cache_5m)}`);
712
+ console.log(` Output: ${fmt(m.output)}`);
713
+ console.log(` Cost: ${fmtCost(m.cost)}`);
714
+ }
715
+ console.log('');
716
+ console.log(' NOTE: Admin API reports all usage for the sim\'s time window,');
717
+ console.log(' which may include other concurrent API activity.');
718
+ }
719
+
720
+ console.log('='.repeat(80));
721
+ console.log('');
722
+ }
723
+
724
+ // ─── Time window extraction ─────────────────────────────────────────────────
725
+
726
+ function getTimeWindow(entries) {
727
+ const timestamps = entries
728
+ .filter(e => e.timestamp)
729
+ .map(e => new Date(e.timestamp));
730
+
731
+ if (timestamps.length === 0) return null;
732
+
733
+ return {
734
+ start: new Date(Math.min(...timestamps)),
735
+ end: new Date(Math.max(...timestamps)),
736
+ };
737
+ }
738
+
739
+ // ─── Time filtering ─────────────────────────────────────────────────────────
740
+
741
+ function parseSinceDuration(since) {
742
+ const match = since.match(/^(\d+)\s*(h|m|d)$/i);
743
+ if (!match) return null;
744
+ const n = parseInt(match[1]);
745
+ const unit = match[2].toLowerCase();
746
+ const ms = unit === 'h' ? n * 3600000 : unit === 'm' ? n * 60000 : n * 86400000;
747
+ return new Date(Date.now() - ms);
748
+ }
749
+
750
+ function filterByTime(entries, opts) {
751
+ if (!opts.date && !opts.since) return entries;
752
+
753
+ let cutoff = null;
754
+ let dateEnd = null;
755
+
756
+ if (opts.date) {
757
+ // Filter to a specific date (YYYY-MM-DD)
758
+ cutoff = new Date(opts.date + 'T00:00:00');
759
+ dateEnd = new Date(opts.date + 'T23:59:59.999');
760
+ } else if (opts.since) {
761
+ cutoff = parseSinceDuration(opts.since);
762
+ if (!cutoff) {
763
+ console.error(`WARNING: Could not parse --since "${opts.since}". Use format like 2h, 30m, 1d.`);
764
+ return entries;
765
+ }
766
+ }
767
+
768
+ const before = entries.length;
769
+ const filtered = entries.filter(e => {
770
+ if (!e.timestamp) return true; // keep entries without timestamps
771
+ const ts = new Date(e.timestamp);
772
+ if (cutoff && ts < cutoff) return false;
773
+ if (dateEnd && ts > dateEnd) return false;
774
+ return true;
775
+ });
776
+
777
+ if (filtered.length < before) {
778
+ console.error(`Filtered: ${before} → ${filtered.length} entries (${opts.date ? 'date ' + opts.date : 'since ' + opts.since}).`);
779
+ }
780
+
781
+ return filtered;
782
+ }
783
+
784
+ // ─── Main ───────────────────────────────────────────────────────────────────
785
+
786
+ async function main() {
787
+ const opts = parseArgs();
788
+
789
+ if (opts.help) { printUsage(); process.exit(0); }
790
+
791
+ // ── Update rates mode ──
792
+ if (opts.updateRates) {
793
+ console.log(`Fetching rates from ${PRICING_URL}...`);
794
+ const live = await fetchLiveRates();
795
+ if (live) {
796
+ writeFileSync(RATES_PATH, JSON.stringify(live, null, 2) + '\n');
797
+ console.log(`Updated ${RATES_PATH} with ${Object.keys(live.models).length} models (${live.last_updated}).`);
798
+ } else {
799
+ console.error('Failed to fetch rates. Bundled rates unchanged.');
800
+ process.exit(1);
801
+ }
802
+ process.exit(0);
803
+ }
804
+
805
+ // ── Load rates ──
806
+ let ratesData;
807
+ if (opts.liveRates) {
808
+ ratesData = await fetchLiveRates();
809
+ }
810
+ if (!ratesData) {
811
+ ratesData = loadBundledRates();
812
+ }
813
+ if (!ratesData) {
814
+ console.error('ERROR: No rate data available. Run with --update-rates first.');
815
+ process.exit(1);
816
+ }
817
+
818
+ // ── Load telemetry ──
819
+ let rawEntries;
820
+ if (opts.simLog) {
821
+ rawEntries = extractFromSimLog(opts.simLog);
822
+ } else if (opts.file) {
823
+ rawEntries = parseJsonLines(readFileSync(opts.file, 'utf8'));
824
+ } else if (!process.stdin.isTTY) {
825
+ rawEntries = parseJsonLines(await readStdin());
826
+ } else if (existsSync(DEFAULT_USAGE_LOG)) {
827
+ // Default: read interceptor usage log
828
+ rawEntries = parseJsonLines(readFileSync(DEFAULT_USAGE_LOG, 'utf8'));
829
+ if (rawEntries.length > 0) {
830
+ console.error(`Reading from ${DEFAULT_USAGE_LOG}`);
831
+ }
832
+ } else {
833
+ console.error(`ERROR: No input found. Expected interceptor log at ${DEFAULT_USAGE_LOG}`);
834
+ console.error(' Use --file, --sim-log, or pipe JSON-lines to stdin.');
835
+ printUsage();
836
+ process.exit(1);
837
+ }
838
+
839
+ if (!rawEntries || rawEntries.length === 0) {
840
+ console.error('ERROR: No telemetry entries found.');
841
+ process.exit(1);
842
+ }
843
+
844
+ // ── Apply time filters ──
845
+ rawEntries = filterByTime(rawEntries, opts);
846
+
847
+ if (rawEntries.length === 0) {
848
+ console.error('ERROR: No entries match the time filter.');
849
+ process.exit(1);
850
+ }
851
+
852
+ console.error(`Loaded ${rawEntries.length} telemetry entries.`);
853
+
854
+ // ── Normalize and calculate ──
855
+ const entries = rawEntries.map(normalizeEntry);
856
+ const { results, summary } = calculateCosts(entries, ratesData);
857
+
858
+ // ── Admin API cross-reference ──
859
+ let adminSummary = null;
860
+ if (opts.adminKey) {
861
+ const window = getTimeWindow(entries);
862
+ if (window) {
863
+ console.error(`Querying Admin API for ${window.start.toISOString()} → ${window.end.toISOString()}...`);
864
+ const apiData = await fetchAdminUsage(opts.adminKey, window.start, window.end);
865
+ if (apiData) {
866
+ adminSummary = summarizeAdminData(apiData, ratesData);
867
+ }
868
+ } else {
869
+ console.error('WARNING: No timestamps in telemetry; cannot query Admin API.');
870
+ }
871
+ }
872
+
873
+ // ── Output ──
874
+ printReport(results, summary, ratesData, adminSummary, opts.format);
875
+ }
876
+
877
+ main().catch(err => {
878
+ console.error(`FATAL: ${err.message}`);
879
+ process.exit(1);
880
+ });
@@ -0,0 +1,91 @@
1
+ {
2
+ "last_updated": "2026-04-09",
3
+ "source": "https://platform.claude.com/docs/en/about-claude/pricing",
4
+ "notes": "Auto-fetched from Anthropic docs.",
5
+ "models": {
6
+ "claude-opus-4-6": {
7
+ "input": 5,
8
+ "output": 25,
9
+ "cache_read": 0.5,
10
+ "cache_write_5m": 6.25,
11
+ "cache_write_1h": 10
12
+ },
13
+ "claude-opus-4-5-20251101": {
14
+ "input": 5,
15
+ "output": 25,
16
+ "cache_read": 0.5,
17
+ "cache_write_5m": 6.25,
18
+ "cache_write_1h": 10
19
+ },
20
+ "claude-opus-4-1-20250805": {
21
+ "input": 15,
22
+ "output": 75,
23
+ "cache_read": 1.5,
24
+ "cache_write_5m": 18.75,
25
+ "cache_write_1h": 30
26
+ },
27
+ "claude-opus-4-20250514": {
28
+ "input": 15,
29
+ "output": 75,
30
+ "cache_read": 1.5,
31
+ "cache_write_5m": 18.75,
32
+ "cache_write_1h": 30
33
+ },
34
+ "claude-sonnet-4-6": {
35
+ "input": 3,
36
+ "output": 15,
37
+ "cache_read": 0.3,
38
+ "cache_write_5m": 3.75,
39
+ "cache_write_1h": 6
40
+ },
41
+ "claude-sonnet-4-5-20250929": {
42
+ "input": 3,
43
+ "output": 15,
44
+ "cache_read": 0.3,
45
+ "cache_write_5m": 3.75,
46
+ "cache_write_1h": 6
47
+ },
48
+ "claude-sonnet-4-20250514": {
49
+ "input": 3,
50
+ "output": 15,
51
+ "cache_read": 0.3,
52
+ "cache_write_5m": 3.75,
53
+ "cache_write_1h": 6
54
+ },
55
+ "claude-sonnet-3-7-20250219": {
56
+ "input": 3,
57
+ "output": 15,
58
+ "cache_read": 0.3,
59
+ "cache_write_5m": 3.75,
60
+ "cache_write_1h": 6
61
+ },
62
+ "claude-haiku-4-5-20251001": {
63
+ "input": 1,
64
+ "output": 5,
65
+ "cache_read": 0.1,
66
+ "cache_write_5m": 1.25,
67
+ "cache_write_1h": 2
68
+ },
69
+ "claude-haiku-3-5-20241022": {
70
+ "input": 0.8,
71
+ "output": 4,
72
+ "cache_read": 0.08,
73
+ "cache_write_5m": 1,
74
+ "cache_write_1h": 1.6
75
+ },
76
+ "claude-3-opus-20240229": {
77
+ "input": 15,
78
+ "output": 75,
79
+ "cache_read": 1.5,
80
+ "cache_write_5m": 18.75,
81
+ "cache_write_1h": 30
82
+ },
83
+ "claude-3-haiku-20240307": {
84
+ "input": 0.25,
85
+ "output": 1.25,
86
+ "cache_read": 0.03,
87
+ "cache_write_5m": 0.3,
88
+ "cache_write_1h": 0.5
89
+ }
90
+ }
91
+ }