claude-code-cache-fix 1.4.1 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -0
- package/package.json +3 -2
- package/preload.mjs +38 -8
- package/tools/cost-report.mjs +880 -0
- package/tools/rates.json +91 -0
package/README.md
CHANGED
|
@@ -118,6 +118,19 @@ Response headers are parsed for `anthropic-ratelimit-unified-5h-utilization` and
|
|
|
118
118
|
|
|
119
119
|
Anthropic applies elevated quota drain rates during weekday peak hours (13:00–19:00 UTC, Mon–Fri). The interceptor detects peak windows and writes `peak_hour: true/false` to `quota-status.json`. See `docs/peak-hours-reference.md` for sources and details.
|
|
120
120
|
|
|
121
|
+
### Usage telemetry and cost reporting
|
|
122
|
+
|
|
123
|
+
The interceptor logs per-call usage data to `~/.claude/usage.jsonl` — one JSON line per API call with model, token counts, and cache breakdown. Use the bundled cost report tool to analyze costs:
|
|
124
|
+
|
|
125
|
+
```bash
|
|
126
|
+
node tools/cost-report.mjs # today's costs from interceptor log
|
|
127
|
+
node tools/cost-report.mjs --date 2026-04-08 # specific date
|
|
128
|
+
node tools/cost-report.mjs --since 2h # last 2 hours
|
|
129
|
+
node tools/cost-report.mjs --admin-key <key> # cross-reference with Admin API
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
Also works with any JSONL containing Anthropic usage fields (`--file`, stdin) — useful for SDK users and proxy setups. See `docs/cost-report.md` for full documentation.
|
|
133
|
+
|
|
121
134
|
## Debug mode
|
|
122
135
|
|
|
123
136
|
Enable debug logging to verify the fix is working:
|
|
@@ -157,6 +170,7 @@ Snapshots are saved to `~/.claude/cache-fix-snapshots/` and diff reports are gen
|
|
|
157
170
|
| `CACHE_FIX_DEBUG` | `0` | Enable debug logging to `~/.claude/cache-fix-debug.log` |
|
|
158
171
|
| `CACHE_FIX_PREFIXDIFF` | `0` | Enable prefix snapshot diffing |
|
|
159
172
|
| `CACHE_FIX_IMAGE_KEEP_LAST` | `0` | Keep images in last N user messages (0 = disabled) |
|
|
173
|
+
| `CACHE_FIX_USAGE_LOG` | `~/.claude/usage.jsonl` | Path for per-call usage telemetry log |
|
|
160
174
|
|
|
161
175
|
## Limitations
|
|
162
176
|
|
package/package.json
CHANGED
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-code-cache-fix",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.5.0",
|
|
4
4
|
"description": "Fixes prompt cache regression in Claude Code that causes up to 20x cost increase on resumed sessions",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"exports": "./preload.mjs",
|
|
7
7
|
"main": "./preload.mjs",
|
|
8
8
|
"files": [
|
|
9
|
-
"preload.mjs"
|
|
9
|
+
"preload.mjs",
|
|
10
|
+
"tools/"
|
|
10
11
|
],
|
|
11
12
|
"engines": {
|
|
12
13
|
"node": ">=18"
|
package/preload.mjs
CHANGED
|
@@ -399,6 +399,7 @@ const DEBUG = process.env.CACHE_FIX_DEBUG === "1";
|
|
|
399
399
|
const PREFIXDIFF = process.env.CACHE_FIX_PREFIXDIFF === "1";
|
|
400
400
|
const LOG_PATH = join(homedir(), ".claude", "cache-fix-debug.log");
|
|
401
401
|
const SNAPSHOT_DIR = join(homedir(), ".claude", "cache-fix-snapshots");
|
|
402
|
+
const USAGE_JSONL = process.env.CACHE_FIX_USAGE_LOG || join(homedir(), ".claude", "usage.jsonl");
|
|
402
403
|
|
|
403
404
|
function debugLog(...args) {
|
|
404
405
|
if (!DEBUG) return;
|
|
@@ -813,10 +814,13 @@ globalThis.fetch = async function (url, options) {
|
|
|
813
814
|
// Non-critical — don't break the response
|
|
814
815
|
}
|
|
815
816
|
|
|
816
|
-
// Clone response to extract TTL tier
|
|
817
|
+
// Clone response to extract TTL tier and usage telemetry from SSE stream.
|
|
818
|
+
// Pass the model from the request so we can log a complete usage record.
|
|
817
819
|
try {
|
|
820
|
+
let reqModel = "unknown";
|
|
821
|
+
try { reqModel = JSON.parse(options?.body)?.model || "unknown"; } catch {}
|
|
818
822
|
const clone = response.clone();
|
|
819
|
-
drainTTLFromClone(clone).catch(() => {});
|
|
823
|
+
drainTTLFromClone(clone, reqModel).catch(() => {});
|
|
820
824
|
} catch {
|
|
821
825
|
// clone() failure is non-fatal
|
|
822
826
|
}
|
|
@@ -837,13 +841,18 @@ globalThis.fetch = async function (url, options) {
|
|
|
837
841
|
* Writes TTL tier to ~/.claude/quota-status.json (merges with existing data)
|
|
838
842
|
* and logs to debug log.
|
|
839
843
|
*/
|
|
840
|
-
async function drainTTLFromClone(clone) {
|
|
844
|
+
async function drainTTLFromClone(clone, model) {
|
|
841
845
|
if (!clone.body) return;
|
|
842
846
|
|
|
843
847
|
const reader = clone.body.getReader();
|
|
844
848
|
const decoder = new TextDecoder();
|
|
845
849
|
let buffer = "";
|
|
846
850
|
|
|
851
|
+
// Accumulate usage across message_start (input/cache) and message_delta (output)
|
|
852
|
+
let startUsage = null;
|
|
853
|
+
let deltaUsage = null;
|
|
854
|
+
let ttlTier = "unknown";
|
|
855
|
+
|
|
847
856
|
try {
|
|
848
857
|
while (true) {
|
|
849
858
|
const { done, value } = await reader.read();
|
|
@@ -862,6 +871,7 @@ async function drainTTLFromClone(clone) {
|
|
|
862
871
|
|
|
863
872
|
if (event.type === "message_start" && event.message?.usage) {
|
|
864
873
|
const u = event.message.usage;
|
|
874
|
+
startUsage = u;
|
|
865
875
|
const cc = u.cache_creation || {};
|
|
866
876
|
const e1h = cc.ephemeral_1h_input_tokens ?? 0;
|
|
867
877
|
const e5m = cc.ephemeral_5m_input_tokens ?? 0;
|
|
@@ -869,8 +879,6 @@ async function drainTTLFromClone(clone) {
|
|
|
869
879
|
const cacheRead = u.cache_read_input_tokens ?? 0;
|
|
870
880
|
|
|
871
881
|
// Determine TTL tier from which ephemeral bucket got tokens
|
|
872
|
-
// When cache is fully warm (no creation), infer tier from previous
|
|
873
|
-
let ttlTier = "unknown";
|
|
874
882
|
if (e1h > 0 && e5m === 0) ttlTier = "1h";
|
|
875
883
|
else if (e5m > 0 && e1h === 0) ttlTier = "5m";
|
|
876
884
|
else if (e1h === 0 && e5m === 0 && cacheCreate === 0) {
|
|
@@ -908,10 +916,11 @@ async function drainTTLFromClone(clone) {
|
|
|
908
916
|
};
|
|
909
917
|
writeFileSync(quotaFile, JSON.stringify(quota, null, 2));
|
|
910
918
|
} catch {}
|
|
919
|
+
}
|
|
911
920
|
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
921
|
+
// Capture final usage from message_delta (has output_tokens)
|
|
922
|
+
if (event.type === "message_delta" && event.usage) {
|
|
923
|
+
deltaUsage = event.usage;
|
|
915
924
|
}
|
|
916
925
|
} catch {
|
|
917
926
|
// Skip malformed SSE lines
|
|
@@ -921,4 +930,25 @@ async function drainTTLFromClone(clone) {
|
|
|
921
930
|
} finally {
|
|
922
931
|
try { reader.releaseLock(); } catch {}
|
|
923
932
|
}
|
|
933
|
+
|
|
934
|
+
// Write usage record to JSONL after stream completes
|
|
935
|
+
if (startUsage) {
|
|
936
|
+
try {
|
|
937
|
+
const cc = startUsage.cache_creation || {};
|
|
938
|
+
const record = {
|
|
939
|
+
timestamp: new Date().toISOString(),
|
|
940
|
+
model: model || "unknown",
|
|
941
|
+
input_tokens: startUsage.input_tokens ?? 0,
|
|
942
|
+
output_tokens: deltaUsage?.output_tokens ?? 0,
|
|
943
|
+
cache_read_input_tokens: startUsage.cache_read_input_tokens ?? 0,
|
|
944
|
+
cache_creation_input_tokens: startUsage.cache_creation_input_tokens ?? 0,
|
|
945
|
+
ephemeral_1h_input_tokens: cc.ephemeral_1h_input_tokens ?? 0,
|
|
946
|
+
ephemeral_5m_input_tokens: cc.ephemeral_5m_input_tokens ?? 0,
|
|
947
|
+
ttl_tier: ttlTier,
|
|
948
|
+
};
|
|
949
|
+
appendFileSync(USAGE_JSONL, JSON.stringify(record) + "\n");
|
|
950
|
+
} catch {
|
|
951
|
+
// Non-critical — don't break anything
|
|
952
|
+
}
|
|
953
|
+
}
|
|
924
954
|
}
|
|
@@ -0,0 +1,880 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* cost-report — Calculate Claude API costs from usage telemetry.
|
|
4
|
+
*
|
|
5
|
+
* Input sources (in priority order):
|
|
6
|
+
* 1. Default: reads interceptor usage log at ~/.claude/usage.jsonl
|
|
7
|
+
* 2. --file / -f: any JSONL file (SDK output, proxy captures, etc.)
|
|
8
|
+
* 3. --sim-log: extract from simulation logs (Token telemetry: {...} lines)
|
|
9
|
+
* 4. stdin: pipe JSON-lines from any source
|
|
10
|
+
*
|
|
11
|
+
* Pricing sources (best → fallback):
|
|
12
|
+
* 1. Admin API actual billed usage (--admin-key)
|
|
13
|
+
* 2. Live rates from Anthropic docs (--live-rates)
|
|
14
|
+
* 3. Bundled rates.json (default)
|
|
15
|
+
*
|
|
16
|
+
* Part of claude-code-cache-fix. Works standalone or with the interceptor.
|
|
17
|
+
* https://github.com/cnighswonger/claude-code-cache-fix
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
import { readFileSync, writeFileSync, existsSync } from 'node:fs';
|
|
21
|
+
import { createInterface } from 'node:readline';
|
|
22
|
+
import { fileURLToPath } from 'node:url';
|
|
23
|
+
import { dirname, join } from 'node:path';
|
|
24
|
+
import { homedir } from 'node:os';
|
|
25
|
+
|
|
26
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
27
|
+
const RATES_PATH = join(__dirname, 'rates.json');
|
|
28
|
+
const PRICING_URL = 'https://platform.claude.com/docs/en/about-claude/pricing';
|
|
29
|
+
const ADMIN_API_BASE = 'https://api.anthropic.com/v1/organizations/usage_report/messages';
|
|
30
|
+
const DEFAULT_USAGE_LOG = join(homedir(), '.claude', 'usage.jsonl');
|
|
31
|
+
|
|
32
|
+
// ─── CLI parsing ────────────────────────────────────────────────────────────
|
|
33
|
+
|
|
34
|
+
function parseArgs() {
|
|
35
|
+
const args = process.argv.slice(2);
|
|
36
|
+
const opts = {
|
|
37
|
+
simLog: null, file: null, adminKey: null,
|
|
38
|
+
liveRates: false, updateRates: false, help: false,
|
|
39
|
+
date: null, since: null, format: 'text',
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
for (let i = 0; i < args.length; i++) {
|
|
43
|
+
switch (args[i]) {
|
|
44
|
+
case '--sim-log': opts.simLog = args[++i]; break;
|
|
45
|
+
case '--file':
|
|
46
|
+
case '-f': opts.file = args[++i]; break;
|
|
47
|
+
case '--admin-key': opts.adminKey = args[++i]; break;
|
|
48
|
+
case '--live-rates': opts.liveRates = true; break;
|
|
49
|
+
case '--update-rates': opts.updateRates = true; break;
|
|
50
|
+
case '--date': opts.date = args[++i]; break;
|
|
51
|
+
case '--since': opts.since = args[++i]; break;
|
|
52
|
+
case '--format': opts.format = args[++i]; break;
|
|
53
|
+
case '--json': opts.format = 'json'; break;
|
|
54
|
+
case '--md':
|
|
55
|
+
case '--markdown': opts.format = 'md'; break;
|
|
56
|
+
case '--help':
|
|
57
|
+
case '-h': opts.help = true; break;
|
|
58
|
+
default:
|
|
59
|
+
if (!args[i].startsWith('-') && !opts.file && !opts.simLog) {
|
|
60
|
+
opts.file = args[i];
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
opts.adminKey = opts.adminKey || process.env.ANTHROPIC_ADMIN_KEY;
|
|
66
|
+
return opts;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
function printUsage() {
|
|
70
|
+
console.log(`
|
|
71
|
+
cost-report — Calculate Claude API costs from usage telemetry.
|
|
72
|
+
|
|
73
|
+
Usage:
|
|
74
|
+
node cost-report.mjs From interceptor log (~/.claude/usage.jsonl)
|
|
75
|
+
node cost-report.mjs --date 2026-04-08 Filter to a specific date
|
|
76
|
+
node cost-report.mjs --since 2h Filter to last N hours/minutes
|
|
77
|
+
node cost-report.mjs --file <path> From any JSONL file
|
|
78
|
+
node cost-report.mjs --sim-log <path> From a simulation log
|
|
79
|
+
node cost-report.mjs --admin-key <key> Cross-reference with Admin API
|
|
80
|
+
cat telemetry.jsonl | node cost-report.mjs From JSON-lines on stdin
|
|
81
|
+
node cost-report.mjs --update-rates Refresh bundled rates
|
|
82
|
+
|
|
83
|
+
Input sources (checked in order):
|
|
84
|
+
Default Reads ~/.claude/usage.jsonl (written by the interceptor)
|
|
85
|
+
--file, -f <path> Any JSONL file (SDK output, proxy captures, etc.)
|
|
86
|
+
--sim-log <path> Extract from simulation logs (Token telemetry lines)
|
|
87
|
+
stdin Pipe JSON-lines from any source
|
|
88
|
+
|
|
89
|
+
Filtering:
|
|
90
|
+
--date <YYYY-MM-DD> Show only entries from this date
|
|
91
|
+
--since <duration> Show entries from last Nh, Nm, or Nd (e.g. 2h, 30m, 1d)
|
|
92
|
+
|
|
93
|
+
Output:
|
|
94
|
+
--format <fmt> Output format: text (default), json, md
|
|
95
|
+
--json Shorthand for --format json
|
|
96
|
+
--md, --markdown Shorthand for --format md
|
|
97
|
+
|
|
98
|
+
Pricing:
|
|
99
|
+
--admin-key <key> Anthropic Admin API key for actual billed usage
|
|
100
|
+
(or set ANTHROPIC_ADMIN_KEY env var)
|
|
101
|
+
--live-rates Fetch current rates from Anthropic docs
|
|
102
|
+
--update-rates Fetch and save current rates to rates.json
|
|
103
|
+
|
|
104
|
+
Input JSON format (one object per line):
|
|
105
|
+
Required: model, input_tokens, output_tokens
|
|
106
|
+
Optional: cache_read_input_tokens, cache_creation_input_tokens,
|
|
107
|
+
ephemeral_1h_input_tokens, ephemeral_5m_input_tokens,
|
|
108
|
+
timestamp, preflight_input_tokens, degradation_steps
|
|
109
|
+
|
|
110
|
+
Example JSONL (as written by the interceptor):
|
|
111
|
+
{"timestamp":"2026-04-09T01:23:45Z","model":"claude-sonnet-4-5-20250929","input_tokens":50000,"output_tokens":1200,"cache_read_input_tokens":13000,"cache_creation_input_tokens":0,"ephemeral_1h_input_tokens":0,"ephemeral_5m_input_tokens":0}
|
|
112
|
+
|
|
113
|
+
For SDK users — log usage from API responses:
|
|
114
|
+
const msg = await anthropic.messages.create({...});
|
|
115
|
+
fs.appendFileSync('usage.jsonl', JSON.stringify({
|
|
116
|
+
timestamp: new Date().toISOString(),
|
|
117
|
+
model: msg.model,
|
|
118
|
+
...msg.usage
|
|
119
|
+
}) + '\\n');
|
|
120
|
+
`);
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// ─── Rates ──────────────────────────────────────────────────────────────────
|
|
124
|
+
|
|
125
|
+
function loadBundledRates() {
|
|
126
|
+
if (!existsSync(RATES_PATH)) {
|
|
127
|
+
console.error('WARNING: No bundled rates.json found. Use --update-rates to create one.');
|
|
128
|
+
return null;
|
|
129
|
+
}
|
|
130
|
+
const data = JSON.parse(readFileSync(RATES_PATH, 'utf8'));
|
|
131
|
+
|
|
132
|
+
// Check staleness
|
|
133
|
+
const lastUpdated = new Date(data.last_updated);
|
|
134
|
+
const daysSince = (Date.now() - lastUpdated.getTime()) / (1000 * 60 * 60 * 24);
|
|
135
|
+
if (daysSince > 30) {
|
|
136
|
+
console.error(`WARNING: Bundled rates are ${Math.floor(daysSince)} days old (last updated ${data.last_updated}).`);
|
|
137
|
+
console.error(' Run with --update-rates to refresh, or --live-rates to fetch once.');
|
|
138
|
+
}
|
|
139
|
+
return data;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
async function fetchLiveRates() {
|
|
143
|
+
try {
|
|
144
|
+
const resp = await fetch(PRICING_URL);
|
|
145
|
+
if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
|
|
146
|
+
const html = await resp.text();
|
|
147
|
+
return parsePricingPage(html);
|
|
148
|
+
} catch (err) {
|
|
149
|
+
console.error(`WARNING: Failed to fetch live rates: ${err.message}`);
|
|
150
|
+
console.error(' Falling back to bundled rates.');
|
|
151
|
+
return null;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
function parsePricingPage(html) {
|
|
156
|
+
// The docs page renders as HTML table rows with <td> elements.
|
|
157
|
+
// Pattern: model name in one <td>, then rates as "$X / MTok" in subsequent <td>s.
|
|
158
|
+
// We extract: Model | Base Input | 5m Cache Write | 1h Cache Write | Cache Read | Output
|
|
159
|
+
//
|
|
160
|
+
// The HTML has rows like:
|
|
161
|
+
// Opus 4.6</td><td ...>$5 / MTok</td><td ...>$6.25 / MTok</td>...
|
|
162
|
+
|
|
163
|
+
const models = {};
|
|
164
|
+
const parseRate = (s) => {
|
|
165
|
+
const m = s.match(/\$([\d.]+)/);
|
|
166
|
+
return m ? parseFloat(m[1]) : null;
|
|
167
|
+
};
|
|
168
|
+
|
|
169
|
+
// Strategy: find model name followed by 5 rate cells in the pricing table.
|
|
170
|
+
// Match: "ModelName</td><td...>$X / MTok</td>..." pattern
|
|
171
|
+
const rowPattern = /((?:Opus|Sonnet|Haiku)\s+[\d.]+(?:\s*\([^)]*\))?)\s*<\/td>\s*<td[^>]*>\s*\$([\d.]+)\s*\/\s*MTok\s*<\/td>\s*<td[^>]*>\s*\$([\d.]+)\s*\/\s*MTok\s*<\/td>\s*<td[^>]*>\s*\$([\d.]+)\s*\/\s*MTok\s*<\/td>\s*<td[^>]*>\s*\$([\d.]+)\s*\/\s*MTok\s*<\/td>\s*<td[^>]*>\s*\$([\d.]+)\s*\/\s*MTok/g;
|
|
172
|
+
let match;
|
|
173
|
+
|
|
174
|
+
while ((match = rowPattern.exec(html)) !== null) {
|
|
175
|
+
let name = match[1].trim();
|
|
176
|
+
// Strip "(deprecated)" etc.
|
|
177
|
+
name = name.replace(/\s*\([^)]*\)\s*$/, '').trim();
|
|
178
|
+
// Skip if it contains HTML
|
|
179
|
+
if (name.includes('<')) continue;
|
|
180
|
+
|
|
181
|
+
const input = parseFloat(match[2]);
|
|
182
|
+
const write5m = parseFloat(match[3]);
|
|
183
|
+
const write1h = parseFloat(match[4]);
|
|
184
|
+
const cacheRead = parseFloat(match[5]);
|
|
185
|
+
const output = parseFloat(match[6]);
|
|
186
|
+
|
|
187
|
+
if (isNaN(input) || isNaN(output)) continue;
|
|
188
|
+
|
|
189
|
+
const idMap = resolveModelId(name);
|
|
190
|
+
for (const id of idMap) {
|
|
191
|
+
models[id] = {
|
|
192
|
+
input, output,
|
|
193
|
+
cache_read: cacheRead,
|
|
194
|
+
cache_write_5m: write5m,
|
|
195
|
+
cache_write_1h: write1h,
|
|
196
|
+
};
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
if (Object.keys(models).length === 0) {
|
|
201
|
+
console.error('WARNING: Could not parse any model pricing from docs page.');
|
|
202
|
+
console.error(' The page format may have changed. Falling back to bundled rates.');
|
|
203
|
+
return null;
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
return {
|
|
207
|
+
last_updated: new Date().toISOString().slice(0, 10),
|
|
208
|
+
source: PRICING_URL,
|
|
209
|
+
notes: 'Auto-fetched from Anthropic docs.',
|
|
210
|
+
models,
|
|
211
|
+
};
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
function resolveModelId(displayName) {
|
|
215
|
+
// Map display names like "Opus 4.6" to API model IDs
|
|
216
|
+
const map = {
|
|
217
|
+
'Opus 4.6': ['claude-opus-4-6'],
|
|
218
|
+
'Opus 4.5': ['claude-opus-4-5-20251101'],
|
|
219
|
+
'Opus 4.1': ['claude-opus-4-1-20250805'],
|
|
220
|
+
'Opus 4': ['claude-opus-4-20250514'],
|
|
221
|
+
'Opus 3': ['claude-3-opus-20240229'],
|
|
222
|
+
'Sonnet 4.6': ['claude-sonnet-4-6'],
|
|
223
|
+
'Sonnet 4.5': ['claude-sonnet-4-5-20250929'],
|
|
224
|
+
'Sonnet 4': ['claude-sonnet-4-20250514'],
|
|
225
|
+
'Sonnet 3.7': ['claude-sonnet-3-7-20250219'],
|
|
226
|
+
'Haiku 4.5': ['claude-haiku-4-5-20251001'],
|
|
227
|
+
'Haiku 3.5': ['claude-haiku-3-5-20241022'],
|
|
228
|
+
'Haiku 3': ['claude-3-haiku-20240307'],
|
|
229
|
+
};
|
|
230
|
+
return map[displayName] || [`claude-${displayName.toLowerCase().replace(/\s+/g, '-')}`];
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
function lookupRates(ratesData, modelId) {
|
|
234
|
+
if (!ratesData || !ratesData.models) return null;
|
|
235
|
+
|
|
236
|
+
// Direct match
|
|
237
|
+
if (ratesData.models[modelId]) return ratesData.models[modelId];
|
|
238
|
+
|
|
239
|
+
// Try prefix match (e.g. "claude-sonnet-4-5-20250929" matches "claude-sonnet-4-5-*")
|
|
240
|
+
for (const [key, rates] of Object.entries(ratesData.models)) {
|
|
241
|
+
// Match if the stored key is a prefix or shares the same base
|
|
242
|
+
if (modelId.startsWith(key) || key.startsWith(modelId)) return rates;
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
// Try matching by family (strip date suffix)
|
|
246
|
+
const base = modelId.replace(/-\d{8}$/, '');
|
|
247
|
+
if (ratesData.models[base]) return ratesData.models[base];
|
|
248
|
+
|
|
249
|
+
return null;
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
// ─── Input parsing ──────────────────────────────────────────────────────────
|
|
253
|
+
|
|
254
|
+
function extractFromSimLog(filePath) {
|
|
255
|
+
const content = readFileSync(filePath, 'utf8');
|
|
256
|
+
const entries = [];
|
|
257
|
+
const lines = content.split('\n');
|
|
258
|
+
|
|
259
|
+
for (const line of lines) {
|
|
260
|
+
const match = line.match(/Token telemetry:\s*(\{.+\})/);
|
|
261
|
+
if (match) {
|
|
262
|
+
try {
|
|
263
|
+
const obj = JSON.parse(match[1]);
|
|
264
|
+
// Extract timestamp from log line
|
|
265
|
+
const tsMatch = line.match(/\[([^\]]+)\]/);
|
|
266
|
+
if (tsMatch) obj._timestamp = tsMatch[1];
|
|
267
|
+
entries.push(obj);
|
|
268
|
+
} catch { /* skip malformed */ }
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
if (entries.length === 0) {
|
|
273
|
+
console.error('WARNING: No "Token telemetry" entries found in sim log.');
|
|
274
|
+
console.error(' This log may use an older format without structured telemetry.');
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
return entries;
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
function parseJsonLines(text) {
|
|
281
|
+
return text.split('\n')
|
|
282
|
+
.filter(l => l.trim())
|
|
283
|
+
.map(l => { try { return JSON.parse(l); } catch { return null; } })
|
|
284
|
+
.filter(Boolean);
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
async function readStdin() {
|
|
288
|
+
const chunks = [];
|
|
289
|
+
const rl = createInterface({ input: process.stdin, terminal: false });
|
|
290
|
+
for await (const line of rl) chunks.push(line);
|
|
291
|
+
return chunks.join('\n');
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
function normalizeEntry(raw) {
|
|
295
|
+
// Accept multiple naming conventions:
|
|
296
|
+
// - Interceptor: input_tokens, output_tokens, timestamp
|
|
297
|
+
// - Sim telemetry: actual_input_tokens, actual_output_tokens, _timestamp
|
|
298
|
+
// - SDK: input_tokens, output_tokens (from usage object)
|
|
299
|
+
return {
|
|
300
|
+
model: raw.model || 'unknown',
|
|
301
|
+
timestamp: raw.timestamp || raw._timestamp || null,
|
|
302
|
+
input_tokens: raw.actual_input_tokens ?? raw.input_tokens ?? 0,
|
|
303
|
+
output_tokens: raw.actual_output_tokens ?? raw.output_tokens ?? 0,
|
|
304
|
+
cache_read: raw.cache_read_input_tokens ?? 0,
|
|
305
|
+
cache_create: raw.cache_creation_input_tokens ?? 0,
|
|
306
|
+
eph_1h: raw.ephemeral_1h_input_tokens ?? 0,
|
|
307
|
+
eph_5m: raw.ephemeral_5m_input_tokens ?? 0,
|
|
308
|
+
preflight: raw.preflight_input_tokens ?? null,
|
|
309
|
+
degradation: raw.degradation_steps ?? [],
|
|
310
|
+
would_have_exceeded: raw.would_have_exceeded ?? false,
|
|
311
|
+
sys_prompt_est: raw.system_prompt_tokens_est ?? null,
|
|
312
|
+
};
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
// ─── Admin API ──────────────────────────────────────────────────────────────
|
|
316
|
+
|
|
317
|
+
async function fetchAdminUsage(adminKey, startTime, endTime) {
|
|
318
|
+
// Round start down and end up to hour boundaries
|
|
319
|
+
const start = new Date(startTime);
|
|
320
|
+
start.setMinutes(0, 0, 0);
|
|
321
|
+
const end = new Date(endTime);
|
|
322
|
+
end.setHours(end.getHours() + 1, 0, 0, 0);
|
|
323
|
+
|
|
324
|
+
const url = `${ADMIN_API_BASE}?bucket_width=1h` +
|
|
325
|
+
`&starting_at=${start.toISOString()}` +
|
|
326
|
+
`&ending_at=${end.toISOString()}` +
|
|
327
|
+
`&group_by[]=model`;
|
|
328
|
+
|
|
329
|
+
try {
|
|
330
|
+
const resp = await fetch(url, {
|
|
331
|
+
headers: {
|
|
332
|
+
'x-api-key': adminKey,
|
|
333
|
+
'anthropic-version': '2023-06-01',
|
|
334
|
+
},
|
|
335
|
+
});
|
|
336
|
+
if (!resp.ok) throw new Error(`HTTP ${resp.status}: ${await resp.text()}`);
|
|
337
|
+
return await resp.json();
|
|
338
|
+
} catch (err) {
|
|
339
|
+
console.error(`WARNING: Admin API query failed: ${err.message}`);
|
|
340
|
+
return null;
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
function summarizeAdminData(apiData, ratesData) {
|
|
345
|
+
const byModel = {};
|
|
346
|
+
let totalCost = 0;
|
|
347
|
+
|
|
348
|
+
for (const bucket of (apiData.data || [])) {
|
|
349
|
+
for (const r of (bucket.results || [])) {
|
|
350
|
+
const model = r.model || 'unknown';
|
|
351
|
+
if (!byModel[model]) {
|
|
352
|
+
byModel[model] = { uncached: 0, cache_read: 0, cache_1h: 0, cache_5m: 0, output: 0, cost: 0 };
|
|
353
|
+
}
|
|
354
|
+
const m = byModel[model];
|
|
355
|
+
m.uncached += r.uncached_input_tokens || 0;
|
|
356
|
+
m.cache_read += r.cache_read_input_tokens || 0;
|
|
357
|
+
const cc = r.cache_creation || {};
|
|
358
|
+
m.cache_1h += cc.ephemeral_1h_input_tokens || 0;
|
|
359
|
+
m.cache_5m += cc.ephemeral_5m_input_tokens || 0;
|
|
360
|
+
m.output += r.output_tokens || 0;
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
// Calculate costs per model
|
|
365
|
+
for (const [model, m] of Object.entries(byModel)) {
|
|
366
|
+
const rates = lookupRates(ratesData, model);
|
|
367
|
+
if (rates) {
|
|
368
|
+
m.cost = (m.uncached * rates.input + m.cache_read * rates.cache_read +
|
|
369
|
+
m.cache_1h * rates.cache_write_1h + m.cache_5m * rates.cache_write_5m +
|
|
370
|
+
m.output * rates.output) / 1_000_000;
|
|
371
|
+
}
|
|
372
|
+
totalCost += m.cost;
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
return { byModel, totalCost };
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
// ─── Cost calculation ───────────────────────────────────────────────────────
|
|
379
|
+
|
|
380
|
+
function calculateCosts(entries, ratesData) {
|
|
381
|
+
const results = [];
|
|
382
|
+
const summary = {
|
|
383
|
+
calls: 0,
|
|
384
|
+
byModel: {},
|
|
385
|
+
totals: { input: 0, output: 0, cache_read: 0, cache_1h: 0, cache_5m: 0, preflight: 0 },
|
|
386
|
+
totalCost: 0,
|
|
387
|
+
degradedCalls: 0,
|
|
388
|
+
exceededCalls: 0,
|
|
389
|
+
degradationSteps: {},
|
|
390
|
+
};
|
|
391
|
+
|
|
392
|
+
for (const entry of entries) {
|
|
393
|
+
const rates = lookupRates(ratesData, entry.model);
|
|
394
|
+
if (!rates) {
|
|
395
|
+
console.error(`WARNING: No rates found for model "${entry.model}". Skipping cost calculation.`);
|
|
396
|
+
results.push({ ...entry, cost: null, rateSource: 'missing' });
|
|
397
|
+
continue;
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
// Determine cache write tier breakdown
|
|
401
|
+
// If telemetry has eph_1h/eph_5m, use those; otherwise assume all cache_create is 5m
|
|
402
|
+
let cw1h = entry.eph_1h;
|
|
403
|
+
let cw5m = entry.eph_5m;
|
|
404
|
+
if (cw1h === 0 && cw5m === 0 && entry.cache_create > 0) {
|
|
405
|
+
// No tier breakdown available; assume 5m (conservative — lower rate)
|
|
406
|
+
cw5m = entry.cache_create;
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
const cost = (
|
|
410
|
+
entry.input_tokens * rates.input +
|
|
411
|
+
entry.output_tokens * rates.output +
|
|
412
|
+
entry.cache_read * rates.cache_read +
|
|
413
|
+
cw1h * rates.cache_write_1h +
|
|
414
|
+
cw5m * rates.cache_write_5m
|
|
415
|
+
) / 1_000_000;
|
|
416
|
+
|
|
417
|
+
results.push({ ...entry, cost, cw1h, cw5m });
|
|
418
|
+
|
|
419
|
+
// Accumulate summary
|
|
420
|
+
summary.calls++;
|
|
421
|
+
summary.totals.input += entry.input_tokens;
|
|
422
|
+
summary.totals.output += entry.output_tokens;
|
|
423
|
+
summary.totals.cache_read += entry.cache_read;
|
|
424
|
+
summary.totals.cache_1h += cw1h;
|
|
425
|
+
summary.totals.cache_5m += cw5m;
|
|
426
|
+
if (entry.preflight != null) summary.totals.preflight += entry.preflight;
|
|
427
|
+
|
|
428
|
+
if (!summary.byModel[entry.model]) {
|
|
429
|
+
summary.byModel[entry.model] = { calls: 0, cost: 0 };
|
|
430
|
+
}
|
|
431
|
+
summary.byModel[entry.model].calls++;
|
|
432
|
+
summary.byModel[entry.model].cost += cost;
|
|
433
|
+
|
|
434
|
+
summary.totalCost += cost;
|
|
435
|
+
|
|
436
|
+
if (entry.degradation.length > 0) {
|
|
437
|
+
summary.degradedCalls++;
|
|
438
|
+
for (const step of entry.degradation) {
|
|
439
|
+
summary.degradationSteps[step] = (summary.degradationSteps[step] || 0) + 1;
|
|
440
|
+
}
|
|
441
|
+
}
|
|
442
|
+
if (entry.would_have_exceeded) summary.exceededCalls++;
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
return { results, summary };
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
// ─── Report formatting ──────────────────────────────────────────────────────
|
|
449
|
+
|
|
450
|
+
function fmt(n) {
|
|
451
|
+
return n.toLocaleString('en-US');
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
function fmtCost(n) {
|
|
455
|
+
if (n == null) return ' N/A';
|
|
456
|
+
return `$${n.toFixed(4)}`;
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
function printReport(results, summary, ratesData, adminSummary, format) {
|
|
460
|
+
if (format === 'json') return printJsonReport(results, summary, ratesData, adminSummary);
|
|
461
|
+
if (format === 'md') return printMarkdownReport(results, summary, ratesData, adminSummary);
|
|
462
|
+
return printTextReport(results, summary, ratesData, adminSummary);
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
// ─── JSON output ────────────────────────────────────────────────────────────
|
|
466
|
+
|
|
467
|
+
function printJsonReport(results, summary, ratesData, adminSummary) {
|
|
468
|
+
const report = {
|
|
469
|
+
generated: new Date().toISOString(),
|
|
470
|
+
pricing: { source: ratesData?.source || 'bundled', last_updated: ratesData?.last_updated },
|
|
471
|
+
calls: results.map(r => ({
|
|
472
|
+
timestamp: r.timestamp,
|
|
473
|
+
model: r.model,
|
|
474
|
+
input_tokens: r.input_tokens,
|
|
475
|
+
output_tokens: r.output_tokens,
|
|
476
|
+
cache_read: r.cache_read,
|
|
477
|
+
cache_write_1h: r.cw1h || 0,
|
|
478
|
+
cache_write_5m: r.cw5m || 0,
|
|
479
|
+
cost: r.cost,
|
|
480
|
+
degradation_steps: r.degradation.length > 0 ? r.degradation : undefined,
|
|
481
|
+
})),
|
|
482
|
+
summary: {
|
|
483
|
+
total_calls: summary.calls,
|
|
484
|
+
total_cost: summary.totalCost,
|
|
485
|
+
avg_cost_per_call: summary.totalCost / summary.calls,
|
|
486
|
+
tokens: summary.totals,
|
|
487
|
+
by_model: summary.byModel,
|
|
488
|
+
degradation: summary.degradedCalls > 0 ? {
|
|
489
|
+
degraded_calls: summary.degradedCalls,
|
|
490
|
+
exceeded_calls: summary.exceededCalls,
|
|
491
|
+
steps: summary.degradationSteps,
|
|
492
|
+
} : undefined,
|
|
493
|
+
},
|
|
494
|
+
};
|
|
495
|
+
if (adminSummary) {
|
|
496
|
+
report.admin_api = {
|
|
497
|
+
total_cost: adminSummary.totalCost,
|
|
498
|
+
delta: adminSummary.totalCost - summary.totalCost,
|
|
499
|
+
by_model: adminSummary.byModel,
|
|
500
|
+
};
|
|
501
|
+
}
|
|
502
|
+
console.log(JSON.stringify(report, null, 2));
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
// ─── Markdown output ────────────────────────────────────────────────────────
|
|
506
|
+
|
|
507
|
+
function printMarkdownReport(results, summary, ratesData, adminSummary) {
|
|
508
|
+
const rateSource = ratesData?.last_updated ? `rates from ${ratesData.last_updated}` : 'unknown rates';
|
|
509
|
+
const lines = [];
|
|
510
|
+
|
|
511
|
+
lines.push('# Claude API Cost Report');
|
|
512
|
+
lines.push('');
|
|
513
|
+
lines.push(`Pricing: ${rateSource} (${ratesData?.source || 'bundled'})`);
|
|
514
|
+
lines.push('');
|
|
515
|
+
|
|
516
|
+
// Per-call table
|
|
517
|
+
if (results.length <= 50) {
|
|
518
|
+
lines.push('## Per-Call Breakdown');
|
|
519
|
+
lines.push('');
|
|
520
|
+
lines.push('| # | Timestamp | Model | Input | Output | Cache Rd | Cache Wr | Cost | Degradation |');
|
|
521
|
+
lines.push('|---|-----------|-------|------:|-------:|---------:|---------:|-----:|-------------|');
|
|
522
|
+
|
|
523
|
+
for (let i = 0; i < results.length; i++) {
|
|
524
|
+
const r = results[i];
|
|
525
|
+
const ts = r.timestamp ? r.timestamp.slice(0, 19) : '—';
|
|
526
|
+
const modelShort = r.model.replace('claude-', '').replace(/-\d{8}$/, '');
|
|
527
|
+
const cacheWr = (r.cw1h || 0) + (r.cw5m || 0);
|
|
528
|
+
const deg = r.degradation.length > 0 ? r.degradation.length + ' steps' : '';
|
|
529
|
+
lines.push(`| ${i + 1} | ${ts} | ${modelShort} | ${fmt(r.input_tokens)} | ${fmt(r.output_tokens)} | ${fmt(r.cache_read)} | ${fmt(cacheWr)} | ${fmtCost(r.cost)} | ${deg} |`);
|
|
530
|
+
}
|
|
531
|
+
lines.push('');
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
// Summary
|
|
535
|
+
lines.push('## Summary');
|
|
536
|
+
lines.push('');
|
|
537
|
+
lines.push(`| Metric | Value |`);
|
|
538
|
+
lines.push(`|--------|------:|`);
|
|
539
|
+
lines.push(`| Total API calls | ${summary.calls} |`);
|
|
540
|
+
lines.push(`| Total input tokens | ${fmt(summary.totals.input)} |`);
|
|
541
|
+
lines.push(`| Total output tokens | ${fmt(summary.totals.output)} |`);
|
|
542
|
+
lines.push(`| Total cache read | ${fmt(summary.totals.cache_read)} |`);
|
|
543
|
+
lines.push(`| Total cache write 1h | ${fmt(summary.totals.cache_1h)} |`);
|
|
544
|
+
lines.push(`| Total cache write 5m | ${fmt(summary.totals.cache_5m)} |`);
|
|
545
|
+
lines.push(`| **Total cost** | **${fmtCost(summary.totalCost)}** |`);
|
|
546
|
+
lines.push(`| Avg cost per call | ${fmtCost(summary.totalCost / summary.calls)} |`);
|
|
547
|
+
lines.push('');
|
|
548
|
+
|
|
549
|
+
// By model
|
|
550
|
+
lines.push('## By Model');
|
|
551
|
+
lines.push('');
|
|
552
|
+
lines.push('| Model | Calls | Cost |');
|
|
553
|
+
lines.push('|-------|------:|-----:|');
|
|
554
|
+
for (const [model, info] of Object.entries(summary.byModel)) {
|
|
555
|
+
lines.push(`| ${model} | ${info.calls} | ${fmtCost(info.cost)} |`);
|
|
556
|
+
}
|
|
557
|
+
lines.push('');
|
|
558
|
+
|
|
559
|
+
// Degradation
|
|
560
|
+
if (summary.degradedCalls > 0) {
|
|
561
|
+
lines.push('## Degradation');
|
|
562
|
+
lines.push('');
|
|
563
|
+
lines.push(`Calls with degradation: ${summary.degradedCalls}/${summary.calls}`);
|
|
564
|
+
lines.push('');
|
|
565
|
+
lines.push('| Step | Count |');
|
|
566
|
+
lines.push('|------|------:|');
|
|
567
|
+
for (const [step, count] of Object.entries(summary.degradationSteps).sort((a, b) => b[1] - a[1])) {
|
|
568
|
+
lines.push(`| ${step} | ${count}/${summary.calls} |`);
|
|
569
|
+
}
|
|
570
|
+
lines.push('');
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
// Admin API
|
|
574
|
+
if (adminSummary) {
|
|
575
|
+
const delta = adminSummary.totalCost - summary.totalCost;
|
|
576
|
+
lines.push('## Admin API (Actual Billed)');
|
|
577
|
+
lines.push('');
|
|
578
|
+
lines.push(`| Source | Cost |`);
|
|
579
|
+
lines.push(`|--------|-----:|`);
|
|
580
|
+
lines.push(`| API-reported | ${fmtCost(adminSummary.totalCost)} |`);
|
|
581
|
+
lines.push(`| Telemetry | ${fmtCost(summary.totalCost)} |`);
|
|
582
|
+
lines.push(`| Delta | ${fmtCost(Math.abs(delta))} (${delta > 0 ? 'API higher' : 'telemetry higher'}) |`);
|
|
583
|
+
lines.push('');
|
|
584
|
+
lines.push('> Note: Admin API reports all usage for the time window, which may include other concurrent API activity.');
|
|
585
|
+
lines.push('');
|
|
586
|
+
}
|
|
587
|
+
|
|
588
|
+
console.log(lines.join('\n'));
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
// ─── Text output ────────────────────────────────────────────────────────────
|
|
592
|
+
|
|
593
|
+
function printTextReport(results, summary, ratesData, adminSummary) {
|
|
594
|
+
const rateSource = ratesData?.last_updated ? `rates from ${ratesData.last_updated}` : 'unknown rates';
|
|
595
|
+
|
|
596
|
+
console.log('');
|
|
597
|
+
console.log('='.repeat(80));
|
|
598
|
+
console.log(' CLAUDE API COST REPORT');
|
|
599
|
+
console.log('='.repeat(80));
|
|
600
|
+
console.log(` Pricing: ${rateSource} (${ratesData?.source || 'bundled'})`);
|
|
601
|
+
console.log('');
|
|
602
|
+
|
|
603
|
+
// ── Per-call table ──
|
|
604
|
+
if (results.length <= 50) {
|
|
605
|
+
console.log('─── Per-Call Breakdown ─────────────────────────────────────────────────────────');
|
|
606
|
+
console.log(
|
|
607
|
+
' #'.padEnd(5) +
|
|
608
|
+
'Timestamp'.padEnd(28) +
|
|
609
|
+
'Model'.padEnd(10) +
|
|
610
|
+
'Input'.padStart(10) +
|
|
611
|
+
'Output'.padStart(9) +
|
|
612
|
+
'CacheRd'.padStart(9) +
|
|
613
|
+
'CacheWr'.padStart(9) +
|
|
614
|
+
'Cost'.padStart(10) +
|
|
615
|
+
' Degradation'
|
|
616
|
+
);
|
|
617
|
+
console.log(' ' + '─'.repeat(78));
|
|
618
|
+
|
|
619
|
+
for (let i = 0; i < results.length; i++) {
|
|
620
|
+
const r = results[i];
|
|
621
|
+
const ts = r.timestamp ? r.timestamp.slice(0, 19) : '—';
|
|
622
|
+
const modelShort = r.model.replace('claude-', '').replace(/-\d{8}$/, '').slice(0, 8);
|
|
623
|
+
const cacheWr = (r.cw1h || 0) + (r.cw5m || 0);
|
|
624
|
+
const deg = r.degradation.length > 0 ? r.degradation.length + ' steps' : '';
|
|
625
|
+
|
|
626
|
+
console.log(
|
|
627
|
+
` ${String(i + 1).padStart(2)} ` +
|
|
628
|
+
ts.padEnd(28) +
|
|
629
|
+
modelShort.padEnd(10) +
|
|
630
|
+
fmt(r.input_tokens).padStart(10) +
|
|
631
|
+
fmt(r.output_tokens).padStart(9) +
|
|
632
|
+
fmt(r.cache_read).padStart(9) +
|
|
633
|
+
fmt(cacheWr).padStart(9) +
|
|
634
|
+
fmtCost(r.cost).padStart(10) +
|
|
635
|
+
' ' + deg
|
|
636
|
+
);
|
|
637
|
+
}
|
|
638
|
+
console.log('');
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
// ── Summary ──
|
|
642
|
+
console.log('─── Summary ────────────────────────────────────────────────────────────────────');
|
|
643
|
+
console.log(` Total API calls: ${summary.calls}`);
|
|
644
|
+
console.log(` Total input tokens: ${fmt(summary.totals.input)}`);
|
|
645
|
+
console.log(` Total output tokens: ${fmt(summary.totals.output)}`);
|
|
646
|
+
console.log(` Total cache read: ${fmt(summary.totals.cache_read)}`);
|
|
647
|
+
console.log(` Total cache write 1h: ${fmt(summary.totals.cache_1h)}`);
|
|
648
|
+
console.log(` Total cache write 5m: ${fmt(summary.totals.cache_5m)}`);
|
|
649
|
+
if (summary.totals.preflight > 0) {
|
|
650
|
+
const saved = summary.totals.preflight - summary.totals.input;
|
|
651
|
+
const pct = (saved / summary.totals.preflight * 100).toFixed(1);
|
|
652
|
+
console.log(` Preflight estimate: ${fmt(summary.totals.preflight)} (degradation saved ${fmt(saved)} tokens, ${pct}%)`);
|
|
653
|
+
}
|
|
654
|
+
console.log('');
|
|
655
|
+
|
|
656
|
+
// ── By model ──
|
|
657
|
+
console.log(' By model:');
|
|
658
|
+
for (const [model, info] of Object.entries(summary.byModel)) {
|
|
659
|
+
const modelShort = model.replace('claude-', '');
|
|
660
|
+
console.log(` ${modelShort}: ${info.calls} calls, ${fmtCost(info.cost)}`);
|
|
661
|
+
}
|
|
662
|
+
console.log('');
|
|
663
|
+
|
|
664
|
+
// ── Cost ──
|
|
665
|
+
console.log('─── Cost ───────────────────────────────────────────────────────────────────────');
|
|
666
|
+
console.log(` Telemetry-calculated: ${fmtCost(summary.totalCost)}`);
|
|
667
|
+
console.log(` Avg cost per call: ${fmtCost(summary.totalCost / summary.calls)}`);
|
|
668
|
+
|
|
669
|
+
// Cache savings estimate
|
|
670
|
+
if (summary.totals.cache_read > 0) {
|
|
671
|
+
// What cache reads would have cost at full input rate
|
|
672
|
+
const models = Object.keys(summary.byModel);
|
|
673
|
+
if (models.length === 1) {
|
|
674
|
+
const rates = lookupRates(ratesData, models[0]);
|
|
675
|
+
if (rates) {
|
|
676
|
+
const fullCost = summary.totals.cache_read * rates.input / 1_000_000;
|
|
677
|
+
const cacheCost = summary.totals.cache_read * rates.cache_read / 1_000_000;
|
|
678
|
+
const saved = fullCost - cacheCost;
|
|
679
|
+
console.log(` Cache read savings: ${fmtCost(saved)} (${(saved / summary.totalCost * 100).toFixed(1)}% of total)`);
|
|
680
|
+
}
|
|
681
|
+
}
|
|
682
|
+
}
|
|
683
|
+
console.log('');
|
|
684
|
+
|
|
685
|
+
// ── Degradation ──
|
|
686
|
+
if (summary.degradedCalls > 0) {
|
|
687
|
+
console.log('─── Degradation ────────────────────────────────────────────────────────────────');
|
|
688
|
+
console.log(` Calls with degradation: ${summary.degradedCalls}/${summary.calls}`);
|
|
689
|
+
console.log(` Budget exceeded: ${summary.exceededCalls}/${summary.calls}`);
|
|
690
|
+
for (const [step, count] of Object.entries(summary.degradationSteps).sort((a, b) => b[1] - a[1])) {
|
|
691
|
+
console.log(` ${step}: ${count}/${summary.calls}`);
|
|
692
|
+
}
|
|
693
|
+
console.log('');
|
|
694
|
+
}
|
|
695
|
+
|
|
696
|
+
// ── Admin API comparison ──
|
|
697
|
+
if (adminSummary) {
|
|
698
|
+
console.log('─── Admin API (Actual Billed) ──────────────────────────────────────────────────');
|
|
699
|
+
console.log(` API-reported total: ${fmtCost(adminSummary.totalCost)}`);
|
|
700
|
+
console.log(` Telemetry total: ${fmtCost(summary.totalCost)}`);
|
|
701
|
+
const delta = adminSummary.totalCost - summary.totalCost;
|
|
702
|
+
console.log(` Delta: ${fmtCost(Math.abs(delta))} (${delta > 0 ? 'API higher' : 'telemetry higher'})`);
|
|
703
|
+
console.log('');
|
|
704
|
+
console.log(' API breakdown by model:');
|
|
705
|
+
for (const [model, m] of Object.entries(adminSummary.byModel)) {
|
|
706
|
+
const modelShort = model.replace('claude-', '');
|
|
707
|
+
console.log(` ${modelShort}:`);
|
|
708
|
+
console.log(` Uncached input: ${fmt(m.uncached)}`);
|
|
709
|
+
console.log(` Cache read: ${fmt(m.cache_read)}`);
|
|
710
|
+
console.log(` Cache write (1h): ${fmt(m.cache_1h)}`);
|
|
711
|
+
console.log(` Cache write (5m): ${fmt(m.cache_5m)}`);
|
|
712
|
+
console.log(` Output: ${fmt(m.output)}`);
|
|
713
|
+
console.log(` Cost: ${fmtCost(m.cost)}`);
|
|
714
|
+
}
|
|
715
|
+
console.log('');
|
|
716
|
+
console.log(' NOTE: Admin API reports all usage for the sim\'s time window,');
|
|
717
|
+
console.log(' which may include other concurrent API activity.');
|
|
718
|
+
}
|
|
719
|
+
|
|
720
|
+
console.log('='.repeat(80));
|
|
721
|
+
console.log('');
|
|
722
|
+
}
|
|
723
|
+
|
|
724
|
+
// ─── Time window extraction ─────────────────────────────────────────────────
|
|
725
|
+
|
|
726
|
+
function getTimeWindow(entries) {
|
|
727
|
+
const timestamps = entries
|
|
728
|
+
.filter(e => e.timestamp)
|
|
729
|
+
.map(e => new Date(e.timestamp));
|
|
730
|
+
|
|
731
|
+
if (timestamps.length === 0) return null;
|
|
732
|
+
|
|
733
|
+
return {
|
|
734
|
+
start: new Date(Math.min(...timestamps)),
|
|
735
|
+
end: new Date(Math.max(...timestamps)),
|
|
736
|
+
};
|
|
737
|
+
}
|
|
738
|
+
|
|
739
|
+
// ─── Time filtering ─────────────────────────────────────────────────────────
|
|
740
|
+
|
|
741
|
+
function parseSinceDuration(since) {
|
|
742
|
+
const match = since.match(/^(\d+)\s*(h|m|d)$/i);
|
|
743
|
+
if (!match) return null;
|
|
744
|
+
const n = parseInt(match[1]);
|
|
745
|
+
const unit = match[2].toLowerCase();
|
|
746
|
+
const ms = unit === 'h' ? n * 3600000 : unit === 'm' ? n * 60000 : n * 86400000;
|
|
747
|
+
return new Date(Date.now() - ms);
|
|
748
|
+
}
|
|
749
|
+
|
|
750
|
+
function filterByTime(entries, opts) {
|
|
751
|
+
if (!opts.date && !opts.since) return entries;
|
|
752
|
+
|
|
753
|
+
let cutoff = null;
|
|
754
|
+
let dateEnd = null;
|
|
755
|
+
|
|
756
|
+
if (opts.date) {
|
|
757
|
+
// Filter to a specific date (YYYY-MM-DD)
|
|
758
|
+
cutoff = new Date(opts.date + 'T00:00:00');
|
|
759
|
+
dateEnd = new Date(opts.date + 'T23:59:59.999');
|
|
760
|
+
} else if (opts.since) {
|
|
761
|
+
cutoff = parseSinceDuration(opts.since);
|
|
762
|
+
if (!cutoff) {
|
|
763
|
+
console.error(`WARNING: Could not parse --since "${opts.since}". Use format like 2h, 30m, 1d.`);
|
|
764
|
+
return entries;
|
|
765
|
+
}
|
|
766
|
+
}
|
|
767
|
+
|
|
768
|
+
const before = entries.length;
|
|
769
|
+
const filtered = entries.filter(e => {
|
|
770
|
+
if (!e.timestamp) return true; // keep entries without timestamps
|
|
771
|
+
const ts = new Date(e.timestamp);
|
|
772
|
+
if (cutoff && ts < cutoff) return false;
|
|
773
|
+
if (dateEnd && ts > dateEnd) return false;
|
|
774
|
+
return true;
|
|
775
|
+
});
|
|
776
|
+
|
|
777
|
+
if (filtered.length < before) {
|
|
778
|
+
console.error(`Filtered: ${before} → ${filtered.length} entries (${opts.date ? 'date ' + opts.date : 'since ' + opts.since}).`);
|
|
779
|
+
}
|
|
780
|
+
|
|
781
|
+
return filtered;
|
|
782
|
+
}
|
|
783
|
+
|
|
784
|
+
// ─── Main ───────────────────────────────────────────────────────────────────
|
|
785
|
+
|
|
786
|
+
async function main() {
|
|
787
|
+
const opts = parseArgs();
|
|
788
|
+
|
|
789
|
+
if (opts.help) { printUsage(); process.exit(0); }
|
|
790
|
+
|
|
791
|
+
// ── Update rates mode ──
|
|
792
|
+
if (opts.updateRates) {
|
|
793
|
+
console.log(`Fetching rates from ${PRICING_URL}...`);
|
|
794
|
+
const live = await fetchLiveRates();
|
|
795
|
+
if (live) {
|
|
796
|
+
writeFileSync(RATES_PATH, JSON.stringify(live, null, 2) + '\n');
|
|
797
|
+
console.log(`Updated ${RATES_PATH} with ${Object.keys(live.models).length} models (${live.last_updated}).`);
|
|
798
|
+
} else {
|
|
799
|
+
console.error('Failed to fetch rates. Bundled rates unchanged.');
|
|
800
|
+
process.exit(1);
|
|
801
|
+
}
|
|
802
|
+
process.exit(0);
|
|
803
|
+
}
|
|
804
|
+
|
|
805
|
+
// ── Load rates ──
|
|
806
|
+
let ratesData;
|
|
807
|
+
if (opts.liveRates) {
|
|
808
|
+
ratesData = await fetchLiveRates();
|
|
809
|
+
}
|
|
810
|
+
if (!ratesData) {
|
|
811
|
+
ratesData = loadBundledRates();
|
|
812
|
+
}
|
|
813
|
+
if (!ratesData) {
|
|
814
|
+
console.error('ERROR: No rate data available. Run with --update-rates first.');
|
|
815
|
+
process.exit(1);
|
|
816
|
+
}
|
|
817
|
+
|
|
818
|
+
// ── Load telemetry ──
|
|
819
|
+
let rawEntries;
|
|
820
|
+
if (opts.simLog) {
|
|
821
|
+
rawEntries = extractFromSimLog(opts.simLog);
|
|
822
|
+
} else if (opts.file) {
|
|
823
|
+
rawEntries = parseJsonLines(readFileSync(opts.file, 'utf8'));
|
|
824
|
+
} else if (!process.stdin.isTTY) {
|
|
825
|
+
rawEntries = parseJsonLines(await readStdin());
|
|
826
|
+
} else if (existsSync(DEFAULT_USAGE_LOG)) {
|
|
827
|
+
// Default: read interceptor usage log
|
|
828
|
+
rawEntries = parseJsonLines(readFileSync(DEFAULT_USAGE_LOG, 'utf8'));
|
|
829
|
+
if (rawEntries.length > 0) {
|
|
830
|
+
console.error(`Reading from ${DEFAULT_USAGE_LOG}`);
|
|
831
|
+
}
|
|
832
|
+
} else {
|
|
833
|
+
console.error(`ERROR: No input found. Expected interceptor log at ${DEFAULT_USAGE_LOG}`);
|
|
834
|
+
console.error(' Use --file, --sim-log, or pipe JSON-lines to stdin.');
|
|
835
|
+
printUsage();
|
|
836
|
+
process.exit(1);
|
|
837
|
+
}
|
|
838
|
+
|
|
839
|
+
if (!rawEntries || rawEntries.length === 0) {
|
|
840
|
+
console.error('ERROR: No telemetry entries found.');
|
|
841
|
+
process.exit(1);
|
|
842
|
+
}
|
|
843
|
+
|
|
844
|
+
// ── Apply time filters ──
|
|
845
|
+
rawEntries = filterByTime(rawEntries, opts);
|
|
846
|
+
|
|
847
|
+
if (rawEntries.length === 0) {
|
|
848
|
+
console.error('ERROR: No entries match the time filter.');
|
|
849
|
+
process.exit(1);
|
|
850
|
+
}
|
|
851
|
+
|
|
852
|
+
console.error(`Loaded ${rawEntries.length} telemetry entries.`);
|
|
853
|
+
|
|
854
|
+
// ── Normalize and calculate ──
|
|
855
|
+
const entries = rawEntries.map(normalizeEntry);
|
|
856
|
+
const { results, summary } = calculateCosts(entries, ratesData);
|
|
857
|
+
|
|
858
|
+
// ── Admin API cross-reference ──
|
|
859
|
+
let adminSummary = null;
|
|
860
|
+
if (opts.adminKey) {
|
|
861
|
+
const window = getTimeWindow(entries);
|
|
862
|
+
if (window) {
|
|
863
|
+
console.error(`Querying Admin API for ${window.start.toISOString()} → ${window.end.toISOString()}...`);
|
|
864
|
+
const apiData = await fetchAdminUsage(opts.adminKey, window.start, window.end);
|
|
865
|
+
if (apiData) {
|
|
866
|
+
adminSummary = summarizeAdminData(apiData, ratesData);
|
|
867
|
+
}
|
|
868
|
+
} else {
|
|
869
|
+
console.error('WARNING: No timestamps in telemetry; cannot query Admin API.');
|
|
870
|
+
}
|
|
871
|
+
}
|
|
872
|
+
|
|
873
|
+
// ── Output ──
|
|
874
|
+
printReport(results, summary, ratesData, adminSummary, opts.format);
|
|
875
|
+
}
|
|
876
|
+
|
|
877
|
+
main().catch(err => {
|
|
878
|
+
console.error(`FATAL: ${err.message}`);
|
|
879
|
+
process.exit(1);
|
|
880
|
+
});
|
package/tools/rates.json
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
{
|
|
2
|
+
"last_updated": "2026-04-09",
|
|
3
|
+
"source": "https://platform.claude.com/docs/en/about-claude/pricing",
|
|
4
|
+
"notes": "Auto-fetched from Anthropic docs.",
|
|
5
|
+
"models": {
|
|
6
|
+
"claude-opus-4-6": {
|
|
7
|
+
"input": 5,
|
|
8
|
+
"output": 25,
|
|
9
|
+
"cache_read": 0.5,
|
|
10
|
+
"cache_write_5m": 6.25,
|
|
11
|
+
"cache_write_1h": 10
|
|
12
|
+
},
|
|
13
|
+
"claude-opus-4-5-20251101": {
|
|
14
|
+
"input": 5,
|
|
15
|
+
"output": 25,
|
|
16
|
+
"cache_read": 0.5,
|
|
17
|
+
"cache_write_5m": 6.25,
|
|
18
|
+
"cache_write_1h": 10
|
|
19
|
+
},
|
|
20
|
+
"claude-opus-4-1-20250805": {
|
|
21
|
+
"input": 15,
|
|
22
|
+
"output": 75,
|
|
23
|
+
"cache_read": 1.5,
|
|
24
|
+
"cache_write_5m": 18.75,
|
|
25
|
+
"cache_write_1h": 30
|
|
26
|
+
},
|
|
27
|
+
"claude-opus-4-20250514": {
|
|
28
|
+
"input": 15,
|
|
29
|
+
"output": 75,
|
|
30
|
+
"cache_read": 1.5,
|
|
31
|
+
"cache_write_5m": 18.75,
|
|
32
|
+
"cache_write_1h": 30
|
|
33
|
+
},
|
|
34
|
+
"claude-sonnet-4-6": {
|
|
35
|
+
"input": 3,
|
|
36
|
+
"output": 15,
|
|
37
|
+
"cache_read": 0.3,
|
|
38
|
+
"cache_write_5m": 3.75,
|
|
39
|
+
"cache_write_1h": 6
|
|
40
|
+
},
|
|
41
|
+
"claude-sonnet-4-5-20250929": {
|
|
42
|
+
"input": 3,
|
|
43
|
+
"output": 15,
|
|
44
|
+
"cache_read": 0.3,
|
|
45
|
+
"cache_write_5m": 3.75,
|
|
46
|
+
"cache_write_1h": 6
|
|
47
|
+
},
|
|
48
|
+
"claude-sonnet-4-20250514": {
|
|
49
|
+
"input": 3,
|
|
50
|
+
"output": 15,
|
|
51
|
+
"cache_read": 0.3,
|
|
52
|
+
"cache_write_5m": 3.75,
|
|
53
|
+
"cache_write_1h": 6
|
|
54
|
+
},
|
|
55
|
+
"claude-sonnet-3-7-20250219": {
|
|
56
|
+
"input": 3,
|
|
57
|
+
"output": 15,
|
|
58
|
+
"cache_read": 0.3,
|
|
59
|
+
"cache_write_5m": 3.75,
|
|
60
|
+
"cache_write_1h": 6
|
|
61
|
+
},
|
|
62
|
+
"claude-haiku-4-5-20251001": {
|
|
63
|
+
"input": 1,
|
|
64
|
+
"output": 5,
|
|
65
|
+
"cache_read": 0.1,
|
|
66
|
+
"cache_write_5m": 1.25,
|
|
67
|
+
"cache_write_1h": 2
|
|
68
|
+
},
|
|
69
|
+
"claude-haiku-3-5-20241022": {
|
|
70
|
+
"input": 0.8,
|
|
71
|
+
"output": 4,
|
|
72
|
+
"cache_read": 0.08,
|
|
73
|
+
"cache_write_5m": 1,
|
|
74
|
+
"cache_write_1h": 1.6
|
|
75
|
+
},
|
|
76
|
+
"claude-3-opus-20240229": {
|
|
77
|
+
"input": 15,
|
|
78
|
+
"output": 75,
|
|
79
|
+
"cache_read": 1.5,
|
|
80
|
+
"cache_write_5m": 18.75,
|
|
81
|
+
"cache_write_1h": 30
|
|
82
|
+
},
|
|
83
|
+
"claude-3-haiku-20240307": {
|
|
84
|
+
"input": 0.25,
|
|
85
|
+
"output": 1.25,
|
|
86
|
+
"cache_read": 0.03,
|
|
87
|
+
"cache_write_5m": 0.3,
|
|
88
|
+
"cache_write_1h": 0.5
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
}
|