claude-code-cache-fix 1.4.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -0
- package/package.json +3 -2
- package/preload.mjs +53 -8
- package/tools/cost-report.mjs +880 -0
- package/tools/rates.json +91 -0
package/README.md
CHANGED
|
@@ -114,6 +114,23 @@ On the first API call, the interceptor reads `~/.claude.json` and logs the curre
|
|
|
114
114
|
|
|
115
115
|
Response headers are parsed for `anthropic-ratelimit-unified-5h-utilization` and `7d-utilization`, saved to `~/.claude/quota-status.json` for consumption by status line hooks or other tools.
|
|
116
116
|
|
|
117
|
+
### Peak hour detection
|
|
118
|
+
|
|
119
|
+
Anthropic applies elevated quota drain rates during weekday peak hours (13:00–19:00 UTC, Mon–Fri). The interceptor detects peak windows and writes `peak_hour: true/false` to `quota-status.json`. See `docs/peak-hours-reference.md` for sources and details.
|
|
120
|
+
|
|
121
|
+
### Usage telemetry and cost reporting
|
|
122
|
+
|
|
123
|
+
The interceptor logs per-call usage data to `~/.claude/usage.jsonl` — one JSON line per API call with model, token counts, and cache breakdown. Use the bundled cost report tool to analyze costs:
|
|
124
|
+
|
|
125
|
+
```bash
|
|
126
|
+
node tools/cost-report.mjs # today's costs from interceptor log
|
|
127
|
+
node tools/cost-report.mjs --date 2026-04-08 # specific date
|
|
128
|
+
node tools/cost-report.mjs --since 2h # last 2 hours
|
|
129
|
+
node tools/cost-report.mjs --admin-key <key> # cross-reference with Admin API
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
Also works with any JSONL containing Anthropic usage fields (`--file`, stdin) — useful for SDK users and proxy setups. See `docs/cost-report.md` for full documentation.
|
|
133
|
+
|
|
117
134
|
## Debug mode
|
|
118
135
|
|
|
119
136
|
Enable debug logging to verify the fix is working:
|
|
@@ -133,6 +150,7 @@ Logs are written to `~/.claude/cache-fix-debug.log`. Look for:
|
|
|
133
150
|
- `GROWTHBOOK FLAGS: {...}` — server-controlled feature flags on first call
|
|
134
151
|
- `PROMPT SIZE: system=N tools=N injected=N (skills=N mcp=N ...)` — per-call prompt size breakdown
|
|
135
152
|
- `CACHE TTL: tier=1h create=N read=N hit=N% (1h=N 5m=N)` — TTL tier and cache hit rate per call
|
|
153
|
+
- `PEAK HOUR: weekday 13:00-19:00 UTC` — Anthropic peak hour throttling active
|
|
136
154
|
- `SKIPPED: resume relocation (not a resume or already correct)` — no fix needed
|
|
137
155
|
|
|
138
156
|
### Prefix diff mode
|
|
@@ -152,6 +170,7 @@ Snapshots are saved to `~/.claude/cache-fix-snapshots/` and diff reports are gen
|
|
|
152
170
|
| `CACHE_FIX_DEBUG` | `0` | Enable debug logging to `~/.claude/cache-fix-debug.log` |
|
|
153
171
|
| `CACHE_FIX_PREFIXDIFF` | `0` | Enable prefix snapshot diffing |
|
|
154
172
|
| `CACHE_FIX_IMAGE_KEEP_LAST` | `0` | Keep images in last N user messages (0 = disabled) |
|
|
173
|
+
| `CACHE_FIX_USAGE_LOG` | `~/.claude/usage.jsonl` | Path for per-call usage telemetry log |
|
|
155
174
|
|
|
156
175
|
## Limitations
|
|
157
176
|
|
package/package.json
CHANGED
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-code-cache-fix",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.5.0",
|
|
4
4
|
"description": "Fixes prompt cache regression in Claude Code that causes up to 20x cost increase on resumed sessions",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"exports": "./preload.mjs",
|
|
7
7
|
"main": "./preload.mjs",
|
|
8
8
|
"files": [
|
|
9
|
-
"preload.mjs"
|
|
9
|
+
"preload.mjs",
|
|
10
|
+
"tools/"
|
|
10
11
|
],
|
|
11
12
|
"engines": {
|
|
12
13
|
"node": ">=18"
|
package/preload.mjs
CHANGED
|
@@ -399,6 +399,7 @@ const DEBUG = process.env.CACHE_FIX_DEBUG === "1";
|
|
|
399
399
|
const PREFIXDIFF = process.env.CACHE_FIX_PREFIXDIFF === "1";
|
|
400
400
|
const LOG_PATH = join(homedir(), ".claude", "cache-fix-debug.log");
|
|
401
401
|
const SNAPSHOT_DIR = join(homedir(), ".claude", "cache-fix-snapshots");
|
|
402
|
+
const USAGE_JSONL = process.env.CACHE_FIX_USAGE_LOG || join(homedir(), ".claude", "usage.jsonl");
|
|
402
403
|
|
|
403
404
|
function debugLog(...args) {
|
|
404
405
|
if (!DEBUG) return;
|
|
@@ -792,16 +793,34 @@ globalThis.fetch = async function (url, options) {
|
|
|
792
793
|
quota.seven_day = h7d ? { utilization: parseFloat(h7d), pct: Math.round(parseFloat(h7d) * 100), resets_at: reset7d ? parseInt(reset7d) : null } : quota.seven_day;
|
|
793
794
|
quota.status = status || null;
|
|
794
795
|
quota.overage_status = overage || null;
|
|
796
|
+
|
|
797
|
+
// Peak hour detection — Anthropic applies higher quota drain rate during
|
|
798
|
+
// weekday peak hours: 13:00–19:00 UTC (Mon–Fri).
|
|
799
|
+
// Source: Thariq (Anthropic) via X, 2026-03-26; confirmed by The Register,
|
|
800
|
+
// PCWorld, Piunikaweb. No specific multiplier disclosed.
|
|
801
|
+
const now = new Date();
|
|
802
|
+
const utcHour = now.getUTCHours();
|
|
803
|
+
const utcDay = now.getUTCDay(); // 0=Sun, 6=Sat
|
|
804
|
+
const isPeak = utcDay >= 1 && utcDay <= 5 && utcHour >= 13 && utcHour < 19;
|
|
805
|
+
quota.peak_hour = isPeak;
|
|
806
|
+
|
|
795
807
|
writeFileSync(quotaFile, JSON.stringify(quota, null, 2));
|
|
808
|
+
|
|
809
|
+
if (DEBUG && isPeak) {
|
|
810
|
+
debugLog("PEAK HOUR: weekday 13:00-19:00 UTC — quota drains at elevated rate");
|
|
811
|
+
}
|
|
796
812
|
}
|
|
797
813
|
} catch {
|
|
798
814
|
// Non-critical — don't break the response
|
|
799
815
|
}
|
|
800
816
|
|
|
801
|
-
// Clone response to extract TTL tier
|
|
817
|
+
// Clone response to extract TTL tier and usage telemetry from SSE stream.
|
|
818
|
+
// Pass the model from the request so we can log a complete usage record.
|
|
802
819
|
try {
|
|
820
|
+
let reqModel = "unknown";
|
|
821
|
+
try { reqModel = JSON.parse(options?.body)?.model || "unknown"; } catch {}
|
|
803
822
|
const clone = response.clone();
|
|
804
|
-
drainTTLFromClone(clone).catch(() => {});
|
|
823
|
+
drainTTLFromClone(clone, reqModel).catch(() => {});
|
|
805
824
|
} catch {
|
|
806
825
|
// clone() failure is non-fatal
|
|
807
826
|
}
|
|
@@ -822,13 +841,18 @@ globalThis.fetch = async function (url, options) {
|
|
|
822
841
|
* Writes TTL tier to ~/.claude/quota-status.json (merges with existing data)
|
|
823
842
|
* and logs to debug log.
|
|
824
843
|
*/
|
|
825
|
-
async function drainTTLFromClone(clone) {
|
|
844
|
+
async function drainTTLFromClone(clone, model) {
|
|
826
845
|
if (!clone.body) return;
|
|
827
846
|
|
|
828
847
|
const reader = clone.body.getReader();
|
|
829
848
|
const decoder = new TextDecoder();
|
|
830
849
|
let buffer = "";
|
|
831
850
|
|
|
851
|
+
// Accumulate usage across message_start (input/cache) and message_delta (output)
|
|
852
|
+
let startUsage = null;
|
|
853
|
+
let deltaUsage = null;
|
|
854
|
+
let ttlTier = "unknown";
|
|
855
|
+
|
|
832
856
|
try {
|
|
833
857
|
while (true) {
|
|
834
858
|
const { done, value } = await reader.read();
|
|
@@ -847,6 +871,7 @@ async function drainTTLFromClone(clone) {
|
|
|
847
871
|
|
|
848
872
|
if (event.type === "message_start" && event.message?.usage) {
|
|
849
873
|
const u = event.message.usage;
|
|
874
|
+
startUsage = u;
|
|
850
875
|
const cc = u.cache_creation || {};
|
|
851
876
|
const e1h = cc.ephemeral_1h_input_tokens ?? 0;
|
|
852
877
|
const e5m = cc.ephemeral_5m_input_tokens ?? 0;
|
|
@@ -854,8 +879,6 @@ async function drainTTLFromClone(clone) {
|
|
|
854
879
|
const cacheRead = u.cache_read_input_tokens ?? 0;
|
|
855
880
|
|
|
856
881
|
// Determine TTL tier from which ephemeral bucket got tokens
|
|
857
|
-
// When cache is fully warm (no creation), infer tier from previous
|
|
858
|
-
let ttlTier = "unknown";
|
|
859
882
|
if (e1h > 0 && e5m === 0) ttlTier = "1h";
|
|
860
883
|
else if (e5m > 0 && e1h === 0) ttlTier = "5m";
|
|
861
884
|
else if (e1h === 0 && e5m === 0 && cacheCreate === 0) {
|
|
@@ -893,10 +916,11 @@ async function drainTTLFromClone(clone) {
|
|
|
893
916
|
};
|
|
894
917
|
writeFileSync(quotaFile, JSON.stringify(quota, null, 2));
|
|
895
918
|
} catch {}
|
|
919
|
+
}
|
|
896
920
|
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
921
|
+
// Capture final usage from message_delta (has output_tokens)
|
|
922
|
+
if (event.type === "message_delta" && event.usage) {
|
|
923
|
+
deltaUsage = event.usage;
|
|
900
924
|
}
|
|
901
925
|
} catch {
|
|
902
926
|
// Skip malformed SSE lines
|
|
@@ -906,4 +930,25 @@ async function drainTTLFromClone(clone) {
|
|
|
906
930
|
} finally {
|
|
907
931
|
try { reader.releaseLock(); } catch {}
|
|
908
932
|
}
|
|
933
|
+
|
|
934
|
+
// Write usage record to JSONL after stream completes
|
|
935
|
+
if (startUsage) {
|
|
936
|
+
try {
|
|
937
|
+
const cc = startUsage.cache_creation || {};
|
|
938
|
+
const record = {
|
|
939
|
+
timestamp: new Date().toISOString(),
|
|
940
|
+
model: model || "unknown",
|
|
941
|
+
input_tokens: startUsage.input_tokens ?? 0,
|
|
942
|
+
output_tokens: deltaUsage?.output_tokens ?? 0,
|
|
943
|
+
cache_read_input_tokens: startUsage.cache_read_input_tokens ?? 0,
|
|
944
|
+
cache_creation_input_tokens: startUsage.cache_creation_input_tokens ?? 0,
|
|
945
|
+
ephemeral_1h_input_tokens: cc.ephemeral_1h_input_tokens ?? 0,
|
|
946
|
+
ephemeral_5m_input_tokens: cc.ephemeral_5m_input_tokens ?? 0,
|
|
947
|
+
ttl_tier: ttlTier,
|
|
948
|
+
};
|
|
949
|
+
appendFileSync(USAGE_JSONL, JSON.stringify(record) + "\n");
|
|
950
|
+
} catch {
|
|
951
|
+
// Non-critical — don't break anything
|
|
952
|
+
}
|
|
953
|
+
}
|
|
909
954
|
}
|
|
@@ -0,0 +1,880 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* cost-report — Calculate Claude API costs from usage telemetry.
|
|
4
|
+
*
|
|
5
|
+
* Input sources (in priority order):
|
|
6
|
+
* 1. Default: reads interceptor usage log at ~/.claude/usage.jsonl
|
|
7
|
+
* 2. --file / -f: any JSONL file (SDK output, proxy captures, etc.)
|
|
8
|
+
* 3. --sim-log: extract from simulation logs (Token telemetry: {...} lines)
|
|
9
|
+
* 4. stdin: pipe JSON-lines from any source
|
|
10
|
+
*
|
|
11
|
+
* Pricing sources (best → fallback):
|
|
12
|
+
* 1. Admin API actual billed usage (--admin-key)
|
|
13
|
+
* 2. Live rates from Anthropic docs (--live-rates)
|
|
14
|
+
* 3. Bundled rates.json (default)
|
|
15
|
+
*
|
|
16
|
+
* Part of claude-code-cache-fix. Works standalone or with the interceptor.
|
|
17
|
+
* https://github.com/cnighswonger/claude-code-cache-fix
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
import { readFileSync, writeFileSync, existsSync } from 'node:fs';
|
|
21
|
+
import { createInterface } from 'node:readline';
|
|
22
|
+
import { fileURLToPath } from 'node:url';
|
|
23
|
+
import { dirname, join } from 'node:path';
|
|
24
|
+
import { homedir } from 'node:os';
|
|
25
|
+
|
|
26
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
27
|
+
const RATES_PATH = join(__dirname, 'rates.json');
|
|
28
|
+
const PRICING_URL = 'https://platform.claude.com/docs/en/about-claude/pricing';
|
|
29
|
+
const ADMIN_API_BASE = 'https://api.anthropic.com/v1/organizations/usage_report/messages';
|
|
30
|
+
const DEFAULT_USAGE_LOG = join(homedir(), '.claude', 'usage.jsonl');
|
|
31
|
+
|
|
32
|
+
// ─── CLI parsing ────────────────────────────────────────────────────────────
|
|
33
|
+
|
|
34
|
+
function parseArgs() {
|
|
35
|
+
const args = process.argv.slice(2);
|
|
36
|
+
const opts = {
|
|
37
|
+
simLog: null, file: null, adminKey: null,
|
|
38
|
+
liveRates: false, updateRates: false, help: false,
|
|
39
|
+
date: null, since: null, format: 'text',
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
for (let i = 0; i < args.length; i++) {
|
|
43
|
+
switch (args[i]) {
|
|
44
|
+
case '--sim-log': opts.simLog = args[++i]; break;
|
|
45
|
+
case '--file':
|
|
46
|
+
case '-f': opts.file = args[++i]; break;
|
|
47
|
+
case '--admin-key': opts.adminKey = args[++i]; break;
|
|
48
|
+
case '--live-rates': opts.liveRates = true; break;
|
|
49
|
+
case '--update-rates': opts.updateRates = true; break;
|
|
50
|
+
case '--date': opts.date = args[++i]; break;
|
|
51
|
+
case '--since': opts.since = args[++i]; break;
|
|
52
|
+
case '--format': opts.format = args[++i]; break;
|
|
53
|
+
case '--json': opts.format = 'json'; break;
|
|
54
|
+
case '--md':
|
|
55
|
+
case '--markdown': opts.format = 'md'; break;
|
|
56
|
+
case '--help':
|
|
57
|
+
case '-h': opts.help = true; break;
|
|
58
|
+
default:
|
|
59
|
+
if (!args[i].startsWith('-') && !opts.file && !opts.simLog) {
|
|
60
|
+
opts.file = args[i];
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
opts.adminKey = opts.adminKey || process.env.ANTHROPIC_ADMIN_KEY;
|
|
66
|
+
return opts;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
function printUsage() {
|
|
70
|
+
console.log(`
|
|
71
|
+
cost-report — Calculate Claude API costs from usage telemetry.
|
|
72
|
+
|
|
73
|
+
Usage:
|
|
74
|
+
node cost-report.mjs From interceptor log (~/.claude/usage.jsonl)
|
|
75
|
+
node cost-report.mjs --date 2026-04-08 Filter to a specific date
|
|
76
|
+
node cost-report.mjs --since 2h Filter to last N hours/minutes
|
|
77
|
+
node cost-report.mjs --file <path> From any JSONL file
|
|
78
|
+
node cost-report.mjs --sim-log <path> From a simulation log
|
|
79
|
+
node cost-report.mjs --admin-key <key> Cross-reference with Admin API
|
|
80
|
+
cat telemetry.jsonl | node cost-report.mjs From JSON-lines on stdin
|
|
81
|
+
node cost-report.mjs --update-rates Refresh bundled rates
|
|
82
|
+
|
|
83
|
+
Input sources (checked in order):
|
|
84
|
+
Default Reads ~/.claude/usage.jsonl (written by the interceptor)
|
|
85
|
+
--file, -f <path> Any JSONL file (SDK output, proxy captures, etc.)
|
|
86
|
+
--sim-log <path> Extract from simulation logs (Token telemetry lines)
|
|
87
|
+
stdin Pipe JSON-lines from any source
|
|
88
|
+
|
|
89
|
+
Filtering:
|
|
90
|
+
--date <YYYY-MM-DD> Show only entries from this date
|
|
91
|
+
--since <duration> Show entries from last Nh, Nm, or Nd (e.g. 2h, 30m, 1d)
|
|
92
|
+
|
|
93
|
+
Output:
|
|
94
|
+
--format <fmt> Output format: text (default), json, md
|
|
95
|
+
--json Shorthand for --format json
|
|
96
|
+
--md, --markdown Shorthand for --format md
|
|
97
|
+
|
|
98
|
+
Pricing:
|
|
99
|
+
--admin-key <key> Anthropic Admin API key for actual billed usage
|
|
100
|
+
(or set ANTHROPIC_ADMIN_KEY env var)
|
|
101
|
+
--live-rates Fetch current rates from Anthropic docs
|
|
102
|
+
--update-rates Fetch and save current rates to rates.json
|
|
103
|
+
|
|
104
|
+
Input JSON format (one object per line):
|
|
105
|
+
Required: model, input_tokens, output_tokens
|
|
106
|
+
Optional: cache_read_input_tokens, cache_creation_input_tokens,
|
|
107
|
+
ephemeral_1h_input_tokens, ephemeral_5m_input_tokens,
|
|
108
|
+
timestamp, preflight_input_tokens, degradation_steps
|
|
109
|
+
|
|
110
|
+
Example JSONL (as written by the interceptor):
|
|
111
|
+
{"timestamp":"2026-04-09T01:23:45Z","model":"claude-sonnet-4-5-20250929","input_tokens":50000,"output_tokens":1200,"cache_read_input_tokens":13000,"cache_creation_input_tokens":0,"ephemeral_1h_input_tokens":0,"ephemeral_5m_input_tokens":0}
|
|
112
|
+
|
|
113
|
+
For SDK users — log usage from API responses:
|
|
114
|
+
const msg = await anthropic.messages.create({...});
|
|
115
|
+
fs.appendFileSync('usage.jsonl', JSON.stringify({
|
|
116
|
+
timestamp: new Date().toISOString(),
|
|
117
|
+
model: msg.model,
|
|
118
|
+
...msg.usage
|
|
119
|
+
}) + '\\n');
|
|
120
|
+
`);
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// ─── Rates ──────────────────────────────────────────────────────────────────
|
|
124
|
+
|
|
125
|
+
function loadBundledRates() {
|
|
126
|
+
if (!existsSync(RATES_PATH)) {
|
|
127
|
+
console.error('WARNING: No bundled rates.json found. Use --update-rates to create one.');
|
|
128
|
+
return null;
|
|
129
|
+
}
|
|
130
|
+
const data = JSON.parse(readFileSync(RATES_PATH, 'utf8'));
|
|
131
|
+
|
|
132
|
+
// Check staleness
|
|
133
|
+
const lastUpdated = new Date(data.last_updated);
|
|
134
|
+
const daysSince = (Date.now() - lastUpdated.getTime()) / (1000 * 60 * 60 * 24);
|
|
135
|
+
if (daysSince > 30) {
|
|
136
|
+
console.error(`WARNING: Bundled rates are ${Math.floor(daysSince)} days old (last updated ${data.last_updated}).`);
|
|
137
|
+
console.error(' Run with --update-rates to refresh, or --live-rates to fetch once.');
|
|
138
|
+
}
|
|
139
|
+
return data;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
async function fetchLiveRates() {
|
|
143
|
+
try {
|
|
144
|
+
const resp = await fetch(PRICING_URL);
|
|
145
|
+
if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
|
|
146
|
+
const html = await resp.text();
|
|
147
|
+
return parsePricingPage(html);
|
|
148
|
+
} catch (err) {
|
|
149
|
+
console.error(`WARNING: Failed to fetch live rates: ${err.message}`);
|
|
150
|
+
console.error(' Falling back to bundled rates.');
|
|
151
|
+
return null;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
function parsePricingPage(html) {
|
|
156
|
+
// The docs page renders as HTML table rows with <td> elements.
|
|
157
|
+
// Pattern: model name in one <td>, then rates as "$X / MTok" in subsequent <td>s.
|
|
158
|
+
// We extract: Model | Base Input | 5m Cache Write | 1h Cache Write | Cache Read | Output
|
|
159
|
+
//
|
|
160
|
+
// The HTML has rows like:
|
|
161
|
+
// Opus 4.6</td><td ...>$5 / MTok</td><td ...>$6.25 / MTok</td>...
|
|
162
|
+
|
|
163
|
+
const models = {};
|
|
164
|
+
const parseRate = (s) => {
|
|
165
|
+
const m = s.match(/\$([\d.]+)/);
|
|
166
|
+
return m ? parseFloat(m[1]) : null;
|
|
167
|
+
};
|
|
168
|
+
|
|
169
|
+
// Strategy: find model name followed by 5 rate cells in the pricing table.
|
|
170
|
+
// Match: "ModelName</td><td...>$X / MTok</td>..." pattern
|
|
171
|
+
const rowPattern = /((?:Opus|Sonnet|Haiku)\s+[\d.]+(?:\s*\([^)]*\))?)\s*<\/td>\s*<td[^>]*>\s*\$([\d.]+)\s*\/\s*MTok\s*<\/td>\s*<td[^>]*>\s*\$([\d.]+)\s*\/\s*MTok\s*<\/td>\s*<td[^>]*>\s*\$([\d.]+)\s*\/\s*MTok\s*<\/td>\s*<td[^>]*>\s*\$([\d.]+)\s*\/\s*MTok\s*<\/td>\s*<td[^>]*>\s*\$([\d.]+)\s*\/\s*MTok/g;
|
|
172
|
+
let match;
|
|
173
|
+
|
|
174
|
+
while ((match = rowPattern.exec(html)) !== null) {
|
|
175
|
+
let name = match[1].trim();
|
|
176
|
+
// Strip "(deprecated)" etc.
|
|
177
|
+
name = name.replace(/\s*\([^)]*\)\s*$/, '').trim();
|
|
178
|
+
// Skip if it contains HTML
|
|
179
|
+
if (name.includes('<')) continue;
|
|
180
|
+
|
|
181
|
+
const input = parseFloat(match[2]);
|
|
182
|
+
const write5m = parseFloat(match[3]);
|
|
183
|
+
const write1h = parseFloat(match[4]);
|
|
184
|
+
const cacheRead = parseFloat(match[5]);
|
|
185
|
+
const output = parseFloat(match[6]);
|
|
186
|
+
|
|
187
|
+
if (isNaN(input) || isNaN(output)) continue;
|
|
188
|
+
|
|
189
|
+
const idMap = resolveModelId(name);
|
|
190
|
+
for (const id of idMap) {
|
|
191
|
+
models[id] = {
|
|
192
|
+
input, output,
|
|
193
|
+
cache_read: cacheRead,
|
|
194
|
+
cache_write_5m: write5m,
|
|
195
|
+
cache_write_1h: write1h,
|
|
196
|
+
};
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
if (Object.keys(models).length === 0) {
|
|
201
|
+
console.error('WARNING: Could not parse any model pricing from docs page.');
|
|
202
|
+
console.error(' The page format may have changed. Falling back to bundled rates.');
|
|
203
|
+
return null;
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
return {
|
|
207
|
+
last_updated: new Date().toISOString().slice(0, 10),
|
|
208
|
+
source: PRICING_URL,
|
|
209
|
+
notes: 'Auto-fetched from Anthropic docs.',
|
|
210
|
+
models,
|
|
211
|
+
};
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
function resolveModelId(displayName) {
|
|
215
|
+
// Map display names like "Opus 4.6" to API model IDs
|
|
216
|
+
const map = {
|
|
217
|
+
'Opus 4.6': ['claude-opus-4-6'],
|
|
218
|
+
'Opus 4.5': ['claude-opus-4-5-20251101'],
|
|
219
|
+
'Opus 4.1': ['claude-opus-4-1-20250805'],
|
|
220
|
+
'Opus 4': ['claude-opus-4-20250514'],
|
|
221
|
+
'Opus 3': ['claude-3-opus-20240229'],
|
|
222
|
+
'Sonnet 4.6': ['claude-sonnet-4-6'],
|
|
223
|
+
'Sonnet 4.5': ['claude-sonnet-4-5-20250929'],
|
|
224
|
+
'Sonnet 4': ['claude-sonnet-4-20250514'],
|
|
225
|
+
'Sonnet 3.7': ['claude-sonnet-3-7-20250219'],
|
|
226
|
+
'Haiku 4.5': ['claude-haiku-4-5-20251001'],
|
|
227
|
+
'Haiku 3.5': ['claude-haiku-3-5-20241022'],
|
|
228
|
+
'Haiku 3': ['claude-3-haiku-20240307'],
|
|
229
|
+
};
|
|
230
|
+
return map[displayName] || [`claude-${displayName.toLowerCase().replace(/\s+/g, '-')}`];
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
function lookupRates(ratesData, modelId) {
|
|
234
|
+
if (!ratesData || !ratesData.models) return null;
|
|
235
|
+
|
|
236
|
+
// Direct match
|
|
237
|
+
if (ratesData.models[modelId]) return ratesData.models[modelId];
|
|
238
|
+
|
|
239
|
+
// Try prefix match (e.g. "claude-sonnet-4-5-20250929" matches "claude-sonnet-4-5-*")
|
|
240
|
+
for (const [key, rates] of Object.entries(ratesData.models)) {
|
|
241
|
+
// Match if the stored key is a prefix or shares the same base
|
|
242
|
+
if (modelId.startsWith(key) || key.startsWith(modelId)) return rates;
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
// Try matching by family (strip date suffix)
|
|
246
|
+
const base = modelId.replace(/-\d{8}$/, '');
|
|
247
|
+
if (ratesData.models[base]) return ratesData.models[base];
|
|
248
|
+
|
|
249
|
+
return null;
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
// ─── Input parsing ──────────────────────────────────────────────────────────
|
|
253
|
+
|
|
254
|
+
function extractFromSimLog(filePath) {
|
|
255
|
+
const content = readFileSync(filePath, 'utf8');
|
|
256
|
+
const entries = [];
|
|
257
|
+
const lines = content.split('\n');
|
|
258
|
+
|
|
259
|
+
for (const line of lines) {
|
|
260
|
+
const match = line.match(/Token telemetry:\s*(\{.+\})/);
|
|
261
|
+
if (match) {
|
|
262
|
+
try {
|
|
263
|
+
const obj = JSON.parse(match[1]);
|
|
264
|
+
// Extract timestamp from log line
|
|
265
|
+
const tsMatch = line.match(/\[([^\]]+)\]/);
|
|
266
|
+
if (tsMatch) obj._timestamp = tsMatch[1];
|
|
267
|
+
entries.push(obj);
|
|
268
|
+
} catch { /* skip malformed */ }
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
if (entries.length === 0) {
|
|
273
|
+
console.error('WARNING: No "Token telemetry" entries found in sim log.');
|
|
274
|
+
console.error(' This log may use an older format without structured telemetry.');
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
return entries;
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
function parseJsonLines(text) {
|
|
281
|
+
return text.split('\n')
|
|
282
|
+
.filter(l => l.trim())
|
|
283
|
+
.map(l => { try { return JSON.parse(l); } catch { return null; } })
|
|
284
|
+
.filter(Boolean);
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
async function readStdin() {
|
|
288
|
+
const chunks = [];
|
|
289
|
+
const rl = createInterface({ input: process.stdin, terminal: false });
|
|
290
|
+
for await (const line of rl) chunks.push(line);
|
|
291
|
+
return chunks.join('\n');
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
function normalizeEntry(raw) {
|
|
295
|
+
// Accept multiple naming conventions:
|
|
296
|
+
// - Interceptor: input_tokens, output_tokens, timestamp
|
|
297
|
+
// - Sim telemetry: actual_input_tokens, actual_output_tokens, _timestamp
|
|
298
|
+
// - SDK: input_tokens, output_tokens (from usage object)
|
|
299
|
+
return {
|
|
300
|
+
model: raw.model || 'unknown',
|
|
301
|
+
timestamp: raw.timestamp || raw._timestamp || null,
|
|
302
|
+
input_tokens: raw.actual_input_tokens ?? raw.input_tokens ?? 0,
|
|
303
|
+
output_tokens: raw.actual_output_tokens ?? raw.output_tokens ?? 0,
|
|
304
|
+
cache_read: raw.cache_read_input_tokens ?? 0,
|
|
305
|
+
cache_create: raw.cache_creation_input_tokens ?? 0,
|
|
306
|
+
eph_1h: raw.ephemeral_1h_input_tokens ?? 0,
|
|
307
|
+
eph_5m: raw.ephemeral_5m_input_tokens ?? 0,
|
|
308
|
+
preflight: raw.preflight_input_tokens ?? null,
|
|
309
|
+
degradation: raw.degradation_steps ?? [],
|
|
310
|
+
would_have_exceeded: raw.would_have_exceeded ?? false,
|
|
311
|
+
sys_prompt_est: raw.system_prompt_tokens_est ?? null,
|
|
312
|
+
};
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
// ─── Admin API ──────────────────────────────────────────────────────────────
|
|
316
|
+
|
|
317
|
+
async function fetchAdminUsage(adminKey, startTime, endTime) {
|
|
318
|
+
// Round start down and end up to hour boundaries
|
|
319
|
+
const start = new Date(startTime);
|
|
320
|
+
start.setMinutes(0, 0, 0);
|
|
321
|
+
const end = new Date(endTime);
|
|
322
|
+
end.setHours(end.getHours() + 1, 0, 0, 0);
|
|
323
|
+
|
|
324
|
+
const url = `${ADMIN_API_BASE}?bucket_width=1h` +
|
|
325
|
+
`&starting_at=${start.toISOString()}` +
|
|
326
|
+
`&ending_at=${end.toISOString()}` +
|
|
327
|
+
`&group_by[]=model`;
|
|
328
|
+
|
|
329
|
+
try {
|
|
330
|
+
const resp = await fetch(url, {
|
|
331
|
+
headers: {
|
|
332
|
+
'x-api-key': adminKey,
|
|
333
|
+
'anthropic-version': '2023-06-01',
|
|
334
|
+
},
|
|
335
|
+
});
|
|
336
|
+
if (!resp.ok) throw new Error(`HTTP ${resp.status}: ${await resp.text()}`);
|
|
337
|
+
return await resp.json();
|
|
338
|
+
} catch (err) {
|
|
339
|
+
console.error(`WARNING: Admin API query failed: ${err.message}`);
|
|
340
|
+
return null;
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
function summarizeAdminData(apiData, ratesData) {
|
|
345
|
+
const byModel = {};
|
|
346
|
+
let totalCost = 0;
|
|
347
|
+
|
|
348
|
+
for (const bucket of (apiData.data || [])) {
|
|
349
|
+
for (const r of (bucket.results || [])) {
|
|
350
|
+
const model = r.model || 'unknown';
|
|
351
|
+
if (!byModel[model]) {
|
|
352
|
+
byModel[model] = { uncached: 0, cache_read: 0, cache_1h: 0, cache_5m: 0, output: 0, cost: 0 };
|
|
353
|
+
}
|
|
354
|
+
const m = byModel[model];
|
|
355
|
+
m.uncached += r.uncached_input_tokens || 0;
|
|
356
|
+
m.cache_read += r.cache_read_input_tokens || 0;
|
|
357
|
+
const cc = r.cache_creation || {};
|
|
358
|
+
m.cache_1h += cc.ephemeral_1h_input_tokens || 0;
|
|
359
|
+
m.cache_5m += cc.ephemeral_5m_input_tokens || 0;
|
|
360
|
+
m.output += r.output_tokens || 0;
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
// Calculate costs per model
|
|
365
|
+
for (const [model, m] of Object.entries(byModel)) {
|
|
366
|
+
const rates = lookupRates(ratesData, model);
|
|
367
|
+
if (rates) {
|
|
368
|
+
m.cost = (m.uncached * rates.input + m.cache_read * rates.cache_read +
|
|
369
|
+
m.cache_1h * rates.cache_write_1h + m.cache_5m * rates.cache_write_5m +
|
|
370
|
+
m.output * rates.output) / 1_000_000;
|
|
371
|
+
}
|
|
372
|
+
totalCost += m.cost;
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
return { byModel, totalCost };
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
// ─── Cost calculation ───────────────────────────────────────────────────────
|
|
379
|
+
|
|
380
|
+
function calculateCosts(entries, ratesData) {
|
|
381
|
+
const results = [];
|
|
382
|
+
const summary = {
|
|
383
|
+
calls: 0,
|
|
384
|
+
byModel: {},
|
|
385
|
+
totals: { input: 0, output: 0, cache_read: 0, cache_1h: 0, cache_5m: 0, preflight: 0 },
|
|
386
|
+
totalCost: 0,
|
|
387
|
+
degradedCalls: 0,
|
|
388
|
+
exceededCalls: 0,
|
|
389
|
+
degradationSteps: {},
|
|
390
|
+
};
|
|
391
|
+
|
|
392
|
+
for (const entry of entries) {
|
|
393
|
+
const rates = lookupRates(ratesData, entry.model);
|
|
394
|
+
if (!rates) {
|
|
395
|
+
console.error(`WARNING: No rates found for model "${entry.model}". Skipping cost calculation.`);
|
|
396
|
+
results.push({ ...entry, cost: null, rateSource: 'missing' });
|
|
397
|
+
continue;
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
// Determine cache write tier breakdown
|
|
401
|
+
// If telemetry has eph_1h/eph_5m, use those; otherwise assume all cache_create is 5m
|
|
402
|
+
let cw1h = entry.eph_1h;
|
|
403
|
+
let cw5m = entry.eph_5m;
|
|
404
|
+
if (cw1h === 0 && cw5m === 0 && entry.cache_create > 0) {
|
|
405
|
+
// No tier breakdown available; assume 5m (conservative — lower rate)
|
|
406
|
+
cw5m = entry.cache_create;
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
const cost = (
|
|
410
|
+
entry.input_tokens * rates.input +
|
|
411
|
+
entry.output_tokens * rates.output +
|
|
412
|
+
entry.cache_read * rates.cache_read +
|
|
413
|
+
cw1h * rates.cache_write_1h +
|
|
414
|
+
cw5m * rates.cache_write_5m
|
|
415
|
+
) / 1_000_000;
|
|
416
|
+
|
|
417
|
+
results.push({ ...entry, cost, cw1h, cw5m });
|
|
418
|
+
|
|
419
|
+
// Accumulate summary
|
|
420
|
+
summary.calls++;
|
|
421
|
+
summary.totals.input += entry.input_tokens;
|
|
422
|
+
summary.totals.output += entry.output_tokens;
|
|
423
|
+
summary.totals.cache_read += entry.cache_read;
|
|
424
|
+
summary.totals.cache_1h += cw1h;
|
|
425
|
+
summary.totals.cache_5m += cw5m;
|
|
426
|
+
if (entry.preflight != null) summary.totals.preflight += entry.preflight;
|
|
427
|
+
|
|
428
|
+
if (!summary.byModel[entry.model]) {
|
|
429
|
+
summary.byModel[entry.model] = { calls: 0, cost: 0 };
|
|
430
|
+
}
|
|
431
|
+
summary.byModel[entry.model].calls++;
|
|
432
|
+
summary.byModel[entry.model].cost += cost;
|
|
433
|
+
|
|
434
|
+
summary.totalCost += cost;
|
|
435
|
+
|
|
436
|
+
if (entry.degradation.length > 0) {
|
|
437
|
+
summary.degradedCalls++;
|
|
438
|
+
for (const step of entry.degradation) {
|
|
439
|
+
summary.degradationSteps[step] = (summary.degradationSteps[step] || 0) + 1;
|
|
440
|
+
}
|
|
441
|
+
}
|
|
442
|
+
if (entry.would_have_exceeded) summary.exceededCalls++;
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
return { results, summary };
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
// ─── Report formatting ──────────────────────────────────────────────────────
|
|
449
|
+
|
|
450
|
+
function fmt(n) {
|
|
451
|
+
return n.toLocaleString('en-US');
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
function fmtCost(n) {
|
|
455
|
+
if (n == null) return ' N/A';
|
|
456
|
+
return `$${n.toFixed(4)}`;
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
function printReport(results, summary, ratesData, adminSummary, format) {
|
|
460
|
+
if (format === 'json') return printJsonReport(results, summary, ratesData, adminSummary);
|
|
461
|
+
if (format === 'md') return printMarkdownReport(results, summary, ratesData, adminSummary);
|
|
462
|
+
return printTextReport(results, summary, ratesData, adminSummary);
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
// ─── JSON output ────────────────────────────────────────────────────────────
|
|
466
|
+
|
|
467
|
+
function printJsonReport(results, summary, ratesData, adminSummary) {
|
|
468
|
+
const report = {
|
|
469
|
+
generated: new Date().toISOString(),
|
|
470
|
+
pricing: { source: ratesData?.source || 'bundled', last_updated: ratesData?.last_updated },
|
|
471
|
+
calls: results.map(r => ({
|
|
472
|
+
timestamp: r.timestamp,
|
|
473
|
+
model: r.model,
|
|
474
|
+
input_tokens: r.input_tokens,
|
|
475
|
+
output_tokens: r.output_tokens,
|
|
476
|
+
cache_read: r.cache_read,
|
|
477
|
+
cache_write_1h: r.cw1h || 0,
|
|
478
|
+
cache_write_5m: r.cw5m || 0,
|
|
479
|
+
cost: r.cost,
|
|
480
|
+
degradation_steps: r.degradation.length > 0 ? r.degradation : undefined,
|
|
481
|
+
})),
|
|
482
|
+
summary: {
|
|
483
|
+
total_calls: summary.calls,
|
|
484
|
+
total_cost: summary.totalCost,
|
|
485
|
+
avg_cost_per_call: summary.totalCost / summary.calls,
|
|
486
|
+
tokens: summary.totals,
|
|
487
|
+
by_model: summary.byModel,
|
|
488
|
+
degradation: summary.degradedCalls > 0 ? {
|
|
489
|
+
degraded_calls: summary.degradedCalls,
|
|
490
|
+
exceeded_calls: summary.exceededCalls,
|
|
491
|
+
steps: summary.degradationSteps,
|
|
492
|
+
} : undefined,
|
|
493
|
+
},
|
|
494
|
+
};
|
|
495
|
+
if (adminSummary) {
|
|
496
|
+
report.admin_api = {
|
|
497
|
+
total_cost: adminSummary.totalCost,
|
|
498
|
+
delta: adminSummary.totalCost - summary.totalCost,
|
|
499
|
+
by_model: adminSummary.byModel,
|
|
500
|
+
};
|
|
501
|
+
}
|
|
502
|
+
console.log(JSON.stringify(report, null, 2));
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
// ─── Markdown output ────────────────────────────────────────────────────────
|
|
506
|
+
|
|
507
|
+
function printMarkdownReport(results, summary, ratesData, adminSummary) {
|
|
508
|
+
const rateSource = ratesData?.last_updated ? `rates from ${ratesData.last_updated}` : 'unknown rates';
|
|
509
|
+
const lines = [];
|
|
510
|
+
|
|
511
|
+
lines.push('# Claude API Cost Report');
|
|
512
|
+
lines.push('');
|
|
513
|
+
lines.push(`Pricing: ${rateSource} (${ratesData?.source || 'bundled'})`);
|
|
514
|
+
lines.push('');
|
|
515
|
+
|
|
516
|
+
// Per-call table
|
|
517
|
+
if (results.length <= 50) {
|
|
518
|
+
lines.push('## Per-Call Breakdown');
|
|
519
|
+
lines.push('');
|
|
520
|
+
lines.push('| # | Timestamp | Model | Input | Output | Cache Rd | Cache Wr | Cost | Degradation |');
|
|
521
|
+
lines.push('|---|-----------|-------|------:|-------:|---------:|---------:|-----:|-------------|');
|
|
522
|
+
|
|
523
|
+
for (let i = 0; i < results.length; i++) {
|
|
524
|
+
const r = results[i];
|
|
525
|
+
const ts = r.timestamp ? r.timestamp.slice(0, 19) : '—';
|
|
526
|
+
const modelShort = r.model.replace('claude-', '').replace(/-\d{8}$/, '');
|
|
527
|
+
const cacheWr = (r.cw1h || 0) + (r.cw5m || 0);
|
|
528
|
+
const deg = r.degradation.length > 0 ? r.degradation.length + ' steps' : '';
|
|
529
|
+
lines.push(`| ${i + 1} | ${ts} | ${modelShort} | ${fmt(r.input_tokens)} | ${fmt(r.output_tokens)} | ${fmt(r.cache_read)} | ${fmt(cacheWr)} | ${fmtCost(r.cost)} | ${deg} |`);
|
|
530
|
+
}
|
|
531
|
+
lines.push('');
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
// Summary
|
|
535
|
+
lines.push('## Summary');
|
|
536
|
+
lines.push('');
|
|
537
|
+
lines.push(`| Metric | Value |`);
|
|
538
|
+
lines.push(`|--------|------:|`);
|
|
539
|
+
lines.push(`| Total API calls | ${summary.calls} |`);
|
|
540
|
+
lines.push(`| Total input tokens | ${fmt(summary.totals.input)} |`);
|
|
541
|
+
lines.push(`| Total output tokens | ${fmt(summary.totals.output)} |`);
|
|
542
|
+
lines.push(`| Total cache read | ${fmt(summary.totals.cache_read)} |`);
|
|
543
|
+
lines.push(`| Total cache write 1h | ${fmt(summary.totals.cache_1h)} |`);
|
|
544
|
+
lines.push(`| Total cache write 5m | ${fmt(summary.totals.cache_5m)} |`);
|
|
545
|
+
lines.push(`| **Total cost** | **${fmtCost(summary.totalCost)}** |`);
|
|
546
|
+
lines.push(`| Avg cost per call | ${fmtCost(summary.totalCost / summary.calls)} |`);
|
|
547
|
+
lines.push('');
|
|
548
|
+
|
|
549
|
+
// By model
|
|
550
|
+
lines.push('## By Model');
|
|
551
|
+
lines.push('');
|
|
552
|
+
lines.push('| Model | Calls | Cost |');
|
|
553
|
+
lines.push('|-------|------:|-----:|');
|
|
554
|
+
for (const [model, info] of Object.entries(summary.byModel)) {
|
|
555
|
+
lines.push(`| ${model} | ${info.calls} | ${fmtCost(info.cost)} |`);
|
|
556
|
+
}
|
|
557
|
+
lines.push('');
|
|
558
|
+
|
|
559
|
+
// Degradation
|
|
560
|
+
if (summary.degradedCalls > 0) {
|
|
561
|
+
lines.push('## Degradation');
|
|
562
|
+
lines.push('');
|
|
563
|
+
lines.push(`Calls with degradation: ${summary.degradedCalls}/${summary.calls}`);
|
|
564
|
+
lines.push('');
|
|
565
|
+
lines.push('| Step | Count |');
|
|
566
|
+
lines.push('|------|------:|');
|
|
567
|
+
for (const [step, count] of Object.entries(summary.degradationSteps).sort((a, b) => b[1] - a[1])) {
|
|
568
|
+
lines.push(`| ${step} | ${count}/${summary.calls} |`);
|
|
569
|
+
}
|
|
570
|
+
lines.push('');
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
// Admin API
|
|
574
|
+
if (adminSummary) {
|
|
575
|
+
const delta = adminSummary.totalCost - summary.totalCost;
|
|
576
|
+
lines.push('## Admin API (Actual Billed)');
|
|
577
|
+
lines.push('');
|
|
578
|
+
lines.push(`| Source | Cost |`);
|
|
579
|
+
lines.push(`|--------|-----:|`);
|
|
580
|
+
lines.push(`| API-reported | ${fmtCost(adminSummary.totalCost)} |`);
|
|
581
|
+
lines.push(`| Telemetry | ${fmtCost(summary.totalCost)} |`);
|
|
582
|
+
lines.push(`| Delta | ${fmtCost(Math.abs(delta))} (${delta > 0 ? 'API higher' : 'telemetry higher'}) |`);
|
|
583
|
+
lines.push('');
|
|
584
|
+
lines.push('> Note: Admin API reports all usage for the time window, which may include other concurrent API activity.');
|
|
585
|
+
lines.push('');
|
|
586
|
+
}
|
|
587
|
+
|
|
588
|
+
console.log(lines.join('\n'));
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
// ─── Text output ────────────────────────────────────────────────────────────
|
|
592
|
+
|
|
593
|
+
function printTextReport(results, summary, ratesData, adminSummary) {
|
|
594
|
+
const rateSource = ratesData?.last_updated ? `rates from ${ratesData.last_updated}` : 'unknown rates';
|
|
595
|
+
|
|
596
|
+
console.log('');
|
|
597
|
+
console.log('='.repeat(80));
|
|
598
|
+
console.log(' CLAUDE API COST REPORT');
|
|
599
|
+
console.log('='.repeat(80));
|
|
600
|
+
console.log(` Pricing: ${rateSource} (${ratesData?.source || 'bundled'})`);
|
|
601
|
+
console.log('');
|
|
602
|
+
|
|
603
|
+
// ── Per-call table ──
|
|
604
|
+
if (results.length <= 50) {
|
|
605
|
+
console.log('─── Per-Call Breakdown ─────────────────────────────────────────────────────────');
|
|
606
|
+
console.log(
|
|
607
|
+
' #'.padEnd(5) +
|
|
608
|
+
'Timestamp'.padEnd(28) +
|
|
609
|
+
'Model'.padEnd(10) +
|
|
610
|
+
'Input'.padStart(10) +
|
|
611
|
+
'Output'.padStart(9) +
|
|
612
|
+
'CacheRd'.padStart(9) +
|
|
613
|
+
'CacheWr'.padStart(9) +
|
|
614
|
+
'Cost'.padStart(10) +
|
|
615
|
+
' Degradation'
|
|
616
|
+
);
|
|
617
|
+
console.log(' ' + '─'.repeat(78));
|
|
618
|
+
|
|
619
|
+
for (let i = 0; i < results.length; i++) {
|
|
620
|
+
const r = results[i];
|
|
621
|
+
const ts = r.timestamp ? r.timestamp.slice(0, 19) : '—';
|
|
622
|
+
const modelShort = r.model.replace('claude-', '').replace(/-\d{8}$/, '').slice(0, 8);
|
|
623
|
+
const cacheWr = (r.cw1h || 0) + (r.cw5m || 0);
|
|
624
|
+
const deg = r.degradation.length > 0 ? r.degradation.length + ' steps' : '';
|
|
625
|
+
|
|
626
|
+
console.log(
|
|
627
|
+
` ${String(i + 1).padStart(2)} ` +
|
|
628
|
+
ts.padEnd(28) +
|
|
629
|
+
modelShort.padEnd(10) +
|
|
630
|
+
fmt(r.input_tokens).padStart(10) +
|
|
631
|
+
fmt(r.output_tokens).padStart(9) +
|
|
632
|
+
fmt(r.cache_read).padStart(9) +
|
|
633
|
+
fmt(cacheWr).padStart(9) +
|
|
634
|
+
fmtCost(r.cost).padStart(10) +
|
|
635
|
+
' ' + deg
|
|
636
|
+
);
|
|
637
|
+
}
|
|
638
|
+
console.log('');
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
// ── Summary ──
|
|
642
|
+
console.log('─── Summary ────────────────────────────────────────────────────────────────────');
|
|
643
|
+
console.log(` Total API calls: ${summary.calls}`);
|
|
644
|
+
console.log(` Total input tokens: ${fmt(summary.totals.input)}`);
|
|
645
|
+
console.log(` Total output tokens: ${fmt(summary.totals.output)}`);
|
|
646
|
+
console.log(` Total cache read: ${fmt(summary.totals.cache_read)}`);
|
|
647
|
+
console.log(` Total cache write 1h: ${fmt(summary.totals.cache_1h)}`);
|
|
648
|
+
console.log(` Total cache write 5m: ${fmt(summary.totals.cache_5m)}`);
|
|
649
|
+
if (summary.totals.preflight > 0) {
|
|
650
|
+
const saved = summary.totals.preflight - summary.totals.input;
|
|
651
|
+
const pct = (saved / summary.totals.preflight * 100).toFixed(1);
|
|
652
|
+
console.log(` Preflight estimate: ${fmt(summary.totals.preflight)} (degradation saved ${fmt(saved)} tokens, ${pct}%)`);
|
|
653
|
+
}
|
|
654
|
+
console.log('');
|
|
655
|
+
|
|
656
|
+
// ── By model ──
|
|
657
|
+
console.log(' By model:');
|
|
658
|
+
for (const [model, info] of Object.entries(summary.byModel)) {
|
|
659
|
+
const modelShort = model.replace('claude-', '');
|
|
660
|
+
console.log(` ${modelShort}: ${info.calls} calls, ${fmtCost(info.cost)}`);
|
|
661
|
+
}
|
|
662
|
+
console.log('');
|
|
663
|
+
|
|
664
|
+
// ── Cost ──
|
|
665
|
+
console.log('─── Cost ───────────────────────────────────────────────────────────────────────');
|
|
666
|
+
console.log(` Telemetry-calculated: ${fmtCost(summary.totalCost)}`);
|
|
667
|
+
console.log(` Avg cost per call: ${fmtCost(summary.totalCost / summary.calls)}`);
|
|
668
|
+
|
|
669
|
+
// Cache savings estimate
|
|
670
|
+
if (summary.totals.cache_read > 0) {
|
|
671
|
+
// What cache reads would have cost at full input rate
|
|
672
|
+
const models = Object.keys(summary.byModel);
|
|
673
|
+
if (models.length === 1) {
|
|
674
|
+
const rates = lookupRates(ratesData, models[0]);
|
|
675
|
+
if (rates) {
|
|
676
|
+
const fullCost = summary.totals.cache_read * rates.input / 1_000_000;
|
|
677
|
+
const cacheCost = summary.totals.cache_read * rates.cache_read / 1_000_000;
|
|
678
|
+
const saved = fullCost - cacheCost;
|
|
679
|
+
console.log(` Cache read savings: ${fmtCost(saved)} (${(saved / summary.totalCost * 100).toFixed(1)}% of total)`);
|
|
680
|
+
}
|
|
681
|
+
}
|
|
682
|
+
}
|
|
683
|
+
console.log('');
|
|
684
|
+
|
|
685
|
+
// ── Degradation ──
|
|
686
|
+
if (summary.degradedCalls > 0) {
|
|
687
|
+
console.log('─── Degradation ────────────────────────────────────────────────────────────────');
|
|
688
|
+
console.log(` Calls with degradation: ${summary.degradedCalls}/${summary.calls}`);
|
|
689
|
+
console.log(` Budget exceeded: ${summary.exceededCalls}/${summary.calls}`);
|
|
690
|
+
for (const [step, count] of Object.entries(summary.degradationSteps).sort((a, b) => b[1] - a[1])) {
|
|
691
|
+
console.log(` ${step}: ${count}/${summary.calls}`);
|
|
692
|
+
}
|
|
693
|
+
console.log('');
|
|
694
|
+
}
|
|
695
|
+
|
|
696
|
+
// ── Admin API comparison ──
|
|
697
|
+
if (adminSummary) {
|
|
698
|
+
console.log('─── Admin API (Actual Billed) ──────────────────────────────────────────────────');
|
|
699
|
+
console.log(` API-reported total: ${fmtCost(adminSummary.totalCost)}`);
|
|
700
|
+
console.log(` Telemetry total: ${fmtCost(summary.totalCost)}`);
|
|
701
|
+
const delta = adminSummary.totalCost - summary.totalCost;
|
|
702
|
+
console.log(` Delta: ${fmtCost(Math.abs(delta))} (${delta > 0 ? 'API higher' : 'telemetry higher'})`);
|
|
703
|
+
console.log('');
|
|
704
|
+
console.log(' API breakdown by model:');
|
|
705
|
+
for (const [model, m] of Object.entries(adminSummary.byModel)) {
|
|
706
|
+
const modelShort = model.replace('claude-', '');
|
|
707
|
+
console.log(` ${modelShort}:`);
|
|
708
|
+
console.log(` Uncached input: ${fmt(m.uncached)}`);
|
|
709
|
+
console.log(` Cache read: ${fmt(m.cache_read)}`);
|
|
710
|
+
console.log(` Cache write (1h): ${fmt(m.cache_1h)}`);
|
|
711
|
+
console.log(` Cache write (5m): ${fmt(m.cache_5m)}`);
|
|
712
|
+
console.log(` Output: ${fmt(m.output)}`);
|
|
713
|
+
console.log(` Cost: ${fmtCost(m.cost)}`);
|
|
714
|
+
}
|
|
715
|
+
console.log('');
|
|
716
|
+
console.log(' NOTE: Admin API reports all usage for the sim\'s time window,');
|
|
717
|
+
console.log(' which may include other concurrent API activity.');
|
|
718
|
+
}
|
|
719
|
+
|
|
720
|
+
console.log('='.repeat(80));
|
|
721
|
+
console.log('');
|
|
722
|
+
}
|
|
723
|
+
|
|
724
|
+
// ─── Time window extraction ─────────────────────────────────────────────────
|
|
725
|
+
|
|
726
|
+
function getTimeWindow(entries) {
|
|
727
|
+
const timestamps = entries
|
|
728
|
+
.filter(e => e.timestamp)
|
|
729
|
+
.map(e => new Date(e.timestamp));
|
|
730
|
+
|
|
731
|
+
if (timestamps.length === 0) return null;
|
|
732
|
+
|
|
733
|
+
return {
|
|
734
|
+
start: new Date(Math.min(...timestamps)),
|
|
735
|
+
end: new Date(Math.max(...timestamps)),
|
|
736
|
+
};
|
|
737
|
+
}
|
|
738
|
+
|
|
739
|
+
// ─── Time filtering ─────────────────────────────────────────────────────────
|
|
740
|
+
|
|
741
|
+
function parseSinceDuration(since) {
|
|
742
|
+
const match = since.match(/^(\d+)\s*(h|m|d)$/i);
|
|
743
|
+
if (!match) return null;
|
|
744
|
+
const n = parseInt(match[1]);
|
|
745
|
+
const unit = match[2].toLowerCase();
|
|
746
|
+
const ms = unit === 'h' ? n * 3600000 : unit === 'm' ? n * 60000 : n * 86400000;
|
|
747
|
+
return new Date(Date.now() - ms);
|
|
748
|
+
}
|
|
749
|
+
|
|
750
|
+
function filterByTime(entries, opts) {
|
|
751
|
+
if (!opts.date && !opts.since) return entries;
|
|
752
|
+
|
|
753
|
+
let cutoff = null;
|
|
754
|
+
let dateEnd = null;
|
|
755
|
+
|
|
756
|
+
if (opts.date) {
|
|
757
|
+
// Filter to a specific date (YYYY-MM-DD)
|
|
758
|
+
cutoff = new Date(opts.date + 'T00:00:00');
|
|
759
|
+
dateEnd = new Date(opts.date + 'T23:59:59.999');
|
|
760
|
+
} else if (opts.since) {
|
|
761
|
+
cutoff = parseSinceDuration(opts.since);
|
|
762
|
+
if (!cutoff) {
|
|
763
|
+
console.error(`WARNING: Could not parse --since "${opts.since}". Use format like 2h, 30m, 1d.`);
|
|
764
|
+
return entries;
|
|
765
|
+
}
|
|
766
|
+
}
|
|
767
|
+
|
|
768
|
+
const before = entries.length;
|
|
769
|
+
const filtered = entries.filter(e => {
|
|
770
|
+
if (!e.timestamp) return true; // keep entries without timestamps
|
|
771
|
+
const ts = new Date(e.timestamp);
|
|
772
|
+
if (cutoff && ts < cutoff) return false;
|
|
773
|
+
if (dateEnd && ts > dateEnd) return false;
|
|
774
|
+
return true;
|
|
775
|
+
});
|
|
776
|
+
|
|
777
|
+
if (filtered.length < before) {
|
|
778
|
+
console.error(`Filtered: ${before} → ${filtered.length} entries (${opts.date ? 'date ' + opts.date : 'since ' + opts.since}).`);
|
|
779
|
+
}
|
|
780
|
+
|
|
781
|
+
return filtered;
|
|
782
|
+
}
|
|
783
|
+
|
|
784
|
+
// ─── Main ───────────────────────────────────────────────────────────────────
|
|
785
|
+
|
|
786
|
+
async function main() {
|
|
787
|
+
const opts = parseArgs();
|
|
788
|
+
|
|
789
|
+
if (opts.help) { printUsage(); process.exit(0); }
|
|
790
|
+
|
|
791
|
+
// ── Update rates mode ──
|
|
792
|
+
if (opts.updateRates) {
|
|
793
|
+
console.log(`Fetching rates from ${PRICING_URL}...`);
|
|
794
|
+
const live = await fetchLiveRates();
|
|
795
|
+
if (live) {
|
|
796
|
+
writeFileSync(RATES_PATH, JSON.stringify(live, null, 2) + '\n');
|
|
797
|
+
console.log(`Updated ${RATES_PATH} with ${Object.keys(live.models).length} models (${live.last_updated}).`);
|
|
798
|
+
} else {
|
|
799
|
+
console.error('Failed to fetch rates. Bundled rates unchanged.');
|
|
800
|
+
process.exit(1);
|
|
801
|
+
}
|
|
802
|
+
process.exit(0);
|
|
803
|
+
}
|
|
804
|
+
|
|
805
|
+
// ── Load rates ──
|
|
806
|
+
let ratesData;
|
|
807
|
+
if (opts.liveRates) {
|
|
808
|
+
ratesData = await fetchLiveRates();
|
|
809
|
+
}
|
|
810
|
+
if (!ratesData) {
|
|
811
|
+
ratesData = loadBundledRates();
|
|
812
|
+
}
|
|
813
|
+
if (!ratesData) {
|
|
814
|
+
console.error('ERROR: No rate data available. Run with --update-rates first.');
|
|
815
|
+
process.exit(1);
|
|
816
|
+
}
|
|
817
|
+
|
|
818
|
+
// ── Load telemetry ──
|
|
819
|
+
let rawEntries;
|
|
820
|
+
if (opts.simLog) {
|
|
821
|
+
rawEntries = extractFromSimLog(opts.simLog);
|
|
822
|
+
} else if (opts.file) {
|
|
823
|
+
rawEntries = parseJsonLines(readFileSync(opts.file, 'utf8'));
|
|
824
|
+
} else if (!process.stdin.isTTY) {
|
|
825
|
+
rawEntries = parseJsonLines(await readStdin());
|
|
826
|
+
} else if (existsSync(DEFAULT_USAGE_LOG)) {
|
|
827
|
+
// Default: read interceptor usage log
|
|
828
|
+
rawEntries = parseJsonLines(readFileSync(DEFAULT_USAGE_LOG, 'utf8'));
|
|
829
|
+
if (rawEntries.length > 0) {
|
|
830
|
+
console.error(`Reading from ${DEFAULT_USAGE_LOG}`);
|
|
831
|
+
}
|
|
832
|
+
} else {
|
|
833
|
+
console.error(`ERROR: No input found. Expected interceptor log at ${DEFAULT_USAGE_LOG}`);
|
|
834
|
+
console.error(' Use --file, --sim-log, or pipe JSON-lines to stdin.');
|
|
835
|
+
printUsage();
|
|
836
|
+
process.exit(1);
|
|
837
|
+
}
|
|
838
|
+
|
|
839
|
+
if (!rawEntries || rawEntries.length === 0) {
|
|
840
|
+
console.error('ERROR: No telemetry entries found.');
|
|
841
|
+
process.exit(1);
|
|
842
|
+
}
|
|
843
|
+
|
|
844
|
+
// ── Apply time filters ──
|
|
845
|
+
rawEntries = filterByTime(rawEntries, opts);
|
|
846
|
+
|
|
847
|
+
if (rawEntries.length === 0) {
|
|
848
|
+
console.error('ERROR: No entries match the time filter.');
|
|
849
|
+
process.exit(1);
|
|
850
|
+
}
|
|
851
|
+
|
|
852
|
+
console.error(`Loaded ${rawEntries.length} telemetry entries.`);
|
|
853
|
+
|
|
854
|
+
// ── Normalize and calculate ──
|
|
855
|
+
const entries = rawEntries.map(normalizeEntry);
|
|
856
|
+
const { results, summary } = calculateCosts(entries, ratesData);
|
|
857
|
+
|
|
858
|
+
// ── Admin API cross-reference ──
|
|
859
|
+
let adminSummary = null;
|
|
860
|
+
if (opts.adminKey) {
|
|
861
|
+
const window = getTimeWindow(entries);
|
|
862
|
+
if (window) {
|
|
863
|
+
console.error(`Querying Admin API for ${window.start.toISOString()} → ${window.end.toISOString()}...`);
|
|
864
|
+
const apiData = await fetchAdminUsage(opts.adminKey, window.start, window.end);
|
|
865
|
+
if (apiData) {
|
|
866
|
+
adminSummary = summarizeAdminData(apiData, ratesData);
|
|
867
|
+
}
|
|
868
|
+
} else {
|
|
869
|
+
console.error('WARNING: No timestamps in telemetry; cannot query Admin API.');
|
|
870
|
+
}
|
|
871
|
+
}
|
|
872
|
+
|
|
873
|
+
// ── Output ──
|
|
874
|
+
printReport(results, summary, ratesData, adminSummary, opts.format);
|
|
875
|
+
}
|
|
876
|
+
|
|
877
|
+
main().catch(err => {
|
|
878
|
+
console.error(`FATAL: ${err.message}`);
|
|
879
|
+
process.exit(1);
|
|
880
|
+
});
|
package/tools/rates.json
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
{
|
|
2
|
+
"last_updated": "2026-04-09",
|
|
3
|
+
"source": "https://platform.claude.com/docs/en/about-claude/pricing",
|
|
4
|
+
"notes": "Auto-fetched from Anthropic docs.",
|
|
5
|
+
"models": {
|
|
6
|
+
"claude-opus-4-6": {
|
|
7
|
+
"input": 5,
|
|
8
|
+
"output": 25,
|
|
9
|
+
"cache_read": 0.5,
|
|
10
|
+
"cache_write_5m": 6.25,
|
|
11
|
+
"cache_write_1h": 10
|
|
12
|
+
},
|
|
13
|
+
"claude-opus-4-5-20251101": {
|
|
14
|
+
"input": 5,
|
|
15
|
+
"output": 25,
|
|
16
|
+
"cache_read": 0.5,
|
|
17
|
+
"cache_write_5m": 6.25,
|
|
18
|
+
"cache_write_1h": 10
|
|
19
|
+
},
|
|
20
|
+
"claude-opus-4-1-20250805": {
|
|
21
|
+
"input": 15,
|
|
22
|
+
"output": 75,
|
|
23
|
+
"cache_read": 1.5,
|
|
24
|
+
"cache_write_5m": 18.75,
|
|
25
|
+
"cache_write_1h": 30
|
|
26
|
+
},
|
|
27
|
+
"claude-opus-4-20250514": {
|
|
28
|
+
"input": 15,
|
|
29
|
+
"output": 75,
|
|
30
|
+
"cache_read": 1.5,
|
|
31
|
+
"cache_write_5m": 18.75,
|
|
32
|
+
"cache_write_1h": 30
|
|
33
|
+
},
|
|
34
|
+
"claude-sonnet-4-6": {
|
|
35
|
+
"input": 3,
|
|
36
|
+
"output": 15,
|
|
37
|
+
"cache_read": 0.3,
|
|
38
|
+
"cache_write_5m": 3.75,
|
|
39
|
+
"cache_write_1h": 6
|
|
40
|
+
},
|
|
41
|
+
"claude-sonnet-4-5-20250929": {
|
|
42
|
+
"input": 3,
|
|
43
|
+
"output": 15,
|
|
44
|
+
"cache_read": 0.3,
|
|
45
|
+
"cache_write_5m": 3.75,
|
|
46
|
+
"cache_write_1h": 6
|
|
47
|
+
},
|
|
48
|
+
"claude-sonnet-4-20250514": {
|
|
49
|
+
"input": 3,
|
|
50
|
+
"output": 15,
|
|
51
|
+
"cache_read": 0.3,
|
|
52
|
+
"cache_write_5m": 3.75,
|
|
53
|
+
"cache_write_1h": 6
|
|
54
|
+
},
|
|
55
|
+
"claude-sonnet-3-7-20250219": {
|
|
56
|
+
"input": 3,
|
|
57
|
+
"output": 15,
|
|
58
|
+
"cache_read": 0.3,
|
|
59
|
+
"cache_write_5m": 3.75,
|
|
60
|
+
"cache_write_1h": 6
|
|
61
|
+
},
|
|
62
|
+
"claude-haiku-4-5-20251001": {
|
|
63
|
+
"input": 1,
|
|
64
|
+
"output": 5,
|
|
65
|
+
"cache_read": 0.1,
|
|
66
|
+
"cache_write_5m": 1.25,
|
|
67
|
+
"cache_write_1h": 2
|
|
68
|
+
},
|
|
69
|
+
"claude-haiku-3-5-20241022": {
|
|
70
|
+
"input": 0.8,
|
|
71
|
+
"output": 4,
|
|
72
|
+
"cache_read": 0.08,
|
|
73
|
+
"cache_write_5m": 1,
|
|
74
|
+
"cache_write_1h": 1.6
|
|
75
|
+
},
|
|
76
|
+
"claude-3-opus-20240229": {
|
|
77
|
+
"input": 15,
|
|
78
|
+
"output": 75,
|
|
79
|
+
"cache_read": 1.5,
|
|
80
|
+
"cache_write_5m": 18.75,
|
|
81
|
+
"cache_write_1h": 30
|
|
82
|
+
},
|
|
83
|
+
"claude-3-haiku-20240307": {
|
|
84
|
+
"input": 0.25,
|
|
85
|
+
"output": 1.25,
|
|
86
|
+
"cache_read": 0.03,
|
|
87
|
+
"cache_write_5m": 0.3,
|
|
88
|
+
"cache_write_1h": 0.5
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
}
|