cctally 1.6.3 → 1.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,520 @@
1
+ """Model pricing tables and cost-computation primitives.
2
+
3
+ Pure-fn layer (no I/O at import time): holds the embedded Anthropic
4
+ (`CLAUDE_MODEL_PRICING`) and OpenAI Codex (`CODEX_MODEL_PRICING`) pricing
5
+ snapshots plus the helpers that consume them — model-name normalization,
6
+ chip palette, per-entry cost calculation for both providers.
7
+
8
+ `bin/cctally` re-exports every symbol below so internal call sites resolve
9
+ unchanged. Tests reach into this layer via the re-exported names on the
10
+ `cctally` module; no direct import of `_lib_pricing` is expected from tests.
11
+
12
+ A private `_eprint` duplicates `bin/cctally:eprint` (two-line stderr helper)
13
+ so this pure layer carries zero back-imports per the split design's
14
+ Section 5.3 contract.
15
+
16
+ Spec: docs/superpowers/specs/2026-05-13-bin-cctally-split-design.md
17
+ """
18
+ from __future__ import annotations
19
+
20
+ import re
21
+ import sys
22
+ from typing import Any
23
+
24
+
25
+ def _eprint(*args: Any) -> None:
26
+ print(*args, file=sys.stderr)
27
+
28
+
29
+ TIERED_THRESHOLD = 200_000
30
+
31
+
32
+ def _chip_for_model(name: str) -> str:
33
+ """Bucket a canonical model id into a small chip palette.
34
+
35
+ Returns one of 'opus' | 'sonnet' | 'haiku' | 'other'. Used by the
36
+ dashboard's Weekly / Monthly panels and modals so per-model
37
+ coloring stays consistent across the UI.
38
+ """
39
+ n = (name or "").lower()
40
+ if "opus" in n:
41
+ return "opus"
42
+ if "sonnet" in n:
43
+ return "sonnet"
44
+ if "haiku" in n:
45
+ return "haiku"
46
+ return "other"
47
+
48
+
49
+ # Anthropic API pricing snapshot:
50
+ # - Source: https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json
51
+ # - Captured: 2026-05-04
52
+ # - Verified by maintainer against docs.claude.com/en/docs/about-claude/pricing;
53
+ # update in PRs touching this table.
54
+ CLAUDE_MODEL_PRICING: dict[str, dict[str, Any]] = {
55
+ "claude-3-5-haiku-20241022": {
56
+ "input_cost_per_token": 8e-07,
57
+ "output_cost_per_token": 4e-06,
58
+ "cache_creation_input_token_cost": 1e-06,
59
+ "cache_read_input_token_cost": 8e-08,
60
+ },
61
+ "claude-3-5-haiku-latest": {
62
+ "input_cost_per_token": 1e-06,
63
+ "output_cost_per_token": 5e-06,
64
+ "cache_creation_input_token_cost": 1.25e-06,
65
+ "cache_read_input_token_cost": 1e-07,
66
+ },
67
+ "claude-3-5-sonnet-20240620": {
68
+ "input_cost_per_token": 3e-06,
69
+ "output_cost_per_token": 1.5e-05,
70
+ "cache_creation_input_token_cost": 3.75e-06,
71
+ "cache_read_input_token_cost": 3e-07,
72
+ },
73
+ "claude-3-5-sonnet-20241022": {
74
+ "input_cost_per_token": 3e-06,
75
+ "output_cost_per_token": 1.5e-05,
76
+ "cache_creation_input_token_cost": 3.75e-06,
77
+ "cache_read_input_token_cost": 3e-07,
78
+ },
79
+ "claude-3-5-sonnet-latest": {
80
+ "input_cost_per_token": 3e-06,
81
+ "output_cost_per_token": 1.5e-05,
82
+ "cache_creation_input_token_cost": 3.75e-06,
83
+ "cache_read_input_token_cost": 3e-07,
84
+ },
85
+ "claude-3-7-sonnet-20250219": {
86
+ "input_cost_per_token": 3e-06,
87
+ "output_cost_per_token": 1.5e-05,
88
+ "cache_creation_input_token_cost": 3.75e-06,
89
+ "cache_read_input_token_cost": 3e-07,
90
+ },
91
+ "claude-3-7-sonnet-latest": {
92
+ "input_cost_per_token": 3e-06,
93
+ "output_cost_per_token": 1.5e-05,
94
+ "cache_creation_input_token_cost": 3.75e-06,
95
+ "cache_read_input_token_cost": 3e-07,
96
+ },
97
+ "claude-3-haiku-20240307": {
98
+ "input_cost_per_token": 2.5e-07,
99
+ "output_cost_per_token": 1.25e-06,
100
+ "cache_creation_input_token_cost": 3e-07,
101
+ "cache_read_input_token_cost": 3e-08,
102
+ },
103
+ "claude-3-opus-20240229": {
104
+ "input_cost_per_token": 1.5e-05,
105
+ "output_cost_per_token": 7.5e-05,
106
+ "cache_creation_input_token_cost": 1.875e-05,
107
+ "cache_read_input_token_cost": 1.5e-06,
108
+ },
109
+ "claude-3-opus-latest": {
110
+ "input_cost_per_token": 1.5e-05,
111
+ "output_cost_per_token": 7.5e-05,
112
+ "cache_creation_input_token_cost": 1.875e-05,
113
+ "cache_read_input_token_cost": 1.5e-06,
114
+ },
115
+ "claude-4-opus-20250514": {
116
+ "input_cost_per_token": 1.5e-05,
117
+ "output_cost_per_token": 7.5e-05,
118
+ "cache_creation_input_token_cost": 1.875e-05,
119
+ "cache_read_input_token_cost": 1.5e-06,
120
+ },
121
+ "claude-4-sonnet-20250514": {
122
+ "input_cost_per_token": 3e-06,
123
+ "output_cost_per_token": 1.5e-05,
124
+ "cache_creation_input_token_cost": 3.75e-06,
125
+ "cache_read_input_token_cost": 3e-07,
126
+ "input_cost_per_token_above_200k_tokens": 6e-06,
127
+ "output_cost_per_token_above_200k_tokens": 2.25e-05,
128
+ "cache_creation_input_token_cost_above_200k_tokens": 7.5e-06,
129
+ "cache_read_input_token_cost_above_200k_tokens": 6e-07,
130
+ },
131
+ "claude-haiku-4-5": {
132
+ "input_cost_per_token": 1e-06,
133
+ "output_cost_per_token": 5e-06,
134
+ "cache_creation_input_token_cost": 1.25e-06,
135
+ "cache_read_input_token_cost": 1e-07,
136
+ },
137
+ "claude-haiku-4-5-20251001": {
138
+ "input_cost_per_token": 1e-06,
139
+ "output_cost_per_token": 5e-06,
140
+ "cache_creation_input_token_cost": 1.25e-06,
141
+ "cache_read_input_token_cost": 1e-07,
142
+ },
143
+ "claude-opus-4-1": {
144
+ "input_cost_per_token": 1.5e-05,
145
+ "output_cost_per_token": 7.5e-05,
146
+ "cache_creation_input_token_cost": 1.875e-05,
147
+ "cache_read_input_token_cost": 1.5e-06,
148
+ },
149
+ "claude-opus-4-1-20250805": {
150
+ "input_cost_per_token": 1.5e-05,
151
+ "output_cost_per_token": 7.5e-05,
152
+ "cache_creation_input_token_cost": 1.875e-05,
153
+ "cache_read_input_token_cost": 1.5e-06,
154
+ },
155
+ "claude-opus-4-20250514": {
156
+ "input_cost_per_token": 1.5e-05,
157
+ "output_cost_per_token": 7.5e-05,
158
+ "cache_creation_input_token_cost": 1.875e-05,
159
+ "cache_read_input_token_cost": 1.5e-06,
160
+ },
161
+ "claude-opus-4-5": {
162
+ "input_cost_per_token": 5e-06,
163
+ "output_cost_per_token": 2.5e-05,
164
+ "cache_creation_input_token_cost": 6.25e-06,
165
+ "cache_read_input_token_cost": 5e-07,
166
+ },
167
+ "claude-opus-4-5-20251101": {
168
+ "input_cost_per_token": 5e-06,
169
+ "output_cost_per_token": 2.5e-05,
170
+ "cache_creation_input_token_cost": 6.25e-06,
171
+ "cache_read_input_token_cost": 5e-07,
172
+ },
173
+ "claude-opus-4-6": {
174
+ "input_cost_per_token": 5e-06,
175
+ "output_cost_per_token": 2.5e-05,
176
+ "cache_creation_input_token_cost": 6.25e-06,
177
+ "cache_read_input_token_cost": 5e-07,
178
+ },
179
+ "claude-opus-4-6-20260205": {
180
+ "input_cost_per_token": 5e-06,
181
+ "output_cost_per_token": 2.5e-05,
182
+ "cache_creation_input_token_cost": 6.25e-06,
183
+ "cache_read_input_token_cost": 5e-07,
184
+ },
185
+ "claude-opus-4-7": {
186
+ "input_cost_per_token": 5e-06,
187
+ "output_cost_per_token": 2.5e-05,
188
+ "cache_creation_input_token_cost": 6.25e-06,
189
+ "cache_read_input_token_cost": 5e-07,
190
+ },
191
+ "claude-opus-4-7-20260416": {
192
+ "input_cost_per_token": 5e-06,
193
+ "output_cost_per_token": 2.5e-05,
194
+ "cache_creation_input_token_cost": 6.25e-06,
195
+ "cache_read_input_token_cost": 5e-07,
196
+ },
197
+ "claude-sonnet-4-20250514": {
198
+ "input_cost_per_token": 3e-06,
199
+ "output_cost_per_token": 1.5e-05,
200
+ "cache_creation_input_token_cost": 3.75e-06,
201
+ "cache_read_input_token_cost": 3e-07,
202
+ "input_cost_per_token_above_200k_tokens": 6e-06,
203
+ "output_cost_per_token_above_200k_tokens": 2.25e-05,
204
+ "cache_creation_input_token_cost_above_200k_tokens": 7.5e-06,
205
+ "cache_read_input_token_cost_above_200k_tokens": 6e-07,
206
+ },
207
+ "claude-sonnet-4-5": {
208
+ "input_cost_per_token": 3e-06,
209
+ "output_cost_per_token": 1.5e-05,
210
+ "cache_creation_input_token_cost": 3.75e-06,
211
+ "cache_read_input_token_cost": 3e-07,
212
+ "input_cost_per_token_above_200k_tokens": 6e-06,
213
+ "output_cost_per_token_above_200k_tokens": 2.25e-05,
214
+ "cache_creation_input_token_cost_above_200k_tokens": 7.5e-06,
215
+ "cache_read_input_token_cost_above_200k_tokens": 6e-07,
216
+ },
217
+ "claude-sonnet-4-5-20250929": {
218
+ "input_cost_per_token": 3e-06,
219
+ "output_cost_per_token": 1.5e-05,
220
+ "cache_creation_input_token_cost": 3.75e-06,
221
+ "cache_read_input_token_cost": 3e-07,
222
+ "input_cost_per_token_above_200k_tokens": 6e-06,
223
+ "output_cost_per_token_above_200k_tokens": 2.25e-05,
224
+ "cache_creation_input_token_cost_above_200k_tokens": 7.5e-06,
225
+ "cache_read_input_token_cost_above_200k_tokens": 6e-07,
226
+ },
227
+ "claude-sonnet-4-6": {
228
+ "input_cost_per_token": 3e-06,
229
+ "output_cost_per_token": 1.5e-05,
230
+ "cache_creation_input_token_cost": 3.75e-06,
231
+ "cache_read_input_token_cost": 3e-07,
232
+ },
233
+ }
234
+
235
+ _unknown_model_warnings: set[str] = set()
236
+
237
+ # ---------------------------------------------------------------------------
238
+ # Codex / GPT-5 pricing table
239
+ # ---------------------------------------------------------------------------
240
+ #
241
+ # Codex (OpenAI) API pricing snapshot:
242
+ # - Source: https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json
243
+ # - Captured: 2026-05-04
244
+ # - Models listed are those observed in ~/.codex/sessions/ at implementation
245
+ # time plus common Codex/GPT-5 variants. Models absent from this table fall
246
+ # back to `gpt-5` pricing with isFallback=true (matches upstream's
247
+ # LEGACY_FALLBACK_MODEL behavior); a one-shot stderr warning is emitted per
248
+ # unknown model name.
249
+ #
250
+ # Billing rules:
251
+ # - reasoning_output_tokens is billed at the *output* rate (matches
252
+ # LiteLLM / upstream).
253
+ # - If cache_read_input_token_cost is absent for a model, we fall back to
254
+ # input_cost_per_token / 4 (matches LiteLLM's documented fallback).
255
+ # - Above-272k tiered rates are applied per-turn (row), mirroring the Claude
256
+ # pattern via a dedicated CODEX_TIERED_THRESHOLD.
257
+ CODEX_TIERED_THRESHOLD = 272_000
258
+
259
+ CODEX_MODEL_PRICING: dict[str, dict[str, Any]] = {
260
+ "gpt-5": {
261
+ "input_cost_per_token": 1.25e-06,
262
+ "cache_read_input_token_cost": 1.25e-07,
263
+ "output_cost_per_token": 1e-05,
264
+ },
265
+ "gpt-5-codex": {
266
+ "input_cost_per_token": 1.25e-06,
267
+ "cache_read_input_token_cost": 1.25e-07,
268
+ "output_cost_per_token": 1e-05,
269
+ },
270
+ "gpt-5.1-codex": {
271
+ "input_cost_per_token": 1.25e-06,
272
+ "cache_read_input_token_cost": 1.25e-07,
273
+ "output_cost_per_token": 1e-05,
274
+ },
275
+ "gpt-5.1-codex-max": {
276
+ "input_cost_per_token": 1.25e-06,
277
+ "cache_read_input_token_cost": 1.25e-07,
278
+ "output_cost_per_token": 1e-05,
279
+ },
280
+ "gpt-5.1-codex-mini": {
281
+ "input_cost_per_token": 2.5e-07,
282
+ "cache_read_input_token_cost": 2.5e-08,
283
+ "output_cost_per_token": 2e-06,
284
+ },
285
+ "gpt-5.2": {
286
+ "input_cost_per_token": 1.75e-06,
287
+ "cache_read_input_token_cost": 1.75e-07,
288
+ "output_cost_per_token": 1.4e-05,
289
+ },
290
+ "gpt-5.2-codex": {
291
+ "input_cost_per_token": 1.75e-06,
292
+ "cache_read_input_token_cost": 1.75e-07,
293
+ "output_cost_per_token": 1.4e-05,
294
+ },
295
+ "gpt-5.3-codex": {
296
+ "input_cost_per_token": 1.75e-06,
297
+ "cache_read_input_token_cost": 1.75e-07,
298
+ "output_cost_per_token": 1.4e-05,
299
+ },
300
+ "gpt-5.3-codex-spark": {
301
+ # Not in LiteLLM snapshot. Upstream emits isFallback: false with $0
302
+ # billing contribution, so we mirror with an all-zero pricing entry
303
+ # rather than the unknown-model warn-path.
304
+ "input_cost_per_token": 0.0,
305
+ "cache_read_input_token_cost": 0.0,
306
+ "output_cost_per_token": 0.0,
307
+ },
308
+ "gpt-5.4": {
309
+ # Source: LiteLLM model_prices_and_context_window.json (openai provider entry
310
+ # for "gpt-5.4"). Also matches OpenAI's published /api/docs/pricing page
311
+ # (input $2.50/M, cached $0.25/M, output $15.00/M; above-272k tier doubles
312
+ # input/cache and 1.5x's output).
313
+ "input_cost_per_token": 2.5e-06,
314
+ "cache_read_input_token_cost": 2.5e-07,
315
+ "output_cost_per_token": 1.5e-05,
316
+ "input_cost_per_token_above_272k_tokens": 5e-06,
317
+ "cache_read_input_token_cost_above_272k_tokens": 5e-07,
318
+ "output_cost_per_token_above_272k_tokens": 2.25e-05,
319
+ },
320
+ "gpt-5.4-mini": {
321
+ # Source: LiteLLM model_prices_and_context_window.json (openai provider
322
+ # entry for "gpt-5.4-mini"). Matches OpenAI published pricing: input
323
+ # $0.75/M, cached $0.075/M, output $4.50/M. No above-272k tier
324
+ # (max_input_tokens in LiteLLM is 272000 — the ceiling, not a tier break).
325
+ "input_cost_per_token": 7.5e-07,
326
+ "cache_read_input_token_cost": 7.5e-08,
327
+ "output_cost_per_token": 4.5e-06,
328
+ },
329
+ "gpt-5.5": {
330
+ # Source: OpenAI published pricing (announced 2026-04-23). Input
331
+ # $5.00/M, cached $0.50/M, output $30.00/M. No above-272k tier
332
+ # announced. Add tiered fields here when LiteLLM publishes them.
333
+ "input_cost_per_token": 5e-06,
334
+ "cache_read_input_token_cost": 5e-07,
335
+ "output_cost_per_token": 3e-05,
336
+ },
337
+ }
338
+
339
+ _unknown_codex_model_warnings: set[str] = set()
340
+
341
+ # Upstream ccusage-codex maps unknown Codex model names to `gpt-5` pricing
342
+ # and marks them isFallback: true. We mirror that behavior so cost figures
343
+ # match what a user would see with `ccusage-codex` on the same JSONL data.
344
+ # Behavior matches LEGACY_FALLBACK_MODEL in upstream ccusage-codex — both
345
+ # tools fall back to gpt-5 pricing for unknown model names so output remains
346
+ # directly comparable.
347
+ CODEX_LEGACY_FALLBACK_MODEL = "gpt-5"
348
+
349
+
350
+ def _resolve_codex_pricing(model: str) -> tuple[dict[str, Any] | None, bool]:
351
+ """Return (pricing_dict, is_fallback).
352
+
353
+ Returns (entry, False) when the model has a direct pricing entry. Returns
354
+ (gpt-5-entry, True) when the model is unknown — matches upstream's
355
+ LEGACY_FALLBACK_MODEL semantics. Returns (None, True) only if the fallback
356
+ model itself is missing from the pricing dict (programming error; warn once).
357
+ """
358
+ direct = CODEX_MODEL_PRICING.get(model)
359
+ if direct is not None:
360
+ return direct, False
361
+ fallback = CODEX_MODEL_PRICING.get(CODEX_LEGACY_FALLBACK_MODEL)
362
+ return fallback, True
363
+
364
+
365
+ def _is_codex_fallback(model: str) -> bool:
366
+ """True iff `model` would resolve via the LEGACY_FALLBACK_MODEL path."""
367
+ return model not in CODEX_MODEL_PRICING
368
+
369
+
370
+ def _resolve_model_pricing(model: str) -> dict[str, Any] | None:
371
+ """Look up pricing for a model name. Returns None if unknown."""
372
+ pricing = CLAUDE_MODEL_PRICING.get(model)
373
+ if pricing is not None:
374
+ return pricing
375
+ for prefix in ("anthropic/", "anthropic."):
376
+ if model.startswith(prefix):
377
+ stripped = model[len(prefix):]
378
+ pricing = CLAUDE_MODEL_PRICING.get(stripped)
379
+ if pricing is not None:
380
+ return pricing
381
+ if model not in _unknown_model_warnings:
382
+ _unknown_model_warnings.add(model)
383
+ _eprint(f"[cost] unknown model, treating cost as $0: {model}")
384
+ return None
385
+
386
+
387
+ def _calculate_entry_cost(
388
+ model: str,
389
+ usage: dict[str, Any],
390
+ mode: str = "auto",
391
+ cost_usd: float | None = None,
392
+ ) -> float:
393
+ """Calculate USD cost for a single API call entry."""
394
+ if mode == "display":
395
+ return cost_usd if cost_usd is not None else 0.0
396
+ if mode == "auto" and cost_usd is not None:
397
+ return cost_usd
398
+
399
+ pricing = _resolve_model_pricing(model)
400
+ if pricing is None:
401
+ return 0.0
402
+
403
+ def _tiered(tokens: int, base_key: str, tiered_key: str) -> float:
404
+ base_rate = pricing.get(base_key, 0.0)
405
+ tiered_rate = pricing.get(tiered_key)
406
+ if tokens <= 0:
407
+ return 0.0
408
+ if tokens > TIERED_THRESHOLD and tiered_rate is not None:
409
+ below = min(tokens, TIERED_THRESHOLD)
410
+ above = tokens - TIERED_THRESHOLD
411
+ return below * base_rate + above * tiered_rate
412
+ return tokens * base_rate
413
+
414
+ input_cost = _tiered(
415
+ usage.get("input_tokens", 0),
416
+ "input_cost_per_token",
417
+ "input_cost_per_token_above_200k_tokens",
418
+ )
419
+ output_cost = _tiered(
420
+ usage.get("output_tokens", 0),
421
+ "output_cost_per_token",
422
+ "output_cost_per_token_above_200k_tokens",
423
+ )
424
+ cache_create_cost = _tiered(
425
+ usage.get("cache_creation_input_tokens", 0),
426
+ "cache_creation_input_token_cost",
427
+ "cache_creation_input_token_cost_above_200k_tokens",
428
+ )
429
+ cache_read_cost = _tiered(
430
+ usage.get("cache_read_input_tokens", 0),
431
+ "cache_read_input_token_cost",
432
+ "cache_read_input_token_cost_above_200k_tokens",
433
+ )
434
+ total = input_cost + output_cost + cache_create_cost + cache_read_cost
435
+
436
+ return total
437
+
438
+
439
+ def _warn_unknown_codex_model(model: str) -> None:
440
+ """One-shot stderr warning for a Codex model absent from the pricing dict."""
441
+ if model in _unknown_codex_model_warnings:
442
+ return
443
+ _unknown_codex_model_warnings.add(model)
444
+ _eprint(f"[codex] unknown model, using gpt-5 fallback pricing (isFallback=true): {model}")
445
+
446
+
447
+ def _calculate_codex_entry_cost(
448
+ model: str,
449
+ input_tokens: int,
450
+ cached_input_tokens: int,
451
+ output_tokens: int,
452
+ reasoning_output_tokens: int,
453
+ ) -> float:
454
+ """Compute USD cost for one Codex `token_count` event.
455
+
456
+ Token-field semantics (LiteLLM convention, matched by upstream ccusage-codex):
457
+ - input_tokens INCLUDES cached_input_tokens (cached is a subset).
458
+ - output_tokens INCLUDES reasoning_output_tokens (reasoning is a subset).
459
+ Cost formula:
460
+ non_cached_input = max(0, input_tokens - cached_input_tokens)
461
+ cost = non_cached_input * input_cost_per_token
462
+ + cached_input_tokens * cache_read_input_token_cost
463
+ + output_tokens * output_cost_per_token
464
+ The reasoning_output_tokens parameter is accepted for API stability but
465
+ not used directly — its contribution is already billed inside output_tokens.
466
+
467
+ Above-272k tier applied per-turn when the corresponding _above_272k_tokens
468
+ key is present in the pricing entry.
469
+ """
470
+ del reasoning_output_tokens # already billed inside output_tokens
471
+ pricing, is_fallback = _resolve_codex_pricing(model)
472
+ if pricing is None:
473
+ # Only possible if CODEX_LEGACY_FALLBACK_MODEL itself is missing — treat as
474
+ # $0 to avoid crashing; a programming error we want to notice.
475
+ _warn_unknown_codex_model(model)
476
+ return 0.0
477
+ if is_fallback:
478
+ _warn_unknown_codex_model(model) # one-shot per unique model name
479
+
480
+ def _tiered(tokens: int, base_key: str, tiered_key: str) -> float:
481
+ if tokens <= 0:
482
+ return 0.0
483
+ base_rate = pricing.get(base_key, 0.0)
484
+ if not base_rate:
485
+ return 0.0
486
+ tiered_rate = pricing.get(tiered_key)
487
+ if tokens > CODEX_TIERED_THRESHOLD and tiered_rate is not None:
488
+ return CODEX_TIERED_THRESHOLD * base_rate + (tokens - CODEX_TIERED_THRESHOLD) * tiered_rate
489
+ return tokens * base_rate
490
+
491
+ non_cached_input = max(0, input_tokens - cached_input_tokens)
492
+
493
+ input_cost = _tiered(
494
+ non_cached_input,
495
+ "input_cost_per_token",
496
+ "input_cost_per_token_above_272k_tokens",
497
+ )
498
+ cached_input_cost = _tiered(
499
+ cached_input_tokens,
500
+ "cache_read_input_token_cost",
501
+ "cache_read_input_token_cost_above_272k_tokens",
502
+ )
503
+ output_cost = _tiered(
504
+ output_tokens,
505
+ "output_cost_per_token",
506
+ "output_cost_per_token_above_272k_tokens",
507
+ )
508
+ return input_cost + cached_input_cost + output_cost
509
+
510
+
511
+ def _short_model_name(model: str) -> str:
512
+ """Shorten model name for display: 'claude-opus-4-6' -> 'opus-4-6'."""
513
+ name = model
514
+ # Strip 'claude-' prefix
515
+ if name.startswith("claude-"):
516
+ name = name[len("claude-"):]
517
+ # Strip date suffixes like '-20251001'
518
+ if re.match(r".*-\d{8}$", name):
519
+ name = name[:-9]
520
+ return name