cctally 1.7.0 → 1.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +7 -0
- package/bin/_cctally_alerts.py +231 -0
- package/bin/_cctally_cache.py +1432 -0
- package/bin/_cctally_config.py +560 -0
- package/bin/_cctally_dashboard.py +5218 -0
- package/bin/_cctally_db.py +1729 -0
- package/bin/_cctally_record.py +2120 -0
- package/bin/_cctally_refresh.py +812 -0
- package/bin/_cctally_release.py +751 -0
- package/bin/_cctally_setup.py +1571 -0
- package/bin/_cctally_sync_week.py +110 -0
- package/bin/_cctally_tui.py +4381 -0
- package/bin/_cctally_update.py +2132 -0
- package/bin/_lib_aggregators.py +712 -0
- package/bin/_lib_alerts_payload.py +194 -0
- package/bin/_lib_blocks.py +414 -0
- package/bin/_lib_diff_kernel.py +1618 -0
- package/bin/_lib_display_tz.py +361 -0
- package/bin/_lib_doctor.py +58 -0
- package/bin/_lib_five_hour.py +82 -0
- package/bin/_lib_jsonl.py +403 -0
- package/bin/_lib_pricing.py +520 -0
- package/bin/_lib_render.py +2785 -0
- package/bin/_lib_semver.py +105 -0
- package/bin/_lib_subscription_weeks.py +492 -0
- package/bin/cctally +11034 -35415
- package/package.json +24 -1
|
@@ -0,0 +1,520 @@
|
|
|
1
|
+
"""Model pricing tables and cost-computation primitives.
|
|
2
|
+
|
|
3
|
+
Pure-fn layer (no I/O at import time): holds the embedded Anthropic
|
|
4
|
+
(`CLAUDE_MODEL_PRICING`) and OpenAI Codex (`CODEX_MODEL_PRICING`) pricing
|
|
5
|
+
snapshots plus the helpers that consume them — model-name normalization,
|
|
6
|
+
chip palette, per-entry cost calculation for both providers.
|
|
7
|
+
|
|
8
|
+
`bin/cctally` re-exports every symbol below so internal call sites resolve
|
|
9
|
+
unchanged. Tests reach into this layer via the re-exported names on the
|
|
10
|
+
`cctally` module; no direct import of `_lib_pricing` is expected from tests.
|
|
11
|
+
|
|
12
|
+
A private `_eprint` duplicates `bin/cctally:eprint` (two-line stderr helper)
|
|
13
|
+
so this pure layer carries zero back-imports per the split design's
|
|
14
|
+
Section 5.3 contract.
|
|
15
|
+
|
|
16
|
+
Spec: docs/superpowers/specs/2026-05-13-bin-cctally-split-design.md
|
|
17
|
+
"""
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import re
|
|
21
|
+
import sys
|
|
22
|
+
from typing import Any
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _eprint(*args: Any) -> None:
|
|
26
|
+
print(*args, file=sys.stderr)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
TIERED_THRESHOLD = 200_000
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _chip_for_model(name: str) -> str:
|
|
33
|
+
"""Bucket a canonical model id into a small chip palette.
|
|
34
|
+
|
|
35
|
+
Returns one of 'opus' | 'sonnet' | 'haiku' | 'other'. Used by the
|
|
36
|
+
dashboard's Weekly / Monthly panels and modals so per-model
|
|
37
|
+
coloring stays consistent across the UI.
|
|
38
|
+
"""
|
|
39
|
+
n = (name or "").lower()
|
|
40
|
+
if "opus" in n:
|
|
41
|
+
return "opus"
|
|
42
|
+
if "sonnet" in n:
|
|
43
|
+
return "sonnet"
|
|
44
|
+
if "haiku" in n:
|
|
45
|
+
return "haiku"
|
|
46
|
+
return "other"
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
# Anthropic API pricing snapshot:
|
|
50
|
+
# - Source: https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json
|
|
51
|
+
# - Captured: 2026-05-04
|
|
52
|
+
# - Verified by maintainer against docs.claude.com/en/docs/about-claude/pricing;
|
|
53
|
+
# update in PRs touching this table.
|
|
54
|
+
CLAUDE_MODEL_PRICING: dict[str, dict[str, Any]] = {
|
|
55
|
+
"claude-3-5-haiku-20241022": {
|
|
56
|
+
"input_cost_per_token": 8e-07,
|
|
57
|
+
"output_cost_per_token": 4e-06,
|
|
58
|
+
"cache_creation_input_token_cost": 1e-06,
|
|
59
|
+
"cache_read_input_token_cost": 8e-08,
|
|
60
|
+
},
|
|
61
|
+
"claude-3-5-haiku-latest": {
|
|
62
|
+
"input_cost_per_token": 1e-06,
|
|
63
|
+
"output_cost_per_token": 5e-06,
|
|
64
|
+
"cache_creation_input_token_cost": 1.25e-06,
|
|
65
|
+
"cache_read_input_token_cost": 1e-07,
|
|
66
|
+
},
|
|
67
|
+
"claude-3-5-sonnet-20240620": {
|
|
68
|
+
"input_cost_per_token": 3e-06,
|
|
69
|
+
"output_cost_per_token": 1.5e-05,
|
|
70
|
+
"cache_creation_input_token_cost": 3.75e-06,
|
|
71
|
+
"cache_read_input_token_cost": 3e-07,
|
|
72
|
+
},
|
|
73
|
+
"claude-3-5-sonnet-20241022": {
|
|
74
|
+
"input_cost_per_token": 3e-06,
|
|
75
|
+
"output_cost_per_token": 1.5e-05,
|
|
76
|
+
"cache_creation_input_token_cost": 3.75e-06,
|
|
77
|
+
"cache_read_input_token_cost": 3e-07,
|
|
78
|
+
},
|
|
79
|
+
"claude-3-5-sonnet-latest": {
|
|
80
|
+
"input_cost_per_token": 3e-06,
|
|
81
|
+
"output_cost_per_token": 1.5e-05,
|
|
82
|
+
"cache_creation_input_token_cost": 3.75e-06,
|
|
83
|
+
"cache_read_input_token_cost": 3e-07,
|
|
84
|
+
},
|
|
85
|
+
"claude-3-7-sonnet-20250219": {
|
|
86
|
+
"input_cost_per_token": 3e-06,
|
|
87
|
+
"output_cost_per_token": 1.5e-05,
|
|
88
|
+
"cache_creation_input_token_cost": 3.75e-06,
|
|
89
|
+
"cache_read_input_token_cost": 3e-07,
|
|
90
|
+
},
|
|
91
|
+
"claude-3-7-sonnet-latest": {
|
|
92
|
+
"input_cost_per_token": 3e-06,
|
|
93
|
+
"output_cost_per_token": 1.5e-05,
|
|
94
|
+
"cache_creation_input_token_cost": 3.75e-06,
|
|
95
|
+
"cache_read_input_token_cost": 3e-07,
|
|
96
|
+
},
|
|
97
|
+
"claude-3-haiku-20240307": {
|
|
98
|
+
"input_cost_per_token": 2.5e-07,
|
|
99
|
+
"output_cost_per_token": 1.25e-06,
|
|
100
|
+
"cache_creation_input_token_cost": 3e-07,
|
|
101
|
+
"cache_read_input_token_cost": 3e-08,
|
|
102
|
+
},
|
|
103
|
+
"claude-3-opus-20240229": {
|
|
104
|
+
"input_cost_per_token": 1.5e-05,
|
|
105
|
+
"output_cost_per_token": 7.5e-05,
|
|
106
|
+
"cache_creation_input_token_cost": 1.875e-05,
|
|
107
|
+
"cache_read_input_token_cost": 1.5e-06,
|
|
108
|
+
},
|
|
109
|
+
"claude-3-opus-latest": {
|
|
110
|
+
"input_cost_per_token": 1.5e-05,
|
|
111
|
+
"output_cost_per_token": 7.5e-05,
|
|
112
|
+
"cache_creation_input_token_cost": 1.875e-05,
|
|
113
|
+
"cache_read_input_token_cost": 1.5e-06,
|
|
114
|
+
},
|
|
115
|
+
"claude-4-opus-20250514": {
|
|
116
|
+
"input_cost_per_token": 1.5e-05,
|
|
117
|
+
"output_cost_per_token": 7.5e-05,
|
|
118
|
+
"cache_creation_input_token_cost": 1.875e-05,
|
|
119
|
+
"cache_read_input_token_cost": 1.5e-06,
|
|
120
|
+
},
|
|
121
|
+
"claude-4-sonnet-20250514": {
|
|
122
|
+
"input_cost_per_token": 3e-06,
|
|
123
|
+
"output_cost_per_token": 1.5e-05,
|
|
124
|
+
"cache_creation_input_token_cost": 3.75e-06,
|
|
125
|
+
"cache_read_input_token_cost": 3e-07,
|
|
126
|
+
"input_cost_per_token_above_200k_tokens": 6e-06,
|
|
127
|
+
"output_cost_per_token_above_200k_tokens": 2.25e-05,
|
|
128
|
+
"cache_creation_input_token_cost_above_200k_tokens": 7.5e-06,
|
|
129
|
+
"cache_read_input_token_cost_above_200k_tokens": 6e-07,
|
|
130
|
+
},
|
|
131
|
+
"claude-haiku-4-5": {
|
|
132
|
+
"input_cost_per_token": 1e-06,
|
|
133
|
+
"output_cost_per_token": 5e-06,
|
|
134
|
+
"cache_creation_input_token_cost": 1.25e-06,
|
|
135
|
+
"cache_read_input_token_cost": 1e-07,
|
|
136
|
+
},
|
|
137
|
+
"claude-haiku-4-5-20251001": {
|
|
138
|
+
"input_cost_per_token": 1e-06,
|
|
139
|
+
"output_cost_per_token": 5e-06,
|
|
140
|
+
"cache_creation_input_token_cost": 1.25e-06,
|
|
141
|
+
"cache_read_input_token_cost": 1e-07,
|
|
142
|
+
},
|
|
143
|
+
"claude-opus-4-1": {
|
|
144
|
+
"input_cost_per_token": 1.5e-05,
|
|
145
|
+
"output_cost_per_token": 7.5e-05,
|
|
146
|
+
"cache_creation_input_token_cost": 1.875e-05,
|
|
147
|
+
"cache_read_input_token_cost": 1.5e-06,
|
|
148
|
+
},
|
|
149
|
+
"claude-opus-4-1-20250805": {
|
|
150
|
+
"input_cost_per_token": 1.5e-05,
|
|
151
|
+
"output_cost_per_token": 7.5e-05,
|
|
152
|
+
"cache_creation_input_token_cost": 1.875e-05,
|
|
153
|
+
"cache_read_input_token_cost": 1.5e-06,
|
|
154
|
+
},
|
|
155
|
+
"claude-opus-4-20250514": {
|
|
156
|
+
"input_cost_per_token": 1.5e-05,
|
|
157
|
+
"output_cost_per_token": 7.5e-05,
|
|
158
|
+
"cache_creation_input_token_cost": 1.875e-05,
|
|
159
|
+
"cache_read_input_token_cost": 1.5e-06,
|
|
160
|
+
},
|
|
161
|
+
"claude-opus-4-5": {
|
|
162
|
+
"input_cost_per_token": 5e-06,
|
|
163
|
+
"output_cost_per_token": 2.5e-05,
|
|
164
|
+
"cache_creation_input_token_cost": 6.25e-06,
|
|
165
|
+
"cache_read_input_token_cost": 5e-07,
|
|
166
|
+
},
|
|
167
|
+
"claude-opus-4-5-20251101": {
|
|
168
|
+
"input_cost_per_token": 5e-06,
|
|
169
|
+
"output_cost_per_token": 2.5e-05,
|
|
170
|
+
"cache_creation_input_token_cost": 6.25e-06,
|
|
171
|
+
"cache_read_input_token_cost": 5e-07,
|
|
172
|
+
},
|
|
173
|
+
"claude-opus-4-6": {
|
|
174
|
+
"input_cost_per_token": 5e-06,
|
|
175
|
+
"output_cost_per_token": 2.5e-05,
|
|
176
|
+
"cache_creation_input_token_cost": 6.25e-06,
|
|
177
|
+
"cache_read_input_token_cost": 5e-07,
|
|
178
|
+
},
|
|
179
|
+
"claude-opus-4-6-20260205": {
|
|
180
|
+
"input_cost_per_token": 5e-06,
|
|
181
|
+
"output_cost_per_token": 2.5e-05,
|
|
182
|
+
"cache_creation_input_token_cost": 6.25e-06,
|
|
183
|
+
"cache_read_input_token_cost": 5e-07,
|
|
184
|
+
},
|
|
185
|
+
"claude-opus-4-7": {
|
|
186
|
+
"input_cost_per_token": 5e-06,
|
|
187
|
+
"output_cost_per_token": 2.5e-05,
|
|
188
|
+
"cache_creation_input_token_cost": 6.25e-06,
|
|
189
|
+
"cache_read_input_token_cost": 5e-07,
|
|
190
|
+
},
|
|
191
|
+
"claude-opus-4-7-20260416": {
|
|
192
|
+
"input_cost_per_token": 5e-06,
|
|
193
|
+
"output_cost_per_token": 2.5e-05,
|
|
194
|
+
"cache_creation_input_token_cost": 6.25e-06,
|
|
195
|
+
"cache_read_input_token_cost": 5e-07,
|
|
196
|
+
},
|
|
197
|
+
"claude-sonnet-4-20250514": {
|
|
198
|
+
"input_cost_per_token": 3e-06,
|
|
199
|
+
"output_cost_per_token": 1.5e-05,
|
|
200
|
+
"cache_creation_input_token_cost": 3.75e-06,
|
|
201
|
+
"cache_read_input_token_cost": 3e-07,
|
|
202
|
+
"input_cost_per_token_above_200k_tokens": 6e-06,
|
|
203
|
+
"output_cost_per_token_above_200k_tokens": 2.25e-05,
|
|
204
|
+
"cache_creation_input_token_cost_above_200k_tokens": 7.5e-06,
|
|
205
|
+
"cache_read_input_token_cost_above_200k_tokens": 6e-07,
|
|
206
|
+
},
|
|
207
|
+
"claude-sonnet-4-5": {
|
|
208
|
+
"input_cost_per_token": 3e-06,
|
|
209
|
+
"output_cost_per_token": 1.5e-05,
|
|
210
|
+
"cache_creation_input_token_cost": 3.75e-06,
|
|
211
|
+
"cache_read_input_token_cost": 3e-07,
|
|
212
|
+
"input_cost_per_token_above_200k_tokens": 6e-06,
|
|
213
|
+
"output_cost_per_token_above_200k_tokens": 2.25e-05,
|
|
214
|
+
"cache_creation_input_token_cost_above_200k_tokens": 7.5e-06,
|
|
215
|
+
"cache_read_input_token_cost_above_200k_tokens": 6e-07,
|
|
216
|
+
},
|
|
217
|
+
"claude-sonnet-4-5-20250929": {
|
|
218
|
+
"input_cost_per_token": 3e-06,
|
|
219
|
+
"output_cost_per_token": 1.5e-05,
|
|
220
|
+
"cache_creation_input_token_cost": 3.75e-06,
|
|
221
|
+
"cache_read_input_token_cost": 3e-07,
|
|
222
|
+
"input_cost_per_token_above_200k_tokens": 6e-06,
|
|
223
|
+
"output_cost_per_token_above_200k_tokens": 2.25e-05,
|
|
224
|
+
"cache_creation_input_token_cost_above_200k_tokens": 7.5e-06,
|
|
225
|
+
"cache_read_input_token_cost_above_200k_tokens": 6e-07,
|
|
226
|
+
},
|
|
227
|
+
"claude-sonnet-4-6": {
|
|
228
|
+
"input_cost_per_token": 3e-06,
|
|
229
|
+
"output_cost_per_token": 1.5e-05,
|
|
230
|
+
"cache_creation_input_token_cost": 3.75e-06,
|
|
231
|
+
"cache_read_input_token_cost": 3e-07,
|
|
232
|
+
},
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
_unknown_model_warnings: set[str] = set()
|
|
236
|
+
|
|
237
|
+
# ---------------------------------------------------------------------------
|
|
238
|
+
# Codex / GPT-5 pricing table
|
|
239
|
+
# ---------------------------------------------------------------------------
|
|
240
|
+
#
|
|
241
|
+
# Codex (OpenAI) API pricing snapshot:
|
|
242
|
+
# - Source: https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json
|
|
243
|
+
# - Captured: 2026-05-04
|
|
244
|
+
# - Models listed are those observed in ~/.codex/sessions/ at implementation
|
|
245
|
+
# time plus common Codex/GPT-5 variants. Models absent from this table fall
|
|
246
|
+
# back to `gpt-5` pricing with isFallback=true (matches upstream's
|
|
247
|
+
# LEGACY_FALLBACK_MODEL behavior); a one-shot stderr warning is emitted per
|
|
248
|
+
# unknown model name.
|
|
249
|
+
#
|
|
250
|
+
# Billing rules:
|
|
251
|
+
# - reasoning_output_tokens is billed at the *output* rate (matches
|
|
252
|
+
# LiteLLM / upstream).
|
|
253
|
+
# - If cache_read_input_token_cost is absent for a model, we fall back to
|
|
254
|
+
# input_cost_per_token / 4 (matches LiteLLM's documented fallback).
|
|
255
|
+
# - Above-272k tiered rates are applied per-turn (row), mirroring the Claude
|
|
256
|
+
# pattern via a dedicated CODEX_TIERED_THRESHOLD.
|
|
257
|
+
CODEX_TIERED_THRESHOLD = 272_000
|
|
258
|
+
|
|
259
|
+
CODEX_MODEL_PRICING: dict[str, dict[str, Any]] = {
|
|
260
|
+
"gpt-5": {
|
|
261
|
+
"input_cost_per_token": 1.25e-06,
|
|
262
|
+
"cache_read_input_token_cost": 1.25e-07,
|
|
263
|
+
"output_cost_per_token": 1e-05,
|
|
264
|
+
},
|
|
265
|
+
"gpt-5-codex": {
|
|
266
|
+
"input_cost_per_token": 1.25e-06,
|
|
267
|
+
"cache_read_input_token_cost": 1.25e-07,
|
|
268
|
+
"output_cost_per_token": 1e-05,
|
|
269
|
+
},
|
|
270
|
+
"gpt-5.1-codex": {
|
|
271
|
+
"input_cost_per_token": 1.25e-06,
|
|
272
|
+
"cache_read_input_token_cost": 1.25e-07,
|
|
273
|
+
"output_cost_per_token": 1e-05,
|
|
274
|
+
},
|
|
275
|
+
"gpt-5.1-codex-max": {
|
|
276
|
+
"input_cost_per_token": 1.25e-06,
|
|
277
|
+
"cache_read_input_token_cost": 1.25e-07,
|
|
278
|
+
"output_cost_per_token": 1e-05,
|
|
279
|
+
},
|
|
280
|
+
"gpt-5.1-codex-mini": {
|
|
281
|
+
"input_cost_per_token": 2.5e-07,
|
|
282
|
+
"cache_read_input_token_cost": 2.5e-08,
|
|
283
|
+
"output_cost_per_token": 2e-06,
|
|
284
|
+
},
|
|
285
|
+
"gpt-5.2": {
|
|
286
|
+
"input_cost_per_token": 1.75e-06,
|
|
287
|
+
"cache_read_input_token_cost": 1.75e-07,
|
|
288
|
+
"output_cost_per_token": 1.4e-05,
|
|
289
|
+
},
|
|
290
|
+
"gpt-5.2-codex": {
|
|
291
|
+
"input_cost_per_token": 1.75e-06,
|
|
292
|
+
"cache_read_input_token_cost": 1.75e-07,
|
|
293
|
+
"output_cost_per_token": 1.4e-05,
|
|
294
|
+
},
|
|
295
|
+
"gpt-5.3-codex": {
|
|
296
|
+
"input_cost_per_token": 1.75e-06,
|
|
297
|
+
"cache_read_input_token_cost": 1.75e-07,
|
|
298
|
+
"output_cost_per_token": 1.4e-05,
|
|
299
|
+
},
|
|
300
|
+
"gpt-5.3-codex-spark": {
|
|
301
|
+
# Not in LiteLLM snapshot. Upstream emits isFallback: false with $0
|
|
302
|
+
# billing contribution, so we mirror with an all-zero pricing entry
|
|
303
|
+
# rather than the unknown-model warn-path.
|
|
304
|
+
"input_cost_per_token": 0.0,
|
|
305
|
+
"cache_read_input_token_cost": 0.0,
|
|
306
|
+
"output_cost_per_token": 0.0,
|
|
307
|
+
},
|
|
308
|
+
"gpt-5.4": {
|
|
309
|
+
# Source: LiteLLM model_prices_and_context_window.json (openai provider entry
|
|
310
|
+
# for "gpt-5.4"). Also matches OpenAI's published /api/docs/pricing page
|
|
311
|
+
# (input $2.50/M, cached $0.25/M, output $15.00/M; above-272k tier doubles
|
|
312
|
+
# input/cache and 1.5x's output).
|
|
313
|
+
"input_cost_per_token": 2.5e-06,
|
|
314
|
+
"cache_read_input_token_cost": 2.5e-07,
|
|
315
|
+
"output_cost_per_token": 1.5e-05,
|
|
316
|
+
"input_cost_per_token_above_272k_tokens": 5e-06,
|
|
317
|
+
"cache_read_input_token_cost_above_272k_tokens": 5e-07,
|
|
318
|
+
"output_cost_per_token_above_272k_tokens": 2.25e-05,
|
|
319
|
+
},
|
|
320
|
+
"gpt-5.4-mini": {
|
|
321
|
+
# Source: LiteLLM model_prices_and_context_window.json (openai provider
|
|
322
|
+
# entry for "gpt-5.4-mini"). Matches OpenAI published pricing: input
|
|
323
|
+
# $0.75/M, cached $0.075/M, output $4.50/M. No above-272k tier
|
|
324
|
+
# (max_input_tokens in LiteLLM is 272000 — the ceiling, not a tier break).
|
|
325
|
+
"input_cost_per_token": 7.5e-07,
|
|
326
|
+
"cache_read_input_token_cost": 7.5e-08,
|
|
327
|
+
"output_cost_per_token": 4.5e-06,
|
|
328
|
+
},
|
|
329
|
+
"gpt-5.5": {
|
|
330
|
+
# Source: OpenAI published pricing (announced 2026-04-23). Input
|
|
331
|
+
# $5.00/M, cached $0.50/M, output $30.00/M. No above-272k tier
|
|
332
|
+
# announced. Add tiered fields here when LiteLLM publishes them.
|
|
333
|
+
"input_cost_per_token": 5e-06,
|
|
334
|
+
"cache_read_input_token_cost": 5e-07,
|
|
335
|
+
"output_cost_per_token": 3e-05,
|
|
336
|
+
},
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
_unknown_codex_model_warnings: set[str] = set()
|
|
340
|
+
|
|
341
|
+
# Upstream ccusage-codex maps unknown Codex model names to `gpt-5` pricing
|
|
342
|
+
# and marks them isFallback: true. We mirror that behavior so cost figures
|
|
343
|
+
# match what a user would see with `ccusage-codex` on the same JSONL data.
|
|
344
|
+
# Behavior matches LEGACY_FALLBACK_MODEL in upstream ccusage-codex — both
|
|
345
|
+
# tools fall back to gpt-5 pricing for unknown model names so output remains
|
|
346
|
+
# directly comparable.
|
|
347
|
+
CODEX_LEGACY_FALLBACK_MODEL = "gpt-5"
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
def _resolve_codex_pricing(model: str) -> tuple[dict[str, Any] | None, bool]:
|
|
351
|
+
"""Return (pricing_dict, is_fallback).
|
|
352
|
+
|
|
353
|
+
Returns (entry, False) when the model has a direct pricing entry. Returns
|
|
354
|
+
(gpt-5-entry, True) when the model is unknown — matches upstream's
|
|
355
|
+
LEGACY_FALLBACK_MODEL semantics. Returns (None, True) only if the fallback
|
|
356
|
+
model itself is missing from the pricing dict (programming error; warn once).
|
|
357
|
+
"""
|
|
358
|
+
direct = CODEX_MODEL_PRICING.get(model)
|
|
359
|
+
if direct is not None:
|
|
360
|
+
return direct, False
|
|
361
|
+
fallback = CODEX_MODEL_PRICING.get(CODEX_LEGACY_FALLBACK_MODEL)
|
|
362
|
+
return fallback, True
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def _is_codex_fallback(model: str) -> bool:
|
|
366
|
+
"""True iff `model` would resolve via the LEGACY_FALLBACK_MODEL path."""
|
|
367
|
+
return model not in CODEX_MODEL_PRICING
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
def _resolve_model_pricing(model: str) -> dict[str, Any] | None:
|
|
371
|
+
"""Look up pricing for a model name. Returns None if unknown."""
|
|
372
|
+
pricing = CLAUDE_MODEL_PRICING.get(model)
|
|
373
|
+
if pricing is not None:
|
|
374
|
+
return pricing
|
|
375
|
+
for prefix in ("anthropic/", "anthropic."):
|
|
376
|
+
if model.startswith(prefix):
|
|
377
|
+
stripped = model[len(prefix):]
|
|
378
|
+
pricing = CLAUDE_MODEL_PRICING.get(stripped)
|
|
379
|
+
if pricing is not None:
|
|
380
|
+
return pricing
|
|
381
|
+
if model not in _unknown_model_warnings:
|
|
382
|
+
_unknown_model_warnings.add(model)
|
|
383
|
+
_eprint(f"[cost] unknown model, treating cost as $0: {model}")
|
|
384
|
+
return None
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
def _calculate_entry_cost(
|
|
388
|
+
model: str,
|
|
389
|
+
usage: dict[str, Any],
|
|
390
|
+
mode: str = "auto",
|
|
391
|
+
cost_usd: float | None = None,
|
|
392
|
+
) -> float:
|
|
393
|
+
"""Calculate USD cost for a single API call entry."""
|
|
394
|
+
if mode == "display":
|
|
395
|
+
return cost_usd if cost_usd is not None else 0.0
|
|
396
|
+
if mode == "auto" and cost_usd is not None:
|
|
397
|
+
return cost_usd
|
|
398
|
+
|
|
399
|
+
pricing = _resolve_model_pricing(model)
|
|
400
|
+
if pricing is None:
|
|
401
|
+
return 0.0
|
|
402
|
+
|
|
403
|
+
def _tiered(tokens: int, base_key: str, tiered_key: str) -> float:
|
|
404
|
+
base_rate = pricing.get(base_key, 0.0)
|
|
405
|
+
tiered_rate = pricing.get(tiered_key)
|
|
406
|
+
if tokens <= 0:
|
|
407
|
+
return 0.0
|
|
408
|
+
if tokens > TIERED_THRESHOLD and tiered_rate is not None:
|
|
409
|
+
below = min(tokens, TIERED_THRESHOLD)
|
|
410
|
+
above = tokens - TIERED_THRESHOLD
|
|
411
|
+
return below * base_rate + above * tiered_rate
|
|
412
|
+
return tokens * base_rate
|
|
413
|
+
|
|
414
|
+
input_cost = _tiered(
|
|
415
|
+
usage.get("input_tokens", 0),
|
|
416
|
+
"input_cost_per_token",
|
|
417
|
+
"input_cost_per_token_above_200k_tokens",
|
|
418
|
+
)
|
|
419
|
+
output_cost = _tiered(
|
|
420
|
+
usage.get("output_tokens", 0),
|
|
421
|
+
"output_cost_per_token",
|
|
422
|
+
"output_cost_per_token_above_200k_tokens",
|
|
423
|
+
)
|
|
424
|
+
cache_create_cost = _tiered(
|
|
425
|
+
usage.get("cache_creation_input_tokens", 0),
|
|
426
|
+
"cache_creation_input_token_cost",
|
|
427
|
+
"cache_creation_input_token_cost_above_200k_tokens",
|
|
428
|
+
)
|
|
429
|
+
cache_read_cost = _tiered(
|
|
430
|
+
usage.get("cache_read_input_tokens", 0),
|
|
431
|
+
"cache_read_input_token_cost",
|
|
432
|
+
"cache_read_input_token_cost_above_200k_tokens",
|
|
433
|
+
)
|
|
434
|
+
total = input_cost + output_cost + cache_create_cost + cache_read_cost
|
|
435
|
+
|
|
436
|
+
return total
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
def _warn_unknown_codex_model(model: str) -> None:
|
|
440
|
+
"""One-shot stderr warning for a Codex model absent from the pricing dict."""
|
|
441
|
+
if model in _unknown_codex_model_warnings:
|
|
442
|
+
return
|
|
443
|
+
_unknown_codex_model_warnings.add(model)
|
|
444
|
+
_eprint(f"[codex] unknown model, using gpt-5 fallback pricing (isFallback=true): {model}")
|
|
445
|
+
|
|
446
|
+
|
|
447
|
+
def _calculate_codex_entry_cost(
|
|
448
|
+
model: str,
|
|
449
|
+
input_tokens: int,
|
|
450
|
+
cached_input_tokens: int,
|
|
451
|
+
output_tokens: int,
|
|
452
|
+
reasoning_output_tokens: int,
|
|
453
|
+
) -> float:
|
|
454
|
+
"""Compute USD cost for one Codex `token_count` event.
|
|
455
|
+
|
|
456
|
+
Token-field semantics (LiteLLM convention, matched by upstream ccusage-codex):
|
|
457
|
+
- input_tokens INCLUDES cached_input_tokens (cached is a subset).
|
|
458
|
+
- output_tokens INCLUDES reasoning_output_tokens (reasoning is a subset).
|
|
459
|
+
Cost formula:
|
|
460
|
+
non_cached_input = max(0, input_tokens - cached_input_tokens)
|
|
461
|
+
cost = non_cached_input * input_cost_per_token
|
|
462
|
+
+ cached_input_tokens * cache_read_input_token_cost
|
|
463
|
+
+ output_tokens * output_cost_per_token
|
|
464
|
+
The reasoning_output_tokens parameter is accepted for API stability but
|
|
465
|
+
not used directly — its contribution is already billed inside output_tokens.
|
|
466
|
+
|
|
467
|
+
Above-272k tier applied per-turn when the corresponding _above_272k_tokens
|
|
468
|
+
key is present in the pricing entry.
|
|
469
|
+
"""
|
|
470
|
+
del reasoning_output_tokens # already billed inside output_tokens
|
|
471
|
+
pricing, is_fallback = _resolve_codex_pricing(model)
|
|
472
|
+
if pricing is None:
|
|
473
|
+
# Only possible if CODEX_LEGACY_FALLBACK_MODEL itself is missing — treat as
|
|
474
|
+
# $0 to avoid crashing; a programming error we want to notice.
|
|
475
|
+
_warn_unknown_codex_model(model)
|
|
476
|
+
return 0.0
|
|
477
|
+
if is_fallback:
|
|
478
|
+
_warn_unknown_codex_model(model) # one-shot per unique model name
|
|
479
|
+
|
|
480
|
+
def _tiered(tokens: int, base_key: str, tiered_key: str) -> float:
|
|
481
|
+
if tokens <= 0:
|
|
482
|
+
return 0.0
|
|
483
|
+
base_rate = pricing.get(base_key, 0.0)
|
|
484
|
+
if not base_rate:
|
|
485
|
+
return 0.0
|
|
486
|
+
tiered_rate = pricing.get(tiered_key)
|
|
487
|
+
if tokens > CODEX_TIERED_THRESHOLD and tiered_rate is not None:
|
|
488
|
+
return CODEX_TIERED_THRESHOLD * base_rate + (tokens - CODEX_TIERED_THRESHOLD) * tiered_rate
|
|
489
|
+
return tokens * base_rate
|
|
490
|
+
|
|
491
|
+
non_cached_input = max(0, input_tokens - cached_input_tokens)
|
|
492
|
+
|
|
493
|
+
input_cost = _tiered(
|
|
494
|
+
non_cached_input,
|
|
495
|
+
"input_cost_per_token",
|
|
496
|
+
"input_cost_per_token_above_272k_tokens",
|
|
497
|
+
)
|
|
498
|
+
cached_input_cost = _tiered(
|
|
499
|
+
cached_input_tokens,
|
|
500
|
+
"cache_read_input_token_cost",
|
|
501
|
+
"cache_read_input_token_cost_above_272k_tokens",
|
|
502
|
+
)
|
|
503
|
+
output_cost = _tiered(
|
|
504
|
+
output_tokens,
|
|
505
|
+
"output_cost_per_token",
|
|
506
|
+
"output_cost_per_token_above_272k_tokens",
|
|
507
|
+
)
|
|
508
|
+
return input_cost + cached_input_cost + output_cost
|
|
509
|
+
|
|
510
|
+
|
|
511
|
+
def _short_model_name(model: str) -> str:
|
|
512
|
+
"""Shorten model name for display: 'claude-opus-4-6' -> 'opus-4-6'."""
|
|
513
|
+
name = model
|
|
514
|
+
# Strip 'claude-' prefix
|
|
515
|
+
if name.startswith("claude-"):
|
|
516
|
+
name = name[len("claude-"):]
|
|
517
|
+
# Strip date suffixes like '-20251001'
|
|
518
|
+
if re.match(r".*-\d{8}$", name):
|
|
519
|
+
name = name[:-9]
|
|
520
|
+
return name
|