codex-meter 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
codex_meter/pricing.py ADDED
@@ -0,0 +1,311 @@
1
+ """Embedded Codex rate card + RateCard resolver (offline-first)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from dataclasses import dataclass, replace
7
+ from decimal import Decimal
8
+ from pathlib import Path
9
+
10
+ from codex_meter.models import (
11
+ CostTotals,
12
+ LongContextRule,
13
+ PricingSource,
14
+ Rates,
15
+ Usage,
16
+ decimal_value,
17
+ )
18
+
19
+ LONG_CONTEXT_INPUT_THRESHOLD = 272_000
20
+ ZERO = Decimal("0")
21
+
22
+ DEFAULT_API_RATES = Rates(input=5.0, cached_input=0.5, output=30.0)
23
+ DEFAULT_CREDIT_RATES = Rates(input=125.0, cached_input=12.5, output=750.0)
24
+
25
+ LONG_CONTEXT_1050K = LongContextRule(
26
+ threshold=LONG_CONTEXT_INPUT_THRESHOLD, input_mult=2.0, output_mult=1.5
27
+ )
28
+
29
+
30
+ @dataclass(frozen=True)
31
+ class ModelCard:
32
+ """One Codex model's pricing. Missing rates are intentionally unpriced."""
33
+
34
+ name: str
35
+ api_rates: Rates | None
36
+ credit_rates: Rates | None
37
+ fast_multiplier: float = 1.0
38
+ long_context: LongContextRule | None = None
39
+
40
+
41
+ MODEL_CARDS: tuple[ModelCard, ...] = (
42
+ ModelCard(
43
+ name="gpt-5.5",
44
+ api_rates=Rates(5.0, 0.5, 30.0),
45
+ credit_rates=Rates(125.0, 12.5, 750.0),
46
+ fast_multiplier=2.5,
47
+ long_context=LONG_CONTEXT_1050K,
48
+ ),
49
+ ModelCard(
50
+ name="gpt-5.4",
51
+ api_rates=Rates(2.5, 0.25, 15.0),
52
+ credit_rates=Rates(62.5, 6.25, 375.0),
53
+ fast_multiplier=2.0,
54
+ long_context=LONG_CONTEXT_1050K,
55
+ ),
56
+ ModelCard(
57
+ name="gpt-5.4-mini",
58
+ api_rates=Rates(0.75, 0.075, 4.5),
59
+ credit_rates=Rates(18.75, 1.875, 113.0),
60
+ ),
61
+ ModelCard(
62
+ name="gpt-5.3-codex",
63
+ api_rates=Rates(1.75, 0.175, 14.0),
64
+ credit_rates=Rates(43.75, 4.375, 350.0),
65
+ ),
66
+ ModelCard(
67
+ name="gpt-5.2",
68
+ api_rates=Rates(1.75, 0.175, 14.0),
69
+ credit_rates=Rates(43.75, 4.375, 350.0),
70
+ ),
71
+ ModelCard(
72
+ name="gpt-5.1-codex-max",
73
+ api_rates=Rates(1.25, 0.125, 10.0),
74
+ credit_rates=None,
75
+ ),
76
+ )
77
+
78
+ MODELS_BY_NAME: dict[str, ModelCard] = {card.name: card for card in MODEL_CARDS}
79
+
80
+ PRICING_SOURCES = [
81
+ PricingSource(
82
+ name="OpenAI API pricing",
83
+ url="https://developers.openai.com/api/docs/pricing",
84
+ checked="2026-05-13",
85
+ ),
86
+ PricingSource(
87
+ name="GPT-5.5 model pricing and long-context rule",
88
+ url="https://developers.openai.com/api/docs/models/gpt-5.5",
89
+ checked="2026-05-13",
90
+ ),
91
+ PricingSource(
92
+ name="GPT-5.4 model pricing and long-context rule",
93
+ url="https://developers.openai.com/api/docs/models/gpt-5.4",
94
+ checked="2026-05-13",
95
+ ),
96
+ PricingSource(
97
+ name="Codex rate card",
98
+ url="https://help.openai.com/en/articles/20001106-codex-rate-card",
99
+ checked="2026-05-13",
100
+ ),
101
+ PricingSource(
102
+ name="Codex fast mode multipliers",
103
+ url="https://developers.openai.com/codex/speed",
104
+ checked="2026-05-13",
105
+ ),
106
+ PricingSource(
107
+ name="GPT-5.1-Codex-Max model pricing",
108
+ url="https://developers.openai.com/api/docs/models/gpt-5.1-codex-max",
109
+ checked="2026-05-13",
110
+ ),
111
+ ]
112
+
113
+
114
+ def normalize_model(model: str | None) -> str:
115
+ raw = (model or "").strip().lower()
116
+ if not raw:
117
+ return ""
118
+ aliases = {
119
+ "gpt-5.4-mini": ("gpt-5.4-mini", "gpt-5-4-mini"),
120
+ "gpt-5.3-codex-spark": ("gpt-5.3-codex-spark", "gpt-5-3-codex-spark"),
121
+ "gpt-5.3-codex": ("gpt-5.3-codex", "gpt-5-3-codex"),
122
+ "gpt-5.1-codex-max": ("gpt-5.1-codex-max", "gpt-5-1-codex-max"),
123
+ }
124
+ for normalized, prefixes in aliases.items():
125
+ if raw in prefixes or any(raw.startswith(f"{prefix}-") for prefix in prefixes):
126
+ return normalized
127
+ for known in sorted(MODELS_BY_NAME, key=len, reverse=True):
128
+ if raw == known or raw.startswith(f"{known}-"):
129
+ return known
130
+ return raw
131
+
132
+
133
+ def normalize_service_tier(value: str | None) -> str:
134
+ raw = (value or "").strip().lower()
135
+ if raw in {"fast", "priority"}:
136
+ return "fast"
137
+ if raw in {"standard", "default", "regular"}:
138
+ return "standard"
139
+ return ""
140
+
141
+
142
+ def _parse_overrides(raw: object, section: str) -> dict[str, Rates]:
143
+ if not isinstance(raw, dict):
144
+ return {}
145
+ values = raw.get(section, {})
146
+ if not isinstance(values, dict):
147
+ raise ValueError(f"rates file section {section!r} must be an object")
148
+ parsed: dict[str, Rates] = {}
149
+ for model, item in values.items():
150
+ if not isinstance(item, dict):
151
+ raise ValueError(f"rates for {model!r} must be an object")
152
+ reasoning_raw = item.get("reasoning_output")
153
+ parsed[normalize_model(model)] = Rates(
154
+ input=decimal_value(item["input"]),
155
+ cached_input=decimal_value(item["cached_input"]),
156
+ output=decimal_value(item["output"]),
157
+ reasoning_output=decimal_value(reasoning_raw) if reasoning_raw is not None else None,
158
+ )
159
+ return parsed
160
+
161
+
162
+ @dataclass(frozen=True)
163
+ class RateCard:
164
+ """Per-run rate resolver. Loads any local overrides exactly once."""
165
+
166
+ api_overrides: dict[str, Rates]
167
+ credit_overrides: dict[str, Rates]
168
+ pricing_mode: str
169
+
170
+ @classmethod
171
+ def load(cls, path: Path | None, pricing_mode: str = "model") -> RateCard:
172
+ if path is None:
173
+ return cls(api_overrides={}, credit_overrides={}, pricing_mode=pricing_mode)
174
+ try:
175
+ raw = json.loads(path.expanduser().read_text())
176
+ except (OSError, json.JSONDecodeError) as exc:
177
+ raise ValueError(f"Could not read rates file {path}: {exc}") from exc
178
+ return cls(
179
+ api_overrides=_parse_overrides(raw, "api"),
180
+ credit_overrides=_parse_overrides(raw, "credits"),
181
+ pricing_mode=pricing_mode,
182
+ )
183
+
184
+ def cost_for(
185
+ self, usage: Usage, model: str, service_tier: str
186
+ ) -> tuple[CostTotals, bool, bool]:
187
+ normalized = normalize_model(model)
188
+ flat = self.pricing_mode == "flat"
189
+ card = MODELS_BY_NAME.get(normalized)
190
+ rule = card.long_context if (card and not flat) else None
191
+ long_context = rule is not None and usage.input_tokens > rule.threshold
192
+ input_mult = decimal_value(rule.input_mult if long_context and rule else 1)
193
+ output_mult = decimal_value(rule.output_mult if long_context and rule else 1)
194
+ billable_usage, ambiguous_reasoning = _billable_usage(usage)
195
+ api_rates, api_unpriced, api_local = self._resolve_api_rates(normalized, card, flat)
196
+ credit_rates, credit_unpriced, credit_local = self._resolve_credit_rates(
197
+ normalized, card, flat
198
+ )
199
+ standard_credits = _estimate(billable_usage, credit_rates, input_mult, output_mult)
200
+ adjusted_credits = standard_credits
201
+ estimated = flat
202
+ if service_tier == "fast" and card is not None and credit_rates is not None:
203
+ adjusted_credits = standard_credits * decimal_value(card.fast_multiplier)
204
+ elif service_tier == "fast" and credit_rates is not None:
205
+ estimated = True
206
+ return (
207
+ CostTotals(
208
+ api_dollars=_estimate(billable_usage, api_rates, input_mult, output_mult),
209
+ standard_credits=standard_credits,
210
+ adjusted_credits=adjusted_credits,
211
+ api_unpriced_events=int(api_unpriced),
212
+ credit_unpriced_events=int(credit_unpriced),
213
+ estimated_events=int(estimated),
214
+ ambiguous_reasoning_events=int(ambiguous_reasoning),
215
+ local_override_events=int(api_local or credit_local),
216
+ ),
217
+ long_context,
218
+ card is None and api_unpriced and credit_unpriced,
219
+ )
220
+
221
+ def cache_savings_for(self, usage: Usage, model: str, service_tier: str) -> CostTotals:
222
+ if not usage.cached_input_tokens:
223
+ return CostTotals()
224
+ uncached_usage = replace(usage, cached_input_tokens=0)
225
+ uncached_cost, _, _ = self.cost_for(uncached_usage, model, service_tier)
226
+ actual_cost, _, _ = self.cost_for(usage, model, service_tier)
227
+ return CostTotals(
228
+ api_dollars=max(ZERO, uncached_cost.api_dollars - actual_cost.api_dollars),
229
+ standard_credits=max(
230
+ ZERO, uncached_cost.standard_credits - actual_cost.standard_credits
231
+ ),
232
+ adjusted_credits=max(
233
+ ZERO, uncached_cost.adjusted_credits - actual_cost.adjusted_credits
234
+ ),
235
+ api_unpriced_events=max(
236
+ uncached_cost.api_unpriced_events, actual_cost.api_unpriced_events
237
+ ),
238
+ credit_unpriced_events=max(
239
+ uncached_cost.credit_unpriced_events, actual_cost.credit_unpriced_events
240
+ ),
241
+ estimated_events=max(uncached_cost.estimated_events, actual_cost.estimated_events),
242
+ ambiguous_reasoning_events=max(
243
+ uncached_cost.ambiguous_reasoning_events,
244
+ actual_cost.ambiguous_reasoning_events,
245
+ ),
246
+ local_override_events=max(
247
+ uncached_cost.local_override_events, actual_cost.local_override_events
248
+ ),
249
+ )
250
+
251
+ def _resolve_api_rates(
252
+ self, normalized: str, card: ModelCard | None, flat: bool
253
+ ) -> tuple[Rates | None, bool, bool]:
254
+ if flat:
255
+ return DEFAULT_API_RATES, False, False
256
+ if normalized in self.api_overrides:
257
+ return self.api_overrides[normalized], False, True
258
+ if card and card.api_rates is not None:
259
+ return card.api_rates, False, False
260
+ return None, True, False
261
+
262
+ def _resolve_credit_rates(
263
+ self, normalized: str, card: ModelCard | None, flat: bool
264
+ ) -> tuple[Rates | None, bool, bool]:
265
+ if flat:
266
+ return DEFAULT_CREDIT_RATES, False, False
267
+ if normalized in self.credit_overrides:
268
+ return self.credit_overrides[normalized], False, True
269
+ if card and card.credit_rates is not None:
270
+ return card.credit_rates, False, False
271
+ return None, True, False
272
+
273
+
274
+ def _billable_usage(usage: Usage) -> tuple[Usage, bool]:
275
+ reasoning = usage.reasoning_output_tokens
276
+ if not reasoning:
277
+ return usage, False
278
+ total_with_reasoning_in_output = usage.input_tokens + usage.output_tokens
279
+ total_with_reasoning_separate = usage.input_tokens + usage.output_tokens + reasoning
280
+ if usage.total_tokens == total_with_reasoning_in_output:
281
+ return replace(usage, reasoning_output_tokens=0), False
282
+ if usage.total_tokens == total_with_reasoning_separate:
283
+ return usage, False
284
+ return replace(usage, reasoning_output_tokens=0), True
285
+
286
+
287
+ def _estimate(
288
+ usage: Usage, rates: Rates | None, input_mult: Decimal, output_mult: Decimal
289
+ ) -> Decimal:
290
+ if rates is None:
291
+ return ZERO
292
+ return (
293
+ decimal_value(usage.uncached_input_tokens) * rates.input * input_mult
294
+ + decimal_value(usage.cached_input_tokens) * rates.cached_input * input_mult
295
+ + decimal_value(usage.output_tokens) * rates.output * output_mult
296
+ + decimal_value(usage.reasoning_output_tokens)
297
+ * rates.effective_reasoning_output
298
+ * output_mult
299
+ ) / Decimal("1000000")
300
+
301
+
302
+ def estimate_event_cost(
303
+ usage: Usage,
304
+ model: str,
305
+ service_tier: str,
306
+ pricing_mode: str,
307
+ rates_file: Path | None = None,
308
+ ) -> tuple[CostTotals, bool, bool]:
309
+ """Single-event pricing. Builds a fresh RateCard — prefer caching one per run."""
310
+ card = RateCard.load(rates_file, pricing_mode)
311
+ return card.cost_for(usage, model, service_tier)
@@ -0,0 +1,116 @@
1
+ """Prometheus exporter. Opt-in via the `[prom]` extra.
2
+
3
+ Why a separate module? `prometheus_client` is an optional dependency. Importing
4
+ this module triggers the dependency check; the rest of codex-meter stays import-clean.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import http.server
10
+ from collections.abc import Callable
11
+ from dataclasses import dataclass, field
12
+
13
+ try:
14
+ from prometheus_client import CollectorRegistry, Gauge, generate_latest
15
+ except ImportError as exc: # pragma: no cover - import guard tested via integration only
16
+ raise ImportError(
17
+ "prometheus-client is not installed. Install with: pip install 'codex-meter[prom]'"
18
+ ) from exc
19
+
20
+
21
+ PROM_CONTENT_TYPE = "text/plain; version=0.0.4; charset=utf-8"
22
+
23
+
24
+ @dataclass(frozen=True)
25
+ class MetricsSnapshot:
26
+ credits_used: float
27
+ burn_per_hour: float
28
+ primary_window_percent: float
29
+ secondary_window_percent: float
30
+ events_total: int
31
+ long_context_events_total: int
32
+ tokens_total: dict[tuple[str, str, str], int] = field(default_factory=dict)
33
+
34
+
35
+ def build_metrics_text(snapshot: MetricsSnapshot) -> bytes:
36
+ """Render a Prometheus text-format payload from a snapshot."""
37
+ registry = CollectorRegistry()
38
+ credits = Gauge(
39
+ "codex_meter_credits_used",
40
+ "Adjusted credits in the current rolling window.",
41
+ registry=registry,
42
+ )
43
+ credits.set(snapshot.credits_used)
44
+
45
+ burn = Gauge(
46
+ "codex_meter_burn_per_hour",
47
+ "Burn rate (credits per hour) for the active window.",
48
+ registry=registry,
49
+ )
50
+ burn.set(snapshot.burn_per_hour)
51
+
52
+ window_pct = Gauge(
53
+ "codex_meter_window_used_percent",
54
+ "Rate-limit window used percent (0..100).",
55
+ ["window"],
56
+ registry=registry,
57
+ )
58
+ window_pct.labels(window="primary").set(snapshot.primary_window_percent)
59
+ window_pct.labels(window="secondary").set(snapshot.secondary_window_percent)
60
+
61
+ tokens = Gauge(
62
+ "codex_meter_tokens_total",
63
+ "Token counts by model, tier, and kind (input|cached|output|reasoning).",
64
+ ["model", "tier", "kind"],
65
+ registry=registry,
66
+ )
67
+ for (model, tier, kind), count in snapshot.tokens_total.items():
68
+ tokens.labels(model=model, tier=tier, kind=kind).set(count)
69
+
70
+ events = Gauge(
71
+ "codex_meter_events_total",
72
+ "Token-count events observed in the current window.",
73
+ registry=registry,
74
+ )
75
+ events.set(snapshot.events_total)
76
+
77
+ long_ctx = Gauge(
78
+ "codex_meter_long_context_events_total",
79
+ "Long-context events (>=threshold input tokens) observed in the current window.",
80
+ registry=registry,
81
+ )
82
+ long_ctx.set(snapshot.long_context_events_total)
83
+
84
+ return generate_latest(registry)
85
+
86
+
87
+ def make_handler(snapshot_fn: Callable[[], MetricsSnapshot]):
88
+ """Build a BaseHTTPRequestHandler subclass that exposes /metrics."""
89
+
90
+ class Handler(http.server.BaseHTTPRequestHandler):
91
+ def do_GET(self): # noqa: N802 — HTTPServer naming convention
92
+ if self.path != "/metrics":
93
+ self.send_error(404, "Only /metrics is exported")
94
+ return
95
+ body = build_metrics_text(snapshot_fn())
96
+ self.send_response(200)
97
+ self.send_header("Content-Type", PROM_CONTENT_TYPE)
98
+ self.send_header("Content-Length", str(len(body)))
99
+ self.end_headers()
100
+ self.wfile.write(body)
101
+
102
+ def log_message(self, fmt: str, *args: object) -> None: # silence default logging
103
+ return
104
+
105
+ return Handler
106
+
107
+
108
+ def serve_forever(host: str, port: int, snapshot_fn: Callable[[], MetricsSnapshot]) -> None:
109
+ """Bind and serve /metrics until interrupted. Default bind is 127.0.0.1."""
110
+ server = http.server.ThreadingHTTPServer((host, port), make_handler(snapshot_fn))
111
+ try:
112
+ server.serve_forever()
113
+ except KeyboardInterrupt:
114
+ pass
115
+ finally:
116
+ server.server_close()
codex_meter/py.typed ADDED
File without changes