cctally 1.21.3 → 1.22.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,201 @@
1
+ """Pure-fn kernel for the pricing-freshness check (spec 2026-05-29).
2
+
3
+ No I/O, no import of `cctally`/`_lib_pricing` at module scope — every
4
+ dependency (pricing predicates, tables, observed rows, LiteLLM snapshot)
5
+ is passed in by the I/O glue in bin/cctally. Re-exported there like the
6
+ other _lib_* kernels.
7
+ """
8
+ from __future__ import annotations
9
+
10
+ import dataclasses
11
+
12
+
13
+ @dataclasses.dataclass(frozen=True)
14
+ class CoverageGap:
15
+ provider: str # "claude" | "codex"
16
+ model: str
17
+ kind: str # "unpriced" | "fallback"
18
+ entry_count: int
19
+ token_total: int
20
+
21
+
22
+ @dataclasses.dataclass(frozen=True)
23
+ class DriftRow:
24
+ model: str
25
+ field: str # "" for whole-model categories
26
+ ours: float | None
27
+ theirs: float | None
28
+
29
+
30
+ @dataclasses.dataclass(frozen=True)
31
+ class DriftResult:
32
+ value_drift: list[DriftRow]
33
+ missing_from_us: list[str]
34
+ ahead_of_litellm: list[str] # informational; never actionable
35
+
36
+
37
+ def classify_coverage(observed, resolve_claude, is_codex_fallback) -> list[CoverageGap]:
38
+ """observed: iterable of (provider, model, entry_count, token_total).
39
+
40
+ Claude model with resolve_claude(model) is None -> kind="unpriced".
41
+ Codex model with is_codex_fallback(model) True -> kind="fallback".
42
+ Priced models produce no gap. Order preserved.
43
+ """
44
+ gaps: list[CoverageGap] = []
45
+ for provider, model, entry_count, token_total in observed:
46
+ if provider == "claude":
47
+ if resolve_claude(model) is None:
48
+ gaps.append(CoverageGap("claude", model, "unpriced", entry_count, token_total))
49
+ elif provider == "codex":
50
+ if is_codex_fallback(model):
51
+ gaps.append(CoverageGap("codex", model, "fallback", entry_count, token_total))
52
+ return gaps
53
+
54
+
55
+ def _is_codex_scope(name: str) -> bool:
56
+ # The Codex models we track are the gpt-5* family (incl. -codex variants).
57
+ # Keep this in sync with CODEX_MODEL_PRICING's key prefixes.
58
+ return name.startswith("gpt-5")
59
+
60
+
61
+ def scope_litellm(litellm: dict) -> dict[str, dict]:
62
+ """Filter a full LiteLLM model_prices map down to the models we track:
63
+ anthropic-provider Claude models, and the gpt-5* Codex family. Skips the
64
+ `sample_spec` doc entry and any entry lacking a dict body."""
65
+ scoped: dict[str, dict] = {}
66
+ for name, body in litellm.items():
67
+ if not isinstance(body, dict):
68
+ continue
69
+ provider = body.get("litellm_provider")
70
+ if provider == "anthropic" and name.startswith("claude-"):
71
+ scoped[name] = body
72
+ elif provider == "openai" and _is_codex_scope(name):
73
+ scoped[name] = body
74
+ return scoped
75
+
76
+
77
+ _DRIFT_EPS = 1e-12 # cost-per-token values are tiny; compare with a small abs epsilon
78
+
79
+
80
+ def _allow_index(allowlist):
81
+ field_suppress = set() # (model, field)
82
+ model_suppress = set() # model (no field -> suppresses missing_from_us)
83
+ for e in allowlist or []:
84
+ if e.get("field"):
85
+ field_suppress.add((e["model"], e["field"]))
86
+ else:
87
+ model_suppress.add(e["model"])
88
+ return field_suppress, model_suppress
89
+
90
+
91
+ def diff_pricing(claude_tbl, codex_tbl, litellm_scoped, allowlist=None) -> DriftResult:
92
+ """Direction-aware drift between our embedded tables and the scoped LiteLLM
93
+ snapshot.
94
+
95
+ value_drift — shared model, a cost field differs beyond _DRIFT_EPS
96
+ (actionable, unless allowlisted by model+field).
97
+ missing_from_us — scoped LiteLLM model absent from our tables
98
+ (actionable, unless allowlisted by model with no field).
99
+ ahead_of_litellm — model we price that scoped LiteLLM lacks (informational;
100
+ NEVER actionable — we may legitimately lead the source).
101
+
102
+ Value-drift is one-directional: it only compares fields LiteLLM carries, so
103
+ a cost field present in our table but absent upstream is not value-compared
104
+ (ahead_of_litellm reports at model granularity only). That matches the
105
+ feature's intent — catch vendor price moves on fields we track.
106
+ """
107
+ field_suppress, model_suppress = _allow_index(allowlist)
108
+ ours = {**claude_tbl, **codex_tbl}
109
+ value_drift: list[DriftRow] = []
110
+ missing: list[str] = []
111
+ ahead: list[str] = []
112
+
113
+ for model, body in litellm_scoped.items():
114
+ if model in ours:
115
+ for field, theirs in body.items():
116
+ # Broad cost-field filter; the `mine is None` guard below is what
117
+ # keeps it safe (skips any upstream cost field we don't carry), so
118
+ # don't remove that guard thinking this filter is precise.
119
+ if not field.endswith("_cost_per_token") and "cost" not in field:
120
+ continue
121
+ # bool is an int subclass — exclude it so a non-numeric "cost" flag
122
+ # can never be read as a 0/1 price.
123
+ if isinstance(theirs, bool) or not isinstance(theirs, (int, float)):
124
+ continue
125
+ if (model, field) in field_suppress:
126
+ continue
127
+ mine = ours[model].get(field)
128
+ if mine is None:
129
+ continue # we don't carry this field; not a value-drift signal
130
+ if abs(float(mine) - float(theirs)) > _DRIFT_EPS:
131
+ value_drift.append(DriftRow(model, field, float(mine), float(theirs)))
132
+ else:
133
+ if model not in model_suppress:
134
+ missing.append(model)
135
+
136
+ for model in ours:
137
+ if model not in litellm_scoped:
138
+ ahead.append(model)
139
+
140
+ return DriftResult(value_drift=value_drift, missing_from_us=missing, ahead_of_litellm=ahead)
141
+
142
+
143
+ def stale_allowlist_entries(allowlist, claude_tbl, codex_tbl, litellm_scoped) -> list:
144
+ """Return allowlist entries that NO LONGER correspond to a real divergence.
145
+
146
+ An entry is stale if, with it removed, diff_pricing reports nothing it would
147
+ have suppressed (i.e. the value now matches / the model is now present)."""
148
+ ours = {**claude_tbl, **codex_tbl}
149
+ stale: list = []
150
+ for e in allowlist or []:
151
+ model = e["model"]
152
+ if e.get("field"):
153
+ theirs = (litellm_scoped.get(model) or {}).get(e["field"])
154
+ mine = (ours.get(model) or {}).get(e["field"])
155
+ real = (theirs is not None and mine is not None
156
+ and abs(float(mine) - float(theirs)) > _DRIFT_EPS)
157
+ else:
158
+ # model-suppress entry: real only if litellm has it AND we don't
159
+ real = (model in litellm_scoped and model not in ours)
160
+ if not real:
161
+ stale.append(e)
162
+ return stale
163
+
164
+
165
+ _CLAUDE_REQUIRED = ("input_cost_per_token", "output_cost_per_token",
166
+ "cache_creation_input_token_cost", "cache_read_input_token_cost")
167
+ _CODEX_REQUIRED = ("input_cost_per_token", "cache_read_input_token_cost",
168
+ "output_cost_per_token")
169
+
170
+
171
+ def check_table_shapes(claude_tbl, codex_tbl, zero_sentinels) -> list:
172
+ """Provider-specific well-formedness. Claude entries need the 4 required
173
+ fields; Codex entries need the 3 base fields (NO cache_creation) and may
174
+ carry optional *_above_272k_tokens tiered fields. All present cost fields
175
+ must be >= 0. An all-zero Codex entry is allowed ONLY if its model is in
176
+ `zero_sentinels` (e.g. gpt-5.3-codex-spark mirroring upstream $0)."""
177
+ problems: list = []
178
+
179
+ def _check(model, body, required, allow_zero):
180
+ for f in required:
181
+ if f not in body:
182
+ problems.append(f"{model}: missing required field {f}")
183
+ cost_fields = {k: v for k, v in body.items() if "cost" in k}
184
+ for k, v in cost_fields.items():
185
+ if not isinstance(v, (int, float)) or v < 0:
186
+ problems.append(f"{model}: field {k} not a non-negative number ({v!r})")
187
+ if cost_fields and all(float(v) == 0.0 for v in cost_fields.values()) and not allow_zero:
188
+ problems.append(f"{model}: all cost fields zero but not a documented sentinel")
189
+
190
+ for model, body in claude_tbl.items():
191
+ _check(model, body, _CLAUDE_REQUIRED, allow_zero=False)
192
+ for model, body in codex_tbl.items():
193
+ _check(model, body, _CODEX_REQUIRED, allow_zero=model in zero_sentinels)
194
+ return problems
195
+
196
+
197
+ def pricing_issue_action(drift_present: bool, existing_open: bool) -> str:
198
+ """Decide the cron's GitHub-issue action. Pure; the YAML executes it."""
199
+ if drift_present:
200
+ return "update" if existing_open else "create"
201
+ return "close" if existing_open else "noop"