cctally 1.21.3 → 1.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -130,6 +130,16 @@ class DoctorState:
130
130
  cctally_reachable_on_path: Optional[bool] = None
131
131
  symlinks_path_pinned: bool = False
132
132
  install_is_brew: bool = False
133
+ # Pricing coverage (spec §5.1): the list[CoverageGap] of unpriced (Claude
134
+ # $0) / fallback (Codex gpt-5) models observed in the trailing 30-day
135
+ # window, populated by `doctor_gather_state` via `_pricing_observed_models`
136
+ # + `classify_coverage`. None means the cache could not be read (or the
137
+ # classification raised) — the check degrades to OK ("no cached usage to
138
+ # assess"), consistent with the kernel's degradation posture. Each element
139
+ # is a `_lib_pricing_check.CoverageGap` (provider/model/kind/entry_count/
140
+ # token_total); the kernel only reads `.kind`/`.model`/`.entry_count`/
141
+ # `.token_total`, so any duck-typed equivalent works for tests.
142
+ pricing_coverage: Optional[list] = None
133
143
 
134
144
 
135
145
  @dataclasses.dataclass(frozen=True)
@@ -761,6 +771,67 @@ def _check_data_post_credit_milestones(s: DoctorState) -> CheckResult:
761
771
  )
762
772
 
763
773
 
774
+ def _check_pricing_coverage(s: DoctorState) -> CheckResult:
775
+ """WARN when recent (30-day) session data contains a model cctally cannot
776
+ price exactly (spec §5.1).
777
+
778
+ Two gap kinds (classified upstream in `_lib_pricing_check.classify_coverage`,
779
+ populated by `doctor_gather_state`):
780
+ * ``unpriced`` — a Claude model `_resolve_model_pricing` returns None for;
781
+ it silently contributes $0 (the serious undercount failure mode).
782
+ * ``fallback`` — a Codex model approximated via `gpt-5` pricing.
783
+
784
+ ``s.pricing_coverage is None`` means the cache could not be read (or the
785
+ classification raised) → OK ("no cached usage to assess"), matching the
786
+ rest of the kernel's degradation posture. An empty list → OK. Any gap →
787
+ WARN (a data-quality signal, deliberately NOT FAIL — doctor FAIL exits 2;
788
+ consistent with the other WARN-family Data checks).
789
+
790
+ ``details`` is a structured dict (sibling-check convention): two lists of
791
+ ``{model, entry_count, token_total}`` keyed by gap kind, so a `--json`
792
+ consumer can machine-read each gap. The human summary + remediation point
793
+ at `cctally pricing-check` and the pricing tables.
794
+ """
795
+ gaps = s.pricing_coverage
796
+ if not gaps:
797
+ return CheckResult(
798
+ id="pricing.coverage", title="Coverage",
799
+ severity="ok",
800
+ summary="all observed models priced",
801
+ remediation=None,
802
+ details={"unpriced": [], "fallback": []},
803
+ )
804
+
805
+ def _row(g) -> dict:
806
+ return {
807
+ "model": g.model,
808
+ "entry_count": g.entry_count,
809
+ "token_total": g.token_total,
810
+ }
811
+
812
+ unpriced = [_row(g) for g in gaps if g.kind == "unpriced"]
813
+ fallback = [_row(g) for g in gaps if g.kind == "fallback"]
814
+
815
+ parts: list[str] = []
816
+ if unpriced:
817
+ parts.append(f"{len(unpriced)} unpriced (Claude $0)")
818
+ if fallback:
819
+ parts.append(f"{len(fallback)} fallback (Codex gpt-5)")
820
+ # Defensive: a gap whose kind is neither (shouldn't happen) still WARNs.
821
+ summary = "; ".join(parts) if parts else f"{len(gaps)} coverage gaps"
822
+
823
+ return CheckResult(
824
+ id="pricing.coverage", title="Coverage",
825
+ severity="warn",
826
+ summary=summary,
827
+ remediation=(
828
+ "Run `cctally pricing-check`, then update CLAUDE_MODEL_PRICING / "
829
+ "CODEX_MODEL_PRICING in bin/_lib_pricing.py"
830
+ ),
831
+ details={"unpriced": unpriced, "fallback": fallback},
832
+ )
833
+
834
+
764
835
  _LOOPBACK_HOSTS = frozenset({"loopback", "127.0.0.1", "::1", "localhost"})
765
836
 
766
837
 
@@ -991,6 +1062,9 @@ _CATEGORY_DEFINITIONS: tuple[tuple[str, str, tuple[tuple[str, str], ...]], ...]
991
1062
  ("data.forked_buckets", "_check_data_forked_buckets"),
992
1063
  ("data.post_credit_milestones", "_check_data_post_credit_milestones"),
993
1064
  )),
1065
+ ("pricing", "Pricing", (
1066
+ ("pricing.coverage", "_check_pricing_coverage"),
1067
+ )),
994
1068
  ("safety", "Safety", (
995
1069
  ("safety.dashboard_bind", "_check_safety_dashboard_bind"),
996
1070
  ("safety.config_json_valid", "_check_safety_config_json_valid"),
@@ -46,9 +46,28 @@ def _chip_for_model(name: str) -> str:
46
46
  return "other"
47
47
 
48
48
 
49
+ # Date the embedded pricing snapshots below were last verified against
50
+ # vendor sources. Bump whenever CLAUDE_MODEL_PRICING / CODEX_MODEL_PRICING
51
+ # is synced. Read by `pricing-check` + the release pre-flight staleness nudge.
52
+ PRICING_SNAPSHOT_DATE = "2026-05-04"
53
+ PRICING_STALENESS_DAYS = 60 # release pre-flight WARNs past this age
54
+
55
+ # Canonical machine-readable pricing source (Claude values + Codex values).
56
+ LITELLM_PRICES_URL = (
57
+ "https://raw.githubusercontent.com/BerriAI/litellm/main/"
58
+ "model_prices_and_context_window.json"
59
+ )
60
+
61
+ # Deliberate divergences from LiteLLM the drift check must NOT flag. Each
62
+ # entry suppresses either a specific value mismatch ({"model","field","reason"})
63
+ # or an intentionally-omitted in-scope model ({"model","reason"} — no field).
64
+ # Guarded by `stale_allowlist_entries` (tests/test_pricing_check.py): an entry
65
+ # that no longer corresponds to a real divergence fails the suite.
66
+ PRICING_DRIFT_ALLOWLIST: list[dict] = []
67
+
49
68
  # Anthropic API pricing snapshot:
50
69
  # - Source: https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json
51
- # - Captured: 2026-05-04
70
+ # - Captured: 2026-05-04 (see PRICING_SNAPSHOT_DATE)
52
71
  # - Verified by maintainer against docs.claude.com/en/docs/about-claude/pricing;
53
72
  # update in PRs touching this table.
54
73
  CLAUDE_MODEL_PRICING: dict[str, dict[str, Any]] = {
@@ -246,7 +265,7 @@ _unknown_model_warnings: set[str] = set()
246
265
  #
247
266
  # Codex (OpenAI) API pricing snapshot:
248
267
  # - Source: https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json
249
- # - Captured: 2026-05-04
268
+ # - Captured: 2026-05-04 (see PRICING_SNAPSHOT_DATE)
250
269
  # - Models listed are those observed in ~/.codex/sessions/ at implementation
251
270
  # time plus common Codex/GPT-5 variants. Models absent from this table fall
252
271
  # back to `gpt-5` pricing with isFallback=true (matches upstream's
@@ -411,8 +430,16 @@ def _is_codex_fallback(model: str) -> bool:
411
430
  return model not in CODEX_MODEL_PRICING
412
431
 
413
432
 
414
- def _resolve_model_pricing(model: str) -> dict[str, Any] | None:
415
- """Look up pricing for a model name. Returns None if unknown."""
433
+ def _resolve_model_pricing(model: str, warn: bool = True) -> dict[str, Any] | None:
434
+ """Look up pricing for a model name. Returns None if unknown.
435
+
436
+ `warn=True` (default) emits a one-shot `[cost] unknown model` stderr warning
437
+ on a miss — correct for cost computation. Detection-only callers (e.g. the
438
+ doctor pricing-coverage scan, whose whole job is to find unpriced models)
439
+ pass `warn=False` so they don't fire the cost-engine warning as a side
440
+ effect, and don't poison `_unknown_model_warnings` (which would suppress a
441
+ later genuine cost-path warning for the same model).
442
+ """
416
443
  pricing = CLAUDE_MODEL_PRICING.get(model)
417
444
  if pricing is not None:
418
445
  return pricing
@@ -422,7 +449,7 @@ def _resolve_model_pricing(model: str) -> dict[str, Any] | None:
422
449
  pricing = CLAUDE_MODEL_PRICING.get(stripped)
423
450
  if pricing is not None:
424
451
  return pricing
425
- if model not in _unknown_model_warnings:
452
+ if warn and model not in _unknown_model_warnings:
426
453
  _unknown_model_warnings.add(model)
427
454
  _eprint(f"[cost] unknown model, treating cost as $0: {model}")
428
455
  return None
@@ -0,0 +1,201 @@
1
+ """Pure-fn kernel for the pricing-freshness check (spec 2026-05-29).
2
+
3
+ No I/O, no import of `cctally`/`_lib_pricing` at module scope — every
4
+ dependency (pricing predicates, tables, observed rows, LiteLLM snapshot)
5
+ is passed in by the I/O glue in bin/cctally. Re-exported there like the
6
+ other _lib_* kernels.
7
+ """
8
+ from __future__ import annotations
9
+
10
+ import dataclasses
11
+
12
+
13
+ @dataclasses.dataclass(frozen=True)
14
+ class CoverageGap:
15
+ provider: str # "claude" | "codex"
16
+ model: str
17
+ kind: str # "unpriced" | "fallback"
18
+ entry_count: int
19
+ token_total: int
20
+
21
+
22
+ @dataclasses.dataclass(frozen=True)
23
+ class DriftRow:
24
+ model: str
25
+ field: str # "" for whole-model categories
26
+ ours: float | None
27
+ theirs: float | None
28
+
29
+
30
+ @dataclasses.dataclass(frozen=True)
31
+ class DriftResult:
32
+ value_drift: list[DriftRow]
33
+ missing_from_us: list[str]
34
+ ahead_of_litellm: list[str] # informational; never actionable
35
+
36
+
37
+ def classify_coverage(observed, resolve_claude, is_codex_fallback) -> list[CoverageGap]:
38
+ """observed: iterable of (provider, model, entry_count, token_total).
39
+
40
+ Claude model with resolve_claude(model) is None -> kind="unpriced".
41
+ Codex model with is_codex_fallback(model) True -> kind="fallback".
42
+ Priced models produce no gap. Order preserved.
43
+ """
44
+ gaps: list[CoverageGap] = []
45
+ for provider, model, entry_count, token_total in observed:
46
+ if provider == "claude":
47
+ if resolve_claude(model) is None:
48
+ gaps.append(CoverageGap("claude", model, "unpriced", entry_count, token_total))
49
+ elif provider == "codex":
50
+ if is_codex_fallback(model):
51
+ gaps.append(CoverageGap("codex", model, "fallback", entry_count, token_total))
52
+ return gaps
53
+
54
+
55
+ def _is_codex_scope(name: str) -> bool:
56
+ # The Codex models we track are the gpt-5* family (incl. -codex variants).
57
+ # Keep this in sync with CODEX_MODEL_PRICING's key prefixes.
58
+ return name.startswith("gpt-5")
59
+
60
+
61
+ def scope_litellm(litellm: dict) -> dict[str, dict]:
62
+ """Filter a full LiteLLM model_prices map down to the models we track:
63
+ anthropic-provider Claude models, and the gpt-5* Codex family. Skips the
64
+ `sample_spec` doc entry and any entry lacking a dict body."""
65
+ scoped: dict[str, dict] = {}
66
+ for name, body in litellm.items():
67
+ if not isinstance(body, dict):
68
+ continue
69
+ provider = body.get("litellm_provider")
70
+ if provider == "anthropic" and name.startswith("claude-"):
71
+ scoped[name] = body
72
+ elif provider == "openai" and _is_codex_scope(name):
73
+ scoped[name] = body
74
+ return scoped
75
+
76
+
77
+ _DRIFT_EPS = 1e-12 # cost-per-token values are tiny; compare with a small abs epsilon
78
+
79
+
80
+ def _allow_index(allowlist):
81
+ field_suppress = set() # (model, field)
82
+ model_suppress = set() # model (no field -> suppresses missing_from_us)
83
+ for e in allowlist or []:
84
+ if e.get("field"):
85
+ field_suppress.add((e["model"], e["field"]))
86
+ else:
87
+ model_suppress.add(e["model"])
88
+ return field_suppress, model_suppress
89
+
90
+
91
+ def diff_pricing(claude_tbl, codex_tbl, litellm_scoped, allowlist=None) -> DriftResult:
92
+ """Direction-aware drift between our embedded tables and the scoped LiteLLM
93
+ snapshot.
94
+
95
+ value_drift — shared model, a cost field differs beyond _DRIFT_EPS
96
+ (actionable, unless allowlisted by model+field).
97
+ missing_from_us — scoped LiteLLM model absent from our tables
98
+ (actionable, unless allowlisted by model with no field).
99
+ ahead_of_litellm — model we price that scoped LiteLLM lacks (informational;
100
+ NEVER actionable — we may legitimately lead the source).
101
+
102
+ Value-drift is one-directional: it only compares fields LiteLLM carries, so
103
+ a cost field present in our table but absent upstream is not value-compared
104
+ (ahead_of_litellm reports at model granularity only). That matches the
105
+ feature's intent — catch vendor price moves on fields we track.
106
+ """
107
+ field_suppress, model_suppress = _allow_index(allowlist)
108
+ ours = {**claude_tbl, **codex_tbl}
109
+ value_drift: list[DriftRow] = []
110
+ missing: list[str] = []
111
+ ahead: list[str] = []
112
+
113
+ for model, body in litellm_scoped.items():
114
+ if model in ours:
115
+ for field, theirs in body.items():
116
+ # Broad cost-field filter; the `mine is None` guard below is what
117
+ # keeps it safe (skips any upstream cost field we don't carry), so
118
+ # don't remove that guard thinking this filter is precise.
119
+ if not field.endswith("_cost_per_token") and "cost" not in field:
120
+ continue
121
+ # bool is an int subclass — exclude it so a non-numeric "cost" flag
122
+ # can never be read as a 0/1 price.
123
+ if isinstance(theirs, bool) or not isinstance(theirs, (int, float)):
124
+ continue
125
+ if (model, field) in field_suppress:
126
+ continue
127
+ mine = ours[model].get(field)
128
+ if mine is None:
129
+ continue # we don't carry this field; not a value-drift signal
130
+ if abs(float(mine) - float(theirs)) > _DRIFT_EPS:
131
+ value_drift.append(DriftRow(model, field, float(mine), float(theirs)))
132
+ else:
133
+ if model not in model_suppress:
134
+ missing.append(model)
135
+
136
+ for model in ours:
137
+ if model not in litellm_scoped:
138
+ ahead.append(model)
139
+
140
+ return DriftResult(value_drift=value_drift, missing_from_us=missing, ahead_of_litellm=ahead)
141
+
142
+
143
+ def stale_allowlist_entries(allowlist, claude_tbl, codex_tbl, litellm_scoped) -> list:
144
+ """Return allowlist entries that NO LONGER correspond to a real divergence.
145
+
146
+ An entry is stale if, with it removed, diff_pricing reports nothing it would
147
+ have suppressed (i.e. the value now matches / the model is now present)."""
148
+ ours = {**claude_tbl, **codex_tbl}
149
+ stale: list = []
150
+ for e in allowlist or []:
151
+ model = e["model"]
152
+ if e.get("field"):
153
+ theirs = (litellm_scoped.get(model) or {}).get(e["field"])
154
+ mine = (ours.get(model) or {}).get(e["field"])
155
+ real = (theirs is not None and mine is not None
156
+ and abs(float(mine) - float(theirs)) > _DRIFT_EPS)
157
+ else:
158
+ # model-suppress entry: real only if litellm has it AND we don't
159
+ real = (model in litellm_scoped and model not in ours)
160
+ if not real:
161
+ stale.append(e)
162
+ return stale
163
+
164
+
165
+ _CLAUDE_REQUIRED = ("input_cost_per_token", "output_cost_per_token",
166
+ "cache_creation_input_token_cost", "cache_read_input_token_cost")
167
+ _CODEX_REQUIRED = ("input_cost_per_token", "cache_read_input_token_cost",
168
+ "output_cost_per_token")
169
+
170
+
171
+ def check_table_shapes(claude_tbl, codex_tbl, zero_sentinels) -> list:
172
+ """Provider-specific well-formedness. Claude entries need the 4 required
173
+ fields; Codex entries need the 3 base fields (NO cache_creation) and may
174
+ carry optional *_above_272k_tokens tiered fields. All present cost fields
175
+ must be >= 0. An all-zero Codex entry is allowed ONLY if its model is in
176
+ `zero_sentinels` (e.g. gpt-5.3-codex-spark mirroring upstream $0)."""
177
+ problems: list = []
178
+
179
+ def _check(model, body, required, allow_zero):
180
+ for f in required:
181
+ if f not in body:
182
+ problems.append(f"{model}: missing required field {f}")
183
+ cost_fields = {k: v for k, v in body.items() if "cost" in k}
184
+ for k, v in cost_fields.items():
185
+ if not isinstance(v, (int, float)) or v < 0:
186
+ problems.append(f"{model}: field {k} not a non-negative number ({v!r})")
187
+ if cost_fields and all(float(v) == 0.0 for v in cost_fields.values()) and not allow_zero:
188
+ problems.append(f"{model}: all cost fields zero but not a documented sentinel")
189
+
190
+ for model, body in claude_tbl.items():
191
+ _check(model, body, _CLAUDE_REQUIRED, allow_zero=False)
192
+ for model, body in codex_tbl.items():
193
+ _check(model, body, _CODEX_REQUIRED, allow_zero=model in zero_sentinels)
194
+ return problems
195
+
196
+
197
+ def pricing_issue_action(drift_present: bool, existing_open: bool) -> str:
198
+ """Decide the cron's GitHub-issue action. Pure; the YAML executes it."""
199
+ if drift_present:
200
+ return "update" if existing_open else "create"
201
+ return "close" if existing_open else "noop"