cctally 1.22.1 → 1.22.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,182 @@
1
+ """Pure-fn kernel: the ccusage-parity "Pricing Mismatch Debug Report".
2
+
3
+ No I/O at import time; no import of `cctally`. The two cost primitives it
4
+ consumes (`_resolve_model_pricing`, `_calculate_entry_cost`) are honest-
5
+ imported from `_lib_pricing` (same `sys.modules` instance bin/cctally
6
+ re-exports). `UsageEntry` is duck-typed (attribute reads only). bin/cctally
7
+ re-exports every symbol below so internal call sites resolve unchanged.
8
+
9
+ Extracted from bin/cctally (#125 Batch E, C9). Spec:
10
+ docs/superpowers/specs/2026-06-01-extract-pricing-setup-glue-design.md
11
+ Original feature: issue #89 (ccusage detectMismatches/printMismatchReport).
12
+ """
13
+ from __future__ import annotations
14
+
15
+ import json
16
+ import os
17
+ from dataclasses import dataclass, field
18
+
19
+ from _lib_pricing import _resolve_model_pricing, _calculate_entry_cost
20
+
21
+
22
+ @dataclass
23
+ class _MismatchModelStat:
24
+ total: int = 0
25
+ matches: int = 0
26
+ mismatches: int = 0
27
+ avg_percent_diff: float = 0.0
28
+
29
+
30
+ @dataclass
31
+ class _MismatchSample:
32
+ file: str
33
+ timestamp: str
34
+ model: str
35
+ original_cost: float
36
+ calculated_cost: float
37
+ difference: float
38
+ percent_diff: float
39
+ usage: dict
40
+
41
+
42
+ @dataclass
43
+ class _MismatchStats:
44
+ command_label: str | None = None
45
+ total_entries: int = 0
46
+ entries_with_both: int = 0
47
+ matches: int = 0
48
+ mismatches: int = 0
49
+ model_stats: dict = field(default_factory=dict)
50
+ discrepancies: list = field(default_factory=list)
51
+
52
+
53
+ def _compute_pricing_mismatch_stats(entries):
54
+ """Walk ``entries: Iterable[UsageEntry]`` and compute the mismatch stats
55
+ that ``_render_pricing_mismatch_report`` consumes.
56
+
57
+ Mirrors ccusage upstream's ``detectMismatches``
58
+ (``~/.npm/_npx/.../node_modules/ccusage/dist/debug-DvI5DUKR.js:6-95``):
59
+
60
+ - An entry counts toward ``entries_with_both`` iff its ``cost_usd``
61
+ is not None AND the model has pricing in ``CLAUDE_MODEL_PRICING``.
62
+ - Threshold: ``percent_diff < 0.1`` is a match; anything else is a
63
+ mismatch and gets appended to ``discrepancies`` in iteration order.
64
+ - ``percent_diff`` is ``0.0`` when recorded cost is zero (parity with
65
+ upstream's divide-by-zero guard).
66
+ - Per-model ``avg_percent_diff`` updated by streaming mean recurrence
67
+ to match upstream's per-row accumulation.
68
+ """
69
+ stats = _MismatchStats()
70
+ for entry in entries:
71
+ # P1.1 (issue #89 review-loop): mirror ccusage upstream's
72
+ # ``detectMismatches`` precondition filter at debug-DvI5DUKR.js:42
73
+ # — synthetic entries are excluded from total_entries AND skip the
74
+ # _resolve_model_pricing call (which would otherwise emit a
75
+ # ``[cost] unknown model: <synthetic>`` warning and mutate the
76
+ # module-level _unknown_model_warnings set, suppressing future
77
+ # legitimate emissions).
78
+ if entry.model == "<synthetic>":
79
+ continue
80
+ stats.total_entries += 1
81
+ if entry.cost_usd is None:
82
+ continue
83
+ if _resolve_model_pricing(entry.model) is None:
84
+ continue
85
+ stats.entries_with_both += 1
86
+ calculated = _calculate_entry_cost(
87
+ entry.model, entry.usage, mode="calculate",
88
+ )
89
+ original = float(entry.cost_usd)
90
+ difference = abs(original - calculated)
91
+ percent_diff = (difference / original * 100) if original > 0 else 0.0
92
+ ms = stats.model_stats.setdefault(entry.model, _MismatchModelStat())
93
+ ms.total += 1
94
+ if percent_diff < 0.1:
95
+ stats.matches += 1
96
+ ms.matches += 1
97
+ else:
98
+ stats.mismatches += 1
99
+ ms.mismatches += 1
100
+ stats.discrepancies.append(_MismatchSample(
101
+ file=os.path.basename(entry.source_path),
102
+ timestamp=entry.timestamp.isoformat(),
103
+ model=entry.model,
104
+ original_cost=original,
105
+ calculated_cost=calculated,
106
+ difference=difference,
107
+ percent_diff=percent_diff,
108
+ usage=dict(entry.usage),
109
+ ))
110
+ # Streaming-mean update for avg_percent_diff (matches upstream).
111
+ ms.avg_percent_diff = (
112
+ ms.avg_percent_diff * (ms.total - 1) + percent_diff
113
+ ) / ms.total
114
+ return stats
115
+
116
+
117
+ def _render_pricing_mismatch_report(stats, sample_limit):
118
+ """Return the report as a list of stderr lines (caller prints \\n-joined).
119
+
120
+ Matches ccusage upstream's ``printMismatchReport``
121
+ (debug-DvI5DUKR.js:97-145) including:
122
+ - Early-return ``"No pricing data found to analyze."`` when
123
+ ``entries_with_both == 0``.
124
+ - Model Statistics + Sample Discrepancies sections omitted when
125
+ ``mismatches == 0``.
126
+ - Models with ``mismatches == 0`` omitted from Model Statistics.
127
+ - Sample header prints the requested ``sample_limit`` (not min with
128
+ discrepancies length).
129
+ Adds ONE intentional non-upstream line: ``Command: cctally <label>``
130
+ under the header so the report self-identifies (issue #89 acceptance
131
+ re: "command in each sample's context").
132
+ """
133
+ out = []
134
+ if stats.entries_with_both == 0:
135
+ out.append("No pricing data found to analyze.")
136
+ return out
137
+
138
+ match_rate = stats.matches / stats.entries_with_both * 100
139
+ out.append("")
140
+ out.append("=== Pricing Mismatch Debug Report ===")
141
+ if stats.command_label:
142
+ out.append(f"Command: cctally {stats.command_label}")
143
+ out.append(f"Total entries processed: {stats.total_entries:,}")
144
+ out.append(
145
+ f"Entries with both costUSD and model: {stats.entries_with_both:,}"
146
+ )
147
+ out.append(f"Matches (within 0.1%): {stats.matches:,}")
148
+ out.append(f"Mismatches: {stats.mismatches:,}")
149
+ out.append(f"Match rate: {match_rate:.2f}%")
150
+
151
+ if stats.mismatches > 0 and stats.model_stats:
152
+ out.append("")
153
+ out.append("=== Model Statistics ===")
154
+ sorted_models = sorted(
155
+ stats.model_stats.items(),
156
+ key=lambda kv: -kv[1].mismatches,
157
+ )
158
+ for model, ms in sorted_models:
159
+ if ms.mismatches == 0:
160
+ continue
161
+ rate = ms.matches / ms.total * 100
162
+ out.append(f"{model}:")
163
+ out.append(f" Total entries: {ms.total:,}")
164
+ out.append(f" Matches: {ms.matches:,} ({rate:.1f}%)")
165
+ out.append(f" Mismatches: {ms.mismatches:,}")
166
+ out.append(f" Avg % difference: {ms.avg_percent_diff:.1f}%")
167
+
168
+ if stats.discrepancies and sample_limit > 0:
169
+ out.append("")
170
+ out.append(f"=== Sample Discrepancies (first {sample_limit}) ===")
171
+ for d in stats.discrepancies[:sample_limit]:
172
+ out.append(f"File: {d.file}")
173
+ out.append(f"Timestamp: {d.timestamp}")
174
+ out.append(f"Model: {d.model}")
175
+ out.append(f"Original cost: ${d.original_cost:.6f}")
176
+ out.append(f"Calculated cost: ${d.calculated_cost:.6f}")
177
+ out.append(
178
+ f"Difference: ${d.difference:.6f} ({d.percent_diff:.2f}%)"
179
+ )
180
+ out.append(f"Tokens: {json.dumps(d.usage)}")
181
+ out.append("---")
182
+ return out
@@ -26,8 +26,8 @@ implicit), and `_compute_subscription_weeks` calls
26
26
  Moving both keeps the subscription-week domain self-contained and avoids
27
27
  inventing a call-time back-reference to `_apply_reset_events_to_subweeks`.
28
28
  `_apply_overlap_clamp_to_weekrefs` (operates on `WeekRef`, NOT `SubWeek`)
29
- stays in `bin/cctally` and reaches `_clamp_end_ats_to_next_start` through
30
- the re-export block.
29
+ lives in `bin/_cctally_weekrefs.py` and reaches `_clamp_end_ats_to_next_start`
30
+ through the cctally namespace (the re-export block + its call-time `c.` accessor).
31
31
 
32
32
  `bin/cctally` re-exports every public symbol below so the ~50 internal
33
33
  call sites + SourceFileLoader-based tests (`tests/test_subweek_display_dates`,