cctally 1.6.3 → 1.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1618 @@
1
+ """Diff kernel for the ``cctally diff`` subcommand.
2
+
3
+ Pure-fn layer (no I/O at import time): holds every window parser,
4
+ aggregator, builder, cell formatter, table/JSON renderer, and anchor
5
+ resolver used by ``cmd_diff`` (which itself stays in ``bin/cctally``
6
+ as the CLI ingress). One contiguous source region collapses into this
7
+ sibling (was bin/cctally L8379-L9825, ~1,447 LOC).
8
+
9
+ Symbol inventory:
10
+
11
+ * Dataclasses / exceptions: ``ParsedWindow``, ``WindowMismatchError``,
12
+ ``NoAnchorError``, ``MetricBundle``, ``DeltaBundle``, ``ColumnSpec``,
13
+ ``DiffRow``, ``DiffSection``, ``NoiseThreshold``, ``DiffResult``.
14
+ * Window parsing: ``_parse_diff_window`` plus the five
15
+ ``_DIFF_*_RE`` regex constants.
16
+ * Numeric helper: ``_humanize_tokens``.
17
+ * Aggregators: ``_diff_iter_claude_entries``,
18
+ ``_diff_aggregate_overall``, ``_diff_aggregate_models``,
19
+ ``_diff_aggregate_projects``, ``_diff_aggregate_cache``,
20
+ ``_diff_resolve_used_pct``.
21
+ * Default column tables: ``_DIFF_DEFAULT_COLUMNS_{OVERALL,MODELS,PROJECTS,CACHE}``.
22
+ * Section builders: ``_diff_sort_rows``, ``_apply_noise_threshold``,
23
+ ``_diff_build_section``, ``_normalize_metric_bundle_per_day``,
24
+ ``_sum_metric_bundles``, ``_build_diff_result``,
25
+ ``_check_diff_invariants``.
26
+ * Cell formatters: ``_DIFF_EM_DASH``, ``_diff_or_emdash``,
27
+ ``_diff_fmt_cost_cell``, ``_diff_fmt_delta_cost_cell``,
28
+ ``_diff_fmt_pct_cell``, ``_diff_fmt_pp_cell``,
29
+ ``_diff_fmt_tokens_cell``, ``_diff_fmt_delta_tokens_cell``,
30
+ ``_diff_color_for_delta``.
31
+ * Renderers: ``_diff_render_banner``, ``_diff_render_window_header``,
32
+ ``_diff_box_chars``, ``_diff_section_heading``,
33
+ ``_diff_render_section_table``, ``_diff_render_full_output``.
34
+ * JSON shapers: ``_diff_metric_to_json``, ``_diff_delta_to_json``,
35
+ ``_diff_window_to_json``, ``_diff_to_json_payload``,
36
+ ``_diff_render_json``.
37
+ * Anchor resolution: ``_diff_resolve_anchor``.
38
+
39
+ Sibling dependencies (loaded at module-load time via ``_load_lib``):
40
+
41
+ * ``_lib_pricing`` — ``_calculate_entry_cost`` (cost computation used
42
+ by every aggregator).
43
+ * ``_lib_display_tz`` — ``_resolve_tz`` (IANA tz resolution for
44
+ month/range parsers) and ``format_display_dt`` (date labels in the
45
+ diff window header).
46
+
47
+ ``bin/cctally`` back-references via module-level callable shims
48
+ (spec §5.5; same precedent as ``bin/_lib_render.py``'s 7 shims and
49
+ ``bin/_cctally_record.py``'s 34 shims):
50
+
51
+ * ``get_claude_session_entries`` — JSONL/cache reader shared with
52
+ every JSONL-walking subcommand.
53
+ * ``_resolve_project_key`` — git-root resolver used by the projects
54
+ aggregator.
55
+ * ``open_db`` — sqlite3 connection helper for the stats DB.
56
+ * ``_iso_z`` — ISO-8601 ``Z`` suffix formatter.
57
+ * ``_supports_unicode_stdout`` / ``_style_ansi`` — terminal-capability
58
+ primitives used by the banner / window-header / section-table /
59
+ box-char renderers.
60
+ * ``_command_as_of`` — ``CCTALLY_AS_OF`` env hook for deterministic
61
+ ``generated_at`` in JSON output (fixture testing).
62
+ * ``_canonicalize_optional_iso`` — ISO-canonicalizer used to look up
63
+ ``week_reset_events`` rows in the anchor resolver.
64
+ * ``parse_iso_datetime`` — strict ISO-8601 parser used to decode
65
+ ``effective_reset_at_utc`` from the same.
66
+
67
+ Each shim resolves ``sys.modules['cctally'].X`` at CALL TIME (not
68
+ bind time), so monkeypatches on cctally's namespace propagate into
69
+ the moved code unchanged.
70
+
71
+ ``bin/cctally`` eager-re-exports every public symbol below so the
72
+ ~7 internal ``cmd_diff`` call sites + the extensive SourceFileLoader
73
+ test surface (``tests/test_diff_*.py``: ``ns["ParsedWindow"]``,
74
+ ``ns["MetricBundle"]``, ``ns["_build_diff_result"]``, etc.) resolve
75
+ unchanged. Eager pattern is mandatory per spec §4.8 carve-out — PEP
76
+ 562 ``__getattr__`` does NOT fire for ``mod.__dict__["X"]`` dict
77
+ access, which is how every ``test_diff_*.py`` reaches in.
78
+
79
+ Spec: docs/superpowers/specs/2026-05-13-bin-cctally-split-design.md
80
+ """
81
+ from __future__ import annotations
82
+
83
+ import dataclasses
84
+ import datetime as dt
85
+ import json
86
+ import pathlib
87
+ import re
88
+ import sqlite3
89
+ import sys
90
+ from dataclasses import dataclass, field
91
+
92
+
93
+ def _cctally():
94
+ """Resolve the current ``cctally`` module at call-time (spec §5.5)."""
95
+ return sys.modules["cctally"]
96
+
97
+
98
+ def _load_lib(name: str):
99
+ cached = sys.modules.get(name)
100
+ if cached is not None:
101
+ return cached
102
+ import importlib.util as _ilu
103
+ p = pathlib.Path(__file__).resolve().parent / f"{name}.py"
104
+ spec = _ilu.spec_from_file_location(name, p)
105
+ mod = _ilu.module_from_spec(spec)
106
+ sys.modules[name] = mod
107
+ spec.loader.exec_module(mod)
108
+ return mod
109
+
110
+
111
+ _lib_pricing = _load_lib("_lib_pricing")
112
+ _calculate_entry_cost = _lib_pricing._calculate_entry_cost
113
+
114
+ _lib_display_tz = _load_lib("_lib_display_tz")
115
+ _resolve_tz = _lib_display_tz._resolve_tz
116
+ format_display_dt = _lib_display_tz.format_display_dt
117
+
118
+
119
+ # Module-level back-ref shims. Each shim resolves
120
+ # ``sys.modules['cctally'].X`` at CALL TIME (not bind time), so
121
+ # monkeypatches on cctally's namespace propagate into the moved code
122
+ # unchanged. Mirrors the precedent established in
123
+ # ``bin/_lib_render.py`` / ``bin/_cctally_record.py``.
124
+ def get_claude_session_entries(*args, **kwargs):
125
+ return sys.modules["cctally"].get_claude_session_entries(*args, **kwargs)
126
+
127
+
128
+ def _resolve_project_key(*args, **kwargs):
129
+ return sys.modules["cctally"]._resolve_project_key(*args, **kwargs)
130
+
131
+
132
+ def open_db(*args, **kwargs):
133
+ return sys.modules["cctally"].open_db(*args, **kwargs)
134
+
135
+
136
+ def _iso_z(*args, **kwargs):
137
+ return sys.modules["cctally"]._iso_z(*args, **kwargs)
138
+
139
+
140
+ def _supports_unicode_stdout(*args, **kwargs):
141
+ return sys.modules["cctally"]._supports_unicode_stdout(*args, **kwargs)
142
+
143
+
144
+ def _style_ansi(*args, **kwargs):
145
+ return sys.modules["cctally"]._style_ansi(*args, **kwargs)
146
+
147
+
148
+ def _command_as_of(*args, **kwargs):
149
+ return sys.modules["cctally"]._command_as_of(*args, **kwargs)
150
+
151
+
152
+ def _canonicalize_optional_iso(*args, **kwargs):
153
+ return sys.modules["cctally"]._canonicalize_optional_iso(*args, **kwargs)
154
+
155
+
156
+ def parse_iso_datetime(*args, **kwargs):
157
+ return sys.modules["cctally"].parse_iso_datetime(*args, **kwargs)
158
+
159
+
160
+ # Private eprint shim per spec §5.3 (pure layer does not back-import
161
+ # cctally for ubiquitous helpers; eprint isn't actually called by the
162
+ # moved code, but kept here as the canonical pure-layer pattern so
163
+ # follow-up edits that need stderr have it ready).
164
+ def _eprint(*args):
165
+ print(*args, file=sys.stderr)
166
+
167
+
168
+ # Optional dependency: zoneinfo.ZoneInfo is referenced only as a
169
+ # string annotation in moved code; no runtime import needed.
170
+
171
+
172
+ @dataclass(frozen=True)
173
+ class ParsedWindow:
174
+ """A single resolved diff window. See spec §2."""
175
+ label: str
176
+ start_utc: dt.datetime
177
+ end_utc: dt.datetime # exclusive (half-open)
178
+ length_days: float
179
+ kind: str # "week" | "month" | "day-range" | "explicit-range"
180
+ week_aligned: bool
181
+ full_weeks_count: int
182
+
183
+
184
+ class WindowMismatchError(ValueError):
185
+ """Two windows have different lengths and --allow-mismatch was not set."""
186
+
187
+
188
+ class NoAnchorError(RuntimeError):
189
+ """Cannot resolve a subscription-week token: no anchor available."""
190
+
191
+
192
+ @dataclass(frozen=True)
193
+ class MetricBundle:
194
+ """Per-row metric values for one window. See spec §4."""
195
+ cost_usd: float
196
+ tokens_input: "int | None"
197
+ tokens_output: "int | None"
198
+ tokens_cache_read: "int | None"
199
+ tokens_cache_write: "int | None"
200
+ cache_hit_pct: "float | None"
201
+ used_pct: "float | None"
202
+
203
+
204
+ @dataclass(frozen=True)
205
+ class DeltaBundle:
206
+ """Per-row delta values (b - a). See spec §4.
207
+
208
+ Asymmetric-row encoding: when one window is missing (new/dropped row),
209
+ the absolute delta is the full b-side value (or the negation of the
210
+ a-side value), and the percent column is None — there's no defined
211
+ relative change against zero or a missing baseline.
212
+ """
213
+ cost_usd: "float | None"
214
+ cost_usd_pct: "float | None"
215
+ tokens_input: "int | None"
216
+ tokens_input_pct: "float | None"
217
+ tokens_output: "int | None"
218
+ tokens_output_pct: "float | None"
219
+ tokens_cache_read: "int | None"
220
+ tokens_cache_read_pct: "float | None"
221
+ tokens_cache_write: "int | None"
222
+ tokens_cache_write_pct: "float | None"
223
+ cache_hit_pct_pp: "float | None"
224
+ used_pct_pp: "float | None"
225
+
226
+
227
+ def _build_delta_bundle(
228
+ a: "MetricBundle | None", b: "MetricBundle | None"
229
+ ) -> DeltaBundle:
230
+ """Compute b - a for every metric, applying the asymmetric-row rules
231
+ from spec §4 (full value when one side is None; None for the percent
232
+ column when a is None or a-side metric is zero)."""
233
+
234
+ def _scalar(av, bv):
235
+ if av is None and bv is None:
236
+ return None, None
237
+ if av is None:
238
+ return bv, None
239
+ if bv is None:
240
+ return -av, None
241
+ delta = bv - av
242
+ if av == 0:
243
+ return delta, None
244
+ return delta, (delta / av) * 100.0
245
+
246
+ def _pp(av, bv):
247
+ if av is None or bv is None:
248
+ return None
249
+ return bv - av
250
+
251
+ a_cost = a.cost_usd if a else None
252
+ b_cost = b.cost_usd if b else None
253
+ cost, cost_pct = _scalar(a_cost, b_cost)
254
+ ti, ti_pct = _scalar(a.tokens_input if a else None,
255
+ b.tokens_input if b else None)
256
+ to, to_pct = _scalar(a.tokens_output if a else None,
257
+ b.tokens_output if b else None)
258
+ tcr, tcr_pct = _scalar(a.tokens_cache_read if a else None,
259
+ b.tokens_cache_read if b else None)
260
+ tcw, tcw_pct = _scalar(a.tokens_cache_write if a else None,
261
+ b.tokens_cache_write if b else None)
262
+ return DeltaBundle(
263
+ cost_usd=cost, cost_usd_pct=cost_pct,
264
+ tokens_input=ti, tokens_input_pct=ti_pct,
265
+ tokens_output=to, tokens_output_pct=to_pct,
266
+ tokens_cache_read=tcr, tokens_cache_read_pct=tcr_pct,
267
+ tokens_cache_write=tcw, tokens_cache_write_pct=tcw_pct,
268
+ cache_hit_pct_pp=_pp(a.cache_hit_pct if a else None,
269
+ b.cache_hit_pct if b else None),
270
+ used_pct_pp=_pp(a.used_pct if a else None,
271
+ b.used_pct if b else None),
272
+ )
273
+
274
+
275
+ @dataclass(frozen=True)
276
+ class ColumnSpec:
277
+ """A single rendered column in a diff section. See spec §4."""
278
+ field: str
279
+ header: str
280
+ format: str # "usd" | "pct" | "tokens"
281
+ show_in_overall: bool
282
+
283
+
284
+ @dataclass
285
+ class DiffRow:
286
+ """One rendered row inside a DiffSection. See spec §4.
287
+
288
+ `status` ∈ {"changed", "new", "dropped"}. `sort_key` is the
289
+ magnitude used by the default delta-sort (typically |Δ$|).
290
+ """
291
+ key: str
292
+ label: str
293
+ status: str
294
+ a: "MetricBundle | None"
295
+ b: "MetricBundle | None"
296
+ delta: DeltaBundle
297
+ sort_key: float
298
+
299
+
300
+ @dataclass
301
+ class DiffSection:
302
+ """One named section (e.g. overall / models / projects / cache).
303
+
304
+ `hidden_count` is how many changed rows were filtered out by the
305
+ noise threshold and reported as "(N hidden, N% of total)" in the
306
+ table renderer.
307
+ """
308
+ name: str
309
+ scope: str
310
+ rows: list
311
+ hidden_count: int
312
+ columns: list
313
+
314
+
315
+ @dataclass(frozen=True)
316
+ class NoiseThreshold:
317
+ """Filter parameters for hiding tiny changed rows. See spec §4."""
318
+ min_delta_usd: float = 0.10
319
+ min_delta_pct: float = 1.0
320
+ show_all: bool = False
321
+ user_override: bool = False
322
+
323
+
324
+ @dataclass
325
+ class DiffResult:
326
+ """Top-level container produced by `_build_diff_result`. See spec §4."""
327
+ window_a: ParsedWindow
328
+ window_b: ParsedWindow
329
+ mismatched_length: bool
330
+ normalization: str
331
+ used_pct_mode_a: str
332
+ used_pct_mode_b: str
333
+ sections: list
334
+ threshold: NoiseThreshold
335
+ auto_normalized: bool = False
336
+ raw_totals: "dict[str, tuple[MetricBundle | None, MetricBundle | None]]" = field(default_factory=dict)
337
+
338
+
339
+ _DIFF_NW_AGO_RE = re.compile(r"^(\d+)w-ago$")
340
+ _DIFF_NM_AGO_RE = re.compile(r"^(\d+)m-ago$")
341
+ _DIFF_LAST_ND_RE = re.compile(r"^last-(\d+)d$")
342
+ _DIFF_PREV_ND_RE = re.compile(r"^prev-(\d+)d$")
343
+ _DIFF_RANGE_RE = re.compile(r"^(\d{4}-\d{2}-\d{2})\.\.(\d{4}-\d{2}-\d{2})$")
344
+
345
+
346
+ def _parse_diff_window(
347
+ token: str,
348
+ *,
349
+ now_utc: dt.datetime,
350
+ anchor_resets_at: dt.datetime | None,
351
+ anchor_week_start: dt.datetime | None,
352
+ tz_name: str,
353
+ ) -> ParsedWindow:
354
+ """Resolve a window token to a ParsedWindow. See spec §2.
355
+
356
+ `anchor_resets_at` and `anchor_week_start` are the subscription-week
357
+ boundary helpers (most-recent reset and its corresponding week-start).
358
+ Both may be None when the user has never run `record-usage`; in that
359
+ case week tokens raise NoAnchorError.
360
+ """
361
+ if token == "this-week" or token == "last-week" or _DIFF_NW_AGO_RE.match(token):
362
+ if anchor_week_start is None or anchor_resets_at is None:
363
+ raise NoAnchorError(
364
+ f"cannot resolve week token {token!r}: no subscription-week "
365
+ f"anchor available (run record-usage first)"
366
+ )
367
+ if token == "this-week":
368
+ start = anchor_week_start
369
+ end = min(now_utc, anchor_resets_at)
370
+ n = 0
371
+ elif token == "last-week":
372
+ start = anchor_week_start - dt.timedelta(days=7)
373
+ end = anchor_week_start
374
+ n = 1
375
+ else:
376
+ n = int(_DIFF_NW_AGO_RE.match(token).group(1))
377
+ start = anchor_week_start - dt.timedelta(days=7 * n)
378
+ end = start + dt.timedelta(days=7)
379
+ if n == 0:
380
+ # this-week: clamped to now if mid-week, else to the next reset.
381
+ week_aligned = end == anchor_resets_at
382
+ full_weeks_count = 1 if week_aligned else 0
383
+ else:
384
+ # last-week / Nw-ago: both endpoints are subscription-week boundaries.
385
+ week_aligned = True
386
+ full_weeks_count = 1
387
+ length = (end - start).total_seconds() / 86400.0
388
+ return ParsedWindow(
389
+ label=token, start_utc=start, end_utc=end,
390
+ length_days=length, kind="week",
391
+ week_aligned=week_aligned,
392
+ full_weeks_count=full_weeks_count,
393
+ )
394
+
395
+ if token == "this-month" or token == "last-month" or _DIFF_NM_AGO_RE.match(token):
396
+ tz = _resolve_tz(tz_name, strict_iana=True, fallback=dt.timezone.utc)
397
+ now_local = now_utc.astimezone(tz)
398
+ if token == "this-month":
399
+ n = 0
400
+ elif token == "last-month":
401
+ n = 1
402
+ else:
403
+ n = int(_DIFF_NM_AGO_RE.match(token).group(1))
404
+ y, m = now_local.year, now_local.month
405
+ for _ in range(n):
406
+ m -= 1
407
+ if m == 0:
408
+ m = 12
409
+ y -= 1
410
+ start_local = dt.datetime(y, m, 1, tzinfo=tz)
411
+ end_y, end_m = (y + 1, 1) if m == 12 else (y, m + 1)
412
+ end_local = dt.datetime(end_y, end_m, 1, tzinfo=tz)
413
+ start = start_local.astimezone(dt.timezone.utc)
414
+ end = end_local.astimezone(dt.timezone.utc)
415
+ length = (end - start).total_seconds() / 86400.0
416
+ return ParsedWindow(
417
+ label=token, start_utc=start, end_utc=end,
418
+ length_days=length, kind="month",
419
+ week_aligned=False,
420
+ full_weeks_count=max(1, int(round(length / 7.0))),
421
+ )
422
+
423
+ m = _DIFF_LAST_ND_RE.match(token) or _DIFF_PREV_ND_RE.match(token)
424
+ if m:
425
+ n = int(m.group(1))
426
+ if token.startswith("last-"):
427
+ end = now_utc
428
+ start = now_utc - dt.timedelta(days=n)
429
+ else:
430
+ end = now_utc - dt.timedelta(days=n)
431
+ start = end - dt.timedelta(days=n)
432
+ return ParsedWindow(
433
+ label=token, start_utc=start, end_utc=end,
434
+ length_days=float(n), kind="day-range",
435
+ week_aligned=False, full_weeks_count=0,
436
+ )
437
+
438
+ m = _DIFF_RANGE_RE.match(token)
439
+ if m:
440
+ start_d = dt.date.fromisoformat(m.group(1))
441
+ end_d = dt.date.fromisoformat(m.group(2))
442
+ if start_d > end_d:
443
+ raise ValueError(
444
+ f"invalid range {token!r}: range start must be on or before end"
445
+ )
446
+ tz = _resolve_tz(tz_name, strict_iana=True, fallback=dt.timezone.utc)
447
+ start = dt.datetime.combine(start_d, dt.time(0, 0), tzinfo=tz).astimezone(dt.timezone.utc)
448
+ end = dt.datetime.combine(
449
+ end_d + dt.timedelta(days=1), dt.time(0, 0), tzinfo=tz
450
+ ).astimezone(dt.timezone.utc)
451
+ length = (end - start).total_seconds() / 86400.0
452
+ return ParsedWindow(
453
+ label=token, start_utc=start, end_utc=end,
454
+ length_days=length, kind="explicit-range",
455
+ week_aligned=False, full_weeks_count=0,
456
+ )
457
+
458
+ raise ValueError(f"invalid window token: {token!r}")
459
+
460
+
461
+ def _humanize_tokens(n: "int | None") -> str:
462
+ """Compact int rendering for diff cells: 1234 -> '1.2K', 1_500_000 -> '1.5M'."""
463
+ if n is None:
464
+ return "—"
465
+ a = abs(n)
466
+ sign = "-" if n < 0 else ""
467
+ if a < 1_000:
468
+ return f"{sign}{a}"
469
+ if a < 1_000_000:
470
+ return f"{sign}{a / 1_000:.1f}K"
471
+ if a < 1_000_000_000:
472
+ return f"{sign}{a / 1_000_000:.1f}M"
473
+ return f"{sign}{a / 1_000_000_000:.1f}B"
474
+
475
+
476
+ def _diff_iter_claude_entries(window: ParsedWindow, *, skip_sync: bool):
477
+ """Honor ParsedWindow's half-open semantics by trimming end_utc by 1 µs
478
+ before passing into the inclusive-end shared cache helper.
479
+
480
+ `get_claude_session_entries` is shared with daily/monthly/blocks/
481
+ range-cost/cache-report/sync-week, all of which rely on inclusive
482
+ end-of-day semantics for date-only inputs — so we cannot tighten the
483
+ helper's SQL. `ParsedWindow.end_utc` is documented exclusive, so trim
484
+ by one microsecond locally to bridge the convention gap.
485
+ """
486
+ end_exclusive = window.end_utc - dt.timedelta(microseconds=1)
487
+ return get_claude_session_entries(
488
+ window.start_utc, end_exclusive, skip_sync=skip_sync
489
+ )
490
+
491
+
492
+ def _diff_aggregate_overall(
493
+ window: ParsedWindow,
494
+ *,
495
+ skip_sync: bool = False,
496
+ ) -> MetricBundle:
497
+ """Sum cost, tokens, and cache stats across all entries in `window`.
498
+
499
+ `cache_hit_pct` follows cache-report semantics: cache_read /
500
+ (cache_read + non_cached_input) * 100. used_pct is intentionally
501
+ None — populated separately by `_diff_resolve_used_pct` so we
502
+ only hit the stats DB once per window.
503
+ """
504
+ cost = 0.0
505
+ ti = to = tcr = tcw = 0
506
+ for e in _diff_iter_claude_entries(window, skip_sync=skip_sync):
507
+ if e.model == "<synthetic>":
508
+ continue
509
+ cost += _calculate_entry_cost(
510
+ e.model,
511
+ {
512
+ "input_tokens": e.input_tokens,
513
+ "output_tokens": e.output_tokens,
514
+ "cache_creation_input_tokens": e.cache_creation_tokens,
515
+ "cache_read_input_tokens": e.cache_read_tokens,
516
+ },
517
+ mode="auto",
518
+ cost_usd=e.cost_usd,
519
+ )
520
+ ti += e.input_tokens
521
+ to += e.output_tokens
522
+ tcr += e.cache_read_tokens
523
+ tcw += e.cache_creation_tokens
524
+ denom = tcr + ti
525
+ cache_hit = (tcr / denom * 100.0) if denom > 0 else None
526
+ return MetricBundle(
527
+ cost_usd=cost, tokens_input=ti, tokens_output=to,
528
+ tokens_cache_read=tcr, tokens_cache_write=tcw,
529
+ cache_hit_pct=cache_hit,
530
+ used_pct=None,
531
+ )
532
+
533
+
534
+ def _diff_aggregate_models(
535
+ window: ParsedWindow,
536
+ *,
537
+ skip_sync: bool = False,
538
+ ) -> dict:
539
+ """Group entries by model id, aggregate to per-model MetricBundle."""
540
+ buckets: dict = {}
541
+ for e in _diff_iter_claude_entries(window, skip_sync=skip_sync):
542
+ if e.model == "<synthetic>":
543
+ continue
544
+ b = buckets.setdefault(e.model, {
545
+ "cost": 0.0, "ti": 0, "to": 0, "tcr": 0, "tcw": 0,
546
+ })
547
+ b["cost"] += _calculate_entry_cost(
548
+ e.model,
549
+ {"input_tokens": e.input_tokens, "output_tokens": e.output_tokens,
550
+ "cache_creation_input_tokens": e.cache_creation_tokens,
551
+ "cache_read_input_tokens": e.cache_read_tokens},
552
+ mode="auto", cost_usd=e.cost_usd,
553
+ )
554
+ b["ti"] += e.input_tokens
555
+ b["to"] += e.output_tokens
556
+ b["tcr"] += e.cache_read_tokens
557
+ b["tcw"] += e.cache_creation_tokens
558
+ out: dict = {}
559
+ for model, b in buckets.items():
560
+ denom = b["tcr"] + b["ti"]
561
+ cache_hit = (b["tcr"] / denom * 100.0) if denom > 0 else None
562
+ out[model] = MetricBundle(
563
+ cost_usd=b["cost"], tokens_input=b["ti"], tokens_output=b["to"],
564
+ tokens_cache_read=b["tcr"], tokens_cache_write=b["tcw"],
565
+ cache_hit_pct=cache_hit, used_pct=None,
566
+ )
567
+ return out
568
+
569
+
570
+ def _diff_aggregate_projects(
571
+ window: ParsedWindow,
572
+ *,
573
+ skip_sync: bool = False,
574
+ group_mode: str = "git-root",
575
+ ) -> dict:
576
+ """Group entries by ProjectKey.display_key (git-root resolved)."""
577
+ resolver_cache: dict = {}
578
+ buckets: dict = {}
579
+ for e in _diff_iter_claude_entries(window, skip_sync=skip_sync):
580
+ if e.model == "<synthetic>":
581
+ continue
582
+ key = _resolve_project_key(e.project_path, group_mode, resolver_cache)
583
+ b = buckets.setdefault(key.display_key, {
584
+ "cost": 0.0, "ti": 0, "to": 0, "tcr": 0, "tcw": 0,
585
+ })
586
+ b["cost"] += _calculate_entry_cost(
587
+ e.model,
588
+ {"input_tokens": e.input_tokens, "output_tokens": e.output_tokens,
589
+ "cache_creation_input_tokens": e.cache_creation_tokens,
590
+ "cache_read_input_tokens": e.cache_read_tokens},
591
+ mode="auto", cost_usd=e.cost_usd,
592
+ )
593
+ b["ti"] += e.input_tokens
594
+ b["to"] += e.output_tokens
595
+ b["tcr"] += e.cache_read_tokens
596
+ b["tcw"] += e.cache_creation_tokens
597
+ out: dict = {}
598
+ for proj, b in buckets.items():
599
+ denom = b["tcr"] + b["ti"]
600
+ cache_hit = (b["tcr"] / denom * 100.0) if denom > 0 else None
601
+ out[proj] = MetricBundle(
602
+ cost_usd=b["cost"], tokens_input=b["ti"], tokens_output=b["to"],
603
+ tokens_cache_read=b["tcr"], tokens_cache_write=b["tcw"],
604
+ cache_hit_pct=cache_hit, used_pct=None,
605
+ )
606
+ return out
607
+
608
+
609
+ def _diff_aggregate_cache(
610
+ window: ParsedWindow,
611
+ *,
612
+ skip_sync: bool = False,
613
+ ) -> dict:
614
+ """Cache-active-entries scope: only entries that touched the cache.
615
+
616
+ Returns up to two keys:
617
+ * `cache:overall` — every entry with cache_create_tokens > 0 OR
618
+ cache_read_tokens > 0.
619
+ * `cache:claude` — same set, since this codebase only reads Claude
620
+ entries; provided for spec parity with future Codex extension.
621
+ Returns {} when no entries touched the cache.
622
+ """
623
+ cost = 0.0
624
+ tcr = tcw = ti = 0
625
+ for e in _diff_iter_claude_entries(window, skip_sync=skip_sync):
626
+ if e.model == "<synthetic>":
627
+ continue
628
+ if e.cache_creation_tokens == 0 and e.cache_read_tokens == 0:
629
+ continue
630
+ cost += _calculate_entry_cost(
631
+ e.model,
632
+ {"input_tokens": e.input_tokens, "output_tokens": e.output_tokens,
633
+ "cache_creation_input_tokens": e.cache_creation_tokens,
634
+ "cache_read_input_tokens": e.cache_read_tokens},
635
+ mode="auto", cost_usd=e.cost_usd,
636
+ )
637
+ tcr += e.cache_read_tokens
638
+ tcw += e.cache_creation_tokens
639
+ ti += e.input_tokens
640
+ if cost == 0.0 and tcr == 0 and tcw == 0:
641
+ return {}
642
+ denom = tcr + ti
643
+ cache_hit = (tcr / denom * 100.0) if denom > 0 else None
644
+ overall = MetricBundle(
645
+ cost_usd=cost, tokens_input=ti, tokens_output=None,
646
+ tokens_cache_read=tcr, tokens_cache_write=tcw,
647
+ cache_hit_pct=cache_hit, used_pct=None,
648
+ )
649
+ # TODO(codex-diff): when Codex entries are walked, compute a separate
650
+ # claude-scope MetricBundle (currently shares the overall ref because
651
+ # claude is the only source).
652
+ return {"cache:overall": overall, "cache:claude": overall}
653
+
654
+
655
+ def _diff_resolve_used_pct(window: ParsedWindow) -> tuple:
656
+ """Return (used_pct_value, mode) for a window. See spec §1+§4.
657
+
658
+ mode ∈ {"exact", "avg", "n/a"}.
659
+
660
+ "exact" requires window.kind == "week" AND window.week_aligned AND
661
+ window.full_weeks_count == 1 — i.e., a single complete subscription
662
+ week. Anything partial (this-week mid-week) is "n/a" by design.
663
+
664
+ "avg" fires for windows spanning >= 2 full weeks (e.g., last-30d
665
+ over 4-5 weeks): we average max(weekly_percent) across the weeks
666
+ that have snapshot rows. If any subscription week in [start, end)
667
+ lacks a snapshot, mode is downgraded to `n/a`.
668
+
669
+ The "exact" lookup is constrained to the target week's
670
+ `week_start_date` so a window with no recorded snapshots correctly
671
+ falls through to `n/a` instead of returning a stale cross-week value.
672
+ (The `_apply_midweek_reset_override` gotcha doesn't apply on this
673
+ code path: a mid-week `this-week` window has `week_aligned=False`
674
+ and routes to `n/a`, never reaching the exact branch.)
675
+ """
676
+ if window.kind == "week" and window.week_aligned and window.full_weeks_count == 1:
677
+ try:
678
+ conn = open_db()
679
+ except Exception:
680
+ return None, "n/a"
681
+ try:
682
+ row = conn.execute(
683
+ "SELECT weekly_percent FROM weekly_usage_snapshots "
684
+ "WHERE week_start_date = ? "
685
+ " AND captured_at_utc <= ? "
686
+ "ORDER BY captured_at_utc DESC, id DESC LIMIT 1",
687
+ (window.start_utc.date().isoformat(), _iso_z(window.end_utc)),
688
+ ).fetchone()
689
+ if row is None or row[0] is None:
690
+ return None, "n/a"
691
+ return float(row[0]), "exact"
692
+ finally:
693
+ conn.close()
694
+ if window.full_weeks_count >= 2:
695
+ try:
696
+ conn = open_db()
697
+ except Exception:
698
+ return None, "n/a"
699
+ try:
700
+ rows = conn.execute(
701
+ "SELECT week_start_date, MAX(weekly_percent) "
702
+ "FROM weekly_usage_snapshots "
703
+ "WHERE captured_at_utc >= ? AND captured_at_utc < ? "
704
+ "GROUP BY week_start_date",
705
+ (_iso_z(window.start_utc), _iso_z(window.end_utc)),
706
+ ).fetchall()
707
+ vals = [r[1] for r in rows if r[1] is not None]
708
+ if len(vals) < window.full_weeks_count:
709
+ # Spec §9.3: missing-coverage weeks would skew the avg —
710
+ # downgrade to n/a instead of reporting a misleading number.
711
+ return None, "n/a"
712
+ if not vals:
713
+ return None, "n/a"
714
+ return sum(vals) / len(vals), "avg"
715
+ finally:
716
+ conn.close()
717
+ return None, "n/a"
718
+
719
+
720
+ _DIFF_DEFAULT_COLUMNS_OVERALL = [
721
+ ColumnSpec("cost_usd", "Cost", "usd", True),
722
+ ColumnSpec("used_pct", "Used %", "pct", True),
723
+ ColumnSpec("cache_hit_pct", "Cache %", "pct", True),
724
+ ColumnSpec("tokens_input", "Tokens", "tokens", True),
725
+ ]
726
+ _DIFF_DEFAULT_COLUMNS_MODELS = [
727
+ ColumnSpec("cost_usd", "Cost", "usd", False),
728
+ ColumnSpec("cache_hit_pct", "Cache %", "pct", False),
729
+ ColumnSpec("tokens_input", "Tokens", "tokens", False),
730
+ ]
731
+ _DIFF_DEFAULT_COLUMNS_PROJECTS = list(_DIFF_DEFAULT_COLUMNS_MODELS)
732
+ _DIFF_DEFAULT_COLUMNS_CACHE = [
733
+ ColumnSpec("cost_usd", "Cost", "usd", False),
734
+ ColumnSpec("cache_hit_pct", "Cache %", "pct", False),
735
+ ]
736
+
737
+
738
+ def _diff_sort_rows(rows: list, sort: str) -> list:
739
+ """Stable sort with deterministic tiebreak on label."""
740
+ if sort == "delta":
741
+ keyfn = lambda r: (-(r.sort_key or 0.0), r.label)
742
+ elif sort == "cost-a":
743
+ keyfn = lambda r: (-(r.a.cost_usd if r.a else 0.0), r.label)
744
+ elif sort == "cost-b":
745
+ keyfn = lambda r: (-(r.b.cost_usd if r.b else 0.0), r.label)
746
+ elif sort == "name":
747
+ keyfn = lambda r: (r.label,)
748
+ elif sort == "status":
749
+ order = {"dropped": 0, "changed": 1, "new": 2}
750
+ keyfn = lambda r: (order.get(r.status, 3), -(r.sort_key or 0.0), r.label)
751
+ else:
752
+ keyfn = lambda r: (-(r.sort_key or 0.0), r.label)
753
+ return sorted(rows, key=keyfn)
754
+
755
+
756
+ def _apply_noise_threshold(rows: list, threshold: NoiseThreshold) -> tuple:
757
+ """Hide changed rows where |Δ$| < min_delta_usd AND |Δ%| < min_delta_pct.
758
+
759
+ new/dropped rows are NEVER hidden — a wholly-appearing or wholly-
760
+ disappearing model/project is always interesting regardless of $.
761
+
762
+ Returns (visible_rows, hidden_count). With show_all=True, every row
763
+ is visible and hidden_count is 0.
764
+ """
765
+ if threshold.show_all:
766
+ return list(rows), 0
767
+ visible: list = []
768
+ hidden = 0
769
+ for r in rows:
770
+ if r.status != "changed":
771
+ visible.append(r)
772
+ continue
773
+ d_usd = abs(r.delta.cost_usd or 0.0)
774
+ d_pct = abs(r.delta.cost_usd_pct or 0.0)
775
+ if d_usd < threshold.min_delta_usd and d_pct < threshold.min_delta_pct:
776
+ hidden += 1
777
+ continue
778
+ visible.append(r)
779
+ return visible, hidden
780
+
781
+
782
+ def _diff_build_section(
783
+ name: str,
784
+ scope: str,
785
+ a_map: dict,
786
+ b_map: dict,
787
+ columns: list,
788
+ threshold: NoiseThreshold,
789
+ sort: str,
790
+ *,
791
+ label_for_key=None,
792
+ top: "int | None" = None,
793
+ ) -> DiffSection:
794
+ """Build one section: union the keys of a_map and b_map, classify each
795
+ as changed/new/dropped, sort, then apply the noise filter.
796
+
797
+ `top` (when not None and >= 0) caps the number of `changed` rows kept
798
+ after sort+filter. `new`/`dropped` rows are exempt — a wholly-appearing
799
+ or wholly-disappearing entry is always interesting. Capped rows roll
800
+ into `hidden_count` so the footer reflects them.
801
+ """
802
+ keys = set(a_map.keys()) | set(b_map.keys())
803
+ rows: list = []
804
+ for k in keys:
805
+ a = a_map.get(k)
806
+ b = b_map.get(k)
807
+ if a is not None and b is not None:
808
+ status = "changed"
809
+ elif a is None:
810
+ status = "new"
811
+ else:
812
+ status = "dropped"
813
+ delta = _build_delta_bundle(a, b)
814
+ sort_key = abs(delta.cost_usd or 0.0)
815
+ label = label_for_key(k) if label_for_key else k
816
+ full_key = f"{name}:{k}" if not k.startswith(name + ":") else k
817
+ rows.append(DiffRow(
818
+ key=full_key, label=label, status=status,
819
+ a=a, b=b, delta=delta, sort_key=sort_key,
820
+ ))
821
+ sorted_rows = _diff_sort_rows(rows, sort)
822
+ visible, hidden = _apply_noise_threshold(sorted_rows, threshold)
823
+ if top is not None and top >= 0:
824
+ # --top caps `changed` rows only; new/dropped rows are exempt.
825
+ new_dropped = [r for r in visible if r.status != "changed"]
826
+ changed = [r for r in visible if r.status == "changed"]
827
+ capped = changed[:top]
828
+ capped_count = len(changed) - len(capped)
829
+ # Re-sort the union to keep the visual order stable under the
830
+ # caller's chosen sort key.
831
+ visible = _diff_sort_rows(new_dropped + capped, sort)
832
+ # Roll capped rows into hidden_count so the footer is accurate.
833
+ hidden += capped_count
834
+ return DiffSection(
835
+ name=name, scope=scope, rows=visible,
836
+ hidden_count=hidden, columns=columns,
837
+ )
838
+
839
+
840
+ def _normalize_metric_bundle_per_day(
841
+ b: "MetricBundle", length_days: float,
842
+ ) -> "MetricBundle":
843
+ """Return a NEW MetricBundle with `cost_usd` divided by `length_days`.
844
+
845
+ Per spec §2 (mismatched-length rule, line 121): "divide every
846
+ absolute-cost / Δ$ value by `length_days` before display. Δ% stays
847
+ a ratio (always meaningful). `Used %` is NOT normalized." Token
848
+ counts are also left raw — the spec only mentions cost. Cache % is
849
+ already a ratio. `used_pct` is preserved untouched (the overall
850
+ section splices Used % AFTER normalization runs).
851
+ """
852
+ if length_days <= 0:
853
+ return b
854
+ return dataclasses.replace(b, cost_usd=b.cost_usd / length_days)
855
+
856
+
857
+ def _sum_metric_bundles(bundles) -> "MetricBundle | None":
858
+ """Sum a sequence of MetricBundles into a single MetricBundle.
859
+ Returns None if the sequence is empty."""
860
+ bundles = list(bundles)
861
+ if not bundles:
862
+ return None
863
+ cost = sum(b.cost_usd for b in bundles)
864
+ ti = sum(b.tokens_input or 0 for b in bundles)
865
+ to = sum(b.tokens_output or 0 for b in bundles)
866
+ tcr = sum(b.tokens_cache_read or 0 for b in bundles)
867
+ tcw = sum(b.tokens_cache_write or 0 for b in bundles)
868
+ denom = tcr + ti
869
+ cache_hit = (tcr / denom * 100.0) if denom > 0 else None
870
+ return MetricBundle(
871
+ cost_usd=cost, tokens_input=ti, tokens_output=to,
872
+ tokens_cache_read=tcr, tokens_cache_write=tcw,
873
+ cache_hit_pct=cache_hit, used_pct=None,
874
+ )
875
+
876
+
877
+ def _build_diff_result(
878
+ window_a: ParsedWindow,
879
+ window_b: ParsedWindow,
880
+ *,
881
+ threshold: NoiseThreshold,
882
+ sections_requested: list,
883
+ sort: str,
884
+ allow_mismatch: bool = False,
885
+ skip_sync: bool = False,
886
+ top: "int | None" = None,
887
+ ) -> DiffResult:
888
+ """Top-level diff builder: wire window_a vs window_b through every
889
+ requested section. Raises WindowMismatchError when lengths differ
890
+ unless allow_mismatch=True (then per-day normalization will be
891
+ annotated for downstream renderers)."""
892
+ mismatched = abs(window_a.length_days - window_b.length_days) > 0.01
893
+ auto_normalized = False
894
+ if mismatched:
895
+ same_eligible_kind = (
896
+ window_a.kind == window_b.kind
897
+ and window_a.kind in {"week", "month"}
898
+ )
899
+ if same_eligible_kind:
900
+ # Spec §2 rule 3: auto-normalize same-kind week/month pairs per-day,
901
+ # no flag required. --allow-mismatch is silently a no-op here.
902
+ auto_normalized = True
903
+ elif not allow_mismatch:
904
+ raise WindowMismatchError(
905
+ f"window A is {window_a.length_days:.1f} days, "
906
+ f"window B is {window_b.length_days:.1f} days; "
907
+ f"pass --allow-mismatch to compare anyway with per-day normalization"
908
+ )
909
+ normalization = "per-day" if mismatched else "none"
910
+
911
+ used_a, mode_a = _diff_resolve_used_pct(window_a)
912
+ used_b, mode_b = _diff_resolve_used_pct(window_b)
913
+
914
+ # Per spec §2 (line 121): when --allow-mismatch lets uneven windows
915
+ # through, divide every absolute-cost value by length_days so cells
916
+ # become "$ per day". Δ% is invariant under uniform scaling, so
917
+ # percent-change cells stay correct. Used % is NEVER normalized
918
+ # (it's already a per-week ratio against the subscription ceiling)
919
+ # — that's why the Used % splice for the overall section runs AFTER
920
+ # the per-bundle normalize below.
921
+ def _norm_a(b: "MetricBundle") -> "MetricBundle":
922
+ return _normalize_metric_bundle_per_day(b, window_a.length_days) if mismatched else b
923
+
924
+ def _norm_b(b: "MetricBundle") -> "MetricBundle":
925
+ return _normalize_metric_bundle_per_day(b, window_b.length_days) if mismatched else b
926
+
927
+ sections: list = []
928
+ raw_totals: "dict[str, tuple[MetricBundle | None, MetricBundle | None]]" = {}
929
+
930
+ if "overall" in sections_requested:
931
+ a_overall_raw = _norm_a(_diff_aggregate_overall(window_a, skip_sync=skip_sync))
932
+ b_overall_raw = _norm_b(_diff_aggregate_overall(window_b, skip_sync=skip_sync))
933
+ # Splice in the resolved Used% AFTER normalization — Used % is
934
+ # never per-day-normalized (it's a weekly ceiling ratio).
935
+ a_overall = dataclasses.replace(a_overall_raw, used_pct=used_a)
936
+ b_overall = dataclasses.replace(b_overall_raw, used_pct=used_b)
937
+ sections.append(_diff_build_section(
938
+ "overall", "all",
939
+ {"overall": a_overall}, {"overall": b_overall},
940
+ _DIFF_DEFAULT_COLUMNS_OVERALL,
941
+ threshold=NoiseThreshold(show_all=True),
942
+ sort=sort,
943
+ label_for_key=lambda k: "Overall",
944
+ ))
945
+ raw_totals["overall"] = (a_overall, b_overall)
946
+
947
+ if "models" in sections_requested:
948
+ a_map = {k: _norm_a(v) for k, v in
949
+ _diff_aggregate_models(window_a, skip_sync=skip_sync).items()}
950
+ b_map = {k: _norm_b(v) for k, v in
951
+ _diff_aggregate_models(window_b, skip_sync=skip_sync).items()}
952
+ sections.append(_diff_build_section(
953
+ "models", "all", a_map, b_map,
954
+ _DIFF_DEFAULT_COLUMNS_MODELS, threshold, sort,
955
+ top=top,
956
+ ))
957
+ raw_totals["models"] = (
958
+ _sum_metric_bundles(a_map.values()),
959
+ _sum_metric_bundles(b_map.values()),
960
+ )
961
+
962
+ if "projects" in sections_requested:
963
+ a_map = {k: _norm_a(v) for k, v in
964
+ _diff_aggregate_projects(window_a, skip_sync=skip_sync).items()}
965
+ b_map = {k: _norm_b(v) for k, v in
966
+ _diff_aggregate_projects(window_b, skip_sync=skip_sync).items()}
967
+ sections.append(_diff_build_section(
968
+ "projects", "all", a_map, b_map,
969
+ _DIFF_DEFAULT_COLUMNS_PROJECTS, threshold, sort,
970
+ top=top,
971
+ ))
972
+ raw_totals["projects"] = (
973
+ _sum_metric_bundles(a_map.values()),
974
+ _sum_metric_bundles(b_map.values()),
975
+ )
976
+
977
+ if "cache" in sections_requested:
978
+ a_map = {k: _norm_a(v) for k, v in
979
+ _diff_aggregate_cache(window_a, skip_sync=skip_sync).items()}
980
+ b_map = {k: _norm_b(v) for k, v in
981
+ _diff_aggregate_cache(window_b, skip_sync=skip_sync).items()}
982
+ sections.append(_diff_build_section(
983
+ "cache", "cache-active-entries", a_map, b_map,
984
+ _DIFF_DEFAULT_COLUMNS_CACHE,
985
+ threshold=NoiseThreshold(show_all=True),
986
+ sort=sort,
987
+ ))
988
+ raw_totals["cache"] = (
989
+ a_map.get("cache:overall"),
990
+ b_map.get("cache:overall"),
991
+ )
992
+
993
+ return DiffResult(
994
+ window_a=window_a, window_b=window_b,
995
+ mismatched_length=mismatched, normalization=normalization,
996
+ used_pct_mode_a=mode_a, used_pct_mode_b=mode_b,
997
+ sections=sections, threshold=threshold,
998
+ auto_normalized=auto_normalized,
999
+ raw_totals=raw_totals,
1000
+ )
1001
+
1002
+
1003
+ def _check_diff_invariants(result: DiffResult) -> None:
1004
+ """Assert spec §4 runtime invariants. Raises AssertionError on drift.
1005
+
1006
+ Invariant: for each side (a, b), sum(cost_usd) over models section
1007
+ rows == overall.cost_usd, and likewise for projects. Tolerance is
1008
+ 1e-9 USD (per the reconcile-test pattern — IEEE-754 ULP drift on
1009
+ aggregation order is normal). Sums are skipped when hidden_count > 0
1010
+ because the noise filter changes the visible total.
1011
+ """
1012
+ sections = {s.name: s for s in result.sections}
1013
+ if "overall" not in sections:
1014
+ return
1015
+ overall = sections["overall"].rows[0]
1016
+
1017
+ def _sum(side: str, section_name: str):
1018
+ s = sections.get(section_name)
1019
+ if s is None:
1020
+ return None
1021
+ if s.hidden_count > 0:
1022
+ # Filter changes the visible sum — invariant doesn't hold.
1023
+ return None
1024
+ total = 0.0
1025
+ for r in s.rows:
1026
+ mb = r.a if side == "a" else r.b
1027
+ if mb is not None:
1028
+ total += mb.cost_usd
1029
+ return total
1030
+
1031
+ for side in ("a", "b"):
1032
+ m_sum = _sum(side, "models")
1033
+ p_sum = _sum(side, "projects")
1034
+ o_mb = overall.a if side == "a" else overall.b
1035
+ if o_mb is None:
1036
+ continue
1037
+ o_val = o_mb.cost_usd
1038
+ if m_sum is not None:
1039
+ assert abs(m_sum - o_val) < 1e-9, (
1040
+ f"models {side} sum {m_sum} != overall {o_val} "
1041
+ f"(Δ={m_sum - o_val})"
1042
+ )
1043
+ if p_sum is not None:
1044
+ assert abs(p_sum - o_val) < 1e-9, (
1045
+ f"projects {side} sum {p_sum} != overall {o_val} "
1046
+ f"(Δ={p_sum - o_val})"
1047
+ )
1048
+
1049
+
1050
+ # ─────────────────────────────────────────────────────────────────────
1051
+ # diff renderer — cell formatters
1052
+ # ─────────────────────────────────────────────────────────────────────
1053
+
1054
+
1055
+ # Single source of truth for the "missing value" glyph so a future style change
1056
+ # (e.g. swapping em-dash for "n/a") only touches one constant. Cell formatters
1057
+ # below either use the helper or reference the constant directly when the
1058
+ # inline pattern reads more clearly.
1059
+ _DIFF_EM_DASH = "—"
1060
+
1061
+
1062
+ def _diff_or_emdash(value, fmt) -> str:
1063
+ """Format `value` via `fmt(value)`, or return em-dash if `value` is None."""
1064
+ if value is None:
1065
+ return _DIFF_EM_DASH
1066
+ return fmt(value)
1067
+
1068
+
1069
+ def _diff_fmt_cost_cell(a: "float | None", b: "float | None") -> str:
1070
+ """`$X.XX → $Y.YY` with `—` for missing sides."""
1071
+ money = lambda v: f"${v:.2f}" # noqa: E731
1072
+ return f"{_diff_or_emdash(a, money)} → {_diff_or_emdash(b, money)}"
1073
+
1074
+
1075
+ def _diff_fmt_delta_cost_cell(delta: "float | None", pct: "float | None") -> str:
1076
+ if delta is None:
1077
+ return _DIFF_EM_DASH
1078
+ sign = "+" if delta >= 0 else "-"
1079
+ pct_s = _diff_or_emdash(
1080
+ pct, lambda v: f"{'+' if v >= 0 else ''}{v:.0f}%",
1081
+ )
1082
+ return f"{sign}${abs(delta):.2f} ({pct_s})"
1083
+
1084
+
1085
+ def _diff_fmt_pct_cell(a: "float | None", b: "float | None") -> str:
1086
+ pct = lambda v: f"{v:.0f}%" # noqa: E731
1087
+ return f"{_diff_or_emdash(a, pct)} → {_diff_or_emdash(b, pct)}"
1088
+
1089
+
1090
+ def _diff_fmt_pp_cell(pp: "float | None") -> str:
1091
+ if pp is None:
1092
+ return _DIFF_EM_DASH
1093
+ sign = "+" if pp >= 0 else "-"
1094
+ return f"{sign}{abs(pp):.0f}pp"
1095
+
1096
+
1097
+ def _diff_fmt_tokens_cell(a: "int | None", b: "int | None") -> str:
1098
+ def _fmt(n: "int | None") -> str:
1099
+ if n is None:
1100
+ return _DIFF_EM_DASH
1101
+ if abs(n) < 1000:
1102
+ return str(n)
1103
+ return _humanize_tokens(n)
1104
+ return f"{_fmt(a)} → {_fmt(b)}"
1105
+
1106
+
1107
+ def _diff_fmt_delta_tokens_cell(delta: "int | None") -> str:
1108
+ if delta is None:
1109
+ return _DIFF_EM_DASH
1110
+ if abs(delta) < 1000:
1111
+ return f"{'+' if delta >= 0 else ''}{delta}"
1112
+ return f"{'+' if delta >= 0 else ''}{_humanize_tokens(delta)}"
1113
+
1114
+
1115
+ def _diff_color_for_delta(metric: str, delta: "float | None", *, enabled: bool) -> str:
1116
+ """Return the ANSI code for a delta cell. Red for "spent more"; green
1117
+ for "spent less" or "more cache hits"; empty when disabled or zero."""
1118
+ if not enabled or delta is None or delta == 0:
1119
+ return ""
1120
+ if metric == "cost":
1121
+ return "31" if delta > 0 else "32"
1122
+ if metric == "cache_pp":
1123
+ return "32" if delta > 0 else "31"
1124
+ return ""
1125
+
1126
+
1127
+ # ─────────────────────────────────────────────────────────────────────
1128
+ # diff renderer — banner + window header
1129
+ # ─────────────────────────────────────────────────────────────────────
1130
+
1131
+
1132
+ def _diff_render_banner() -> str:
1133
+ """`╭─╮` title-banner box matching daily/weekly/monthly style."""
1134
+ title = "Claude Code Token Usage Report - Diff"
1135
+ inner_w = len(title) + 4
1136
+ if _supports_unicode_stdout():
1137
+ tl, tr, bl, br, h, v = "╭", "╮", "╰", "╯", "─", "│"
1138
+ else:
1139
+ tl, tr, bl, br, h, v = "+", "+", "+", "+", "-", "|"
1140
+ top = f" {tl}{h * inner_w}{tr}"
1141
+ blank = f" {v}{' ' * inner_w}{v}"
1142
+ mid = f" {v} {title} {v}"
1143
+ bot = f" {bl}{h * inner_w}{br}"
1144
+ return "\n".join([top, blank, mid, blank, bot])
1145
+
1146
+
1147
+ def _diff_render_window_header(
1148
+ result: DiffResult, *, color: bool, tz: "ZoneInfo | None" = None,
1149
+ ) -> str:
1150
+ """Two-line A/B header + optional mismatched-length banner.
1151
+
1152
+ ``tz`` is the resolved display zone for the date labels routed through
1153
+ ``format_display_dt``; ``tz=None`` means host-local.
1154
+ """
1155
+ lines: list = []
1156
+ for label_letter, pw, mode in (
1157
+ ("A", result.window_a, result.used_pct_mode_a),
1158
+ ("B", result.window_b, result.used_pct_mode_b),
1159
+ ):
1160
+ start_date = format_display_dt(pw.start_utc, tz, fmt="%Y-%m-%d", suffix=False)
1161
+ end_date = format_display_dt(pw.end_utc, tz, fmt="%Y-%m-%d", suffix=False)
1162
+ used_label = {
1163
+ "exact": "exact Used %",
1164
+ "avg": "avg Used %/wk",
1165
+ "n/a": "Used % n/a",
1166
+ }[mode]
1167
+ lines.append(
1168
+ f" {label_letter}: {pw.label:<14} "
1169
+ f"{start_date} → {end_date} "
1170
+ f"({pw.length_days:.1f}d, {used_label})"
1171
+ )
1172
+ if result.mismatched_length:
1173
+ if result.auto_normalized:
1174
+ # Auto-fire (same-kind week/month pair): softer info banner
1175
+ # naming both sides + their lengths so the user understands
1176
+ # WHY values are per-day.
1177
+ if result.window_a.length_days < result.window_b.length_days:
1178
+ partial, full = result.window_a, result.window_b
1179
+ else:
1180
+ partial, full = result.window_b, result.window_a
1181
+ msg = (
1182
+ f" ℹ Comparing partial {partial.label} "
1183
+ f"({partial.length_days:.1f}d) against full {full.label} "
1184
+ f"({full.length_days:.1f}d) — values shown per-day."
1185
+ )
1186
+ if not _supports_unicode_stdout():
1187
+ msg = msg.replace("ℹ", "i").replace("—", "--")
1188
+ lines.append(_style_ansi(msg, "36", enabled=color)) # cyan
1189
+ else:
1190
+ # Explicit --allow-mismatch on a non-eligible pair: warning.
1191
+ warn = (
1192
+ " ⚠ Mismatched window lengths; absolute $ values are "
1193
+ "normalized per-day."
1194
+ )
1195
+ if not _supports_unicode_stdout():
1196
+ warn = warn.replace("⚠", "!!")
1197
+ lines.append(_style_ansi(warn, "33", enabled=color)) # yellow
1198
+ return "\n".join(lines)
1199
+
1200
+
1201
+ # ─────────────────────────────────────────────────────────────────────
1202
+ # diff renderer — section table
1203
+ # ─────────────────────────────────────────────────────────────────────
1204
+
1205
+
1206
+ def _diff_box_chars() -> dict:
1207
+ """Box-drawing dict; ASCII fallback when unicode isn't supported."""
1208
+ if _supports_unicode_stdout():
1209
+ return {
1210
+ "tl": "┌", "tm": "┬", "tr": "┐",
1211
+ "ml": "├", "mm": "┼", "mr": "┤",
1212
+ "bl": "└", "bm": "┴", "br": "┘",
1213
+ "h": "─", "v": "│",
1214
+ }
1215
+ return {
1216
+ "tl": "+", "tm": "+", "tr": "+",
1217
+ "ml": "+", "mm": "+", "mr": "+",
1218
+ "bl": "+", "bm": "+", "br": "+",
1219
+ "h": "-", "v": "|",
1220
+ }
1221
+
1222
+
1223
+ def _diff_section_heading(name: str, width: int) -> str:
1224
+ h = "─" if _supports_unicode_stdout() else "-"
1225
+ pretty = name.capitalize()
1226
+ return f"{h * 3} {pretty} {h * max(0, width - len(pretty) - 5)}"
1227
+
1228
+
1229
+ def _diff_render_section_table(
1230
+ section: DiffSection,
1231
+ *,
1232
+ total_a: "MetricBundle | None",
1233
+ total_b: "MetricBundle | None",
1234
+ width: int,
1235
+ color: bool,
1236
+ used_pct_mode_a: str,
1237
+ used_pct_mode_b: str,
1238
+ threshold: "NoiseThreshold | None" = None,
1239
+ ) -> str:
1240
+ """Render one bordered table for a section. The Total row sums all rows
1241
+ (visible + hidden) — the caller passes the unfiltered aggregate map as
1242
+ total_a/total_b so hidden rows still contribute (spec §4 invariant)."""
1243
+ boxes = _diff_box_chars()
1244
+ out: list = [_diff_section_heading(section.name, width), ""]
1245
+
1246
+ header_cells: list = ["Model" if section.name == "models"
1247
+ else "Project" if section.name == "projects"
1248
+ else "Scope" if section.name in ("cache", "overall")
1249
+ else section.name.capitalize(),
1250
+ "Cost (A → B)", "Δ Cost"]
1251
+ has_used = used_pct_mode_a == used_pct_mode_b and used_pct_mode_a != "n/a"
1252
+ if has_used:
1253
+ header_cells += [
1254
+ "Avg Used %/wk (A → B)" if used_pct_mode_a == "avg" else "Used % (A → B)",
1255
+ "Δ pp",
1256
+ ]
1257
+ header_cells += ["Cache % (A → B)", "Δ pp", "Tokens (A → B)", "Δ Tokens"]
1258
+
1259
+ # Each row is a list of (raw_text, ansi_code) tuples. ansi_code="" means
1260
+ # render the cell as plain text. Width math runs on raw_text only, so
1261
+ # styling never affects column widths.
1262
+ body_cells: list = []
1263
+
1264
+ def _row_cells(label: str,
1265
+ a: "MetricBundle | None", b: "MetricBundle | None",
1266
+ delta) -> list:
1267
+ delta_cost_code = _diff_color_for_delta("cost", delta.cost_usd, enabled=color)
1268
+ delta_cache_pp_code = _diff_color_for_delta(
1269
+ "cache_pp", delta.cache_hit_pct_pp, enabled=color,
1270
+ )
1271
+ delta_tokens_code = _diff_color_for_delta(
1272
+ "cost", delta.tokens_input, enabled=color,
1273
+ )
1274
+ cells: list = [
1275
+ (label, ""),
1276
+ (_diff_fmt_cost_cell(
1277
+ a.cost_usd if a else None, b.cost_usd if b else None,
1278
+ ), ""),
1279
+ (_diff_fmt_delta_cost_cell(delta.cost_usd, delta.cost_usd_pct),
1280
+ delta_cost_code),
1281
+ ]
1282
+ if has_used:
1283
+ cells += [
1284
+ (_diff_fmt_pct_cell(a.used_pct if a else None,
1285
+ b.used_pct if b else None), ""),
1286
+ # Used % Δ pp is NOT styled — only the cache Δ pp slot is.
1287
+ (_diff_fmt_pp_cell(delta.used_pct_pp), ""),
1288
+ ]
1289
+ cells += [
1290
+ (_diff_fmt_pct_cell(a.cache_hit_pct if a else None,
1291
+ b.cache_hit_pct if b else None), ""),
1292
+ (_diff_fmt_pp_cell(delta.cache_hit_pct_pp), delta_cache_pp_code),
1293
+ (_diff_fmt_tokens_cell(a.tokens_input if a else None,
1294
+ b.tokens_input if b else None), ""),
1295
+ (_diff_fmt_delta_tokens_cell(delta.tokens_input), delta_tokens_code),
1296
+ ]
1297
+ return cells
1298
+
1299
+ for r in section.rows:
1300
+ label = r.label
1301
+ if r.status in ("new", "dropped"):
1302
+ label = f"{label}\n({r.status})"
1303
+ body_cells.append(_row_cells(label, r.a, r.b, r.delta))
1304
+
1305
+ if total_a is not None or total_b is not None:
1306
+ total_delta = _build_delta_bundle(total_a, total_b)
1307
+ body_cells.append(_row_cells("Total", total_a, total_b, total_delta))
1308
+
1309
+ n_cols = len(header_cells)
1310
+ # Header has no styling — represent it as (text, "") tuples uniformly.
1311
+ header_row = [(h, "") for h in header_cells]
1312
+ col_w = [len(h) for h in header_cells]
1313
+ all_rows = [header_row] + body_cells
1314
+ for row in all_rows:
1315
+ for i, (raw, _code) in enumerate(row):
1316
+ for line in raw.split("\n"):
1317
+ col_w[i] = max(col_w[i], len(line))
1318
+
1319
+ def _line(left, mid, right, fill=None):
1320
+ fill = fill or boxes["h"]
1321
+ parts = [left]
1322
+ for i, w in enumerate(col_w):
1323
+ parts.append(fill * (w + 2))
1324
+ parts.append(right if i == n_cols - 1 else mid)
1325
+ return "".join(parts)
1326
+
1327
+ def _render_row(cells: list) -> str:
1328
+ per_cell_lines = []
1329
+ max_lines = 1
1330
+ for i, (raw, _code) in enumerate(cells):
1331
+ lines = raw.split("\n")
1332
+ per_cell_lines.append(lines)
1333
+ max_lines = max(max_lines, len(lines))
1334
+ out_lines: list = []
1335
+ for li in range(max_lines):
1336
+ parts = [boxes["v"]]
1337
+ for i, lines in enumerate(per_cell_lines):
1338
+ line = lines[li] if li < len(lines) else ""
1339
+ padded = line.ljust(col_w[i])
1340
+ code = cells[i][1]
1341
+ # ljust runs on raw text first, then ANSI wraps the padded
1342
+ # result. Spaces stay outside the ANSI escape so column rules
1343
+ # align identically with or without color.
1344
+ styled = _style_ansi(padded, code, enabled=bool(code))
1345
+ parts.append(f" {styled} ")
1346
+ parts.append(boxes["v"])
1347
+ out_lines.append("".join(parts))
1348
+ return "\n".join(out_lines)
1349
+
1350
+ out.append(_line(boxes["tl"], boxes["tm"], boxes["tr"]))
1351
+ out.append(_render_row(header_row))
1352
+ out.append(_line(boxes["ml"], boxes["mm"], boxes["mr"]))
1353
+ for i, row in enumerate(body_cells):
1354
+ out.append(_render_row(row))
1355
+ if i < len(body_cells) - 1:
1356
+ out.append(_line(boxes["ml"], boxes["mm"], boxes["mr"]))
1357
+ out.append(_line(boxes["bl"], boxes["bm"], boxes["br"]))
1358
+
1359
+ if section.hidden_count > 0:
1360
+ if threshold is not None:
1361
+ usd_lit = f"${threshold.min_delta_usd:.2f}"
1362
+ pct_lit = f"{threshold.min_delta_pct:.1f}"
1363
+ else:
1364
+ usd_lit = "$0.10"
1365
+ pct_lit = "1.0"
1366
+ out.append(
1367
+ f" ({section.hidden_count} rows hidden; "
1368
+ f"|Δ$| < {usd_lit} AND |Δ%| < {pct_lit}. "
1369
+ f"Pass --all to show, or --min-delta to override.)"
1370
+ )
1371
+ return "\n".join(out)
1372
+
1373
+
1374
+ def _diff_render_full_output(
1375
+ result: DiffResult,
1376
+ *,
1377
+ color: bool,
1378
+ width: int,
1379
+ raw_aggregates: dict,
1380
+ tz: "ZoneInfo | None" = None,
1381
+ ) -> str:
1382
+ """Compose banner + window header + each section's table.
1383
+
1384
+ ``tz`` is forwarded to ``_diff_render_window_header`` for the date
1385
+ labels; ``tz=None`` means host-local.
1386
+ """
1387
+ parts: list = [
1388
+ _diff_render_banner(), "",
1389
+ _diff_render_window_header(result, color=color, tz=tz),
1390
+ ]
1391
+ for section in result.sections:
1392
+ ta, tb = raw_aggregates.get(section.name, (None, None))
1393
+ parts.append("")
1394
+ parts.append(_diff_render_section_table(
1395
+ section, total_a=ta, total_b=tb,
1396
+ width=width, color=color,
1397
+ used_pct_mode_a=result.used_pct_mode_a,
1398
+ used_pct_mode_b=result.used_pct_mode_b,
1399
+ threshold=result.threshold,
1400
+ ))
1401
+ return "\n".join(parts)
1402
+
1403
+
1404
+ # ─────────────────────────────────────────────────────────────────────
1405
+ # diff renderer — JSON
1406
+ # ─────────────────────────────────────────────────────────────────────
1407
+
1408
+
1409
+ def _diff_metric_to_json(mb: "MetricBundle | None") -> "dict | None":
1410
+ if mb is None:
1411
+ return None
1412
+ return {
1413
+ "cost_usd": round(mb.cost_usd, 6),
1414
+ "tokens_input": mb.tokens_input,
1415
+ "tokens_output": mb.tokens_output,
1416
+ "tokens_cache_read": mb.tokens_cache_read,
1417
+ "tokens_cache_write": mb.tokens_cache_write,
1418
+ "cache_hit_pct": (None if mb.cache_hit_pct is None
1419
+ else round(mb.cache_hit_pct, 3)),
1420
+ "used_pct": (None if mb.used_pct is None
1421
+ else round(mb.used_pct, 3)),
1422
+ }
1423
+
1424
+
1425
+ def _diff_delta_to_json(d: DeltaBundle) -> dict:
1426
+ return {
1427
+ "cost_usd": (None if d.cost_usd is None else round(d.cost_usd, 6)),
1428
+ "cost_usd_pct": (None if d.cost_usd_pct is None
1429
+ else round(d.cost_usd_pct, 3)),
1430
+ "tokens_input": d.tokens_input,
1431
+ "tokens_input_pct": (None if d.tokens_input_pct is None
1432
+ else round(d.tokens_input_pct, 3)),
1433
+ "tokens_output": d.tokens_output,
1434
+ "tokens_output_pct": (None if d.tokens_output_pct is None
1435
+ else round(d.tokens_output_pct, 3)),
1436
+ "tokens_cache_read": d.tokens_cache_read,
1437
+ "tokens_cache_read_pct": (None if d.tokens_cache_read_pct is None
1438
+ else round(d.tokens_cache_read_pct, 3)),
1439
+ "tokens_cache_write": d.tokens_cache_write,
1440
+ "tokens_cache_write_pct": (None if d.tokens_cache_write_pct is None
1441
+ else round(d.tokens_cache_write_pct, 3)),
1442
+ "cache_hit_pct_pp": (None if d.cache_hit_pct_pp is None
1443
+ else round(d.cache_hit_pct_pp, 3)),
1444
+ "used_pct_pp": (None if d.used_pct_pp is None
1445
+ else round(d.used_pct_pp, 3)),
1446
+ }
1447
+
1448
+
1449
+ def _diff_window_to_json(pw: ParsedWindow, used_pct_mode: str) -> dict:
1450
+ return {
1451
+ "label": pw.label,
1452
+ "kind": pw.kind,
1453
+ "start_at": _iso_z(pw.start_utc),
1454
+ "end_at": _iso_z(pw.end_utc),
1455
+ "length_days": round(pw.length_days, 3),
1456
+ "week_aligned": pw.week_aligned,
1457
+ "full_weeks_count": pw.full_weeks_count,
1458
+ "used_pct_mode": used_pct_mode,
1459
+ }
1460
+
1461
+
1462
+ def _diff_to_json_payload(
1463
+ result: DiffResult,
1464
+ *,
1465
+ options: dict,
1466
+ now: "dt.datetime | None" = None,
1467
+ ) -> dict:
1468
+ """Render a DiffResult as the spec §6 envelope shape.
1469
+
1470
+ The `now` kwarg defaults to `_command_as_of()` so a CCTALLY_AS_OF env var
1471
+ pins `generated_at` deterministically in tests/fixtures. Pass an explicit
1472
+ `now` to override.
1473
+ """
1474
+ if now is None:
1475
+ now = _command_as_of()
1476
+ sections_json: list = []
1477
+ for s in result.sections:
1478
+ rows_json: list = []
1479
+ for r in s.rows:
1480
+ rows_json.append({
1481
+ "key": r.key,
1482
+ "label": r.label,
1483
+ "status": r.status,
1484
+ "a": _diff_metric_to_json(r.a),
1485
+ "b": _diff_metric_to_json(r.b),
1486
+ "delta": _diff_delta_to_json(r.delta),
1487
+ "sort_key": r.sort_key,
1488
+ })
1489
+ sections_json.append({
1490
+ "name": s.name,
1491
+ "scope": s.scope,
1492
+ "rows": rows_json,
1493
+ "hidden_count": s.hidden_count,
1494
+ "columns": [
1495
+ {"field": c.field, "header": c.header,
1496
+ "format": c.format, "show_in_overall": c.show_in_overall}
1497
+ for c in s.columns
1498
+ ],
1499
+ })
1500
+ return {
1501
+ "schema_version": 1,
1502
+ "generated_at": _iso_z(now),
1503
+ "subcommand": "diff",
1504
+ "windows": {
1505
+ "a": _diff_window_to_json(result.window_a, result.used_pct_mode_a),
1506
+ "b": _diff_window_to_json(result.window_b, result.used_pct_mode_b),
1507
+ },
1508
+ "mismatched_length": result.mismatched_length,
1509
+ "normalization": result.normalization,
1510
+ "options": options,
1511
+ "sections": sections_json,
1512
+ }
1513
+
1514
+
1515
+ def _diff_render_json(
1516
+ result: DiffResult,
1517
+ *,
1518
+ options: dict,
1519
+ now: "dt.datetime | None" = None,
1520
+ ) -> str:
1521
+ return json.dumps(
1522
+ _diff_to_json_payload(result, options=options, now=now),
1523
+ indent=2,
1524
+ )
1525
+
1526
+
1527
+ def _diff_resolve_anchor(
1528
+ now_utc: dt.datetime,
1529
+ ) -> "tuple[dt.datetime | None, dt.datetime | None]":
1530
+ """Read the latest weekly_usage_snapshots row to obtain the
1531
+ (anchor_week_start, anchor_resets_at) pair, then apply two
1532
+ post-processing steps in order so week-token resolution stays
1533
+ correct even when the latest snapshot doesn't reflect the current
1534
+ subscription week:
1535
+
1536
+ 1. **Roll forward stale anchor.** If the latest snapshot's
1537
+ `week_end_at` is strictly earlier than `now_utc` — i.e. no
1538
+ `record-usage` invocation has fired since the most recent
1539
+ reset — synthesize the current week by advancing both endpoints
1540
+ by 7-day multiples until the window contains `now_utc`. The
1541
+ reset event lookup is skipped in this branch because any
1542
+ recorded event pertained to a now-past week. The synthesized
1543
+ week has no row in `weekly_usage_snapshots` for its
1544
+ `week_start_date`, so `_diff_resolve_used_pct`'s exact branch
1545
+ (constrained by `WHERE week_start_date = ?`) returns `n/a` —
1546
+ no extra plumbing needed for that to work.
1547
+
1548
+ 2. **Apply mid-week reset event override.** When the snapshot is
1549
+ current (`we > now_utc`), look up `week_reset_events` for a row
1550
+ whose `new_week_end_at` matches the snapshot's `week_end_at`.
1551
+ If found and the `effective_reset_at_utc` is later than the
1552
+ snapshot's `week_start_at`, override the start to the actual
1553
+ reset moment (mirrors `_apply_midweek_reset_override` /
1554
+ `_apply_reset_events_to_weekrefs` POST-reset rule).
1555
+
1556
+ Returns (None, None) when the DB is unreachable or has no rows.
1557
+ """
1558
+ try:
1559
+ conn = open_db()
1560
+ except Exception:
1561
+ return None, None
1562
+ try:
1563
+ row = conn.execute(
1564
+ "SELECT week_start_at, week_end_at "
1565
+ "FROM weekly_usage_snapshots "
1566
+ "ORDER BY captured_at_utc DESC, id DESC LIMIT 1"
1567
+ ).fetchone()
1568
+ if row is None:
1569
+ return None, None
1570
+ anchor_week_start = None
1571
+ anchor_resets_at = None
1572
+ if row[0]:
1573
+ anchor_week_start = dt.datetime.fromisoformat(
1574
+ row[0].replace("Z", "+00:00")
1575
+ ).astimezone(dt.timezone.utc)
1576
+ if row[1]:
1577
+ anchor_resets_at = dt.datetime.fromisoformat(
1578
+ row[1].replace("Z", "+00:00")
1579
+ ).astimezone(dt.timezone.utc)
1580
+ if anchor_week_start is None or anchor_resets_at is None:
1581
+ return anchor_week_start, anchor_resets_at
1582
+
1583
+ # Step 1: roll a stale anchor forward by 7-day multiples until
1584
+ # the window contains now_utc. Bounded loop (100 iterations ≈
1585
+ # 700 days) so a clock skew can't spin forever.
1586
+ if anchor_resets_at < now_utc:
1587
+ week = dt.timedelta(days=7)
1588
+ for _ in range(100):
1589
+ if anchor_resets_at >= now_utc:
1590
+ break
1591
+ anchor_week_start = anchor_week_start + week
1592
+ anchor_resets_at = anchor_resets_at + week
1593
+ return anchor_week_start, anchor_resets_at
1594
+
1595
+ # Step 2: snapshot is current — apply mid-week reset event
1596
+ # override if one matches this window's end.
1597
+ try:
1598
+ end_iso = _canonicalize_optional_iso(
1599
+ anchor_resets_at.isoformat(timespec="seconds"),
1600
+ "diff.anchor.end",
1601
+ )
1602
+ if end_iso is not None:
1603
+ event_row = conn.execute(
1604
+ "SELECT effective_reset_at_utc FROM week_reset_events "
1605
+ "WHERE new_week_end_at = ?",
1606
+ (end_iso,),
1607
+ ).fetchone()
1608
+ if event_row and event_row[0]:
1609
+ reset_dt = parse_iso_datetime(
1610
+ event_row[0], "reset_event.effective"
1611
+ )
1612
+ if reset_dt > anchor_week_start:
1613
+ anchor_week_start = reset_dt
1614
+ except (sqlite3.DatabaseError, ValueError):
1615
+ pass
1616
+ return anchor_week_start, anchor_resets_at
1617
+ finally:
1618
+ conn.close()