cctally 1.22.2 → 1.22.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1688 @@
1
+ """5-hour-window command family.
2
+
3
+ Holds the three 5h commands — `cmd_blocks`, `cmd_five_hour_blocks`,
4
+ `cmd_five_hour_breakdown` — their family-local helpers, the shared 5h
5
+ recorded-window resolution layer (`_load_recorded_five_hour_windows`,
6
+ `_select_non_overlapping_recorded_windows`, `_maybe_swap_active_block_to_canonical`,
7
+ `_resolve_block_selector`, `_CANONICAL_WEIGHT_THRESHOLD`), AND
8
+ `_backfill_five_hour_blocks` — the one-shot historical backfill of
9
+ `five_hour_blocks` from `weekly_usage_snapshots` (idempotent via
10
+ `UNIQUE(five_hour_window_key)` + `INSERT OR IGNORE`, `BEGIN IMMEDIATE` per
11
+ #87 — `tests/test_stats_db_busy_timeout.py` reads its source from this file;
12
+ `five_hour_milestones` is never backfilled, write-once gotcha).
13
+
14
+ Honest *name* imports are KERNEL-ONLY (`_cctally_core`). This module
15
+ references the bin/cctally RE-EXPORTED names of every library kernel it
16
+ needs (`BLOCK_DURATION`, `_canonical_5h_window_key`, `_render_blocks_table`,
17
+ `build_blocks_view`, …) — NOT the `_lib_*` module objects — so NO qualified
18
+ `_lib_*` import is required; every such name is reached via the call-time
19
+ `_cctally()` accessor so test monkeypatches through `cctally`'s namespace
20
+ are preserved (spec §3.1). The accessor is bound to ``_c`` (not the usual
21
+ ``c``) here because several moved functions already use ``c`` as a real
22
+ ``for c in ...`` loop variable over ``sqlite3.Row`` rows — binding the
23
+ accessor to ``c`` would shadow the module after the loop. The four
24
+ ``_cctally_core`` kernel symbols this module needs at runtime (``open_db``,
25
+ ``_command_as_of``, ``eprint``, ``parse_iso_datetime``) are honest-imported
26
+ (kernel-extraction invariant — ``tests/test_kernel_extraction_invariants.py``),
27
+ not reached via ``_c``.
28
+
29
+ bin/cctally re-exports EVERY moved symbol (eager): the parser resolves
30
+ `c.cmd_blocks` / `c.cmd_five_hour_blocks` / `c.cmd_five_hour_breakdown`;
31
+ the dashboard reaches `sys.modules["cctally"]._load_recorded_five_hour_windows`;
32
+ `_lib_render` reaches `sys.modules["cctally"]._format_block_start`; tests
33
+ retrieve `ns["cmd_blocks"]` / `ns["_resolve_block_selector"]` /
34
+ `ns["_maybe_swap_active_block_to_canonical"]`.
35
+
36
+ Spec: docs/superpowers/specs/2026-05-30-extract-five-hour-statusline-cmd-design.md
37
+ """
38
+ from __future__ import annotations
39
+
40
+ import argparse
41
+ import datetime as dt
42
+ import json
43
+ import sqlite3
44
+ import sys
45
+
46
+ from _cctally_core import _command_as_of, eprint, now_utc_iso, open_db, parse_iso_datetime
47
+
48
+
49
+ def _cctally():
50
+ """Resolve the current `cctally` module at call-time (spec §3.1)."""
51
+ return sys.modules["cctally"]
52
+
53
+
54
+ def _resolve_block_selector(
55
+ conn: sqlite3.Connection,
56
+ *,
57
+ block_start: str | None,
58
+ ago: int | None,
59
+ ) -> dict | None:
60
+ """Resolve a five-hour-breakdown selector to one ``five_hour_blocks`` row.
61
+
62
+ Returns a dict-mapped ``sqlite3.Row`` (or ``None`` if no block matches).
63
+ Raises ``ValueError`` on conflicting / malformed input.
64
+
65
+ Selector rules (spec §3.1):
66
+ * Both ``None`` -> most-recent block (highest ``block_start_at``).
67
+ * ``ago=N`` -> the (N+1)-th most-recent block; ``N=0`` == default.
68
+ * ``block_start=<iso>`` -> parse as ISO 8601; naive forms are UTC.
69
+ Match by computing
70
+ ``_canonical_5h_window_key(parsed_epoch + 5*3600)`` and looking up
71
+ ``five_hour_window_key``.
72
+ * ``block_start`` + ``ago`` together -> ``ValueError``.
73
+ * Date-only ``block_start`` (no ``T``/space separator) -> ``ValueError``
74
+ (cannot derive a unique canonical 5h key from a date alone).
75
+ """
76
+ _c = _cctally()
77
+ if block_start is not None and ago is not None:
78
+ raise ValueError(
79
+ "--block-start and --ago are mutually exclusive"
80
+ )
81
+
82
+ if block_start is not None:
83
+ # Reject date-only forms — can't compute a unique canonical key.
84
+ if "T" not in block_start and " " not in block_start:
85
+ raise ValueError(
86
+ f"--block-start requires HH:MM (got '{block_start}')"
87
+ )
88
+ try:
89
+ parsed = dt.datetime.fromisoformat(block_start)
90
+ except ValueError as e:
91
+ raise ValueError(f"--block-start: {e}") from e
92
+ # Naive -> UTC.
93
+ if parsed.tzinfo is None:
94
+ parsed = parsed.replace(tzinfo=dt.timezone.utc)
95
+ resets_epoch = int(parsed.timestamp()) + 5 * 3600
96
+ key = _c._canonical_5h_window_key(resets_epoch)
97
+ row = conn.execute(
98
+ "SELECT * FROM five_hour_blocks WHERE five_hour_window_key = ?",
99
+ (key,),
100
+ ).fetchone()
101
+ return dict(row) if row else None
102
+
103
+ # Default or --ago: order DESC by block_start_at, take the (ago or 0)-th.
104
+ offset = int(ago) if ago is not None else 0
105
+ if offset < 0:
106
+ raise ValueError(f"--ago must be non-negative (got {ago})")
107
+ row = conn.execute(
108
+ """
109
+ SELECT * FROM five_hour_blocks
110
+ ORDER BY block_start_at DESC, id DESC
111
+ LIMIT 1 OFFSET ?
112
+ """,
113
+ (offset,),
114
+ ).fetchone()
115
+ return dict(row) if row else None
116
+
117
+
118
+ # Weight overlay applied per canonical (``five_hour_blocks``) row by
119
+ # ``_load_recorded_five_hour_windows``: ``counts[snapped] += _CANONICAL_WEIGHT_THRESHOLD``.
120
+ # Gives canonical anchors dominant weight inside the
121
+ # ``_select_non_overlapping_recorded_windows`` DP, so any non-canonical
122
+ # phantom adjacent to a canonical anchor loses on weight comparison. NOT
123
+ # used as a provenance check — the selector takes an explicit
124
+ # ``canonical_anchors`` set from the loader for the force-restore bypass
125
+ # (issue #116 review follow-up: raw-only buckets with bulk-imported /
126
+ # high-frequency snapshot histories can also accumulate >= 1000 weight,
127
+ # so the threshold conflates provenance with support count).
128
+ _CANONICAL_WEIGHT_THRESHOLD = 1000
129
+
130
+
131
+ def _select_non_overlapping_recorded_windows(
132
+ items: list[tuple[dt.datetime, int]],
133
+ *,
134
+ canonical_anchors: set[dt.datetime] | None = None,
135
+ ) -> list[dt.datetime]:
136
+ """Pick the max-weight subset of recorded ``R`` values that respect
137
+ the 5h non-overlap constraint, with canonical anchors guaranteed
138
+ to survive.
139
+
140
+ Anthropic 5h windows cannot truly overlap: the next window only
141
+ opens once the previous one resets, so consecutive real ``R``
142
+ values are always at least ``BLOCK_DURATION`` apart. When two
143
+ recorded ``R`` values fall within ``BLOCK_DURATION`` of each other
144
+ (e.g. a 2-row anomaly captured during a brief status-line glitch
145
+ sitting next to the 78-row real reset), at most one is genuine.
146
+ This solves weighted interval scheduling where each ``R`` "owns"
147
+ its preceding 5h window and the weight is the number of supporting
148
+ snapshots: the subset that maximizes total support wins. Tie-break
149
+ in the take branch favors including more ``R`` values.
150
+
151
+ Canonical bypass (issue #116): any ``R`` passed in ``canonical_anchors``
152
+ came from the authoritative ``five_hour_blocks`` rollup.
153
+ ``maybe_update_five_hour_block`` already deduped via
154
+ ``_canonical_5h_window_key`` pre-insert, so two canonical rows are
155
+ by definition non-overlapping physically — they only appear "in
156
+ conflict" here when their 10-min-floored keys land less than
157
+ ``BLOCK_DURATION`` apart, which happens at every real reset
158
+ boundary when Anthropic's ``resets_at`` jitters sub-second across
159
+ the boundary (e.g. OLD ``R=09:00:01Z`` floors to ``09:00``, NEW
160
+ ``R=13:59:59Z`` floors to ``13:50`` — 4h 50m floored-distance for
161
+ a genuinely-adjacent block pair). The DP still runs over the full
162
+ item set so non-canonical phantoms next to a canonical anchor get
163
+ dropped by weight comparison; the canonical-bypass only force-
164
+ restores anchors the caller marked canonical, never adds back a
165
+ raw-only phantom (even one whose raw weight ≥ ``_CANONICAL_WEIGHT_THRESHOLD``
166
+ — the v1.20.3 fix used weight as a provenance proxy, which the
167
+ review correctly flagged as conflating support count with provenance).
168
+
169
+ Args:
170
+ items: ``(R, support_count)`` pairs.
171
+ canonical_anchors: explicit set of ``R`` values sourced from
172
+ ``five_hour_blocks``. Any present in ``items`` is guaranteed to
173
+ appear in the result, even if the DP dropped it on the 5h
174
+ non-overlap constraint. ``None`` / empty set = pure DP behavior
175
+ (no bypass).
176
+
177
+ Returns:
178
+ Sorted ascending list of selected ``R`` values.
179
+ """
180
+ if not items:
181
+ return []
182
+ items_sorted = sorted(items, key=lambda x: x[0])
183
+ n = len(items_sorted)
184
+ opt = [0] * n
185
+ chose = [False] * n
186
+
187
+ def _last_compatible(i: int) -> int:
188
+ """Index of the latest j < i with items_sorted[j].R <= R_i - 5h."""
189
+ _c = _cctally()
190
+ cutoff = items_sorted[i][0] - _c.BLOCK_DURATION
191
+ lo, hi, j = 0, i - 1, -1
192
+ while lo <= hi:
193
+ mid = (lo + hi) // 2
194
+ if items_sorted[mid][0] <= cutoff:
195
+ j = mid
196
+ lo = mid + 1
197
+ else:
198
+ hi = mid - 1
199
+ return j
200
+
201
+ for i in range(n):
202
+ skip = opt[i - 1] if i > 0 else 0
203
+ j = _last_compatible(i)
204
+ take = items_sorted[i][1] + (opt[j] if j >= 0 else 0)
205
+ if take >= skip:
206
+ opt[i], chose[i] = take, True
207
+ else:
208
+ opt[i], chose[i] = skip, False
209
+
210
+ chosen: list[dt.datetime] = []
211
+ i = n - 1
212
+ while i >= 0:
213
+ if chose[i]:
214
+ chosen.append(items_sorted[i][0])
215
+ i = _last_compatible(i)
216
+ else:
217
+ i -= 1
218
+ chosen.reverse()
219
+ # Canonical bypass: force-restore any canonical anchor the DP dropped
220
+ # (issue #116). Intersect with items' keys so a caller passing anchors
221
+ # outside the item set can't corrupt the result.
222
+ if canonical_anchors:
223
+ items_keys = {R for R, _ in items_sorted}
224
+ present_canonical = canonical_anchors & items_keys
225
+ if present_canonical and not present_canonical.issubset(chosen):
226
+ return sorted(set(chosen) | present_canonical)
227
+ return chosen
228
+
229
+
230
+ def _load_recorded_five_hour_windows(
231
+ range_start: dt.datetime,
232
+ range_end: dt.datetime,
233
+ ) -> tuple[
234
+ list[dt.datetime],
235
+ dict[dt.datetime, dt.datetime],
236
+ dict[dt.datetime, tuple[dt.datetime, dt.datetime]],
237
+ ]:
238
+ """Return sorted, UTC-aware recorded ``five_hour_resets_at`` values
239
+ that anchor real 5h windows in ``[range_start, range_end]``.
240
+
241
+ Returns a 3-tuple ``(selected, block_start_overrides, canonical_intervals)``:
242
+
243
+ * ``selected``: list of 10-min-floored ``R`` anchors (sorted),
244
+ each representing one accepted canonical 5h window. Same shape
245
+ as before — drives `_group_entries_into_blocks`'s
246
+ ``recorded_windows=`` kwarg.
247
+
248
+ * ``block_start_overrides``: ``{R_floored → block_start_at_utc}``
249
+ for credit-truncated anchors (Bug J). When a credit moment
250
+ falls inside a canonical block's overlap with the next block,
251
+ the earlier ``R`` is replaced by the credit moment (floored to
252
+ 10 min) and the original ``block_start_at`` is recorded here so
253
+ the renderer keeps the real display start.
254
+
255
+ * ``canonical_intervals``: ``{R_floored → (bs_utc, rs_utc)}``
256
+ carrying the **exact** ``(block_start_at, five_hour_resets_at)``
257
+ for every selected anchor that has a canonical
258
+ ``five_hour_blocks`` row. ``rs_utc`` is the un-floored reset
259
+ moment (jitter intact), ``bs_utc`` is the API-derived block
260
+ start normalized to UTC. Drives `_group_entries_into_blocks`'s
261
+ partition predicate AND Phase 1.5 block construction
262
+ (issue #76 — 10-min-floor partition trap). Anchors with no
263
+ canonical row (legacy weekly-snapshots-only) are absent from
264
+ the map and the partitioner falls back to ``(R - 5h, R)``.
265
+ Credit-truncated anchors land here with the truncated upper
266
+ bound (``rs = effective_reset``) and the override-supplied
267
+ ``bs`` (the real pre-truncation block start).
268
+
269
+ Two sources contribute to the merged anchor set:
270
+
271
+ 1. ``weekly_usage_snapshots.five_hour_resets_at`` — every
272
+ record-usage tick stores the API-derived reset moment here. The
273
+ count of supporting rows weights each anchor (low-count anchors
274
+ are downvoted in ``_select_non_overlapping_recorded_windows``).
275
+
276
+ 2. ``five_hour_blocks.five_hour_resets_at`` — the canonical
277
+ API-anchored rollup table. Each row represents ONE accepted 5h
278
+ window after ``maybe_update_five_hour_block`` has merged jittered
279
+ reset values via ``_canonical_5h_window_key``. These are the
280
+ authoritative anchors; we count them with a heavy weight (1000)
281
+ so they always dominate over jittered raw snapshot values when
282
+ both sources see the same physical window. Without this source,
283
+ ``cctally blocks`` falls back to the heuristic anchor for the
284
+ ACTIVE row whenever the most recent
285
+ ``weekly_usage_snapshots.five_hour_resets_at`` value disagrees
286
+ with the canonical anchor — Bug C in v1.7.2 round 3. Tied
287
+ windows (jitter within 10-minute floor) collapse to the same
288
+ key and the canonical weight dominates.
289
+
290
+ Each value is parsed as ISO-8601 (the storage format produced by
291
+ ``cmd_record_usage``) and normalized to UTC. Naive datetimes are
292
+ treated as already-UTC. Values are floored to the previous
293
+ 10-minute boundary (jitter tolerance) and grouped — each bucket's
294
+ weight is the count of supporting snapshots. Finally, when two
295
+ floored ``R`` values fall within ``BLOCK_DURATION`` of each other,
296
+ ``_select_non_overlapping_recorded_windows`` resolves the conflict
297
+ by keeping the better-supported one (real Anthropic 5h windows do
298
+ not overlap; a low-row-count ``R`` adjacent to a high-row-count
299
+ one is almost always a transient bad reading from the status line).
300
+
301
+ Returns ``[]`` when the underlying DB can't be opened, the query
302
+ fails, or the resulting row set is empty. This keeps ``cmd_blocks``
303
+ on the pre-existing heuristic path whenever recorded-anchor data is
304
+ unavailable.
305
+ """
306
+ _c = _cctally()
307
+ try:
308
+ with open_db() as conn:
309
+ rows = conn.execute(
310
+ "SELECT five_hour_resets_at "
311
+ "FROM weekly_usage_snapshots "
312
+ "WHERE five_hour_resets_at IS NOT NULL "
313
+ " AND five_hour_resets_at >= ? "
314
+ " AND five_hour_resets_at <= ?",
315
+ (range_start.isoformat(), range_end.isoformat()),
316
+ ).fetchall()
317
+ # Canonical API-anchored windows from the rollup table.
318
+ # Heavy-weight (1000 per row) so they always dominate over
319
+ # any jittered raw-snapshot value sharing the same floored
320
+ # 10-minute bucket. Wrapped in a defensive try in case the
321
+ # five_hour_blocks table doesn't exist yet (very-old DB on
322
+ # first open before the bootstrap migration ran).
323
+ # Pull ``block_start_at`` alongside ``five_hour_resets_at``
324
+ # so Bug J's overlap-truncation step (below) can preserve
325
+ # the real display start for credit-truncated blocks.
326
+ canonical_rows: list[Any] = []
327
+ try:
328
+ canonical_rows = conn.execute(
329
+ "SELECT five_hour_resets_at, block_start_at "
330
+ "FROM five_hour_blocks "
331
+ "WHERE five_hour_resets_at IS NOT NULL "
332
+ " AND five_hour_resets_at >= ? "
333
+ " AND five_hour_resets_at <= ?",
334
+ (range_start.isoformat(), range_end.isoformat()),
335
+ ).fetchall()
336
+ except sqlite3.DatabaseError:
337
+ canonical_rows = []
338
+ # In-place credit events — used by Bug J to detect canonical
339
+ # block overlaps that should be resolved by truncating the
340
+ # earlier block at the credit moment (rather than dropping
341
+ # one via _select_non_overlapping_recorded_windows, which
342
+ # leaves the dropped block's entries unanchored and
343
+ # rendered as a phantom heuristic "~" row).
344
+ credit_moments: list[dt.datetime] = []
345
+ try:
346
+ credit_rows = conn.execute(
347
+ "SELECT effective_reset_at_utc "
348
+ "FROM week_reset_events "
349
+ "WHERE old_week_end_at = effective_reset_at_utc"
350
+ ).fetchall()
351
+ for c in credit_rows:
352
+ raw = c["effective_reset_at_utc"]
353
+ try:
354
+ d = dt.datetime.fromisoformat(str(raw))
355
+ except ValueError:
356
+ continue
357
+ if d.tzinfo is None:
358
+ d = d.replace(tzinfo=dt.timezone.utc)
359
+ else:
360
+ d = d.astimezone(dt.timezone.utc)
361
+ credit_moments.append(d)
362
+ # Issue #44: the inner-loop break below latches onto the
363
+ # first credit in [next_bs, rs]. With two credits inside
364
+ # the same pre-credit canonical 5h window, the wrong one
365
+ # (the later one) wins when SQLite returns rows in
366
+ # insertion order rather than time order — collapsing
367
+ # two distinct truncated anchors onto the same floored
368
+ # bucket and silently dropping one via override-map
369
+ # overwrite. Sort once so the break consistently picks
370
+ # the EARLIEST credit, which is the one that actually
371
+ # ended the earlier block (its floor equals the next
372
+ # block's block_start_at by construction).
373
+ credit_moments.sort()
374
+ except sqlite3.DatabaseError:
375
+ credit_moments = []
376
+ except (sqlite3.DatabaseError, OSError):
377
+ # OSError covers ensure_dirs() failures (read-only FS, permission
378
+ # denied on parent dir) that propagate from open_db() before any
379
+ # SQL runs. Either way, fall back to the heuristic anchor path.
380
+ return [], {}, {}
381
+ counts: dict[dt.datetime, int] = {}
382
+ for row in rows:
383
+ raw = row["five_hour_resets_at"] if hasattr(row, "keys") else row[0]
384
+ if raw is None:
385
+ continue
386
+ try:
387
+ d = dt.datetime.fromisoformat(str(raw))
388
+ except ValueError:
389
+ continue
390
+ if d.tzinfo is None:
391
+ d = d.replace(tzinfo=dt.timezone.utc)
392
+ else:
393
+ d = d.astimezone(dt.timezone.utc)
394
+ snapped = _c._floor_to_ten_minutes(d)
395
+ counts[snapped] = counts.get(snapped, 0) + 1
396
+ # Overlay canonical rollup anchors at heavy weight. Same flooring
397
+ # rule so a jittered raw value (e.g. 17:48Z) and its canonicalized
398
+ # rollup (e.g. 17:50Z) collapse into the same bucket; without that
399
+ # the high-weight canonical entry would create a NEW bucket and
400
+ # both would be reported as separate windows, then
401
+ # `_select_non_overlapping_recorded_windows` (5h-disjoint
402
+ # invariant) would drop the lower-weight one — but the wrong
403
+ # one would win when jitter exceeds 10 minutes.
404
+ #
405
+ # Bug J (v1.7.2 round-5): collect canonical (block_start, R) pairs
406
+ # so we can detect in-place-credit overlaps before flattening into
407
+ # the weighted scheduler. When two canonical 5h blocks overlap AND
408
+ # an in-place credit event falls inside the overlap, truncate the
409
+ # EARLIER block's R to the credit moment (floored to 10 min so it
410
+ # collapses with any same-bucket raw-snapshot value). The
411
+ # truncated R keeps both blocks visible — without this fix the
412
+ # earlier block's entries are silently rendered as a phantom
413
+ # heuristic "~" row by `_group_entries_into_blocks`.
414
+ canonical_pairs: list[tuple[dt.datetime, dt.datetime]] = []
415
+ for row in canonical_rows:
416
+ rs_raw = row["five_hour_resets_at"] if hasattr(row, "keys") else row[0]
417
+ bs_raw = row["block_start_at"] if hasattr(row, "keys") else row[1]
418
+ if rs_raw is None or bs_raw is None:
419
+ continue
420
+ try:
421
+ rs = dt.datetime.fromisoformat(str(rs_raw))
422
+ bs = dt.datetime.fromisoformat(str(bs_raw))
423
+ except ValueError:
424
+ continue
425
+ if rs.tzinfo is None:
426
+ rs = rs.replace(tzinfo=dt.timezone.utc)
427
+ else:
428
+ rs = rs.astimezone(dt.timezone.utc)
429
+ if bs.tzinfo is None:
430
+ bs = bs.replace(tzinfo=dt.timezone.utc)
431
+ else:
432
+ bs = bs.astimezone(dt.timezone.utc)
433
+ canonical_pairs.append((bs, rs))
434
+ canonical_pairs.sort(key=lambda p: p[0])
435
+
436
+ # issue #76: canonical_intervals maps every floored R -> its EXACT
437
+ # (block_start_at, five_hour_resets_at) — both UTC, rs un-floored
438
+ # (jitter intact). Drives the partition predicate AND Phase 1.5
439
+ # block construction in `_group_entries_into_blocks` so floor-band
440
+ # entries (timestamps in [floor(R), R)) land in the right bucket
441
+ # and the displayed window matches Anthropic's actual interval.
442
+ # Built before the credit-truncation loop below so that loop can
443
+ # rewrite the upper bound in-place (truncated R replaces rs).
444
+ canonical_intervals: dict[
445
+ dt.datetime, tuple[dt.datetime, dt.datetime]
446
+ ] = {}
447
+ for bs, rs in canonical_pairs:
448
+ snapped = _c._floor_to_ten_minutes(rs)
449
+ canonical_intervals[snapped] = (bs, rs)
450
+
451
+ # Detect overlap-with-credit and replace the earlier R with a
452
+ # credit-truncated anchor. The (anchor → real_block_start) map is
453
+ # returned alongside the anchor list so the renderer can show the
454
+ # real block_start_at on the display row (instead of the default
455
+ # R - 5h, which would be hours earlier for a 2h-truncated block).
456
+ block_start_overrides: dict[dt.datetime, dt.datetime] = {}
457
+ truncated_pairs: list[tuple[dt.datetime, dt.datetime]] = []
458
+ for i, (bs, rs) in enumerate(canonical_pairs):
459
+ truncated_R = rs
460
+ if i + 1 < len(canonical_pairs):
461
+ next_bs, _next_rs = canonical_pairs[i + 1]
462
+ if rs > next_bs: # overlap with next block
463
+ # Look for a credit moment inside [next_bs, rs] — the
464
+ # part of the earlier block that overlaps the next.
465
+ for cm in credit_moments:
466
+ if next_bs <= cm <= rs:
467
+ cm_floored = _c._floor_to_ten_minutes(cm)
468
+ # Only truncate if cm is strictly inside the
469
+ # earlier block; otherwise leave R alone and
470
+ # let `_select_non_overlapping_recorded_windows`
471
+ # drop one via its weight-tiebreaker.
472
+ if bs < cm_floored < rs:
473
+ truncated_R = cm_floored
474
+ block_start_overrides[cm_floored] = bs
475
+ # Rewrite canonical_intervals[snapped_orig]
476
+ # to the truncated interval under the
477
+ # truncated key. issue #76: the
478
+ # partitioner reads canonical_intervals
479
+ # for the exact bs/rs; the truncated entry
480
+ # must reflect the credit-shifted upper
481
+ # bound (cm_floored) AND the real bs (the
482
+ # override) so partition + Phase 1.5
483
+ # render the credit-shortened block
484
+ # consistently.
485
+ snapped_orig = _c._floor_to_ten_minutes(rs)
486
+ canonical_intervals.pop(snapped_orig, None)
487
+ canonical_intervals[cm_floored] = (
488
+ bs, cm_floored,
489
+ )
490
+ break
491
+ truncated_pairs.append((bs, truncated_R))
492
+
493
+ # Truncated anchors are credit-adjusted and known-good; bypass the
494
+ # `_select_non_overlapping_recorded_windows` weighted scheduler for
495
+ # them (the scheduler treats every R as the END of a fixed 5h
496
+ # window and would see a truncated R conflicting with the adjacent
497
+ # canonical block one slot earlier — e.g. truncated R=17:50 would
498
+ # collide with the prior block's R=15:50 even though their REAL
499
+ # intervals are [15:50, 17:50] and [10:50, 15:50] respectively —
500
+ # adjacent, not overlapping). Add their R directly to the selector
501
+ # input weight (so jittered same-bucket raw values still collapse)
502
+ # but skip them when computing the overlap-safe subset.
503
+ truncated_anchors: set[dt.datetime] = set()
504
+ for bs, rs in truncated_pairs:
505
+ snapped = _c._floor_to_ten_minutes(rs)
506
+ if rs != _c._floor_to_ten_minutes(rs):
507
+ if rs in block_start_overrides:
508
+ block_start_overrides[snapped] = block_start_overrides.pop(rs)
509
+ # Identify truncated anchors by membership in the override map
510
+ # (only credit-truncated entries land there).
511
+ if snapped in block_start_overrides:
512
+ truncated_anchors.add(snapped)
513
+ counts[snapped] = counts.get(snapped, 0) + _CANONICAL_WEIGHT_THRESHOLD
514
+
515
+ non_truncated_items = [
516
+ (a, w) for a, w in counts.items() if a not in truncated_anchors
517
+ ]
518
+ # Pass canonical provenance explicitly: every key currently in
519
+ # canonical_intervals came from a `five_hour_blocks` row (raw-only
520
+ # buckets never land in this map). Subtract truncated_anchors because
521
+ # those bypass the DP via the separate merge below — keeping them
522
+ # out of canonical_anchors here is a no-op for correctness but
523
+ # mirrors the same scope as non_truncated_items for clarity.
524
+ canonical_anchors_for_dp = set(canonical_intervals.keys()) - truncated_anchors
525
+ selected_non_truncated = _select_non_overlapping_recorded_windows(
526
+ non_truncated_items,
527
+ canonical_anchors=canonical_anchors_for_dp,
528
+ )
529
+ # Merge truncated anchors back in, sorted ascending. Their non-
530
+ # overlap with the surrounding canonical blocks is guaranteed by
531
+ # the credit-moment truncation: a truncated R sits strictly
532
+ # between its real block_start (which equals the prior block's R)
533
+ # and the next block's R.
534
+ selected = sorted(
535
+ list(selected_non_truncated) + list(truncated_anchors)
536
+ )
537
+ # Filter canonical_intervals down to selected anchors. Raw-only
538
+ # anchors (selected via weekly_usage_snapshots but absent from
539
+ # five_hour_blocks) stay out of the map; the partitioner falls
540
+ # back to (R - 5h, R) for them. issue #76 / spec §1.1 D1.
541
+ canonical_intervals = {
542
+ R: canonical_intervals[R]
543
+ for R in selected
544
+ if R in canonical_intervals
545
+ }
546
+ return selected, block_start_overrides, canonical_intervals
547
+
548
+
549
+ def cmd_blocks(args: argparse.Namespace) -> int:
550
+ """Show usage report grouped by 5-hour session blocks."""
551
+ _c = _cctally()
552
+ # -n/--session-length guard (#86 Session F). The flag is a documented
553
+ # no-op (cctally blocks anchor to Anthropic's real 5h resets and are not
554
+ # re-sizable), but a non-positive value still errors for drop-in fidelity
555
+ # with ccusage's "Session length must be a positive number". Runs first,
556
+ # before any data load — matches ccusage's command-flow ordering.
557
+ if getattr(args, "session_length", 5.0) <= 0:
558
+ eprint("blocks: session length must be a positive number")
559
+ return 1
560
+
561
+ config = _c._load_claude_config_for_args(args)
562
+ _c._bridge_z_into_tz(args, config)
563
+ tz = _c.resolve_display_tz(args, config)
564
+ args._resolved_tz = tz
565
+
566
+ now_utc = _command_as_of()
567
+ # Parse --since / --until into datetime range. Session A (spec §7.1.1)
568
+ # routes through the centralized dual-form helper so YYYY-MM-DD also
569
+ # works and the error message matches the other in-scope cmds.
570
+ if args.since:
571
+ try:
572
+ since_date = _c._parse_dual_form_date(args.since, "--since")
573
+ except ValueError:
574
+ return 1
575
+ range_start = since_date.replace(tzinfo=dt.timezone.utc)
576
+ else:
577
+ # Default: all available data (matches ccusage behavior)
578
+ range_start = dt.datetime(2020, 1, 1, tzinfo=dt.timezone.utc)
579
+
580
+ if args.until:
581
+ try:
582
+ until_date = _c._parse_dual_form_date(args.until, "--until")
583
+ except ValueError:
584
+ return 1
585
+ # End of that day
586
+ range_end = until_date.replace(
587
+ hour=23, minute=59, second=59, microsecond=999999,
588
+ tzinfo=dt.timezone.utc,
589
+ )
590
+ else:
591
+ range_end = now_utc
592
+
593
+ # Collect all entries
594
+ all_entries = _c.get_entries(range_start, range_end)
595
+
596
+ _c._emit_debug_samples_if_set(
597
+ args, all_entries, command_label="blocks",
598
+ )
599
+
600
+ # Load recorded 5-hour reset timestamps. Widen both bounds by
601
+ # BLOCK_DURATION: a window covers [R - 5h, R), so a reset R just
602
+ # before ``range_start`` can still anchor entries near it, and a
603
+ # reset R just after ``range_end`` (e.g. the active window when
604
+ # range_end is wall-clock "now") can still anchor entries that fall
605
+ # inside [range_start, range_end].
606
+ recorded_windows, block_start_overrides, canonical_intervals = (
607
+ _load_recorded_five_hour_windows(
608
+ range_start - _c.BLOCK_DURATION, range_end + _c.BLOCK_DURATION,
609
+ )
610
+ )
611
+
612
+ # Group into blocks via the view-model kernel (issue #56). The
613
+ # heuristic-aware ``aggregated`` tuple holds the full Block list
614
+ # (gaps included, oldest-first) — same shape the JSON / table
615
+ # renderers expect. We materialize back to a list because
616
+ # ``_maybe_swap_active_block_to_canonical`` mutates in-place.
617
+ #
618
+ # ``skip_rows=True`` (issue #60 review fix) opts out of the
619
+ # dashboard-row construction inside ``build_blocks_view`` — the
620
+ # per-block per-model enrichment that scans every entry per
621
+ # non-gap block (O(B × N)). The CLI never reads ``view.rows``
622
+ # (only ``view.aggregated`` here), so on large all-history blocks
623
+ # runs we avoid quadratic-ish work we'd discard.
624
+ view = _c.build_blocks_view(
625
+ all_entries,
626
+ now_utc=now_utc,
627
+ recorded_windows=recorded_windows,
628
+ block_start_overrides=block_start_overrides,
629
+ canonical_intervals=canonical_intervals,
630
+ range_start=range_start,
631
+ range_end=range_end,
632
+ display_tz=tz,
633
+ mode=args.mode,
634
+ skip_rows=True,
635
+ )
636
+ blocks = list(view.aggregated)
637
+
638
+ # Bug E (v1.7.2 round-4): when the ACTIVE block is heuristic-anchored
639
+ # but a canonical ``five_hour_blocks`` row exists for the current 5h
640
+ # window key, swap the active block's times to the API-anchored
641
+ # ``block_start_at`` / ``five_hour_resets_at`` and flip its anchor to
642
+ # ``"recorded"`` so the renderer drops the ``~`` prefix. The
643
+ # heuristic anchor can sit in a different 10-minute floor bucket
644
+ # than the canonical anchor (e.g. 23:00 IDT vs 20:50 IDT — 130 min
645
+ # apart), so round-3's anchor-overlay in
646
+ # ``_load_recorded_five_hour_windows`` doesn't catch this case.
647
+ # Match by the live 5h window key (the same key
648
+ # ``cmd_five_hour_blocks`` would surface for the ACTIVE row) — falls
649
+ # back to heuristic behavior whenever the canonical row is missing.
650
+ #
651
+ # Bug F (v1.7.2 round-5): pass ``all_entries`` so the swap also
652
+ # re-aggregates token / cost totals over the canonical interval. The
653
+ # heuristic block holds only entries from the heuristic anchor
654
+ # onwards; the canonical block may start earlier and include 1-2h of
655
+ # additional entries. Without re-aggregation the displayed window
656
+ # said one thing and the cost said another (live data: window
657
+ # 20:50→01:50 with $45 cost vs the real $128).
658
+ _maybe_swap_active_block_to_canonical(blocks, all_entries, now=now_utc, mode=args.mode)
659
+
660
+ # ── Session F (#86): resolve token limit, then filter ────────────────
661
+ # Auto-max baseline over ALL blocks (before --recent/--active filtering),
662
+ # matching ccusage's maxTokensFromAll.
663
+ max_completed = _c._max_completed_block_tokens(blocks)
664
+ token_limit = _c._parse_blocks_token_limit(
665
+ getattr(args, "token_limit", None), max_completed
666
+ )
667
+ # ``token_limit_explicit`` is the resolved limit ONLY when -t was passed
668
+ # (any value incl. "max"); the implicit default leaves it None so the
669
+ # box's Token Limit Status sub-block + the JSON tokenLimitStatus key are
670
+ # omitted (ccusage `if (tokenLimit != null)` gate).
671
+ token_limit_explicit = (
672
+ token_limit if getattr(args, "token_limit", None) is not None else None
673
+ )
674
+ auto_max = getattr(args, "token_limit", None) in (None, "", "max")
675
+ if auto_max and token_limit and not args.json:
676
+ # ccusage parity: logger.info → stdout (Codex F1). Suppressed under
677
+ # --json (ccusage sets logger.level=0), so --json goldens stay stable.
678
+ print(f"Using max tokens from previous sessions: {_c._fmt_num(token_limit)}")
679
+
680
+ if getattr(args, "recent", False):
681
+ cutoff = now_utc - dt.timedelta(days=3)
682
+ blocks = [b for b in blocks if b.start_time >= cutoff or b.is_active]
683
+
684
+ if getattr(args, "active", False):
685
+ blocks = [b for b in blocks if b.is_active and not b.is_gap]
686
+ if not blocks:
687
+ if args.json:
688
+ print('{\n "blocks": [],\n "message": "No active block"\n}')
689
+ else:
690
+ print("No active session block found.")
691
+ return 0
692
+
693
+ if args.json:
694
+ print(_c._blocks_to_json(blocks, token_limit_status_limit=token_limit_explicit))
695
+ return 0
696
+
697
+ if getattr(args, "active", False) and len(blocks) == 1:
698
+ print(_c._render_active_block_box(
699
+ blocks[0], now=now_utc, tz=tz,
700
+ token_limit_explicit=token_limit_explicit,
701
+ color=_c._supports_color_stdout(), unicode_ok=_c._supports_unicode_stdout(),
702
+ ))
703
+ return 0
704
+
705
+ # Table output. Session A (spec §7.6.1; Review-A P2-B): thread
706
+ # --compact through so the renderer's scale-down branch fires
707
+ # regardless of terminal width when the flag is set. Session F: thread
708
+ # the resolved token_limit so an explicit -t keys the %/REMAINING/
709
+ # PROJECTED surface (the default path passes the same auto-max the
710
+ # renderer computed internally, so it stays byte-identical).
711
+ print(_c._render_blocks_table(
712
+ blocks, breakdown=args.breakdown, now=now_utc, tz=tz,
713
+ compact=getattr(args, "compact", False), token_limit=token_limit,
714
+ ))
715
+ return 0
716
+
717
+
718
+ def _maybe_swap_active_block_to_canonical(
719
+ blocks: list[Any],
720
+ all_entries: list[Any],
721
+ *,
722
+ now: dt.datetime,
723
+ mode: str = "auto",
724
+ ) -> None:
725
+ """In-place swap of an ACTIVE heuristic block to its API-anchored
726
+ canonical window — timestamps AND token/cost totals.
727
+
728
+ Looks up the live ``five_hour_window_key`` from the most recent
729
+ ``weekly_usage_snapshots`` row, then joins to ``five_hour_blocks``
730
+ for that key. If found AND the canonical window still contains
731
+ ``now`` (resets_at > now), rewrites the active block to span the
732
+ canonical ``[block_start_at, five_hour_resets_at)`` interval and
733
+ flips ``anchor`` to ``"recorded"``. Token / cost totals are
734
+ re-aggregated from ``all_entries`` filtered to that interval via
735
+ ``_aggregate_block`` — the canonical window may contain 1-2h more
736
+ activity than the heuristic grouping did, so the cost shown next
737
+ to the swapped timestamps stays consistent with them (Bug F).
738
+
739
+ No-op when:
740
+ - No block is active (no ``is_active`` and not gap).
741
+ - The active block's anchor is already ``"recorded"``.
742
+ - No live snapshot exists, or the snapshot's ``five_hour_window_key``
743
+ is NULL.
744
+ - No canonical ``five_hour_blocks`` row matches the live key.
745
+ - The canonical window's ``five_hour_resets_at`` is already in
746
+ the past relative to ``now`` (canonical block is closed; the
747
+ heuristic block is genuinely the current activity).
748
+
749
+ Surgical helper called once from ``cmd_blocks`` after grouping.
750
+ """
751
+ _c = _cctally()
752
+ # Find the active (non-gap, heuristic) block — there's at most one.
753
+ active_idx = None
754
+ for i, b in enumerate(blocks):
755
+ if not b.is_gap and b.is_active:
756
+ active_idx = i
757
+ break
758
+ if active_idx is None or blocks[active_idx].anchor != "heuristic":
759
+ return
760
+ active = blocks[active_idx]
761
+ try:
762
+ with open_db() as conn:
763
+ snap = conn.execute(
764
+ "SELECT five_hour_window_key FROM weekly_usage_snapshots "
765
+ "WHERE five_hour_window_key IS NOT NULL "
766
+ "ORDER BY captured_at_utc DESC, id DESC LIMIT 1"
767
+ ).fetchone()
768
+ if snap is None or snap["five_hour_window_key"] is None:
769
+ return
770
+ key = int(snap["five_hour_window_key"])
771
+ row = conn.execute(
772
+ "SELECT block_start_at, five_hour_resets_at "
773
+ "FROM five_hour_blocks WHERE five_hour_window_key = ? "
774
+ "LIMIT 1",
775
+ (key,),
776
+ ).fetchone()
777
+ except (sqlite3.DatabaseError, OSError):
778
+ return
779
+ if row is None:
780
+ return
781
+ try:
782
+ block_start = parse_iso_datetime(
783
+ row["block_start_at"], "five_hour_blocks.block_start_at"
784
+ )
785
+ block_end = parse_iso_datetime(
786
+ row["five_hour_resets_at"], "five_hour_blocks.five_hour_resets_at"
787
+ )
788
+ except ValueError:
789
+ return
790
+ # Normalize to UTC for stable comparisons (block_start_at can carry
791
+ # the host-local offset; five_hour_resets_at is UTC).
792
+ block_start_utc = block_start.astimezone(dt.timezone.utc)
793
+ block_end_utc = block_end.astimezone(dt.timezone.utc)
794
+ # If the canonical window has already ended, don't displace the
795
+ # heuristic active block — the canonical block is closed and the
796
+ # heuristic anchor reflects real ongoing activity in a later window.
797
+ if block_end_utc <= now.astimezone(dt.timezone.utc):
798
+ return
799
+ # Re-aggregate entries over the canonical interval. Build a fresh
800
+ # Block via ``_build_activity_block`` so every total stays in one code
801
+ # path — no field-by-field assignment that could drift if the dataclass
802
+ # grows new fields. Thread the caller's ``mode`` so the active block's
803
+ # cost honors --mode like the main grouping (Session C / Codex F1).
804
+ canonical_entries = [
805
+ e for e in all_entries
806
+ if block_start_utc <= e.timestamp < block_end_utc
807
+ ]
808
+ rebuilt = _c._build_activity_block(
809
+ canonical_entries,
810
+ block_start_utc,
811
+ block_end_utc,
812
+ now.astimezone(dt.timezone.utc),
813
+ mode,
814
+ anchor="recorded",
815
+ )
816
+ blocks[active_idx] = rebuilt
817
+
818
+
819
+ def _format_block_start(iso: str, tz: "ZoneInfo | None") -> str:
820
+ """Format a ``block_start_at`` ISO timestamp per the resolved tz.
821
+
822
+ Used by both ``cmd_five_hour_blocks`` and ``cmd_five_hour_breakdown``.
823
+ Renders as ``YYYY-MM-DD HH:MM <SUFFIX>`` where the suffix is the
824
+ zone label per ``display_tz_label``. Naive inputs are treated as
825
+ UTC; ``tz=None`` means "host-local via bare astimezone()".
826
+ """
827
+ _c = _cctally()
828
+ return _c.format_display_dt(iso, tz, fmt="%Y-%m-%d %H:%M", suffix=True)
829
+
830
+
831
+ def _format_hhmm_in_tz(iso: str, tz: "ZoneInfo | None") -> str:
832
+ """Render the HH:MM portion of an ISO timestamp in the resolved tz.
833
+
834
+ Mirrors ``_format_block_start``'s tz resolution so paired start/end
835
+ cells in the same row stay in the same zone. Naive inputs are
836
+ treated as UTC; ``tz=None`` means host-local. No suffix.
837
+ """
838
+ _c = _cctally()
839
+ return _c.format_display_dt(iso, tz, fmt="%H:%M", suffix=False)
840
+
841
+
842
+ def _block_is_active(
843
+ block: dict,
844
+ latest_window_key: int | None,
845
+ now_utc: dt.datetime,
846
+ ) -> bool:
847
+ """Active = not flag-closed AND matches latest snapshot's window AND
848
+ natural expiration hasn't passed yet.
849
+
850
+ The third clause guards against the natural-expiration sweep in
851
+ ``maybe_update_five_hour_block`` not having fired since the user
852
+ last interacted (collector idle past the 5h reset). Without it,
853
+ ``is_closed`` stays 0 AND the latest snapshot's window_key still
854
+ references the now-expired block — so the simpler 2-clause
855
+ predicate would mark an idle-past-reset block ACTIVE and the
856
+ callers (cmd_five_hour_blocks, cmd_five_hour_breakdown) would
857
+ overwrite ``seven_day_pct_at_block_end`` with stale data.
858
+
859
+ ``block`` is a dict-mapped sqlite3.Row from ``five_hour_blocks``;
860
+ ``latest_window_key`` comes from ``_latest_seven_day_and_window``;
861
+ ``now_utc`` is a tz-aware UTC datetime (typically
862
+ ``_command_as_of()`` so fixture-pinned harnesses stay deterministic).
863
+
864
+ ``five_hour_resets_at`` is canonical UTC-Z (see ``now_utc_iso`` /
865
+ ``_iso_z``), so a lexicographic ``>`` compare against
866
+ ``_iso_z(now_utc)`` is chronological.
867
+ """
868
+ _c = _cctally()
869
+ return (
870
+ block.get("is_closed") == 0
871
+ and block.get("five_hour_window_key") == latest_window_key
872
+ and (block.get("five_hour_resets_at") or "") > _c._iso_z(now_utc)
873
+ )
874
+
875
+
876
+ def _latest_seven_day_and_window(
877
+ conn: sqlite3.Connection,
878
+ ) -> tuple[float | None, int | None]:
879
+ """Return ``(latest_7d_percent, latest_5h_window_key)`` from
880
+ ``weekly_usage_snapshots``.
881
+
882
+ Selects the most-recent snapshot row regardless of whether the 5h
883
+ fields are populated (some rows lack a ``five_hour_window_key``).
884
+ Either or both elements may be ``None``. Used by
885
+ ``cmd_five_hour_breakdown`` to override
886
+ ``seven_day_pct_at_block_end`` on the active row.
887
+ """
888
+ try:
889
+ row = conn.execute(
890
+ """
891
+ SELECT weekly_percent, five_hour_window_key
892
+ FROM weekly_usage_snapshots
893
+ ORDER BY captured_at_utc DESC, id DESC
894
+ LIMIT 1
895
+ """
896
+ ).fetchone()
897
+ except sqlite3.DatabaseError:
898
+ return None, None
899
+ if row is None:
900
+ return None, None
901
+ pct = row[0]
902
+ key = row[1]
903
+ return (
904
+ float(pct) if pct is not None else None,
905
+ int(key) if key is not None else None,
906
+ )
907
+
908
+
909
+ def _parse_date_filter(value: str, flag_name: str) -> str:
910
+ """Parse ``YYYY-MM-DD`` or ``YYYYMMDD`` into an ISO date for SQL ``WHERE`` clauses.
911
+
912
+ Used by ``cmd_five_hour_blocks`` ``--since``/``--until``. Mirrors the
913
+ upstream ccusage convention. Routes through the centralized
914
+ ``_parse_dual_form_date`` (spec §7.1.1) so the dual-form contract and
915
+ error message are shared with cmd_blocks / cmd_daily / etc.
916
+
917
+ The helper already eprints its own diagnostic and raises a bare
918
+ ``ValueError``; we propagate that bare exception so callers can
919
+ return an exit code without double-printing (Review-A P1-1; mirrors
920
+ the bare-re-raise pattern used by ``cmd_cache_report``).
921
+ """
922
+ _c = _cctally()
923
+ return _c._parse_dual_form_date(value, flag_name).date().isoformat()
924
+
925
+
926
+ def _load_breakdown(
927
+ conn: sqlite3.Connection, block_id: int, axis: str,
928
+ ) -> list[dict]:
929
+ """Load rollup-children rows for one block on the given axis.
930
+
931
+ ``axis`` is ``"model"`` or ``"project"``. Returns a list of dicts (one
932
+ per child row), sorted by ``cost_usd DESC, id ASC``.
933
+ """
934
+ table = (
935
+ "five_hour_block_models" if axis == "model"
936
+ else "five_hour_block_projects"
937
+ )
938
+ rows = conn.execute(
939
+ f"""
940
+ SELECT * FROM {table}
941
+ WHERE block_id = ?
942
+ ORDER BY cost_usd DESC, id ASC
943
+ """,
944
+ (block_id,),
945
+ ).fetchall()
946
+ return [dict(r) for r in rows]
947
+
948
+
949
+ def cmd_five_hour_blocks(args: argparse.Namespace) -> int:
950
+ """List API-anchored 5h blocks with rollup totals + 7d-drift columns."""
951
+ _c = _cctally()
952
+ _c._share_validate_args(args)
953
+ config = _c._load_claude_config_for_args(args)
954
+ # Session A (spec §7.2): bridge -z/--timezone into args.tz before
955
+ # resolve_display_tz so the new alias precedence lands.
956
+ _c._bridge_z_into_tz(args, config)
957
+ args._resolved_tz = _c.resolve_display_tz(args, config)
958
+ # Pin "now" once (CCTALLY_AS_OF for fixture-pinned harnesses; mirrors
959
+ # cmd_five_hour_breakdown). Used by the active-predicate to gate
960
+ # natural expiration so an idle-past-reset block doesn't render ACTIVE.
961
+ now_utc = _command_as_of()
962
+ conn = open_db()
963
+ try:
964
+ # Date filter parsing — same convention as cmd_blocks.
965
+ # _parse_date_filter routes through _parse_dual_form_date, which
966
+ # eprints its own diagnostic and raises a bare ValueError on bad
967
+ # input (Review-A P1-1 — dedup stderr by NOT re-emitting here).
968
+ try:
969
+ since_iso = (
970
+ _parse_date_filter(args.since, "--since")
971
+ if args.since else None
972
+ )
973
+ until_iso = (
974
+ _parse_date_filter(args.until, "--until")
975
+ if args.until else None
976
+ )
977
+ except ValueError:
978
+ return 2
979
+
980
+ where: list[str] = []
981
+ params: list[Any] = []
982
+ if since_iso:
983
+ where.append("block_start_at >= ?")
984
+ params.append(since_iso)
985
+ if until_iso:
986
+ # Inclusive of the until date — add 1 day.
987
+ until_dt = dt.date.fromisoformat(until_iso) + dt.timedelta(days=1)
988
+ where.append("block_start_at < ?")
989
+ params.append(until_dt.isoformat())
990
+ clause = ("WHERE " + " AND ".join(where)) if where else ""
991
+
992
+ # No filter → cap at 50; with filter → unbounded.
993
+ cap = None if (since_iso or until_iso) else 50
994
+ limit_clause = f"LIMIT {cap}" if cap is not None else ""
995
+
996
+ rows = conn.execute(
997
+ f"""
998
+ SELECT * FROM five_hour_blocks {clause}
999
+ ORDER BY block_start_at DESC, id DESC
1000
+ {limit_clause}
1001
+ """,
1002
+ params,
1003
+ ).fetchall()
1004
+
1005
+ # Issue #89: --debug report scope = the time range spanned by
1006
+ # the rendered block rows. When `rows` is empty, pass an empty
1007
+ # list to short-circuit the loader entirely.
1008
+ if rows:
1009
+ # rows are ORDER BY block_start_at DESC; first row is newest,
1010
+ # last row is oldest. The rendered window is
1011
+ # [oldest_block_start, newest_block_start + BLOCK_DURATION).
1012
+ oldest_start_iso = rows[-1]["block_start_at"]
1013
+ newest_start_iso = rows[0]["block_start_at"]
1014
+ block_window_start = parse_iso_datetime(
1015
+ oldest_start_iso, "block_start_at",
1016
+ )
1017
+ block_window_end = parse_iso_datetime(
1018
+ newest_start_iso, "block_start_at",
1019
+ ) + _c.BLOCK_DURATION
1020
+ _c._emit_debug_samples_if_set(
1021
+ args,
1022
+ lambda: _c.get_entries(block_window_start, block_window_end),
1023
+ command_label="five-hour-blocks",
1024
+ )
1025
+ else:
1026
+ _c._emit_debug_samples_if_set(
1027
+ args, [], command_label="five-hour-blocks",
1028
+ )
1029
+
1030
+ # Detect truncation: cap applied AND there's at least one older
1031
+ # block beyond the cap. Probe with LIMIT 1 OFFSET <cap> over the
1032
+ # SAME filter set (none here, but kept symmetric for clarity).
1033
+ truncated = False
1034
+ if cap is not None and len(rows) == cap:
1035
+ extra = conn.execute(
1036
+ """
1037
+ SELECT 1 FROM five_hour_blocks
1038
+ ORDER BY block_start_at DESC, id DESC
1039
+ LIMIT 1 OFFSET ?
1040
+ """,
1041
+ (cap,),
1042
+ ).fetchone()
1043
+ truncated = extra is not None
1044
+
1045
+ # Latest live 7d% from the latest weekly_usage_snapshots row, used
1046
+ # to fill seven_day_pct_at_block_end on the active row.
1047
+ latest_7d, latest_window_key = _latest_seven_day_and_window(conn)
1048
+
1049
+ # Pre-load credit events for every window_key the rows query
1050
+ # returned. Single index-scan over `five_hour_reset_events`;
1051
+ # build a window_key -> list[Credit] map keyed for in-process
1052
+ # JOIN against each block dict. Used by both the text/JSON
1053
+ # render path AND the share-output snapshot wiring (spec §5.1.1).
1054
+ # Loaded in a single pass — no per-block SELECT.
1055
+ credit_rows = conn.execute(
1056
+ "SELECT five_hour_window_key, prior_percent, post_percent, "
1057
+ " effective_reset_at_utc "
1058
+ " FROM five_hour_reset_events "
1059
+ " ORDER BY five_hour_window_key, effective_reset_at_utc"
1060
+ ).fetchall()
1061
+ credits_by_window: dict[int, list[dict]] = {}
1062
+ for cr in credit_rows:
1063
+ credits_by_window.setdefault(
1064
+ int(cr["five_hour_window_key"]), []
1065
+ ).append({
1066
+ "effectiveResetAtUtc": cr["effective_reset_at_utc"],
1067
+ "priorPercent": float(cr["prior_percent"]),
1068
+ "postPercent": float(cr["post_percent"]),
1069
+ "deltaPp": round(
1070
+ float(cr["post_percent"]) - float(cr["prior_percent"]), 1
1071
+ ),
1072
+ })
1073
+
1074
+ # Build per-block dicts with the active-flag side-channel.
1075
+ block_dicts: list[dict] = []
1076
+ for r in rows:
1077
+ d = dict(r)
1078
+ is_active = _block_is_active(d, latest_window_key, now_utc)
1079
+ d["__is_active"] = is_active
1080
+ if is_active and latest_7d is not None:
1081
+ d["seven_day_pct_at_block_end"] = latest_7d
1082
+ # Side-channel (parallel to __is_active): list of credit
1083
+ # event dicts for this block's window. Empty list when none.
1084
+ d["__credits"] = credits_by_window.get(
1085
+ int(d["five_hour_window_key"]), []
1086
+ )
1087
+ block_dicts.append(d)
1088
+
1089
+ # Shareable-reports gate: --format short-circuits the JSON / table
1090
+ # dispatch via `_share_render_and_emit`. The mutex in
1091
+ # `_add_share_args` keeps `--format` and `--json` from coexisting.
1092
+ # Note: --breakdown is a no-op under --format (snapshot focuses on
1093
+ # the headline 5h-block trend; per-axis sub-rows aren't in the
1094
+ # share spec scope). Cross-reset blocks render with `▲` x-axis
1095
+ # markers in the BarChart and `⚡` glyphs in the table cell —
1096
+ # both signals route to the share renderer's UTF-8-safe paths.
1097
+ # Gate runs BEFORE the optional `_load_breakdown` loop so a
1098
+ # 50-block --format invocation doesn't pay 50 wasted SQLite
1099
+ # queries the snapshot would discard.
1100
+ if getattr(args, "format", None):
1101
+ display_tz_str = _c._share_display_tz_label(args._resolved_tz)
1102
+ # Period bounds: prefer the user's --since/--until filter
1103
+ # window; fall back to oldest/newest block timestamps when no
1104
+ # filter was applied so the period label reflects what the
1105
+ # snapshot actually covers.
1106
+ # block_dicts is DESC-ordered: [-1] is oldest, [0] is newest.
1107
+ if since_iso:
1108
+ period_start = _c._share_parse_date_to_dt(
1109
+ since_iso, args._resolved_tz,
1110
+ )
1111
+ elif block_dicts:
1112
+ tail = block_dicts[-1].get("block_start_at")
1113
+ period_start = _c._share_parse_date_to_dt(
1114
+ (tail or "").split("T")[0] or None,
1115
+ args._resolved_tz,
1116
+ )
1117
+ else:
1118
+ period_start = _c._share_now_utc()
1119
+ if until_iso:
1120
+ period_end = _c._share_parse_date_to_dt(
1121
+ until_iso, args._resolved_tz,
1122
+ )
1123
+ elif block_dicts:
1124
+ head = block_dicts[0].get("block_start_at")
1125
+ period_end = _c._share_parse_date_to_dt(
1126
+ (head or "").split("T")[0] or None,
1127
+ args._resolved_tz,
1128
+ )
1129
+ else:
1130
+ period_end = _c._share_now_utc()
1131
+ # Build a BlocksView from the API-anchored table rows
1132
+ # (issue #56). Reset-aware totals come from the table's
1133
+ # per-block columns (CLAUDE.md 5-hour gotcha block) so the
1134
+ # share snapshot's footer reads from the single typed
1135
+ # source rather than re-summing inline.
1136
+ view = _c.build_blocks_view_from_table_rows(
1137
+ block_dicts,
1138
+ period_start=period_start,
1139
+ period_end=period_end,
1140
+ display_tz=args._resolved_tz,
1141
+ )
1142
+ snap = _c._build_five_hour_blocks_snapshot(
1143
+ view,
1144
+ period_start=period_start,
1145
+ period_end=period_end,
1146
+ display_tz=display_tz_str,
1147
+ version=_c._share_resolve_version(),
1148
+ theme=args.theme,
1149
+ reveal_projects=args.reveal_projects,
1150
+ tz=args._resolved_tz,
1151
+ )
1152
+ _c._share_render_and_emit(snap, args)
1153
+ return 0
1154
+
1155
+ # Optional breakdown.
1156
+ if args.breakdown:
1157
+ for bd in block_dicts:
1158
+ bd["__breakdown_rows"] = _load_breakdown(
1159
+ conn, bd["id"], args.breakdown,
1160
+ )
1161
+
1162
+ if args.json:
1163
+ print(json.dumps(
1164
+ _c._five_hour_blocks_to_json(
1165
+ block_dicts, since_iso, until_iso,
1166
+ cap, truncated, args.breakdown,
1167
+ ),
1168
+ indent=2,
1169
+ ))
1170
+ return 0
1171
+
1172
+ _c._render_five_hour_blocks_table(block_dicts, args)
1173
+ return 0
1174
+ finally:
1175
+ conn.close()
1176
+
1177
+
1178
+ def cmd_five_hour_breakdown(args: argparse.Namespace) -> int:
1179
+ """Per-percent milestone view inside one 5h block."""
1180
+ _c = _cctally()
1181
+ config = _c.load_config()
1182
+ args._resolved_tz = _c.resolve_display_tz(args, config)
1183
+ # Resolve `now` once via the as-of testing hook (env-var-only — no public
1184
+ # `--as-of` flag here, matching the existing posture for `project` and
1185
+ # other testing-hook-only commands). Used for the active-block elapsed
1186
+ # display below so fixture-pinned harnesses get deterministic output.
1187
+ now_utc = _command_as_of()
1188
+ conn = open_db()
1189
+ try:
1190
+ try:
1191
+ block = _resolve_block_selector(
1192
+ conn,
1193
+ block_start=args.block_start,
1194
+ ago=args.ago,
1195
+ )
1196
+ except ValueError as e:
1197
+ print(f"five-hour-breakdown: {e}", file=sys.stderr)
1198
+ return 2
1199
+
1200
+ if block is None:
1201
+ label = (
1202
+ args.block_start if args.block_start
1203
+ else f"--ago {args.ago}" if args.ago is not None
1204
+ else "current"
1205
+ )
1206
+ print(
1207
+ f"five-hour-breakdown: no block matches '{label}'",
1208
+ file=sys.stderr,
1209
+ )
1210
+ return 2
1211
+
1212
+ # Spec §5.2: ORDER BY captured_at_utc ASC (NOT percent_threshold)
1213
+ # so post-credit segments interleave with pre-credit ones in
1214
+ # time-order — same human threshold number can appear twice
1215
+ # (once per reset_event_id segment) and must render in the
1216
+ # order it crossed. Bucket B per §3.2: read ALL segments (no
1217
+ # ``reset_event_id`` filter).
1218
+ milestones = conn.execute(
1219
+ """
1220
+ SELECT percent_threshold, captured_at_utc,
1221
+ block_cost_usd, marginal_cost_usd,
1222
+ seven_day_pct_at_crossing, reset_event_id
1223
+ FROM five_hour_milestones
1224
+ WHERE block_id = ?
1225
+ ORDER BY captured_at_utc ASC, id ASC
1226
+ """,
1227
+ (block["id"],),
1228
+ ).fetchall()
1229
+
1230
+ # Spec §5.2 — load in-place credit events for this block's
1231
+ # window, ascending by effective_reset_at_utc, so the text
1232
+ # renderer can interleave a ``⚡ CREDIT -Xpp @ HH:MM`` divider
1233
+ # row between pre- and post-credit milestone segments and JSON
1234
+ # consumers see the parallel ``credits[]`` array (Section 5.2).
1235
+ credit_rows = conn.execute(
1236
+ """
1237
+ SELECT effective_reset_at_utc, prior_percent, post_percent
1238
+ FROM five_hour_reset_events
1239
+ WHERE five_hour_window_key = ?
1240
+ ORDER BY effective_reset_at_utc ASC
1241
+ """,
1242
+ (block["five_hour_window_key"],),
1243
+ ).fetchall()
1244
+ credits_list: list[dict] = [
1245
+ {
1246
+ "effectiveResetAtUtc": c["effective_reset_at_utc"],
1247
+ "priorPercent": float(c["prior_percent"]),
1248
+ "postPercent": float(c["post_percent"]),
1249
+ "deltaPp": round(
1250
+ float(c["post_percent"]) - float(c["prior_percent"]), 1
1251
+ ),
1252
+ }
1253
+ for c in credit_rows
1254
+ ]
1255
+
1256
+ crossed = bool(block.get("crossed_seven_day_reset"))
1257
+ p_start = block.get("seven_day_pct_at_block_start")
1258
+ p_end = block.get("seven_day_pct_at_block_end")
1259
+
1260
+ # Live 7d_end on active row.
1261
+ latest_7d, latest_window_key = _latest_seven_day_and_window(conn)
1262
+ is_active = _block_is_active(block, latest_window_key, now_utc)
1263
+ if is_active and latest_7d is not None:
1264
+ p_end = latest_7d
1265
+
1266
+ delta = (
1267
+ None if (crossed or p_start is None or p_end is None)
1268
+ else round(p_end - p_start, 9)
1269
+ )
1270
+ pct = block.get("final_five_hour_percent") or 0.0
1271
+ cost = block.get("total_cost_usd") or 0.0
1272
+ dpp = round(cost / pct, 9) if pct >= 0.5 else None
1273
+
1274
+ block_out = {
1275
+ "blockStartAt": block["block_start_at"],
1276
+ "fiveHourWindowKey": block["five_hour_window_key"],
1277
+ "fiveHourResetsAt": block["five_hour_resets_at"],
1278
+ "lastObservedAtUtc": block["last_observed_at_utc"],
1279
+ "status": "active" if is_active else "closed",
1280
+ "finalFiveHourPercent": round(pct, 1),
1281
+ "totalCost": round(cost, 9),
1282
+ "dollarsPerPercent": dpp,
1283
+ "inputTokens": block.get("total_input_tokens", 0),
1284
+ "outputTokens": block.get("total_output_tokens", 0),
1285
+ "cacheCreationTokens": block.get("total_cache_create_tokens", 0),
1286
+ "cacheReadTokens": block.get("total_cache_read_tokens", 0),
1287
+ "sevenDayPctAtBlockStart": p_start,
1288
+ "sevenDayPctAtBlockEnd": p_end,
1289
+ "sevenDayPctDeltaPp": delta,
1290
+ "crossedSevenDayReset": crossed,
1291
+ }
1292
+ # Spec §5.2: expose ``resetEventId`` on each milestone so JSON
1293
+ # consumers can disambiguate post-credit threshold repeats from
1294
+ # pre-credit ones. ``0`` is the pre-credit/no-credit sentinel
1295
+ # (matches the schema default).
1296
+ ms_out = [
1297
+ {
1298
+ "percentThreshold": m["percent_threshold"],
1299
+ "capturedAt": m["captured_at_utc"],
1300
+ "blockCostUSD": round(m["block_cost_usd"], 9),
1301
+ "marginalCostUSD": (
1302
+ None if m["marginal_cost_usd"] is None
1303
+ else round(m["marginal_cost_usd"], 9)
1304
+ ),
1305
+ "sevenDayPctAtCrossing": m["seven_day_pct_at_crossing"],
1306
+ "resetEventId": int(m["reset_event_id"] or 0),
1307
+ }
1308
+ for m in milestones
1309
+ ]
1310
+
1311
+ if args.json:
1312
+ # Spec §5.2: ``credits`` is the parallel array to
1313
+ # ``milestones`` — same shape as the ``credits`` field on
1314
+ # ``five-hour-blocks --json`` (§5.1). Stacked credits across
1315
+ # distinct 10-min slots produce multiple entries.
1316
+ print(json.dumps(
1317
+ {
1318
+ "schemaVersion": 1,
1319
+ "block": block_out,
1320
+ "milestones": ms_out,
1321
+ "credits": credits_list,
1322
+ },
1323
+ indent=2,
1324
+ ))
1325
+ return 0
1326
+
1327
+ # Human-readable header line.
1328
+ formatted = _format_block_start(block["block_start_at"], args._resolved_tz)
1329
+ if is_active:
1330
+ # Anchor elapsed math to the resolved `now_utc` (CCTALLY_AS_OF
1331
+ # honored) instead of wall-clock so pinned harnesses don't see
1332
+ # the active-block header drift every run.
1333
+ elapsed_s = max(0, int((
1334
+ now_utc
1335
+ - dt.datetime.fromisoformat(block["block_start_at"])
1336
+ ).total_seconds()))
1337
+ status_str = (
1338
+ f"(active, {elapsed_s // 3600}h "
1339
+ f"{(elapsed_s % 3600) // 60:02d}m elapsed)"
1340
+ )
1341
+ else:
1342
+ ended = _format_hhmm_in_tz(block["five_hour_resets_at"], args._resolved_tz)
1343
+ status_str = f"(closed, ended {ended})"
1344
+
1345
+ delta_str = "—" if delta is None else f"Δ {delta:+.1f}pp"
1346
+ seven_d_str = (
1347
+ f"{p_start:.1f}→{p_end:.1f}"
1348
+ if p_start is not None and p_end is not None else "—"
1349
+ )
1350
+ crossed_suffix = " ⚡ crossed weekly reset" if crossed else ""
1351
+ print(
1352
+ f"Block: {formatted} {status_str} · "
1353
+ f"5h%: {pct:.1f}% · 7d% {seven_d_str} ({delta_str}){crossed_suffix}"
1354
+ )
1355
+
1356
+ if not ms_out:
1357
+ print("No milestones recorded — block did not cross 1%.")
1358
+ return 0
1359
+
1360
+ headers = ["#", "Threshold", "Cumulative Cost", "Marginal Cost",
1361
+ "7d at crossing"]
1362
+ rows = []
1363
+ # Spec §5.2 — merged event stream. Interleave milestones and
1364
+ # credits in time-order (``capturedAt`` for milestones,
1365
+ # ``effectiveResetAtUtc`` for credits). Credits render as a
1366
+ # divider row with ``⚡ CREDIT`` in the Threshold cell and the
1367
+ # delta-pp + HH:MM in the rightmost cell; the milestone row
1368
+ # numbering counter (``#``) continues across the divider so the
1369
+ # ordinal still reflects "the Nth event in this block."
1370
+ merged_events: list[tuple[str, dict]] = []
1371
+ for m in ms_out:
1372
+ merged_events.append(("milestone", m))
1373
+ for c in credits_list:
1374
+ merged_events.append(("credit", c))
1375
+ merged_events.sort(key=lambda ev: (
1376
+ ev[1]["effectiveResetAtUtc"] if ev[0] == "credit"
1377
+ else ev[1]["capturedAt"]
1378
+ ))
1379
+ idx = 0
1380
+ for kind, ev in merged_events:
1381
+ idx += 1
1382
+ if kind == "credit":
1383
+ # Spec §5.2: ⚡ CREDIT -Xpp @ HH:MM divider row.
1384
+ # HH:MM rendered in the display tz via format_display_dt.
1385
+ # ``format_display_dt`` is the documented chokepoint for
1386
+ # human-displayed datetimes (CLAUDE.md). The deltaPp
1387
+ # value is float; format as integer ppm (mirrors the
1388
+ # five-hour-blocks chip in §5.1).
1389
+ hhmm = _c.format_display_dt(
1390
+ ev["effectiveResetAtUtc"],
1391
+ args._resolved_tz,
1392
+ fmt="%H:%M",
1393
+ suffix=False,
1394
+ )
1395
+ rows.append([
1396
+ str(idx),
1397
+ "⚡ CREDIT",
1398
+ f"{ev['deltaPp']:+.0f}pp",
1399
+ "",
1400
+ f"@ {hhmm}",
1401
+ ])
1402
+ continue
1403
+ m = ev
1404
+ cum = f"${m['blockCostUSD']:.6f}"
1405
+ marg = (
1406
+ "n/a" if m["marginalCostUSD"] is None
1407
+ else f"${m['marginalCostUSD']:.6f}"
1408
+ )
1409
+ p7d = (
1410
+ "—" if m["sevenDayPctAtCrossing"] is None
1411
+ else f"{m['sevenDayPctAtCrossing']:.0f}%"
1412
+ )
1413
+ rows.append(
1414
+ [str(idx), f"{m['percentThreshold']}%", cum, marg, p7d]
1415
+ )
1416
+ print()
1417
+ print(_c._boxed_table(headers, rows, ["right"] * 5))
1418
+ if is_active:
1419
+ print("\n(active — more milestones may appear)")
1420
+ return 0
1421
+ finally:
1422
+ conn.close()
1423
+
1424
+
1425
+ def _backfill_five_hour_blocks(conn: sqlite3.Connection) -> int:
1426
+ """One-shot historical backfill of five_hour_blocks from existing
1427
+ weekly_usage_snapshots data. Idempotent via UNIQUE(five_hour_window_key)
1428
+ + INSERT OR IGNORE. Per spec §4.3, five_hour_milestones is NEVER
1429
+ backfilled (write-once gotcha).
1430
+
1431
+ Returns the count of newly-inserted parent rows (sum of INSERT OR IGNORE
1432
+ rowcounts). Caller uses this to decide whether to re-run the
1433
+ 003_merge_5h_block_duplicates_v1 handler post-backfill — the dispatcher
1434
+ walks the registry BEFORE this backfill, so a fresh run against an
1435
+ empty five_hour_blocks no-ops the dedup migration even when the
1436
+ snapshot keys we're about to insert are jitter-forked. On failure,
1437
+ rolls back and returns 0; gate fires again on next open_db() so
1438
+ partial state is recoverable.
1439
+ """
1440
+ _c = _cctally()
1441
+ inserted = 0
1442
+ try:
1443
+ # Iterate distinct windows that have BOTH a canonical key AND a
1444
+ # 5h percent. The percent guard is critical: MAX(five_hour_percent)
1445
+ # over a NULL-only window is NULL, which would trip the
1446
+ # final_five_hour_percent NOT NULL constraint at insert time.
1447
+ keys = [
1448
+ int(r[0]) for r in conn.execute(
1449
+ """
1450
+ SELECT DISTINCT five_hour_window_key
1451
+ FROM weekly_usage_snapshots
1452
+ WHERE five_hour_window_key IS NOT NULL
1453
+ AND five_hour_percent IS NOT NULL
1454
+ """
1455
+ ).fetchall()
1456
+ ]
1457
+
1458
+ now_iso = now_utc_iso()
1459
+ now_dt = parse_iso_datetime(now_iso, "now")
1460
+
1461
+ # BEGIN IMMEDIATE (not deferred): this transaction's first DML is a
1462
+ # READ (min_row/max_row below), so a plain deferred BEGIN takes a read
1463
+ # snapshot and only tries to upgrade to the write lock at the first
1464
+ # INSERT OR IGNORE. Under concurrent first-run openers, a competing
1465
+ # commit landing between that read and the first write makes the upgrade
1466
+ # fail with SQLITE_BUSY_SNAPSHOT *immediately* — busy_timeout cannot
1467
+ # absorb it, and the whole backfill rolls back. Acquiring the write lock
1468
+ # up front serializes the backfill cleanly behind busy_timeout instead.
1469
+ # See cctally-dev#87.
1470
+ conn.execute("BEGIN IMMEDIATE")
1471
+ try:
1472
+ for key in keys:
1473
+ # MIN-captured row defines the immutable block boundary
1474
+ # values (deterministic — picking "any in-window row"
1475
+ # would be nondeterministic under seconds-level Anthropic
1476
+ # ISO jitter that the canonical key collapses).
1477
+ min_row = conn.execute(
1478
+ """
1479
+ SELECT five_hour_resets_at, captured_at_utc, weekly_percent
1480
+ FROM weekly_usage_snapshots
1481
+ WHERE five_hour_window_key = ?
1482
+ AND five_hour_percent IS NOT NULL
1483
+ ORDER BY captured_at_utc ASC, id ASC LIMIT 1
1484
+ """,
1485
+ (key,),
1486
+ ).fetchone()
1487
+ max_row = conn.execute(
1488
+ """
1489
+ SELECT captured_at_utc, weekly_percent, five_hour_percent
1490
+ FROM weekly_usage_snapshots
1491
+ WHERE five_hour_window_key = ?
1492
+ AND five_hour_percent IS NOT NULL
1493
+ ORDER BY captured_at_utc DESC, id DESC LIMIT 1
1494
+ """,
1495
+ (key,),
1496
+ ).fetchone()
1497
+ if min_row is None or max_row is None:
1498
+ continue # defensive — should be unreachable per the keys query
1499
+
1500
+ resets_at = min_row["five_hour_resets_at"]
1501
+ first_obs = min_row["captured_at_utc"]
1502
+ last_obs = max_row["captured_at_utc"]
1503
+ pct_start_7d = min_row["weekly_percent"]
1504
+ pct_end_7d = max_row["weekly_percent"]
1505
+ final_5h = float(max_row["five_hour_percent"])
1506
+
1507
+ resets_dt = parse_iso_datetime(resets_at, "resets_at backfill")
1508
+ block_start_dt = resets_dt - dt.timedelta(hours=5)
1509
+ block_start_at = block_start_dt.isoformat(timespec="seconds")
1510
+ last_obs_dt = parse_iso_datetime(last_obs, "last_obs backfill")
1511
+
1512
+ # Cross-reset detection (interval predicate, symmetric with
1513
+ # the live path's UPDATE in T4). Two sources:
1514
+ # (a) week_reset_events — Anthropic-shifted mid-week resets.
1515
+ # (b) weekly_usage_snapshots.week_start_at — natural week
1516
+ # boundaries (no event row for these).
1517
+ # Strict ``>`` on the lower bound for (b) so a block whose
1518
+ # block_start_at coincides with a week boundary is not flagged.
1519
+ # ``unixepoch()`` normalizes the comparison across mixed tz
1520
+ # suffixes (block_start_at is host-local; week_start_at /
1521
+ # effective_reset_at_utc are ``+00:00``); see the live-path
1522
+ # comment for rationale.
1523
+ cross_row = conn.execute(
1524
+ """
1525
+ SELECT 1 FROM week_reset_events
1526
+ WHERE unixepoch(effective_reset_at_utc) >= unixepoch(?)
1527
+ AND unixepoch(effective_reset_at_utc) <= unixepoch(?)
1528
+ LIMIT 1
1529
+ """,
1530
+ (block_start_at, last_obs),
1531
+ ).fetchone()
1532
+ if cross_row is None:
1533
+ cross_row = conn.execute(
1534
+ """
1535
+ SELECT 1 FROM weekly_usage_snapshots
1536
+ WHERE week_start_at IS NOT NULL
1537
+ AND unixepoch(week_start_at) > unixepoch(?)
1538
+ AND unixepoch(week_start_at) <= unixepoch(?)
1539
+ LIMIT 1
1540
+ """,
1541
+ (block_start_at, last_obs),
1542
+ ).fetchone()
1543
+ crossed = 1 if cross_row is not None else 0
1544
+
1545
+ # is_closed: 1 if the canonical reset moment is already past.
1546
+ is_closed = 1 if resets_dt < now_dt else 0
1547
+
1548
+ # Token + cost totals — recomputed via the shared helper,
1549
+ # which routes through get_entries() and falls back to
1550
+ # JSONL if cache.db is unreadable.
1551
+ # skip_sync=True: backfill runs inside open_db(); a sync_cache
1552
+ # cascade here would reopen cache.db recursively.
1553
+ totals = _c._compute_block_totals(
1554
+ block_start_dt, last_obs_dt, skip_sync=True,
1555
+ )
1556
+
1557
+ cur = conn.execute(
1558
+ """
1559
+ INSERT OR IGNORE INTO five_hour_blocks (
1560
+ five_hour_window_key,
1561
+ five_hour_resets_at,
1562
+ block_start_at,
1563
+ first_observed_at_utc,
1564
+ last_observed_at_utc,
1565
+ final_five_hour_percent,
1566
+ seven_day_pct_at_block_start,
1567
+ seven_day_pct_at_block_end,
1568
+ crossed_seven_day_reset,
1569
+ total_input_tokens,
1570
+ total_output_tokens,
1571
+ total_cache_create_tokens,
1572
+ total_cache_read_tokens,
1573
+ total_cost_usd,
1574
+ is_closed,
1575
+ created_at_utc,
1576
+ last_updated_at_utc
1577
+ )
1578
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
1579
+ """,
1580
+ (
1581
+ key,
1582
+ resets_at,
1583
+ block_start_at,
1584
+ first_obs,
1585
+ last_obs,
1586
+ final_5h,
1587
+ pct_start_7d,
1588
+ pct_end_7d,
1589
+ crossed,
1590
+ totals["input_tokens"],
1591
+ totals["output_tokens"],
1592
+ totals["cache_create_tokens"],
1593
+ totals["cache_read_tokens"],
1594
+ totals["cost_usd"],
1595
+ is_closed,
1596
+ now_iso,
1597
+ now_iso,
1598
+ ),
1599
+ )
1600
+ inserted += cur.rowcount or 0
1601
+
1602
+ # ── Write per-(block, model) and per-(block, project) child
1603
+ # rows for this window. INSERT OR IGNORE handles partial-fail
1604
+ # re-runs (UNIQUE(five_hour_window_key, model|project_path)).
1605
+ # Same transaction as the parent INSERT.
1606
+ parent_id_row = conn.execute(
1607
+ "SELECT id FROM five_hour_blocks WHERE five_hour_window_key = ?",
1608
+ (key,),
1609
+ ).fetchone()
1610
+ if parent_id_row is not None:
1611
+ parent_id = int(parent_id_row["id"])
1612
+ if totals.get("by_model"):
1613
+ conn.executemany(
1614
+ """
1615
+ INSERT OR IGNORE INTO five_hour_block_models (
1616
+ block_id, five_hour_window_key, model,
1617
+ input_tokens, output_tokens,
1618
+ cache_create_tokens, cache_read_tokens,
1619
+ cost_usd, entry_count
1620
+ )
1621
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
1622
+ """,
1623
+ [
1624
+ (
1625
+ parent_id,
1626
+ key,
1627
+ model,
1628
+ b["input_tokens"],
1629
+ b["output_tokens"],
1630
+ b["cache_create_tokens"],
1631
+ b["cache_read_tokens"],
1632
+ b["cost_usd"],
1633
+ b["entry_count"],
1634
+ )
1635
+ for model, b in totals["by_model"].items()
1636
+ ],
1637
+ )
1638
+ if totals.get("by_project"):
1639
+ conn.executemany(
1640
+ """
1641
+ INSERT OR IGNORE INTO five_hour_block_projects (
1642
+ block_id, five_hour_window_key, project_path,
1643
+ input_tokens, output_tokens,
1644
+ cache_create_tokens, cache_read_tokens,
1645
+ cost_usd, entry_count
1646
+ )
1647
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
1648
+ """,
1649
+ [
1650
+ (
1651
+ parent_id,
1652
+ key,
1653
+ project_path,
1654
+ b["input_tokens"],
1655
+ b["output_tokens"],
1656
+ b["cache_create_tokens"],
1657
+ b["cache_read_tokens"],
1658
+ b["cost_usd"],
1659
+ b["entry_count"],
1660
+ )
1661
+ for project_path, b in totals["by_project"].items()
1662
+ ],
1663
+ )
1664
+
1665
+ # Mark child-table migrations done so the upgrade-user gates in
1666
+ # open_db() don't re-fire on next open. Inside the same
1667
+ # transaction as the per-window inserts so partial-fail leaves
1668
+ # both the data AND the markers absent — gates re-fire cleanly.
1669
+ conn.executemany(
1670
+ """
1671
+ INSERT OR IGNORE INTO schema_migrations (name, applied_at_utc)
1672
+ VALUES (?, ?)
1673
+ """,
1674
+ [
1675
+ ("001_five_hour_block_models_backfill_v1", now_iso),
1676
+ ("002_five_hour_block_projects_backfill_v1", now_iso),
1677
+ ],
1678
+ )
1679
+ conn.commit()
1680
+ except Exception:
1681
+ conn.rollback()
1682
+ raise
1683
+ except Exception as exc:
1684
+ eprint(f"[5h-block backfill] failed: {exc}")
1685
+ return 0
1686
+ return inserted
1687
+
1688
+