cctally 1.7.3 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,890 @@
1
+ """Leaf-of-the-graph kernel for cctally.
2
+
3
+ Contains primitives that every sibling and bin/cctally itself depend on:
4
+ logging (eprint), datetime helpers, week-name/bounds, time-of-day,
5
+ alerts-config validation, open_db, WeekRef + make_week_ref,
6
+ get_latest_usage_for_week.
7
+
8
+ Path constants (APP_DIR, DB_PATH, LOG_DIR) intentionally live in
9
+ bin/cctally and are read here via a call-time _cctally() accessor —
10
+ this is the ONLY accessor use inside core. See
11
+ docs/superpowers/specs/2026-05-17-cctally-core-kernel-extraction.md §2.
12
+ """
13
+ from __future__ import annotations
14
+ import datetime as dt
15
+ import os
16
+ import re
17
+ import sqlite3
18
+ import sys
19
+ import traceback
20
+ from dataclasses import dataclass
21
+ from typing import Any
22
+
23
+
24
+ def _cctally():
25
+ return sys.modules["cctally"]
26
+
27
+
28
+ # === Logging =========================================================
29
+
30
+
31
+ def eprint(*args: Any) -> None:
32
+ print(*args, file=sys.stderr)
33
+
34
+
35
+ # === Datetime helpers ================================================
36
+
37
+
38
+ def now_utc_iso(now_utc: dt.datetime | None = None) -> str:
39
+ """Return a UTC-ISO 'Z'-suffixed timestamp with seconds precision.
40
+
41
+ When ``now_utc`` is omitted (the default), reads wall-clock — existing
42
+ behavior, preserved byte-for-byte for all existing callers. When a
43
+ tz-aware UTC datetime is supplied (typically via ``_command_as_of()``),
44
+ it is used verbatim so callers that honor ``CCTALLY_AS_OF`` get a
45
+ stable, caller-pinned timestamp.
46
+ """
47
+ value = now_utc if now_utc is not None else dt.datetime.now(dt.timezone.utc)
48
+ return (
49
+ value.astimezone(dt.timezone.utc)
50
+ .replace(microsecond=0)
51
+ .isoformat()
52
+ .replace("+00:00", "Z")
53
+ )
54
+
55
+
56
+ def _iso_to_epoch(s: str) -> int:
57
+ """Parse an ISO-8601 timestamp and return Unix epoch seconds.
58
+
59
+ Naive ISO strings (no timezone) are treated as UTC, matching the
60
+ statusline-command.sh ``_iso_to_epoch`` helper. ``Z`` suffix is
61
+ handled by mapping to ``+00:00`` since ``datetime.fromisoformat``
62
+ accepts ``Z`` natively from Python 3.11.
63
+ """
64
+ s = s.strip()
65
+ if s.endswith("Z"):
66
+ s = s[:-1] + "+00:00"
67
+ parsed = dt.datetime.fromisoformat(s)
68
+ if parsed.tzinfo is None:
69
+ parsed = parsed.replace(tzinfo=dt.timezone.utc)
70
+ return int(parsed.timestamp())
71
+
72
+
73
+ def _format_short_duration(seconds: int) -> str:
74
+ """Format a duration as a short top-two-units string.
75
+
76
+ Examples: ``6d 4h``, ``2h 15m``, ``2h``, ``45m``, ``30s``, ``0s``.
77
+ Mirrors the shape used by ``~/.claude/statusline-command.sh``'s
78
+ format_duration helper. Negative inputs clamp to ``0s``.
79
+ """
80
+ s = max(0, int(seconds))
81
+ if s >= 86400:
82
+ days = s // 86400
83
+ hours = (s % 86400) // 3600
84
+ return f"{days}d {hours}h" if hours else f"{days}d"
85
+ if s >= 3600:
86
+ hours = s // 3600
87
+ minutes = (s % 3600) // 60
88
+ return f"{hours}h {minutes}m" if minutes else f"{hours}h"
89
+ if s >= 60:
90
+ return f"{s // 60}m"
91
+ return f"{s}s"
92
+
93
+
94
+ _DATE_RE = re.compile(r"^\d{4}-\d{2}-\d{2}$")
95
+
96
+
97
+ def parse_date_str(value: str, label: str) -> dt.date:
98
+ s = value.strip()
99
+ if not _DATE_RE.match(s):
100
+ raise ValueError(f"{label} must be YYYY-MM-DD")
101
+ return dt.date.fromisoformat(s)
102
+
103
+
104
+ def parse_iso_datetime(value: str, label: str) -> dt.datetime:
105
+ s = value.strip()
106
+ if not s:
107
+ raise ValueError(f"{label} must be a non-empty ISO datetime")
108
+ try:
109
+ parsed = dt.datetime.fromisoformat(s.replace("Z", "+00:00"))
110
+ except ValueError as exc:
111
+ raise ValueError(f"{label} must be ISO datetime") from exc
112
+
113
+ if parsed.tzinfo is None:
114
+ # internal fallback: host-local intentional
115
+ local_tz = dt.datetime.now().astimezone().tzinfo
116
+ parsed = parsed.replace(tzinfo=local_tz)
117
+ # internal fallback: host-local intentional
118
+ return parsed.astimezone()
119
+
120
+
121
+ def format_local_iso(d: dt.date, end_of_day: bool) -> str:
122
+ t = dt.time(23, 59, 59) if end_of_day else dt.time(0, 0, 0)
123
+ # internal fallback: host-local intentional
124
+ local_dt = dt.datetime.combine(d, t).astimezone()
125
+ return local_dt.isoformat(timespec="seconds")
126
+
127
+
128
+ def _normalize_week_boundary_dt(value: dt.datetime) -> dt.datetime:
129
+ """
130
+ Normalize known Anthropic boundary jitter.
131
+
132
+ Anthropic resets are always on hour boundaries. Relative reset text
133
+ ("in XX hr YY min") produces minute-level drift on every capture, and
134
+ the UI occasionally alternates between HH:00 and HH-1:59 for the same
135
+ logical reset.
136
+
137
+ Canonicalization: round to the nearest hour.
138
+ - minutes 0..29 -> HH:00
139
+ - minutes 30..59 -> (HH+1):00
140
+ """
141
+ normalized = value.replace(second=0, microsecond=0)
142
+ if normalized.minute >= 30:
143
+ normalized = (normalized + dt.timedelta(hours=1)).replace(
144
+ minute=0,
145
+ second=0,
146
+ microsecond=0,
147
+ )
148
+ elif normalized.minute > 0:
149
+ normalized = normalized.replace(
150
+ minute=0,
151
+ second=0,
152
+ microsecond=0,
153
+ )
154
+ return normalized
155
+
156
+
157
+ # === Time-of-day (CCTALLY_AS_OF hooks) ==============================
158
+
159
+
160
+ def _command_as_of() -> dt.datetime:
161
+ """Testing hook: CCTALLY_AS_OF env var overrides wall-clock `now` for
162
+ time-dependent commands. Shared by cmd_project, cmd_weekly,
163
+ cmd_cache_report, cmd_codex_weekly, cmd_diff (and any future
164
+ time-dependent command). Format: ISO-8601 with Z or explicit tz offset.
165
+ """
166
+ override = os.environ.get("CCTALLY_AS_OF")
167
+ if override:
168
+ override = override.strip()
169
+ if override.endswith("Z"):
170
+ override = override[:-1] + "+00:00"
171
+ return dt.datetime.fromisoformat(override).astimezone(dt.timezone.utc)
172
+ return dt.datetime.now(dt.timezone.utc)
173
+
174
+
175
+ def _now_utc() -> dt.datetime:
176
+ """UTC now, with CCTALLY_AS_OF env override for fixture-stability.
177
+
178
+ Single time source for the `update` subcommand and its supporting
179
+ state machine (TTL gates, ``remind_after.until_utc`` comparisons,
180
+ log timestamps, install-method detection cache). Mirrors the
181
+ documented CCTALLY_AS_OF precedent (see CLAUDE.md — `project` has
182
+ a hidden `CCTALLY_AS_OF` env hook, and `_command_as_of` /
183
+ `_share_now_utc` reuse it for `weekly`/`forecast`/share-render).
184
+ Accepts ISO-8601 with `Z` or explicit offset; result is always
185
+ tz-aware UTC.
186
+
187
+ Raises ValueError on malformed CCTALLY_AS_OF — deliberate fail-loud
188
+ for the dev hook so fixture authors notice typos immediately rather
189
+ than silently falling back to wall-clock time.
190
+ """
191
+ override = os.environ.get("CCTALLY_AS_OF")
192
+ if override:
193
+ override = override.strip()
194
+ if override.endswith("Z"):
195
+ override = override[:-1] + "+00:00"
196
+ return dt.datetime.fromisoformat(override).astimezone(dt.timezone.utc)
197
+ return dt.datetime.now(dt.timezone.utc)
198
+
199
+
200
+ # === Week-name + bounds =============================================
201
+
202
+
203
+ DEFAULT_WEEK_START = "monday"
204
+
205
+ WEEKDAY_MAP = {
206
+ "monday": 0,
207
+ "tuesday": 1,
208
+ "wednesday": 2,
209
+ "thursday": 3,
210
+ "friday": 4,
211
+ "saturday": 5,
212
+ "sunday": 6,
213
+ }
214
+
215
+
216
+ def get_week_start_name(config: dict[str, Any], override: str | None = None) -> str:
217
+ if override:
218
+ name = override.strip().lower()
219
+ else:
220
+ name = str(config.get("collector", {}).get("week_start", DEFAULT_WEEK_START)).strip().lower()
221
+ if name not in WEEKDAY_MAP:
222
+ raise ValueError(
223
+ f"Invalid week start '{name}'. Allowed: {', '.join(WEEKDAY_MAP.keys())}"
224
+ )
225
+ return name
226
+
227
+
228
+ def compute_week_bounds(anchor_dt: dt.datetime, week_start_name: str) -> tuple[dt.date, dt.date]:
229
+ start_idx = WEEKDAY_MAP[week_start_name]
230
+ # internal fallback: host-local intentional
231
+ local_anchor = anchor_dt.astimezone()
232
+ local_date = local_anchor.date()
233
+ diff = (local_date.weekday() - start_idx) % 7
234
+ start = local_date - dt.timedelta(days=diff)
235
+ end = start + dt.timedelta(days=6)
236
+ return start, end
237
+
238
+
239
+ # === Path primitive =================================================
240
+
241
+
242
+ def ensure_dirs() -> None:
243
+ c = _cctally()
244
+ c.APP_DIR.mkdir(parents=True, exist_ok=True)
245
+ c.LOG_DIR.mkdir(parents=True, exist_ok=True)
246
+
247
+
248
+ # === Alerts validation cluster ======================================
249
+
250
+
251
+ class _AlertsConfigError(ValueError):
252
+ """Raised by _get_alerts_config on invalid alerts block."""
253
+
254
+
255
+ _ALERTS_CONFIG_VALID_KEYS = {"enabled", "weekly_thresholds", "five_hour_thresholds"}
256
+
257
+
258
+ def _validate_threshold_list(name: str, value: object) -> "list[int]":
259
+ """Validate one of the alerts threshold lists.
260
+
261
+ Rules: non-empty list of plain ints (NOT bools — `bool` is an `int`
262
+ subclass), each in [1, 100], strictly increasing (no duplicates).
263
+ Error messages mention `alerts.<name>` so users can locate the
264
+ offending key in their config.json.
265
+ """
266
+ if not isinstance(value, list):
267
+ raise _AlertsConfigError(f"alerts.{name} must be a list of integers")
268
+ if len(value) == 0:
269
+ raise _AlertsConfigError(
270
+ f"alerts.{name} must not be empty (disable alerts via alerts.enabled=false)"
271
+ )
272
+ out: "list[int]" = []
273
+ prev = -1
274
+ seen: "set[int]" = set()
275
+ for item in value:
276
+ if not isinstance(item, int) or isinstance(item, bool):
277
+ raise _AlertsConfigError(
278
+ f"alerts.{name} items must be integers, got {type(item).__name__}: {item!r}"
279
+ )
280
+ if item < 1 or item > 100:
281
+ raise _AlertsConfigError(
282
+ f"alerts.{name} items must be in [1, 100], got {item}"
283
+ )
284
+ if item in seen:
285
+ raise _AlertsConfigError(
286
+ f"alerts.{name} contains duplicate value {item}"
287
+ )
288
+ if item <= prev:
289
+ raise _AlertsConfigError(
290
+ f"alerts.{name} must be strictly increasing, got {prev} then {item}"
291
+ )
292
+ seen.add(item)
293
+ prev = item
294
+ out.append(item)
295
+ return out
296
+
297
+
298
+ def _get_alerts_config(cfg: "dict | None") -> dict:
299
+ """Return the validated alerts block. Raises _AlertsConfigError on failure.
300
+
301
+ Defaults applied at read time so future default-tuning takes effect
302
+ for users who never customized. Unknown sub-keys under `alerts.*`
303
+ emit a one-line warn-and-ignore (mirrors the `display.tz` posture
304
+ for forward compatibility).
305
+ """
306
+ block = (cfg or {}).get("alerts", {}) or {}
307
+ if not isinstance(block, dict):
308
+ raise _AlertsConfigError("alerts must be an object")
309
+ # warn-and-ignore unknown keys (forward compat; matches display.tz posture)
310
+ for k in block.keys():
311
+ if k not in _ALERTS_CONFIG_VALID_KEYS:
312
+ print(
313
+ f"warning: ignoring unknown alerts config key: {k}",
314
+ file=sys.stderr,
315
+ )
316
+ enabled = block.get("enabled", False)
317
+ if not isinstance(enabled, bool):
318
+ raise _AlertsConfigError(
319
+ f"alerts.enabled must be a JSON boolean, got {type(enabled).__name__}: {enabled!r}"
320
+ )
321
+ weekly = _validate_threshold_list(
322
+ "weekly_thresholds", block.get("weekly_thresholds", [90, 95])
323
+ )
324
+ five_hour = _validate_threshold_list(
325
+ "five_hour_thresholds", block.get("five_hour_thresholds", [90, 95])
326
+ )
327
+ return {
328
+ "enabled": enabled,
329
+ "weekly_thresholds": weekly,
330
+ "five_hour_thresholds": five_hour,
331
+ }
332
+
333
+
334
+ # === DB primitive ===================================================
335
+
336
+
337
+ def open_db() -> sqlite3.Connection:
338
+ c = _cctally()
339
+ # Spec §2.6 carve-out: open_db reaches the migration framework
340
+ # (lives in _cctally_db + bin/cctally). Direct imports would
341
+ # create a cycle (_cctally_db imports kernel from this module).
342
+ # Local-binding via the call-time accessor preserves byte-stable
343
+ # behavior with the reach list explicit at the top of the function.
344
+ # Enforced by tests/test_kernel_extraction_invariants.py
345
+ # test_core_accessor_use_is_bounded (lands in I2).
346
+ add_column_if_missing = c.add_column_if_missing
347
+ _canonical_5h_window_key = c._canonical_5h_window_key
348
+ _backfill_week_reset_events = c._backfill_week_reset_events
349
+ _backfill_five_hour_blocks = c._backfill_five_hour_blocks
350
+ _run_pending_migrations = c._run_pending_migrations
351
+ _STATS_MIGRATIONS = c._STATS_MIGRATIONS
352
+ _log_migration_error = c._log_migration_error
353
+ _clear_migration_error_log_entries = c._clear_migration_error_log_entries
354
+
355
+ ensure_dirs()
356
+ conn = sqlite3.connect(c.DB_PATH)
357
+ conn.row_factory = sqlite3.Row
358
+ conn.execute("PRAGMA journal_mode=WAL")
359
+ conn.execute("PRAGMA synchronous=NORMAL")
360
+ conn.execute(
361
+ """
362
+ CREATE TABLE IF NOT EXISTS weekly_usage_snapshots (
363
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
364
+ captured_at_utc TEXT NOT NULL,
365
+ week_start_date TEXT NOT NULL,
366
+ week_end_date TEXT NOT NULL,
367
+ week_start_at TEXT,
368
+ week_end_at TEXT,
369
+ weekly_percent REAL NOT NULL,
370
+ page_url TEXT,
371
+ source TEXT NOT NULL DEFAULT 'userscript',
372
+ payload_json TEXT NOT NULL
373
+ )
374
+ """
375
+ )
376
+ conn.execute(
377
+ """
378
+ CREATE INDEX IF NOT EXISTS idx_usage_week_time
379
+ ON weekly_usage_snapshots(week_start_date, captured_at_utc DESC, id DESC)
380
+ """
381
+ )
382
+ conn.execute(
383
+ """
384
+ CREATE TABLE IF NOT EXISTS weekly_cost_snapshots (
385
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
386
+ captured_at_utc TEXT NOT NULL,
387
+ week_start_date TEXT NOT NULL,
388
+ week_end_date TEXT NOT NULL,
389
+ week_start_at TEXT,
390
+ week_end_at TEXT,
391
+ range_start_iso TEXT,
392
+ range_end_iso TEXT,
393
+ cost_usd REAL NOT NULL,
394
+ source TEXT NOT NULL DEFAULT 'cctally-range-cost',
395
+ mode TEXT NOT NULL DEFAULT 'auto',
396
+ project TEXT
397
+ )
398
+ """
399
+ )
400
+ conn.execute(
401
+ """
402
+ CREATE INDEX IF NOT EXISTS idx_cost_week_time
403
+ ON weekly_cost_snapshots(week_start_date, captured_at_utc DESC, id DESC)
404
+ """
405
+ )
406
+
407
+ add_column_if_missing(conn, "weekly_usage_snapshots", "week_start_at", "TEXT")
408
+ add_column_if_missing(conn, "weekly_usage_snapshots", "week_end_at", "TEXT")
409
+ add_column_if_missing(conn, "weekly_usage_snapshots", "five_hour_percent", "REAL")
410
+ add_column_if_missing(conn, "weekly_usage_snapshots", "five_hour_resets_at", "TEXT")
411
+ # five_hour_window_key — canonical (10-min-floored epoch) key for
412
+ # jitter-tolerant equality. Anthropic's status-line API jitters
413
+ # rate_limits.5h.resets_at by ~seconds within the same physical 5h
414
+ # window; joining on the raw ISO string treats each jittered fetch as
415
+ # a new window, escaping the monotonic clamp at cmd_record_usage.
416
+ # Backfill is RESUMABLE: Python's sqlite3 auto-commits DDL,
417
+ # so a process killed mid-loop would leave the column added with NULL
418
+ # keys for unprocessed rows. The gating below detects that partial
419
+ # state on the next open_db() call (`five_hour_resets_at IS NOT NULL
420
+ # AND five_hour_window_key IS NULL`) and completes the backfill, so
421
+ # the original Bug B can't silently re-emerge for half-migrated rows.
422
+ needs_5h_key_backfill = add_column_if_missing(
423
+ conn, "weekly_usage_snapshots", "five_hour_window_key", "INTEGER"
424
+ )
425
+ if not needs_5h_key_backfill and conn.execute(
426
+ "SELECT 1 FROM weekly_usage_snapshots "
427
+ "WHERE five_hour_resets_at IS NOT NULL "
428
+ " AND five_hour_window_key IS NULL "
429
+ "LIMIT 1"
430
+ ).fetchone() is not None:
431
+ needs_5h_key_backfill = True
432
+
433
+ if needs_5h_key_backfill:
434
+ backfill_rows = conn.execute(
435
+ "SELECT id, five_hour_resets_at FROM weekly_usage_snapshots "
436
+ "WHERE five_hour_resets_at IS NOT NULL "
437
+ " AND five_hour_window_key IS NULL"
438
+ ).fetchall()
439
+ for row in backfill_rows:
440
+ try:
441
+ iso = row[1]
442
+ d = parse_iso_datetime(iso, "five_hour_resets_at backfill")
443
+ epoch = int(d.timestamp())
444
+ key = _canonical_5h_window_key(epoch)
445
+ conn.execute(
446
+ "UPDATE weekly_usage_snapshots "
447
+ "SET five_hour_window_key = ? WHERE id = ?",
448
+ (key, row[0]),
449
+ )
450
+ except (ValueError, TypeError) as exc:
451
+ eprint(f"[migration] skipped row {row[0]}: {exc}")
452
+ conn.execute(
453
+ "CREATE INDEX IF NOT EXISTS idx_weekly_usage_snapshots_5h_window_key "
454
+ "ON weekly_usage_snapshots(five_hour_window_key)"
455
+ )
456
+ conn.commit()
457
+
458
+ add_column_if_missing(conn, "weekly_cost_snapshots", "week_start_at", "TEXT")
459
+ add_column_if_missing(conn, "weekly_cost_snapshots", "week_end_at", "TEXT")
460
+ add_column_if_missing(conn, "weekly_cost_snapshots", "range_start_iso", "TEXT")
461
+ add_column_if_missing(conn, "weekly_cost_snapshots", "range_end_iso", "TEXT")
462
+
463
+ conn.execute(
464
+ """
465
+ CREATE INDEX IF NOT EXISTS idx_usage_week_start_at_time
466
+ ON weekly_usage_snapshots(week_start_at, captured_at_utc DESC, id DESC)
467
+ """
468
+ )
469
+ conn.execute(
470
+ """
471
+ CREATE INDEX IF NOT EXISTS idx_cost_week_start_at_time
472
+ ON weekly_cost_snapshots(week_start_at, captured_at_utc DESC, id DESC)
473
+ """
474
+ )
475
+
476
+ conn.execute(
477
+ """
478
+ CREATE TABLE IF NOT EXISTS percent_milestones (
479
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
480
+ captured_at_utc TEXT NOT NULL,
481
+ week_start_date TEXT NOT NULL,
482
+ week_end_date TEXT NOT NULL,
483
+ week_start_at TEXT,
484
+ week_end_at TEXT,
485
+ percent_threshold INTEGER NOT NULL,
486
+ cumulative_cost_usd REAL NOT NULL,
487
+ marginal_cost_usd REAL,
488
+ usage_snapshot_id INTEGER NOT NULL,
489
+ cost_snapshot_id INTEGER NOT NULL,
490
+ reset_event_id INTEGER NOT NULL DEFAULT 0,
491
+ UNIQUE(week_start_date, percent_threshold, reset_event_id)
492
+ )
493
+ """
494
+ )
495
+
496
+ add_column_if_missing(conn, "percent_milestones", "five_hour_percent_at_crossing", "REAL")
497
+ # reset_event_id: segment column added by migration 005. Fresh-install
498
+ # DBs get it via the live CREATE TABLE above + the dispatcher
499
+ # fast-stamps the migration. Existing pre-005 DBs trip the migration's
500
+ # rename-recreate-copy idiom (handler in _cctally_db.py); the handler's
501
+ # fast-path probe stamps the marker when the column is already present
502
+ # (covers the corner case where a partially-upgraded DB has the column
503
+ # but not the new UNIQUE — re-run is safe).
504
+
505
+ # alerted_at: populated by the alert-dispatch path when a milestone-INSERT
506
+ # row's threshold matches the user's configured alerts.weekly_thresholds /
507
+ # alerts.five_hour_thresholds (and alerts.enabled is true). NULL means
508
+ # "alerts were disabled at the moment of crossing OR the threshold wasn't
509
+ # in the configured list" — never "alert delivery failed" (dispatch is
510
+ # best-effort and write-once forward-only). The matching ALTER for
511
+ # `five_hour_milestones` lives right after that table's CREATE block
512
+ # below, since the table doesn't exist yet at this point in `open_db()`.
513
+ add_column_if_missing(conn, "percent_milestones", "alerted_at", "TEXT")
514
+
515
+ # Mid-week reset events: when Anthropic advances `rate_limits.seven_day.
516
+ # resets_at` before the previously-declared reset actually fires (i.e.,
517
+ # gives the user a fresh weekly window before the old one naturally
518
+ # expired), we record one row here so display + cost layers can treat
519
+ # the effective reset moment as the old week's end AND the new week's
520
+ # start — preventing the API's -7d-derived new week from overlapping
521
+ # the old week. Inserted by cmd_record_usage on detection; read by
522
+ # _apply_reset_events_to_weekrefs and the cost live-recompute path.
523
+ conn.execute(
524
+ """
525
+ CREATE TABLE IF NOT EXISTS week_reset_events (
526
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
527
+ detected_at_utc TEXT NOT NULL,
528
+ old_week_end_at TEXT NOT NULL,
529
+ new_week_end_at TEXT NOT NULL,
530
+ effective_reset_at_utc TEXT NOT NULL,
531
+ observed_pre_credit_pct REAL,
532
+ UNIQUE(old_week_end_at, new_week_end_at)
533
+ )
534
+ """
535
+ )
536
+ _backfill_week_reset_events(conn)
537
+
538
+ # ── five_hour_reset_events (Anthropic-issued in-place 5h credits) ──
539
+ # Parallel concept to ``week_reset_events`` for the 5h dimension; lives
540
+ # adjacent in ``_apply_schema`` because the two carry the same kind of
541
+ # signal at different cadences. Diverges from weekly in that the payload
542
+ # is the *percent values* (prior + post) rather than boundary keys,
543
+ # because the 5h variant has a stable ``five_hour_window_key`` and only
544
+ # the percent moves. See spec
545
+ # docs/superpowers/specs/2026-05-16-5h-in-place-credit-detection.md §3.1
546
+ # for rationale.
547
+ #
548
+ # UNIQUE(five_hour_window_key, effective_reset_at_utc) — supports stacked
549
+ # credits across DISTINCT 10-min slots inside one block (see spec §2.3
550
+ # "Bounded stacked-credit resolution" for the cap statement: ~30 distinct
551
+ # slots per 5h block when floor matches ``_canonical_5h_window_key``'s
552
+ # 600-second floor; same-slot collisions silently absorbed by
553
+ # INSERT OR IGNORE — an intentional cap, not a bug).
554
+ #
555
+ # No FK per CLAUDE.md gotcha: FKs in this codebase are documentation-only
556
+ # (``PRAGMA foreign_keys`` not enabled). ``five_hour_window_key`` provides
557
+ # the join key without a formal FK.
558
+ #
559
+ # No ``_backfill_five_hour_reset_events`` call follows (forward-only ship
560
+ # per spec Q5; historical backfill deferred to a future issue).
561
+ conn.execute(
562
+ """
563
+ CREATE TABLE IF NOT EXISTS five_hour_reset_events (
564
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
565
+ detected_at_utc TEXT NOT NULL,
566
+ five_hour_window_key INTEGER NOT NULL,
567
+ prior_percent REAL NOT NULL,
568
+ post_percent REAL NOT NULL,
569
+ effective_reset_at_utc TEXT NOT NULL,
570
+ UNIQUE(five_hour_window_key, effective_reset_at_utc)
571
+ )
572
+ """
573
+ )
574
+
575
+ # ── five_hour_blocks (rollup, one row per API-anchored 5h block) ──
576
+ conn.execute(
577
+ """
578
+ CREATE TABLE IF NOT EXISTS five_hour_blocks (
579
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
580
+ five_hour_window_key INTEGER NOT NULL UNIQUE,
581
+ five_hour_resets_at TEXT NOT NULL,
582
+ block_start_at TEXT NOT NULL,
583
+ first_observed_at_utc TEXT NOT NULL,
584
+ last_observed_at_utc TEXT NOT NULL,
585
+ final_five_hour_percent REAL NOT NULL,
586
+ seven_day_pct_at_block_start REAL,
587
+ seven_day_pct_at_block_end REAL,
588
+ crossed_seven_day_reset INTEGER NOT NULL DEFAULT 0,
589
+ total_input_tokens INTEGER NOT NULL DEFAULT 0,
590
+ total_output_tokens INTEGER NOT NULL DEFAULT 0,
591
+ total_cache_create_tokens INTEGER NOT NULL DEFAULT 0,
592
+ total_cache_read_tokens INTEGER NOT NULL DEFAULT 0,
593
+ total_cost_usd REAL NOT NULL DEFAULT 0,
594
+ is_closed INTEGER NOT NULL DEFAULT 0,
595
+ created_at_utc TEXT NOT NULL,
596
+ last_updated_at_utc TEXT NOT NULL
597
+ )
598
+ """
599
+ )
600
+ conn.execute(
601
+ """
602
+ CREATE INDEX IF NOT EXISTS idx_five_hour_blocks_block_start
603
+ ON five_hour_blocks(block_start_at DESC)
604
+ """
605
+ )
606
+
607
+ # ── five_hour_milestones (per-percent crossings inside a 5h block) ──
608
+ conn.execute(
609
+ """
610
+ CREATE TABLE IF NOT EXISTS five_hour_milestones (
611
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
612
+ block_id INTEGER NOT NULL,
613
+ five_hour_window_key INTEGER NOT NULL,
614
+ percent_threshold INTEGER NOT NULL,
615
+ captured_at_utc TEXT NOT NULL,
616
+ usage_snapshot_id INTEGER NOT NULL,
617
+ block_input_tokens INTEGER NOT NULL DEFAULT 0,
618
+ block_output_tokens INTEGER NOT NULL DEFAULT 0,
619
+ block_cache_create_tokens INTEGER NOT NULL DEFAULT 0,
620
+ block_cache_read_tokens INTEGER NOT NULL DEFAULT 0,
621
+ block_cost_usd REAL NOT NULL DEFAULT 0,
622
+ marginal_cost_usd REAL,
623
+ seven_day_pct_at_crossing REAL,
624
+ reset_event_id INTEGER NOT NULL DEFAULT 0,
625
+ UNIQUE(five_hour_window_key, percent_threshold, reset_event_id),
626
+ FOREIGN KEY (block_id) REFERENCES five_hour_blocks(id)
627
+ )
628
+ """
629
+ )
630
+ conn.execute(
631
+ """
632
+ CREATE INDEX IF NOT EXISTS idx_five_hour_milestones_block
633
+ ON five_hour_milestones(block_id)
634
+ """
635
+ )
636
+
637
+ # alerted_at: see the matching ALTER on `percent_milestones` above for
638
+ # rationale. Same write-once forward-only semantics: the alert-dispatch
639
+ # path stamps this column on milestone-INSERT rows whose threshold
640
+ # matches the user's configured `alerts.five_hour_thresholds`. NULL =
641
+ # "alerts disabled at moment of crossing OR threshold not configured"
642
+ # — never "delivery failed".
643
+ add_column_if_missing(conn, "five_hour_milestones", "alerted_at", "TEXT")
644
+
645
+ # reset_event_id: segment column added by migration 006. Fresh-install
646
+ # DBs get it via the live CREATE TABLE above + the dispatcher fast-stamps
647
+ # the migration marker (the live DDL must carry the column AND the 3-col
648
+ # UNIQUE for fast-stamp to be safe — see spec §3.2). Existing pre-006
649
+ # DBs trip the migration's rename-recreate-copy idiom (handler in
650
+ # bin/_cctally_db.py); the handler's fast-path probe stamps the marker
651
+ # when the column is already present (covers the corner case where a
652
+ # partially-upgraded DB has the column but not the new UNIQUE — re-run
653
+ # is safe). Mirrors weekly migration 005 / `percent_milestones`.
654
+
655
+ # ── five_hour_block_models (per-(block, model) rollup-child) ──
656
+ # MUST be created BEFORE the parent-backfill gate below, because
657
+ # _backfill_five_hour_blocks writes into this table on the fresh-install
658
+ # path. UNIQUE keyed on (five_hour_window_key, model) — durable across
659
+ # parent rebuilds. Live writes use DELETE WHERE five_hour_window_key = ?.
660
+ conn.execute(
661
+ """
662
+ CREATE TABLE IF NOT EXISTS five_hour_block_models (
663
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
664
+ block_id INTEGER NOT NULL,
665
+ five_hour_window_key INTEGER NOT NULL,
666
+ model TEXT NOT NULL,
667
+ input_tokens INTEGER NOT NULL DEFAULT 0,
668
+ output_tokens INTEGER NOT NULL DEFAULT 0,
669
+ cache_create_tokens INTEGER NOT NULL DEFAULT 0,
670
+ cache_read_tokens INTEGER NOT NULL DEFAULT 0,
671
+ cost_usd REAL NOT NULL DEFAULT 0,
672
+ entry_count INTEGER NOT NULL DEFAULT 0,
673
+ UNIQUE(five_hour_window_key, model),
674
+ FOREIGN KEY (block_id) REFERENCES five_hour_blocks(id)
675
+ )
676
+ """
677
+ )
678
+ conn.execute(
679
+ """
680
+ CREATE INDEX IF NOT EXISTS idx_five_hour_block_models_block
681
+ ON five_hour_block_models(block_id)
682
+ """
683
+ )
684
+ conn.execute(
685
+ """
686
+ CREATE INDEX IF NOT EXISTS idx_five_hour_block_models_window
687
+ ON five_hour_block_models(five_hour_window_key)
688
+ """
689
+ )
690
+
691
+ # ── five_hour_block_projects (per-(block, project_path) rollup-child) ──
692
+ # NULL session_files.project_path → '(unknown)' sentinel at write time,
693
+ # keeping reconcile invariant SUM(child.cost) == parent.total intact.
694
+ conn.execute(
695
+ """
696
+ CREATE TABLE IF NOT EXISTS five_hour_block_projects (
697
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
698
+ block_id INTEGER NOT NULL,
699
+ five_hour_window_key INTEGER NOT NULL,
700
+ project_path TEXT NOT NULL,
701
+ input_tokens INTEGER NOT NULL DEFAULT 0,
702
+ output_tokens INTEGER NOT NULL DEFAULT 0,
703
+ cache_create_tokens INTEGER NOT NULL DEFAULT 0,
704
+ cache_read_tokens INTEGER NOT NULL DEFAULT 0,
705
+ cost_usd REAL NOT NULL DEFAULT 0,
706
+ entry_count INTEGER NOT NULL DEFAULT 0,
707
+ UNIQUE(five_hour_window_key, project_path),
708
+ FOREIGN KEY (block_id) REFERENCES five_hour_blocks(id)
709
+ )
710
+ """
711
+ )
712
+ conn.execute(
713
+ """
714
+ CREATE INDEX IF NOT EXISTS idx_five_hour_block_projects_block
715
+ ON five_hour_block_projects(block_id)
716
+ """
717
+ )
718
+ conn.execute(
719
+ """
720
+ CREATE INDEX IF NOT EXISTS idx_five_hour_block_projects_window
721
+ ON five_hour_block_projects(five_hour_window_key)
722
+ """
723
+ )
724
+
725
+ # Migration framework dispatcher. Replaces the prior inline gate stack
726
+ # (has_blocks + _migration_done) with the framework's _run_pending_-
727
+ # migrations entry point. See spec §2.3, §5.2 + the migration handlers
728
+ # decorated with @stats_migration further down in this file.
729
+ #
730
+ # MUST run BEFORE any DDL or write that touches `schema_migrations`
731
+ # (Codex P1 #1 fix on c3625ee + e7fdcc8): the dispatcher's fresh-install
732
+ # detection snapshots `schema_migrations`'s existence in sqlite_master
733
+ # BEFORE its own CREATE TABLE IF NOT EXISTS. Pre-creating the table
734
+ # earlier in open_db() (or letting `_backfill_five_hour_blocks` insert
735
+ # markers first) flips that snapshot to True on a brand-new DB and
736
+ # dead-codes the stamp-only fast path. The dispatcher is now the sole
737
+ # creator of `schema_migrations` + `schema_migrations_skipped`.
738
+ _run_pending_migrations(
739
+ conn, registry=_STATS_MIGRATIONS, db_label="stats.db",
740
+ )
741
+
742
+ # One-time historical backfill of five_hour_blocks (rollup only;
743
+ # milestones are forward-only per spec §4.3 / [Write-once milestones]).
744
+ # Idempotent via UNIQUE(five_hour_window_key) + INSERT OR IGNORE.
745
+ # Runs AFTER the dispatcher so `schema_migrations` exists for the
746
+ # marker INSERTs inside the backfill body, and so any fresh-install
747
+ # stamp-only path the dispatcher took above is already committed.
748
+ existing = conn.execute(
749
+ "SELECT 1 FROM five_hour_blocks LIMIT 1"
750
+ ).fetchone()
751
+ has_snapshots = conn.execute(
752
+ "SELECT 1 FROM weekly_usage_snapshots "
753
+ "WHERE five_hour_window_key IS NOT NULL "
754
+ " AND five_hour_percent IS NOT NULL "
755
+ "LIMIT 1"
756
+ ).fetchone()
757
+ if not existing and has_snapshots:
758
+ inserted = _backfill_five_hour_blocks(conn)
759
+ # Re-run the 5h dedup migration AFTER backfill creates parents.
760
+ # The dispatcher above ran while five_hour_blocks was empty, so
761
+ # the dedup handler no-op'd and stamped its marker. Snapshot
762
+ # keys can carry jitter beyond the 600s canonical floor (the
763
+ # 003_* migration handles up to 1800s grouping), so the
764
+ # backfill's `DISTINCT five_hour_window_key` over those keys
765
+ # can produce duplicate parent rows for one physical 5h
766
+ # window. Without this re-invocation those duplicates persist
767
+ # forever — the marker says it ran. Handler owns its own
768
+ # BEGIN/COMMIT and is idempotent (no groups → no-op).
769
+ #
770
+ # Honor `db skip` here as well: if the operator marked 003 as
771
+ # skipped (e.g., poison pill on their machine), we must NOT
772
+ # back-door run the handler. Duplicates introduced by the
773
+ # backfill will persist until they `db unskip` — which is the
774
+ # explicit choice the skip records. Failure path mirrors the
775
+ # dispatcher's contract: route through _log_migration_error so
776
+ # the next interactive command renders the banner, and clear
777
+ # the log entry on success so the banner auto-dismisses.
778
+ if inserted > 0:
779
+ target_name = "003_merge_5h_block_duplicates_v1"
780
+ try:
781
+ skipped = {
782
+ row[0] for row in conn.execute(
783
+ "SELECT name FROM schema_migrations_skipped"
784
+ ).fetchall()
785
+ }
786
+ except sqlite3.OperationalError:
787
+ skipped = set()
788
+ if target_name not in skipped:
789
+ for _m in _STATS_MIGRATIONS:
790
+ if _m.name == target_name:
791
+ qualified = f"stats.db:{target_name}"
792
+ try:
793
+ _m.handler(conn)
794
+ _clear_migration_error_log_entries(qualified)
795
+ except Exception as exc:
796
+ _log_migration_error(
797
+ name=qualified,
798
+ exc=exc,
799
+ tb=traceback.format_exc(),
800
+ )
801
+ eprint(f"[migration {qualified}] failed: {exc}")
802
+ break
803
+
804
+ conn.commit()
805
+ return conn
806
+
807
+
808
+ # === WeekRef cluster ================================================
809
+
810
+
811
+ def _canonicalize_optional_iso(value: str | None, label: str) -> str | None:
812
+ if value is None:
813
+ return None
814
+ s = value.strip()
815
+ if s == "":
816
+ return None
817
+ normalized = _normalize_week_boundary_dt(parse_iso_datetime(s, label)).astimezone(dt.timezone.utc)
818
+ return normalized.isoformat(timespec="seconds")
819
+
820
+
821
+ @dataclass(frozen=True)
822
+ class WeekRef:
823
+ week_start: dt.date
824
+ week_end: dt.date | None
825
+ week_start_at: str | None
826
+ week_end_at: str | None
827
+ key: str
828
+
829
+
830
+ def make_week_ref(
831
+ week_start_date: str,
832
+ week_end_date: str | None,
833
+ week_start_at: str | None = None,
834
+ week_end_at: str | None = None,
835
+ ) -> WeekRef:
836
+ week_start = dt.date.fromisoformat(week_start_date)
837
+ week_end = dt.date.fromisoformat(week_end_date) if week_end_date else None
838
+ start_at = _canonicalize_optional_iso(week_start_at, "weekStartAt")
839
+ end_at = _canonicalize_optional_iso(week_end_at, "weekEndAt")
840
+
841
+ return WeekRef(
842
+ week_start=week_start,
843
+ week_end=week_end,
844
+ week_start_at=start_at,
845
+ week_end_at=end_at,
846
+ key=week_start.isoformat(),
847
+ )
848
+
849
+
850
+ # === Usage lookup ===================================================
851
+
852
+
853
+ def _get_latest_row_for_week(
854
+ conn: sqlite3.Connection,
855
+ table_name: str,
856
+ week_ref: WeekRef,
857
+ as_of_utc: str | None = None,
858
+ ) -> sqlite3.Row | None:
859
+ if as_of_utc is None:
860
+ return conn.execute(
861
+ f"""
862
+ SELECT *
863
+ FROM {table_name}
864
+ WHERE week_start_date = ?
865
+ ORDER BY captured_at_utc DESC, id DESC
866
+ LIMIT 1
867
+ """,
868
+ (week_ref.week_start.isoformat(),),
869
+ ).fetchone()
870
+ return conn.execute(
871
+ f"""
872
+ SELECT *
873
+ FROM {table_name}
874
+ WHERE week_start_date = ?
875
+ AND captured_at_utc <= ?
876
+ ORDER BY captured_at_utc DESC, id DESC
877
+ LIMIT 1
878
+ """,
879
+ (week_ref.week_start.isoformat(), as_of_utc),
880
+ ).fetchone()
881
+
882
+
883
+ def get_latest_usage_for_week(
884
+ conn: sqlite3.Connection,
885
+ week_ref: WeekRef,
886
+ as_of_utc: str | None = None,
887
+ ) -> sqlite3.Row | None:
888
+ return _get_latest_row_for_week(
889
+ conn, "weekly_usage_snapshots", week_ref, as_of_utc=as_of_utc,
890
+ )