cctally 1.11.1 → 1.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +62 -0
- package/bin/_cctally_cache.py +342 -113
- package/bin/_cctally_config.py +55 -9
- package/bin/_cctally_core.py +51 -0
- package/bin/_cctally_db.py +1654 -5
- package/bin/_cctally_record.py +1 -1
- package/bin/_cctally_setup.py +11 -1
- package/bin/_lib_diff_kernel.py +14 -4
- package/bin/_lib_jsonl.py +88 -17
- package/bin/_lib_render.py +193 -22
- package/bin/_lib_subscription_weeks.py +21 -3
- package/bin/cctally +1278 -85
- package/package.json +1 -1
package/bin/_cctally_db.py
CHANGED
|
@@ -61,6 +61,7 @@ from __future__ import annotations
|
|
|
61
61
|
|
|
62
62
|
import argparse
|
|
63
63
|
import datetime as dt
|
|
64
|
+
import enum
|
|
64
65
|
import json
|
|
65
66
|
import os
|
|
66
67
|
import pathlib
|
|
@@ -89,6 +90,13 @@ from _cctally_core import (
|
|
|
89
90
|
parse_iso_datetime,
|
|
90
91
|
)
|
|
91
92
|
|
|
93
|
+
# Stats migration 008 needs the same per-entry cost computation used by
|
|
94
|
+
# the live cost-report path. Direct import keeps the kernel single-sourced
|
|
95
|
+
# (no shim drift); _lib_pricing is a stdlib-only leaf module so no cycle
|
|
96
|
+
# risk. Other siblings (_cctally_record, _cctally_dashboard) follow the
|
|
97
|
+
# same direct-import pattern.
|
|
98
|
+
from _lib_pricing import _calculate_entry_cost
|
|
99
|
+
|
|
92
100
|
|
|
93
101
|
# Module-level back-ref shim for the one Z-high callable that STAYS in
|
|
94
102
|
# bin/cctally. Resolves `sys.modules['cctally'].X` at CALL TIME (not
|
|
@@ -199,6 +207,116 @@ class DowngradeDetected(Exception):
|
|
|
199
207
|
)
|
|
200
208
|
|
|
201
209
|
|
|
210
|
+
class MigrationGateNotMet(Exception):
|
|
211
|
+
"""Migration cannot run yet because a cross-DB prerequisite is unsatisfied.
|
|
212
|
+
|
|
213
|
+
Dispatcher treats this as transient: do NOT write to
|
|
214
|
+
``migration-errors.log``, do NOT mark the migration as skipped, do
|
|
215
|
+
NOT render the error banner. Retry on the next open.
|
|
216
|
+
|
|
217
|
+
Used by cross-DB migrations whose body needs to verify that a
|
|
218
|
+
sibling DB's migration has applied AND that downstream ingest has
|
|
219
|
+
repopulated the data the body depends on. The canonical use case
|
|
220
|
+
is stats migration 008 (recompute weekly_cost_snapshots) which
|
|
221
|
+
needs cache migration 001 (dedup wipe) AND a post-wipe
|
|
222
|
+
``sync_cache`` cycle before it can safely re-sum cost.
|
|
223
|
+
|
|
224
|
+
Spec: docs/superpowers/specs/2026-05-22-ccusage-dedup-parity.md §D4.
|
|
225
|
+
"""
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
@dataclass(frozen=True)
|
|
229
|
+
class UpgradeGateInputs:
|
|
230
|
+
"""Frozen inputs to ``resolve_upgrade_gate`` (cctally-dev#93, spec D1).
|
|
231
|
+
|
|
232
|
+
All fields are derived by the thin I/O shell ``_gate_001_post_ingest_completed``;
|
|
233
|
+
the resolver itself does no I/O.
|
|
234
|
+
"""
|
|
235
|
+
cache_001_state: str # "applied" | "skipped" | "pending"
|
|
236
|
+
walk_complete_since_001: bool # cache_meta marker present; missing table -> False
|
|
237
|
+
cache_has_entries: bool # session_entries non-empty; missing table -> False
|
|
238
|
+
caller_has_historical_rows: bool
|
|
239
|
+
disk_state: str # "jsonl_present" | "pruned" | "absent" (REASON only)
|
|
240
|
+
marker_state_readable: bool # False -> schema_migrations missing OR any read transiently locked
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
class GateAction(enum.Enum):
|
|
244
|
+
PROCEED = "proceed" # run the recompute body
|
|
245
|
+
DEFER = "defer" # raise MigrationGateNotMet; stays pending, retried next open
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
@dataclass(frozen=True)
|
|
249
|
+
class GateResolution:
|
|
250
|
+
action: GateAction
|
|
251
|
+
reason: str
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
def resolve_upgrade_gate(inp: UpgradeGateInputs) -> GateResolution:
|
|
255
|
+
"""Pure decision function — the D3 truth table. First matching row wins.
|
|
256
|
+
|
|
257
|
+
Spec: docs/superpowers/specs/2026-05-23-migration-gate-state-machine-design.md D1/D3.
|
|
258
|
+
"""
|
|
259
|
+
# Row 1 — marker state unreadable (missing schema_migrations or transient lock).
|
|
260
|
+
if not inp.marker_state_readable:
|
|
261
|
+
return GateResolution(
|
|
262
|
+
GateAction.DEFER,
|
|
263
|
+
"cache.db migration state unreadable (no schema_migrations table yet, "
|
|
264
|
+
"or transiently locked); retry on next open.",
|
|
265
|
+
)
|
|
266
|
+
# Row 2 — 001 not applied.
|
|
267
|
+
if inp.cache_001_state == "pending":
|
|
268
|
+
return GateResolution(
|
|
269
|
+
GateAction.DEFER,
|
|
270
|
+
"cache.db migration 001_dedup_highest_wins not yet applied; run any "
|
|
271
|
+
"JSONL-reading command (e.g. `cctally weekly`) once, or "
|
|
272
|
+
"`cctally db skip 001_dedup_highest_wins` to defer.",
|
|
273
|
+
)
|
|
274
|
+
# Rows 3/4 — 001 skipped.
|
|
275
|
+
if inp.cache_001_state == "skipped":
|
|
276
|
+
if inp.caller_has_historical_rows:
|
|
277
|
+
return GateResolution(
|
|
278
|
+
GateAction.DEFER,
|
|
279
|
+
"cache.db migration 001_dedup_highest_wins is skipped while historical "
|
|
280
|
+
"rows remain; deferring to avoid recomputing over stale pre-dedup "
|
|
281
|
+
"session_entries. Run `cctally db unskip 001_dedup_highest_wins` then "
|
|
282
|
+
"any JSONL-reading command once.",
|
|
283
|
+
)
|
|
284
|
+
return GateResolution(
|
|
285
|
+
GateAction.PROCEED,
|
|
286
|
+
"001 skipped and no historical rows to protect; proceed (body no-ops).",
|
|
287
|
+
)
|
|
288
|
+
# cache_001_state == "applied" below.
|
|
289
|
+
# Row 5 — nothing to protect.
|
|
290
|
+
if not inp.caller_has_historical_rows:
|
|
291
|
+
return GateResolution(
|
|
292
|
+
GateAction.PROCEED,
|
|
293
|
+
"no historical rows to protect; proceed (body no-ops).",
|
|
294
|
+
)
|
|
295
|
+
# Row 6 — complete, non-empty post-001 cache.
|
|
296
|
+
if inp.walk_complete_since_001 and inp.cache_has_entries:
|
|
297
|
+
return GateResolution(
|
|
298
|
+
GateAction.PROCEED,
|
|
299
|
+
"complete, non-empty post-001 walk observed; proceed.",
|
|
300
|
+
)
|
|
301
|
+
# Row 7 — DEFER; reason branches on disk_state (decision is identical).
|
|
302
|
+
if not inp.walk_complete_since_001:
|
|
303
|
+
if inp.disk_state == "jsonl_present":
|
|
304
|
+
reason = ("post-001 ingest walk not yet complete; run any JSONL-reading "
|
|
305
|
+
"command (e.g. `cctally weekly`) once and retry.")
|
|
306
|
+
elif inp.disk_state == "pruned":
|
|
307
|
+
reason = ("no complete post-001 walk and projects/ holds no JSONL; restore "
|
|
308
|
+
"the JSONL or `cctally db skip` this migration to accept stale "
|
|
309
|
+
"aggregates.")
|
|
310
|
+
else: # absent
|
|
311
|
+
reason = ("no complete post-001 walk and no projects/ dir resolves; check "
|
|
312
|
+
"CLAUDE_CONFIG_DIR or `cctally db skip` this migration.")
|
|
313
|
+
else: # walk complete but cache empty (rebuild/truncation over pruned disk)
|
|
314
|
+
reason = ("cache is empty after a rebuild over pruned disk; refusing to zero "
|
|
315
|
+
"historical aggregates. Restore the JSONL or `cctally db skip` this "
|
|
316
|
+
"migration.")
|
|
317
|
+
return GateResolution(GateAction.DEFER, reason)
|
|
318
|
+
|
|
319
|
+
|
|
202
320
|
def _make_migration_decorator(registry: list[Migration], db_label: str, name: str):
|
|
203
321
|
"""Internal helper — builds the @stats_migration / @cache_migration decorators.
|
|
204
322
|
|
|
@@ -330,8 +448,17 @@ def _run_pending_migrations(
|
|
|
330
448
|
auto-open on the UPDATE statements, so subsequent handler
|
|
331
449
|
``conn.execute("BEGIN")`` calls start cleanly.
|
|
332
450
|
- Fresh install (schema_migrations just CREATE'd, zero rows
|
|
333
|
-
post-bootstrap
|
|
334
|
-
|
|
451
|
+
post-bootstrap, AND the DB's primary data table is empty or
|
|
452
|
+
absent) → stamp every migration applied without invoking
|
|
453
|
+
handlers. The data-emptiness probe (D1) defends against the
|
|
454
|
+
pre-framework upgrade case where cache.db was populated by
|
|
455
|
+
a pre-v1.12.0 build that wrote ``session_entries`` without
|
|
456
|
+
ever creating ``schema_migrations`` — pre-fix that landscape
|
|
457
|
+
was falsely classified as fresh and stamped every migration
|
|
458
|
+
applied without running its handler, indefinitely persisting
|
|
459
|
+
the buggy summed-tokens dedup. Probe tables:
|
|
460
|
+
``stats.db → weekly_cost_snapshots``,
|
|
461
|
+
``cache.db → session_entries``.
|
|
335
462
|
- Per migration: handler raises ``Exception`` → log + BREAK
|
|
336
463
|
(Codex P1 #3 — the FIRST failure halts the registry walk so
|
|
337
464
|
later migrations never see partial-prior state). ``BaseException``
|
|
@@ -443,10 +570,56 @@ def _run_pending_migrations(
|
|
|
443
570
|
row[0] for row in conn.execute("SELECT name FROM schema_migrations_skipped").fetchall()
|
|
444
571
|
}
|
|
445
572
|
|
|
446
|
-
#
|
|
447
|
-
#
|
|
448
|
-
#
|
|
573
|
+
# D1 — fresh install requires BOTH "schema_migrations table did not
|
|
574
|
+
# exist" AND "the DB's primary data table is empty (or absent)".
|
|
575
|
+
# Pre-fix this check was schema_migrations-only: a pre-v1.12.0
|
|
576
|
+
# cache.db (populated session_entries but no schema_migrations
|
|
577
|
+
# table — the framework didn't exist for cache.db before this
|
|
578
|
+
# release) was falsely classified as a fresh install. The
|
|
579
|
+
# fresh-install branch then stamped EVERY pending migration's
|
|
580
|
+
# marker WITHOUT invoking its handler, so the cache 001
|
|
581
|
+
# dedup-highest-wins migration silently skipped on every upgrading
|
|
582
|
+
# user. The handler is the entire fix — skipping it leaves the
|
|
583
|
+
# buggy summed-tokens data in place indefinitely.
|
|
584
|
+
#
|
|
585
|
+
# Probe tables per DB — ANY non-empty probe table means "not fresh":
|
|
586
|
+
# * stats.db → every table the recompute migrations (008/009/010)
|
|
587
|
+
# touch: ``weekly_cost_snapshots`` (008), ``five_hour_blocks``
|
|
588
|
+
# (009), ``percent_milestones`` (010). Probing ONLY
|
|
589
|
+
# ``weekly_cost_snapshots`` was a gap: a legacy stats.db with live
|
|
590
|
+
# 5h history but no weekly snapshots (e.g. a user who only ever ran
|
|
591
|
+
# 5h-window commands) was falsely classified as a fresh install,
|
|
592
|
+
# so 009 got stamped-without-running and its historical 5h totals
|
|
593
|
+
# stayed inflated forever — the exact bug this patch set exists to
|
|
594
|
+
# fix. Probe all three so non-emptiness in ANY recompute target
|
|
595
|
+
# forces the handlers to run.
|
|
596
|
+
# * cache.db → ``session_entries`` (the table 001 wipes; non-empty
|
|
597
|
+
# means real session history under the buggy old dedup rule).
|
|
598
|
+
# Probe table absent → treat as empty (a brand-new DB hasn't run
|
|
599
|
+
# the schema CREATEs yet, so the data table doesn't exist; that's a
|
|
600
|
+
# genuine fresh install).
|
|
449
601
|
fresh_install = (not schema_migrations_existed) and len(applied) == 0
|
|
602
|
+
if fresh_install:
|
|
603
|
+
probe_tables = {
|
|
604
|
+
"stats.db": (
|
|
605
|
+
"weekly_cost_snapshots",
|
|
606
|
+
"five_hour_blocks",
|
|
607
|
+
"percent_milestones",
|
|
608
|
+
),
|
|
609
|
+
"cache.db": ("session_entries",),
|
|
610
|
+
}.get(db_label, ())
|
|
611
|
+
for probe_table in probe_tables:
|
|
612
|
+
# _probe_table_nonempty centralizes the "is there data here?"
|
|
613
|
+
# probe (cctally-dev#93): a present-and-non-empty table means
|
|
614
|
+
# data exists from a pre-framework write path, so the DB is
|
|
615
|
+
# NOT a fresh install — run every handler normally so the
|
|
616
|
+
# upgrading user gets the fix. A missing table contributes no
|
|
617
|
+
# signal (genuine pre-CREATE fresh install); keep checking the
|
|
618
|
+
# rest. Transient BUSY/LOCKED and any other OperationalError
|
|
619
|
+
# propagate (corrupt DB / IO error).
|
|
620
|
+
if _probe_table_nonempty(conn, probe_table):
|
|
621
|
+
fresh_install = False
|
|
622
|
+
break
|
|
450
623
|
|
|
451
624
|
now_iso = now_utc_iso()
|
|
452
625
|
for m in registry:
|
|
@@ -464,6 +637,40 @@ def _run_pending_migrations(
|
|
|
464
637
|
m.handler(conn)
|
|
465
638
|
_clear_migration_error_log_entries(qualified_name)
|
|
466
639
|
applied.add(m.name)
|
|
640
|
+
except MigrationGateNotMet as gate_exc:
|
|
641
|
+
# Transient cross-DB gating: do NOT log to migration-errors.log,
|
|
642
|
+
# do NOT mark as skipped, do NOT render the error banner. The
|
|
643
|
+
# migration stays pending; the next open re-tries it. Spec
|
|
644
|
+
# docs/superpowers/specs/2026-05-22-ccusage-dedup-parity.md §D4.
|
|
645
|
+
#
|
|
646
|
+
# P2 — defensive log-entry clear, symmetric with the success
|
|
647
|
+
# branch above. A prior run may have logged a hard failure
|
|
648
|
+
# for this migration; if the underlying state has since
|
|
649
|
+
# shifted such that the migration now gate-defers (e.g. a
|
|
650
|
+
# prereq vanished mid-cycle, or the handler was rewritten to
|
|
651
|
+
# gate where it previously raised), the stale error log
|
|
652
|
+
# entry would persist forever and the banner would mislead.
|
|
653
|
+
# Clearing here keeps the contract crisp: any non-failure
|
|
654
|
+
# outcome (apply OR gate-defer) clears any prior failure log
|
|
655
|
+
# for this migration's qualified name.
|
|
656
|
+
_clear_migration_error_log_entries(qualified_name)
|
|
657
|
+
if os.environ.get("CCTALLY_DEBUG"):
|
|
658
|
+
eprint(
|
|
659
|
+
f"[migration {qualified_name}] deferred: {gate_exc}"
|
|
660
|
+
)
|
|
661
|
+
# D2 — ``continue``, NOT ``break``. A gate-defer leaves the DB
|
|
662
|
+
# in a fully-consistent prior state (the handler raised before
|
|
663
|
+
# touching anything, or rolled back its own BEGIN); later
|
|
664
|
+
# registry entries can legitimately attempt to run. The
|
|
665
|
+
# all-applied predicate below uses ``applied | skipped``, so
|
|
666
|
+
# this gated migration's absence from both sets keeps
|
|
667
|
+
# ``user_version`` from advancing — a future open re-tries
|
|
668
|
+
# the gated migration even if every later one succeeded.
|
|
669
|
+
#
|
|
670
|
+
# Contrast the Exception branch below, which DOES break: a
|
|
671
|
+
# generic handler exception may have left a partial transaction
|
|
672
|
+
# state, so later migrations should not see it.
|
|
673
|
+
continue
|
|
467
674
|
except Exception as exc:
|
|
468
675
|
_log_migration_error(
|
|
469
676
|
name=qualified_name,
|
|
@@ -815,6 +1022,11 @@ _BANNER_SUPPRESSED_COMMANDS = frozenset({
|
|
|
815
1022
|
"doctor", # consolidates migration + update banner state into its
|
|
816
1023
|
# own report; double-printing the banner would duplicate
|
|
817
1024
|
# findings doctor already surfaces structurally.
|
|
1025
|
+
"blocks", # stdout-formatted table replacing `ccusage blocks`;
|
|
1026
|
+
# stderr noise pollutes the visually-aligned report and
|
|
1027
|
+
# confuses scripted pipelines piping via `2>&1`.
|
|
1028
|
+
# Banner still lands on the next interactive non-report
|
|
1029
|
+
# command (`report`, `weekly`, `percent-breakdown`, etc.).
|
|
818
1030
|
# Note: `setup` carve-out handled separately (only suppressed w/o --status).
|
|
819
1031
|
# Note: `dashboard` carve-out handled separately (banner printed in cmd_dashboard).
|
|
820
1032
|
})
|
|
@@ -1679,6 +1891,1443 @@ def _migration_observed_pre_credit_pct(conn: sqlite3.Connection) -> None:
|
|
|
1679
1891
|
raise
|
|
1680
1892
|
|
|
1681
1893
|
|
|
1894
|
+
# === Region 7b: Cross-DB migration gate helper (ccusage-parity prep) ===
|
|
1895
|
+
|
|
1896
|
+
def _gate_001_post_ingest_completed(
|
|
1897
|
+
cache_ro: sqlite3.Connection,
|
|
1898
|
+
claude_projects_dirs: pathlib.Path | list[pathlib.Path],
|
|
1899
|
+
*,
|
|
1900
|
+
data_present: bool = False,
|
|
1901
|
+
) -> None:
|
|
1902
|
+
"""Thin I/O shell over the pure ``resolve_upgrade_gate`` resolver.
|
|
1903
|
+
|
|
1904
|
+
Derives the six ``UpgradeGateInputs`` from cache.db reads + the
|
|
1905
|
+
on-disk JSONL state, calls the resolver (the D3 truth table), and
|
|
1906
|
+
raises ``MigrationGateNotMet(reason)`` when the resolution is
|
|
1907
|
+
``DEFER``. All decision logic lives in the resolver; this function
|
|
1908
|
+
does only I/O. (cctally-dev#93, spec D1/D3.)
|
|
1909
|
+
|
|
1910
|
+
Input derivation
|
|
1911
|
+
----------------
|
|
1912
|
+
* ``cache_001_state`` — ``"applied"`` if ``schema_migrations``
|
|
1913
|
+
carries ``001_dedup_highest_wins``; else ``"skipped"`` if
|
|
1914
|
+
``schema_migrations_skipped`` carries it; else ``"pending"``.
|
|
1915
|
+
* ``walk_complete_since_001`` — the ``cache_meta``
|
|
1916
|
+
``claude_ingest_walk_complete`` marker is present. ``sync_cache``
|
|
1917
|
+
writes it only after a clean full walk that began with 001 already
|
|
1918
|
+
applied, and cache 001 / rebuild / truncation clear it atomically
|
|
1919
|
+
(spec D5). This REPLACES the old ``session_files.last_ingested_at
|
|
1920
|
+
>= 001.applied_at_utc`` proof — the marker is the single
|
|
1921
|
+
ingest-completeness signal now. A missing ``cache_meta`` table
|
|
1922
|
+
composes as ``False`` (not a hard defer).
|
|
1923
|
+
* ``cache_has_entries`` — ``session_entries`` is non-empty, read
|
|
1924
|
+
via an inline ``SELECT 1 FROM session_entries LIMIT 1``
|
|
1925
|
+
(deliberately NOT ``_probe_table_nonempty``, which propagates
|
|
1926
|
+
transient errors by design — the shell must CATCH a transient
|
|
1927
|
+
BUSY/LOCKED here and flip ``marker_state_readable=False`` so the
|
|
1928
|
+
resolver DEFERs at row 1; the helper cannot do that). Together
|
|
1929
|
+
with ``walk_complete`` this closes the round-3 partial-walk
|
|
1930
|
+
false-pass and the P1 empty-cache rebuild-over-pruned-disk case
|
|
1931
|
+
(spec D3): row 6 requires BOTH.
|
|
1932
|
+
* ``caller_has_historical_rows`` — caller-supplied ``data_present``;
|
|
1933
|
+
each migration passes its OWN scoped row set (008
|
|
1934
|
+
``bool(snapshot_rows)``, etc.) so a no-op upgrade isn't wedged.
|
|
1935
|
+
* ``disk_state`` — ``"absent"`` (no projects dirs resolve),
|
|
1936
|
+
``"jsonl_present"`` (≥1 ``*.jsonl`` under any root), or
|
|
1937
|
+
``"pruned"`` (dirs resolve but hold no JSONL). REASON-only — it
|
|
1938
|
+
never changes the decision, only the row-7 operator guidance text.
|
|
1939
|
+
* ``marker_state_readable`` — ``False`` only when the
|
|
1940
|
+
``schema_migrations`` read is missing-table (cache.db never ran
|
|
1941
|
+
the dispatcher) OR any of the reads is transiently
|
|
1942
|
+
``BUSY``/``LOCKED``/``CANTOPEN`` (per-read split, spec P2#1). The
|
|
1943
|
+
resolver maps this to row 1 DEFER (retry next open).
|
|
1944
|
+
|
|
1945
|
+
Parameters
|
|
1946
|
+
----------
|
|
1947
|
+
cache_ro
|
|
1948
|
+
Read-only sqlite3 connection to ``cache.db``. Cross-DB migrations
|
|
1949
|
+
open the sibling DB read-only inside their handler body via
|
|
1950
|
+
``sqlite3.connect(f"file:{path}?mode=ro", uri=True)``. Exposed as
|
|
1951
|
+
an explicit parameter so tests can inject a tmp-path connection
|
|
1952
|
+
without touching ``HOME``.
|
|
1953
|
+
claude_projects_dirs
|
|
1954
|
+
Either a single ``pathlib.Path`` (legacy single-rooted form) or a
|
|
1955
|
+
``list[pathlib.Path]`` of projects/ directories. The disk-state
|
|
1956
|
+
classification ORs across every root. Production callers resolve
|
|
1957
|
+
this via ``_resolve_projects_dirs_for_gate`` (env-aware); an empty
|
|
1958
|
+
list is the legitimate ``disk_state="absent"`` topology and is
|
|
1959
|
+
handled by the resolver (no per-migration default-dir fallback).
|
|
1960
|
+
data_present
|
|
1961
|
+
Keyword-only (defaults ``False`` for the 2-arg test callers).
|
|
1962
|
+
Whether the caller still holds historical rows it is about to
|
|
1963
|
+
recompute from ``session_entries``.
|
|
1964
|
+
|
|
1965
|
+
Spec: docs/superpowers/specs/2026-05-23-migration-gate-state-machine-design.md D1/D3.
|
|
1966
|
+
"""
|
|
1967
|
+
# Normalize to list so the disk-state classification can OR across
|
|
1968
|
+
# roots. Accepting a bare Path keeps the legacy test signature working.
|
|
1969
|
+
if isinstance(claude_projects_dirs, pathlib.Path):
|
|
1970
|
+
projects_dirs = [claude_projects_dirs]
|
|
1971
|
+
else:
|
|
1972
|
+
projects_dirs = list(claude_projects_dirs)
|
|
1973
|
+
|
|
1974
|
+
marker_state_readable = True
|
|
1975
|
+
|
|
1976
|
+
# --- cache 001 state (schema_migrations / schema_migrations_skipped) ---
|
|
1977
|
+
# "applied" wins; else "skipped"; else "pending". A missing
|
|
1978
|
+
# ``schema_migrations`` table (cache.db never ran the dispatcher) or a
|
|
1979
|
+
# transient BUSY/LOCKED on the read flips ``marker_state_readable`` so
|
|
1980
|
+
# the resolver defers at row 1 instead of guessing.
|
|
1981
|
+
cache_001_state = "pending"
|
|
1982
|
+
try:
|
|
1983
|
+
if cache_ro.execute(
|
|
1984
|
+
"SELECT 1 FROM schema_migrations WHERE name=?",
|
|
1985
|
+
("001_dedup_highest_wins",),
|
|
1986
|
+
).fetchone() is not None:
|
|
1987
|
+
cache_001_state = "applied"
|
|
1988
|
+
else:
|
|
1989
|
+
try:
|
|
1990
|
+
if cache_ro.execute(
|
|
1991
|
+
"SELECT 1 FROM schema_migrations_skipped WHERE name=?",
|
|
1992
|
+
("001_dedup_highest_wins",),
|
|
1993
|
+
).fetchone() is not None:
|
|
1994
|
+
cache_001_state = "skipped"
|
|
1995
|
+
except sqlite3.OperationalError as exc:
|
|
1996
|
+
if _is_transient_sqlite_error(exc):
|
|
1997
|
+
marker_state_readable = False
|
|
1998
|
+
elif not _is_no_such_table_error(exc):
|
|
1999
|
+
raise
|
|
2000
|
+
# no_such_table on _skipped -> treat as "not skipped" (pending)
|
|
2001
|
+
except sqlite3.OperationalError as exc:
|
|
2002
|
+
if _is_transient_sqlite_error(exc) or _is_no_such_table_error(exc):
|
|
2003
|
+
marker_state_readable = False
|
|
2004
|
+
else:
|
|
2005
|
+
raise
|
|
2006
|
+
|
|
2007
|
+
# --- walk_complete (cache_meta marker presence) ---
|
|
2008
|
+
# The single ingest-completeness signal (spec D5). ``sync_cache`` writes
|
|
2009
|
+
# it only after a clean full walk begun with 001 applied; cache 001 /
|
|
2010
|
+
# rebuild / truncation clear it atomically. Missing table -> walk✗.
|
|
2011
|
+
walk_complete = False
|
|
2012
|
+
if marker_state_readable:
|
|
2013
|
+
try:
|
|
2014
|
+
walk_complete = cache_ro.execute(
|
|
2015
|
+
"SELECT 1 FROM cache_meta WHERE key='claude_ingest_walk_complete'"
|
|
2016
|
+
).fetchone() is not None
|
|
2017
|
+
except sqlite3.OperationalError as exc:
|
|
2018
|
+
if _is_transient_sqlite_error(exc):
|
|
2019
|
+
marker_state_readable = False
|
|
2020
|
+
elif not _is_no_such_table_error(exc):
|
|
2021
|
+
raise
|
|
2022
|
+
|
|
2023
|
+
# --- cache_has_entries (session_entries non-empty) ---
|
|
2024
|
+
cache_has_entries = False
|
|
2025
|
+
if marker_state_readable:
|
|
2026
|
+
try:
|
|
2027
|
+
cache_has_entries = cache_ro.execute(
|
|
2028
|
+
"SELECT 1 FROM session_entries LIMIT 1"
|
|
2029
|
+
).fetchone() is not None
|
|
2030
|
+
except sqlite3.OperationalError as exc:
|
|
2031
|
+
if _is_transient_sqlite_error(exc):
|
|
2032
|
+
marker_state_readable = False
|
|
2033
|
+
elif not _is_no_such_table_error(exc):
|
|
2034
|
+
raise
|
|
2035
|
+
|
|
2036
|
+
# --- disk_state (REASON-only; never changes the decision) ---
|
|
2037
|
+
if not projects_dirs:
|
|
2038
|
+
disk_state = "absent"
|
|
2039
|
+
elif any(any(p.glob("**/*.jsonl")) for p in projects_dirs):
|
|
2040
|
+
disk_state = "jsonl_present"
|
|
2041
|
+
else:
|
|
2042
|
+
disk_state = "pruned"
|
|
2043
|
+
|
|
2044
|
+
resolution = resolve_upgrade_gate(UpgradeGateInputs(
|
|
2045
|
+
cache_001_state=cache_001_state,
|
|
2046
|
+
walk_complete_since_001=walk_complete,
|
|
2047
|
+
cache_has_entries=cache_has_entries,
|
|
2048
|
+
caller_has_historical_rows=bool(data_present),
|
|
2049
|
+
disk_state=disk_state,
|
|
2050
|
+
marker_state_readable=marker_state_readable,
|
|
2051
|
+
))
|
|
2052
|
+
if resolution.action is GateAction.DEFER:
|
|
2053
|
+
raise MigrationGateNotMet(resolution.reason)
|
|
2054
|
+
|
|
2055
|
+
|
|
2056
|
+
def _is_no_such_table_error(exc: sqlite3.OperationalError) -> bool:
|
|
2057
|
+
"""Return True iff ``exc`` is SQLite's "no such table" error.
|
|
2058
|
+
|
|
2059
|
+
Two-signal predicate to defend against future SQLite version drift
|
|
2060
|
+
in the error-message format:
|
|
2061
|
+
|
|
2062
|
+
* Substring match on the lowercased message (stable for ~20 years).
|
|
2063
|
+
* ``exc.sqlite_errorcode == SQLITE_ERROR (1)`` (Python 3.11+;
|
|
2064
|
+
cctally's floor is 3.13 per ``__min_python_version__``). The
|
|
2065
|
+
``getattr(..., None) in (None, 1)`` form degrades gracefully if
|
|
2066
|
+
the attribute is ever missing — substring-only on legacy Python.
|
|
2067
|
+
|
|
2068
|
+
Centralized so the gate shell's cache-state reads and the migration
|
|
2069
|
+
table-existence checks share the same "table missing" predicate.
|
|
2070
|
+
"""
|
|
2071
|
+
return (
|
|
2072
|
+
"no such table" in str(exc).lower()
|
|
2073
|
+
and getattr(exc, "sqlite_errorcode", None) in (None, 1)
|
|
2074
|
+
)
|
|
2075
|
+
|
|
2076
|
+
|
|
2077
|
+
def _probe_table_nonempty(conn: sqlite3.Connection, table: str) -> bool:
|
|
2078
|
+
"""True iff ``table`` exists and has at least one row. Missing table -> False.
|
|
2079
|
+
|
|
2080
|
+
Single source for the 'is there data here?' probe shared by the dispatcher
|
|
2081
|
+
fresh-install fast-path and the gate shell's cache_has_entries input
|
|
2082
|
+
(cctally-dev#93). Transient BUSY/LOCKED propagates to the caller.
|
|
2083
|
+
"""
|
|
2084
|
+
try:
|
|
2085
|
+
return conn.execute(f"SELECT 1 FROM {table} LIMIT 1").fetchone() is not None
|
|
2086
|
+
except sqlite3.OperationalError as exc:
|
|
2087
|
+
if _is_no_such_table_error(exc):
|
|
2088
|
+
return False
|
|
2089
|
+
raise
|
|
2090
|
+
|
|
2091
|
+
|
|
2092
|
+
def _is_transient_sqlite_error(exc: sqlite3.OperationalError) -> bool:
|
|
2093
|
+
"""Return True iff ``exc`` is a transient SQLite condition the gate
|
|
2094
|
+
can legitimately defer on.
|
|
2095
|
+
|
|
2096
|
+
Covers:
|
|
2097
|
+
|
|
2098
|
+
* ``SQLITE_BUSY`` (errorcode 5) — another writer holds the DB.
|
|
2099
|
+
* ``SQLITE_LOCKED`` (errorcode 6) — a table within the DB is locked.
|
|
2100
|
+
* ``SQLITE_CANTOPEN``(errorcode 14) — the DB file doesn't exist /
|
|
2101
|
+
can't be opened (e.g. unlinked mid-flight between an ``exists()``
|
|
2102
|
+
probe and ``sqlite3.connect``, or never created yet).
|
|
2103
|
+
|
|
2104
|
+
Gate-defer semantics (G4 + G5): a transient error means the gate
|
|
2105
|
+
state is genuinely unknown at this instant, NOT that the migration
|
|
2106
|
+
has failed. The dispatcher should translate to ``MigrationGateNotMet``
|
|
2107
|
+
rather than logging to ``migration-errors.log`` (which would render
|
|
2108
|
+
a misleading error banner for a self-healing condition).
|
|
2109
|
+
|
|
2110
|
+
Belt-and-suspenders predicate: matches on ``sqlite_errorcode`` first
|
|
2111
|
+
(stable Python 3.11+ API), with a substring fallback for the rare
|
|
2112
|
+
case where the attribute is missing (legacy Python builds; the
|
|
2113
|
+
``getattr(..., None) in (...)`` form degrades to substring-only).
|
|
2114
|
+
"""
|
|
2115
|
+
code = getattr(exc, "sqlite_errorcode", None)
|
|
2116
|
+
if code in (5, 6, 14):
|
|
2117
|
+
return True
|
|
2118
|
+
if code is None:
|
|
2119
|
+
msg = str(exc).lower()
|
|
2120
|
+
# Stable SQLite error-message fragments for the three transient
|
|
2121
|
+
# codes; substring-only fallback when sqlite_errorcode is absent.
|
|
2122
|
+
if (
|
|
2123
|
+
"database is locked" in msg
|
|
2124
|
+
or "database table is locked" in msg
|
|
2125
|
+
or "unable to open database" in msg
|
|
2126
|
+
):
|
|
2127
|
+
return True
|
|
2128
|
+
return False
|
|
2129
|
+
|
|
2130
|
+
|
|
2131
|
+
# === Region 7b2: Eager cache-migration trigger (V4 — same-invocation 008 apply) ===
|
|
2132
|
+
|
|
2133
|
+
|
|
2134
|
+
def _apply_cache_schema(conn: sqlite3.Connection) -> None:
|
|
2135
|
+
"""Single source of cache.db's schema (cctally-dev#93, spec D4).
|
|
2136
|
+
|
|
2137
|
+
``_cctally()``-free so both ``open_cache_db`` (in _cctally_cache.py, which
|
|
2138
|
+
already imports _cctally_db) and ``_eagerly_apply_cache_migrations`` (here)
|
|
2139
|
+
can call it without an import cycle. Idempotent (CREATE ... IF NOT EXISTS +
|
|
2140
|
+
``add_column_if_missing``). Does NOT run the dispatcher and does NOT include
|
|
2141
|
+
the Codex ``last_total_tokens`` ALTER, which carries a one-time purge
|
|
2142
|
+
side-effect that stays in ``open_cache_db``: a future cross-DB migration
|
|
2143
|
+
that needs a Codex column on the eager-apply path must revisit that
|
|
2144
|
+
exception. The eager-apply path provably never touches Codex (cache 001 +
|
|
2145
|
+
the 008/009/010 RO joins are all Claude-side), so the column's absence here
|
|
2146
|
+
cannot surface a ``no such column``.
|
|
2147
|
+
"""
|
|
2148
|
+
conn.executescript(
|
|
2149
|
+
"""
|
|
2150
|
+
CREATE TABLE IF NOT EXISTS session_files (
|
|
2151
|
+
path TEXT PRIMARY KEY,
|
|
2152
|
+
size_bytes INTEGER NOT NULL,
|
|
2153
|
+
mtime_ns INTEGER NOT NULL,
|
|
2154
|
+
last_byte_offset INTEGER NOT NULL,
|
|
2155
|
+
last_ingested_at TEXT NOT NULL
|
|
2156
|
+
);
|
|
2157
|
+
CREATE TABLE IF NOT EXISTS session_entries (
|
|
2158
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
2159
|
+
source_path TEXT NOT NULL,
|
|
2160
|
+
line_offset INTEGER NOT NULL,
|
|
2161
|
+
timestamp_utc TEXT NOT NULL,
|
|
2162
|
+
model TEXT NOT NULL,
|
|
2163
|
+
msg_id TEXT,
|
|
2164
|
+
req_id TEXT,
|
|
2165
|
+
input_tokens INTEGER NOT NULL DEFAULT 0,
|
|
2166
|
+
output_tokens INTEGER NOT NULL DEFAULT 0,
|
|
2167
|
+
cache_create_tokens INTEGER NOT NULL DEFAULT 0,
|
|
2168
|
+
cache_read_tokens INTEGER NOT NULL DEFAULT 0,
|
|
2169
|
+
usage_extra_json TEXT,
|
|
2170
|
+
cost_usd_raw REAL
|
|
2171
|
+
);
|
|
2172
|
+
CREATE INDEX IF NOT EXISTS idx_entries_timestamp
|
|
2173
|
+
ON session_entries(timestamp_utc);
|
|
2174
|
+
CREATE INDEX IF NOT EXISTS idx_entries_source
|
|
2175
|
+
ON session_entries(source_path);
|
|
2176
|
+
CREATE UNIQUE INDEX IF NOT EXISTS idx_entries_dedup
|
|
2177
|
+
ON session_entries(msg_id, req_id)
|
|
2178
|
+
WHERE msg_id IS NOT NULL AND req_id IS NOT NULL;
|
|
2179
|
+
|
|
2180
|
+
CREATE TABLE IF NOT EXISTS codex_session_files (
|
|
2181
|
+
path TEXT PRIMARY KEY,
|
|
2182
|
+
size_bytes INTEGER NOT NULL,
|
|
2183
|
+
mtime_ns INTEGER NOT NULL,
|
|
2184
|
+
last_byte_offset INTEGER NOT NULL,
|
|
2185
|
+
last_ingested_at TEXT NOT NULL,
|
|
2186
|
+
last_session_id TEXT,
|
|
2187
|
+
last_model TEXT
|
|
2188
|
+
);
|
|
2189
|
+
CREATE TABLE IF NOT EXISTS codex_session_entries (
|
|
2190
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
2191
|
+
source_path TEXT NOT NULL,
|
|
2192
|
+
line_offset INTEGER NOT NULL,
|
|
2193
|
+
timestamp_utc TEXT NOT NULL,
|
|
2194
|
+
session_id TEXT NOT NULL,
|
|
2195
|
+
model TEXT NOT NULL,
|
|
2196
|
+
input_tokens INTEGER NOT NULL DEFAULT 0,
|
|
2197
|
+
cached_input_tokens INTEGER NOT NULL DEFAULT 0,
|
|
2198
|
+
output_tokens INTEGER NOT NULL DEFAULT 0,
|
|
2199
|
+
reasoning_output_tokens INTEGER NOT NULL DEFAULT 0,
|
|
2200
|
+
total_tokens INTEGER NOT NULL DEFAULT 0,
|
|
2201
|
+
UNIQUE(source_path, line_offset)
|
|
2202
|
+
);
|
|
2203
|
+
CREATE INDEX IF NOT EXISTS idx_codex_entries_timestamp
|
|
2204
|
+
ON codex_session_entries(timestamp_utc);
|
|
2205
|
+
CREATE INDEX IF NOT EXISTS idx_codex_entries_session
|
|
2206
|
+
ON codex_session_entries(session_id);
|
|
2207
|
+
CREATE INDEX IF NOT EXISTS idx_codex_entries_source
|
|
2208
|
+
ON codex_session_entries(source_path);
|
|
2209
|
+
|
|
2210
|
+
CREATE TABLE IF NOT EXISTS cache_meta (
|
|
2211
|
+
key TEXT PRIMARY KEY,
|
|
2212
|
+
value TEXT
|
|
2213
|
+
);
|
|
2214
|
+
"""
|
|
2215
|
+
)
|
|
2216
|
+
# Inline migration: add session_id / project_path columns to session_files
|
|
2217
|
+
# if they're missing. These were added for A2 `session` subcommand metadata;
|
|
2218
|
+
# populated lazily in sync_cache() / _ensure_session_files_row().
|
|
2219
|
+
add_column_if_missing(conn, "session_files", "session_id", "TEXT")
|
|
2220
|
+
add_column_if_missing(conn, "session_files", "project_path", "TEXT")
|
|
2221
|
+
conn.execute(
|
|
2222
|
+
"CREATE INDEX IF NOT EXISTS idx_session_files_session_id "
|
|
2223
|
+
"ON session_files(session_id)"
|
|
2224
|
+
)
|
|
2225
|
+
|
|
2226
|
+
|
|
2227
|
+
def _eagerly_apply_cache_migrations() -> None:
|
|
2228
|
+
"""Open cache.db so its pending migrations (notably
|
|
2229
|
+
``001_dedup_highest_wins``) apply BEFORE stats migration 008's gate
|
|
2230
|
+
check.
|
|
2231
|
+
|
|
2232
|
+
Why
|
|
2233
|
+
---
|
|
2234
|
+
On the very first ``cctally`` invocation post-upgrade against a
|
|
2235
|
+
populated stats.db, the natural call order is:
|
|
2236
|
+
|
|
2237
|
+
1. ``cmd_<reporting>`` opens stats.db via ``open_db()`` (runs the
|
|
2238
|
+
stats dispatcher → stats 008 fires).
|
|
2239
|
+
2. (Maybe) ``cmd_<jsonl-reader>`` opens cache.db via
|
|
2240
|
+
``open_cache_db()`` (runs the cache dispatcher → cache 001
|
|
2241
|
+
fires).
|
|
2242
|
+
|
|
2243
|
+
Step 1 happens BEFORE step 2 — and for commands that read stats.db
|
|
2244
|
+
only (e.g. ``cctally report`` without ``--sync-current``), step 2
|
|
2245
|
+
NEVER happens. So stats 008's gate finds no 001 marker in
|
|
2246
|
+
cache.db, raises ``MigrationGateNotMet``, the dispatcher defers,
|
|
2247
|
+
and ``report`` proceeds with stale ``weekly_cost_snapshots``
|
|
2248
|
+
forever (until the user happens to run a JSONL-reading command).
|
|
2249
|
+
|
|
2250
|
+
This helper inverts the dependency: stats 008 itself triggers
|
|
2251
|
+
cache.db's dispatcher BEFORE checking the gate. After this returns,
|
|
2252
|
+
cache 001's marker is present (``cache_001_state="applied"``). But
|
|
2253
|
+
an eager-applied 001 WIPES the cache and clears the ``cache_meta``
|
|
2254
|
+
``claude_ingest_walk_complete`` marker (spec D5) — and the gate
|
|
2255
|
+
(``_gate_001_post_ingest_completed`` → ``resolve_upgrade_gate``) now
|
|
2256
|
+
keys ingest-completeness on that walk-complete marker, not on a
|
|
2257
|
+
post-001 ``session_files`` row. So the gate DEFERs on this first
|
|
2258
|
+
invocation until a subsequent clean ``sync_cache`` re-walks the
|
|
2259
|
+
on-disk JSONL and re-establishes the marker. For users with no
|
|
2260
|
+
JSONL on disk (or no projects/ dir at all), ``disk_state="absent"``
|
|
2261
|
+
lets the resolver PROCEED (no data to lose) — 008 completes in the
|
|
2262
|
+
SAME invocation. For users with JSONL, the operator's next
|
|
2263
|
+
JSONL-reading command runs ``sync_cache``, which sets the
|
|
2264
|
+
walk-complete marker → the invocation after that runs 008
|
|
2265
|
+
successfully. That's worst-case one extra invocation instead of
|
|
2266
|
+
unbounded deferral.
|
|
2267
|
+
|
|
2268
|
+
Lock ordering
|
|
2269
|
+
-------------
|
|
2270
|
+
Stats and cache are SEPARATE SQLite files with SEPARATE WAL locks.
|
|
2271
|
+
``open_cache_db()`` does not touch stats.db. Stats.db is currently
|
|
2272
|
+
inside the migration dispatcher (the 008 handler hasn't started a
|
|
2273
|
+
``BEGIN`` on the stats connection yet — that happens later in the
|
|
2274
|
+
body, AFTER this helper returns). No deadlock potential.
|
|
2275
|
+
|
|
2276
|
+
Failure modes
|
|
2277
|
+
-------------
|
|
2278
|
+
If cache.db can't be opened (rare — disk full, permission denied,
|
|
2279
|
+
truly missing parent dir), let the exception propagate to the
|
|
2280
|
+
stats 008 body's ``try``, where the existing
|
|
2281
|
+
``_is_transient_sqlite_error`` predicate translates it to
|
|
2282
|
+
``MigrationGateNotMet``. The dispatcher then defers — symmetric
|
|
2283
|
+
with G4/G5 behavior on the read-only gate connection.
|
|
2284
|
+
|
|
2285
|
+
Implementation note
|
|
2286
|
+
-------------------
|
|
2287
|
+
We open cache.db here directly (corruption-recovery connect + PRAGMAs +
|
|
2288
|
+
schema + dispatcher) rather than delegate to
|
|
2289
|
+
``_cctally_cache.open_cache_db`` because the latter calls ``_cctally()``
|
|
2290
|
+
(a back-reference into ``sys.modules['cctally']``) which is set up by the
|
|
2291
|
+
bin/cctally entrypoint but absent in test harnesses that exercise the
|
|
2292
|
+
stats handler directly. The schema is applied via the shared
|
|
2293
|
+
``_apply_cache_schema`` helper (cctally-dev#93, D4) — the SAME source
|
|
2294
|
+
``open_cache_db`` uses — so the two paths can no longer drift (the prior
|
|
2295
|
+
hand-curated inline subset was the origin of the ``no such column:
|
|
2296
|
+
sf.project_path`` landmine). The only divergence from ``open_cache_db``
|
|
2297
|
+
is the Codex ``last_total_tokens`` ALTER + purge, which is deliberately
|
|
2298
|
+
Claude-irrelevant and provably never reached by this path (see
|
|
2299
|
+
``_apply_cache_schema``'s docstring + spec D4/P1#3).
|
|
2300
|
+
"""
|
|
2301
|
+
cache_db_path = _cctally_core.CACHE_DB_PATH
|
|
2302
|
+
_cctally_core.APP_DIR.mkdir(parents=True, exist_ok=True)
|
|
2303
|
+
try:
|
|
2304
|
+
conn = sqlite3.connect(cache_db_path)
|
|
2305
|
+
conn.execute("SELECT 1").fetchone()
|
|
2306
|
+
except sqlite3.DatabaseError as exc:
|
|
2307
|
+
# Corruption recovery mirrors the contract in
|
|
2308
|
+
# ``_cctally_cache.open_cache_db``: cache.db is fully
|
|
2309
|
+
# re-derivable from JSONL, so we unlink + recreate. Stay quiet
|
|
2310
|
+
# under tests — the dispatcher's gate-defer machinery handles
|
|
2311
|
+
# the case where this fails outright.
|
|
2312
|
+
eprint(f"[cache] corrupt cache DB ({exc}); recreating")
|
|
2313
|
+
try:
|
|
2314
|
+
cache_db_path.unlink()
|
|
2315
|
+
except FileNotFoundError:
|
|
2316
|
+
pass
|
|
2317
|
+
conn = sqlite3.connect(cache_db_path)
|
|
2318
|
+
try:
|
|
2319
|
+
conn.execute("PRAGMA journal_mode=WAL")
|
|
2320
|
+
conn.execute("PRAGMA busy_timeout=5000")
|
|
2321
|
+
# Apply the shared cache.db schema (cctally-dev#93, D4). This is the
|
|
2322
|
+
# SAME source ``open_cache_db`` uses, including ``session_id`` /
|
|
2323
|
+
# ``project_path`` on session_files (009 joins ``sf.project_path`` on
|
|
2324
|
+
# the RO gate connection bootstrapped here; column resolution happens
|
|
2325
|
+
# at prepare time even with zero rows, so an absent column would raise
|
|
2326
|
+
# ``no such column: sf.project_path``) and the new ``cache_meta``
|
|
2327
|
+
# table. The Codex ``last_total_tokens`` ALTER stays out of the shared
|
|
2328
|
+
# helper and is intentionally not applied here (Claude-only path; see
|
|
2329
|
+
# ``_apply_cache_schema``'s docstring + spec D4/P1#3).
|
|
2330
|
+
_apply_cache_schema(conn)
|
|
2331
|
+
# Dispatcher (cache.db side). Runs every pending cache
|
|
2332
|
+
# migration, including ``001_dedup_highest_wins``. Idempotent —
|
|
2333
|
+
# if 001 has already applied, this is a fast-path return.
|
|
2334
|
+
_run_pending_migrations(
|
|
2335
|
+
conn, registry=_CACHE_MIGRATIONS, db_label="cache.db",
|
|
2336
|
+
)
|
|
2337
|
+
finally:
|
|
2338
|
+
# Close immediately so the WAL writer lock (if any) is
|
|
2339
|
+
# released before the stats 008 body opens its read-only
|
|
2340
|
+
# gate connection.
|
|
2341
|
+
conn.close()
|
|
2342
|
+
|
|
2343
|
+
|
|
2344
|
+
# === Region 7c: Cache migration 001_dedup_highest_wins (ccusage-parity fix) ===
|
|
2345
|
+
|
|
2346
|
+
|
|
2347
|
+
def _recompute_banner_should_emit(
|
|
2348
|
+
*,
|
|
2349
|
+
data_present: bool,
|
|
2350
|
+
) -> bool:
|
|
2351
|
+
"""Shared banner-suppression gate for recompute-style migrations
|
|
2352
|
+
(cache 001, stats 008, 009, 010). Combines two conditions:
|
|
2353
|
+
|
|
2354
|
+
(a) ``data_present`` — caller checked that the migration has
|
|
2355
|
+
actual rows to recompute. Empty-data topologies (most
|
|
2356
|
+
fresh-install upgrades, every golden fixture without seed
|
|
2357
|
+
rows) make the migration body a marker-only no-op; the
|
|
2358
|
+
banner would announce work that isn't happening. Caller
|
|
2359
|
+
owns this check because each migration scopes "data" to a
|
|
2360
|
+
different table (``session_entries`` for 001,
|
|
2361
|
+
``weekly_cost_snapshots`` for 008, ``five_hour_blocks`` for
|
|
2362
|
+
009, ``percent_milestones`` for 010).
|
|
2363
|
+
|
|
2364
|
+
(b) ``sys.argv[1]`` NOT in ``_BANNER_SUPPRESSED_COMMANDS``. Hot
|
|
2365
|
+
paths (``record-usage``, ``hook-tick``, ``sync-week``,
|
|
2366
|
+
``cache-sync``, ``refresh-usage``) machine-consume stderr;
|
|
2367
|
+
``tui`` / ``dashboard`` take over the screen; ``db`` and
|
|
2368
|
+
``doctor`` surface migration state in their own reports;
|
|
2369
|
+
``blocks`` is a stdout-formatted table whose stderr noise
|
|
2370
|
+
confuses scripted pipelines. Banner still lands on the
|
|
2371
|
+
next interactive non-report command (``report``,
|
|
2372
|
+
``weekly``, ``percent-breakdown``, etc.) once on upgrade.
|
|
2373
|
+
|
|
2374
|
+
Returns True iff the banner should be printed. Defensive: any
|
|
2375
|
+
error reading ``sys.argv`` falls back to "don't print" — silence
|
|
2376
|
+
is the safer side under uncertainty (worst case, a heavy user
|
|
2377
|
+
misses the one-line announcement; not a correctness regression).
|
|
2378
|
+
|
|
2379
|
+
SW5-extended — replaces the per-migration ad-hoc banner gates
|
|
2380
|
+
that drifted between 001 (which checked argv1 in suppression
|
|
2381
|
+
list) and 008/009/010 (which only checked data-table emptiness).
|
|
2382
|
+
The asymmetry caused ``cctally blocks`` to emit 009's banner
|
|
2383
|
+
even when ``record-usage`` would not — surfaced by
|
|
2384
|
+
``floor-band-trap`` golden-terminal.txt drift.
|
|
2385
|
+
"""
|
|
2386
|
+
if not data_present:
|
|
2387
|
+
return False
|
|
2388
|
+
try:
|
|
2389
|
+
argv1 = sys.argv[1] if len(sys.argv) > 1 else None
|
|
2390
|
+
except Exception:
|
|
2391
|
+
argv1 = None
|
|
2392
|
+
if argv1 in _BANNER_SUPPRESSED_COMMANDS:
|
|
2393
|
+
return False
|
|
2394
|
+
return True
|
|
2395
|
+
|
|
2396
|
+
|
|
2397
|
+
def _001_banner_should_emit(conn: sqlite3.Connection) -> bool:
|
|
2398
|
+
"""SW5 — gate cache migration 001's banner. Thin shim around the
|
|
2399
|
+
shared ``_recompute_banner_should_emit`` helper: probes
|
|
2400
|
+
``session_entries`` for non-emptiness, then defers to the shared
|
|
2401
|
+
suppression-argv1 check.
|
|
2402
|
+
|
|
2403
|
+
Kept as a named function (rather than inlined at the call site)
|
|
2404
|
+
because cache migration 001's data check requires a defensive
|
|
2405
|
+
``sqlite3.Error`` swallow — the migration runs early and the
|
|
2406
|
+
table may not yet exist on certain ALTER-mid-upgrade topologies.
|
|
2407
|
+
Stats 008/009/010 don't need this swallow because their gate
|
|
2408
|
+
runs after the schema is fully bootstrapped.
|
|
2409
|
+
"""
|
|
2410
|
+
try:
|
|
2411
|
+
row = conn.execute(
|
|
2412
|
+
"SELECT 1 FROM session_entries LIMIT 1"
|
|
2413
|
+
).fetchone()
|
|
2414
|
+
except sqlite3.Error:
|
|
2415
|
+
return False
|
|
2416
|
+
return _recompute_banner_should_emit(data_present=row is not None)
|
|
2417
|
+
|
|
2418
|
+
|
|
2419
|
+
@cache_migration("001_dedup_highest_wins")
|
|
2420
|
+
def _001_dedup_highest_wins(conn: sqlite3.Connection) -> None:
|
|
2421
|
+
"""One-time re-ingest of session_entries with corrected msg_id+req_id dedup.
|
|
2422
|
+
|
|
2423
|
+
The previous INSERT OR IGNORE kept the streaming-intermediate row of each
|
|
2424
|
+
(msg_id, req_id) pair (output_tokens=1, no ``speed`` field) and rejected
|
|
2425
|
+
the post-stream finalization row (output_tokens=N, ``speed='standard'``).
|
|
2426
|
+
The winner's data is not recoverable from session_entries alone — it was
|
|
2427
|
+
never inserted under the old rule. We wipe ``session_entries`` +
|
|
2428
|
+
``session_files`` so the next ``sync_cache`` re-reads JSONL under the new
|
|
2429
|
+
ON CONFLICT DO UPDATE clause (highest-token-total wins, ``speed`` set
|
|
2430
|
+
breaks ties).
|
|
2431
|
+
|
|
2432
|
+
Codex tables (``codex_session_entries``, ``codex_session_files``) are NOT
|
|
2433
|
+
touched — the bug is Claude-side only.
|
|
2434
|
+
|
|
2435
|
+
Spec: docs/superpowers/specs/2026-05-22-ccusage-dedup-parity.md §I2.
|
|
2436
|
+
|
|
2437
|
+
Invariants:
|
|
2438
|
+
* Marker row INSERTed inside the same BEGIN/COMMIT as the DELETEs.
|
|
2439
|
+
* Empty session_entries (no JSONL ingested yet) still writes the
|
|
2440
|
+
marker — table-emptiness is not the sentinel (CLAUDE.md "Pricing
|
|
2441
|
+
& schema"). A truly fresh install short-circuits earlier via the
|
|
2442
|
+
dispatcher's ``fresh_install`` fast-path; this handler only sees
|
|
2443
|
+
the post-shipped-empty case where the cache.db schema and
|
|
2444
|
+
migration tables already exist but ``session_entries`` is empty.
|
|
2445
|
+
* Migration handler does NOT call ``_log_migration_error`` /
|
|
2446
|
+
``_clear_migration_error_log_entries``; the dispatcher owns that
|
|
2447
|
+
surface (CLAUDE.md "Migration error sentinel is uniform").
|
|
2448
|
+
|
|
2449
|
+
SW5 — Banner suppression. Two gates compose:
|
|
2450
|
+
|
|
2451
|
+
(a) ``session_entries`` non-emptiness — if the table is empty (most
|
|
2452
|
+
fresh-install upgrade topologies + every golden fixture), the
|
|
2453
|
+
handler's body is a marker-only no-op and the banner has
|
|
2454
|
+
nothing to announce. Mirrors the snapshot-rows gate on
|
|
2455
|
+
migration 008's banner.
|
|
2456
|
+
|
|
2457
|
+
(b) ``sys.argv[1]`` in ``_BANNER_SUPPRESSED_COMMANDS`` — the same
|
|
2458
|
+
set the dispatcher consults for its post-failure banner. Hot
|
|
2459
|
+
paths (record-usage, hook-tick, sync-week, cache-sync,
|
|
2460
|
+
refresh-usage, tui, dashboard, db, doctor) machine-consume
|
|
2461
|
+
stderr or take over the screen, so the banner has nowhere
|
|
2462
|
+
safe to land. Migration handlers don't receive ``args``, so
|
|
2463
|
+
we read ``sys.argv`` directly — `argparse` hasn't run yet at
|
|
2464
|
+
handler time anyway. Interactive surfaces (``report``,
|
|
2465
|
+
``weekly``, ``percent-breakdown``, etc.) still see it once.
|
|
2466
|
+
"""
|
|
2467
|
+
if _001_banner_should_emit(conn):
|
|
2468
|
+
eprint(
|
|
2469
|
+
"[cctally] Re-ingesting Claude session history with "
|
|
2470
|
+
"corrected dedup (one-time; may take 10-30s depending on "
|
|
2471
|
+
"JSONL volume)..."
|
|
2472
|
+
)
|
|
2473
|
+
# D3 — BEGIN IMMEDIATE so the destructive DELETEs are race-guarded,
|
|
2474
|
+
# not just the marker insert. The dispatcher snapshots the applied
|
|
2475
|
+
# set ONCE before its registry walk (``_run_pending_migrations``),
|
|
2476
|
+
# so two concurrent openers (e.g. dashboard + CLI on the same
|
|
2477
|
+
# cache.db) can BOTH classify 001 as pending and BOTH enter this
|
|
2478
|
+
# handler. With a plain ``BEGIN`` (deferred), each acquires the
|
|
2479
|
+
# write lock only on its first DELETE: the loser would wait for the
|
|
2480
|
+
# winner's COMMIT, then DELETE — wiping the rows the winner's
|
|
2481
|
+
# subsequent ``sync_cache`` already reingested, leaving the cache
|
|
2482
|
+
# partially rebuilt until another full sync. ``BEGIN IMMEDIATE``
|
|
2483
|
+
# grabs the write lock up front, so the loser blocks here BEFORE
|
|
2484
|
+
# touching any data; once it acquires the lock the winner's marker
|
|
2485
|
+
# is already committed, and the in-transaction re-check below turns
|
|
2486
|
+
# the loser's body into a no-op. The marker INSERT stays
|
|
2487
|
+
# ``INSERT OR IGNORE`` as a belt-and-suspenders against an
|
|
2488
|
+
# IntegrityError banner.
|
|
2489
|
+
conn.execute("BEGIN IMMEDIATE")
|
|
2490
|
+
try:
|
|
2491
|
+
already_applied = conn.execute(
|
|
2492
|
+
"SELECT 1 FROM schema_migrations WHERE name = ? LIMIT 1",
|
|
2493
|
+
("001_dedup_highest_wins",),
|
|
2494
|
+
).fetchone() is not None
|
|
2495
|
+
if already_applied:
|
|
2496
|
+
# A concurrent opener won the race and already wiped +
|
|
2497
|
+
# stamped 001 (and may already be repopulating via
|
|
2498
|
+
# sync_cache). Re-running the DELETEs here would destroy that
|
|
2499
|
+
# reingested data. Commit the empty IMMEDIATE transaction
|
|
2500
|
+
# (releases the write lock) and return — the marker is
|
|
2501
|
+
# present, so the dispatcher records us as applied.
|
|
2502
|
+
conn.commit()
|
|
2503
|
+
return
|
|
2504
|
+
conn.execute("DELETE FROM session_entries")
|
|
2505
|
+
conn.execute("DELETE FROM session_files")
|
|
2506
|
+
# Clear the walk-complete sentinel atomically with the wipe
|
|
2507
|
+
# (cctally-dev#93, D5/D2): a wiped session_entries must never coexist
|
|
2508
|
+
# with a "complete walk" marker. The end-of-loop write in sync_cache
|
|
2509
|
+
# re-establishes it only after a subsequent clean walk. In production
|
|
2510
|
+
# ``_apply_cache_schema`` always creates ``cache_meta`` before the
|
|
2511
|
+
# dispatcher fires 001 (open_cache_db / _eagerly_apply_cache_migrations
|
|
2512
|
+
# both apply the schema first), so the table is present. Tolerate its
|
|
2513
|
+
# absence defensively (a pre-cache_meta cache.db invoked through the
|
|
2514
|
+
# handler directly, e.g. older per-migration goldens): a missing table
|
|
2515
|
+
# means there is no stale marker to clear, so the no-op is correct.
|
|
2516
|
+
# The "no such table" prepare-time error never opened a write, so the
|
|
2517
|
+
# enclosing BEGIN IMMEDIATE transaction stays intact for the stamp +
|
|
2518
|
+
# commit below.
|
|
2519
|
+
try:
|
|
2520
|
+
conn.execute("DELETE FROM cache_meta WHERE key='claude_ingest_walk_complete'")
|
|
2521
|
+
except sqlite3.OperationalError as exc:
|
|
2522
|
+
if not _is_no_such_table_error(exc):
|
|
2523
|
+
raise
|
|
2524
|
+
conn.execute(
|
|
2525
|
+
"INSERT OR IGNORE INTO schema_migrations (name, applied_at_utc) "
|
|
2526
|
+
"VALUES (?, ?)",
|
|
2527
|
+
("001_dedup_highest_wins", now_utc_iso()),
|
|
2528
|
+
)
|
|
2529
|
+
conn.commit()
|
|
2530
|
+
except Exception:
|
|
2531
|
+
conn.rollback()
|
|
2532
|
+
raise
|
|
2533
|
+
|
|
2534
|
+
|
|
2535
|
+
# === Region 7d: Stats migration 008_recompute_weekly_cost_snapshots_dedup_fix ===
|
|
2536
|
+
|
|
2537
|
+
@stats_migration("008_recompute_weekly_cost_snapshots_dedup_fix")
|
|
2538
|
+
def _008_recompute_weekly_cost_snapshots_dedup_fix(
|
|
2539
|
+
conn: sqlite3.Connection,
|
|
2540
|
+
) -> None:
|
|
2541
|
+
"""Recompute ``weekly_cost_snapshots.cost_usd`` from the now-corrected
|
|
2542
|
+
``session_entries``. Gated on cache migration 001 having applied AND
|
|
2543
|
+
``sync_cache`` having repopulated ``session_entries`` since.
|
|
2544
|
+
|
|
2545
|
+
Scope: only rows with ``mode='auto'`` AND ``project IS NULL``.
|
|
2546
|
+
``mode='display'`` rows preserve a user-supplied cost from a prior
|
|
2547
|
+
``calculate`` run (``docs/commands/sync-week.md``); per-project
|
|
2548
|
+
snapshots have aggregation boundaries this fix doesn't know about.
|
|
2549
|
+
Both are left untouched.
|
|
2550
|
+
|
|
2551
|
+
Legacy rows with ``range_start_iso IS NULL`` or
|
|
2552
|
+
``range_end_iso IS NULL`` are skipped (their pre-fix value stays);
|
|
2553
|
+
CHANGELOG calls this out as the one exception to "post-fix
|
|
2554
|
+
``report`` matches ``weekly``."
|
|
2555
|
+
|
|
2556
|
+
Cross-DB plumbing
|
|
2557
|
+
-----------------
|
|
2558
|
+
Opens ``cache.db`` read-only via the ``file:?mode=ro`` URI form. We
|
|
2559
|
+
do NOT ``ATTACH DATABASE`` — the existing transactional isolation
|
|
2560
|
+
(write side on ``conn`` inside ``BEGIN``/``COMMIT``, read side on a
|
|
2561
|
+
separate read-only connection) is the cleanest design and matches
|
|
2562
|
+
how Task 3's gate helper already wires it.
|
|
2563
|
+
|
|
2564
|
+
Timestamp comparison
|
|
2565
|
+
--------------------
|
|
2566
|
+
``range_start_iso`` and ``range_end_iso`` originate from
|
|
2567
|
+
``insert_cost_snapshot`` → ``parse_iso_datetime(...).isoformat()``,
|
|
2568
|
+
which keeps the offset of whatever the caller passed (typically
|
|
2569
|
+
``+00:00`` from ``week_start_at`` canonicalization, but
|
|
2570
|
+
``parse_iso_datetime`` returns ``parsed.astimezone()`` so naive
|
|
2571
|
+
inputs end up host-local). ``session_entries.timestamp_utc`` is
|
|
2572
|
+
written via ``entry.timestamp.astimezone(dt.timezone.utc).isoformat()``
|
|
2573
|
+
in ``sync_cache`` — canonical UTC ISO with ``+00:00`` offset.
|
|
2574
|
+
Both sides are normalized at the Python boundary through
|
|
2575
|
+
``_canonical_utc_iso_for_index`` so plain lex compare against
|
|
2576
|
+
``timestamp_utc`` hits ``idx_entries_timestamp``. Mirrors the
|
|
2577
|
+
canonicalization that ``iter_entries`` /
|
|
2578
|
+
``get_claude_session_entries`` apply to user-facing queries in
|
|
2579
|
+
``bin/_cctally_cache.py``.
|
|
2580
|
+
|
|
2581
|
+
Spec: docs/superpowers/specs/2026-05-22-ccusage-dedup-parity.md §I3.
|
|
2582
|
+
"""
|
|
2583
|
+
# Banner is gated on "we actually have rows to recompute" via the
|
|
2584
|
+
# shared ``_recompute_banner_should_emit`` helper (composed below).
|
|
2585
|
+
# The all-empty no-op case (most upgrade-time fresh-install
|
|
2586
|
+
# topologies, and most goldens with no snapshot rows) skips the
|
|
2587
|
+
# banner so we don't pollute thousands of test goldens /
|
|
2588
|
+
# per-command stderr with a benign one-line announcement. Heavy
|
|
2589
|
+
# users with 52+ snapshots still see it once.
|
|
2590
|
+
#
|
|
2591
|
+
# ``_open_cache_ro_with_gate_defer`` (shared with 009/010) eagerly
|
|
2592
|
+
# applies cache.db's dispatcher (V4: ensures cache migration 001's
|
|
2593
|
+
# marker is in place even on stats-only invocations) then opens
|
|
2594
|
+
# cache.db RO with the G4/G5 transient-defer translation baked in.
|
|
2595
|
+
cache_ro = _open_cache_ro_with_gate_defer()
|
|
2596
|
+
try:
|
|
2597
|
+
# Resolve projects dirs via the shared helper (mirrors 009/010).
|
|
2598
|
+
# Empty list returned only when NO projects/ dir resolves under
|
|
2599
|
+
# any env-configured or default root; the resolver classifies
|
|
2600
|
+
# that as ``disk_state="absent"`` and decides accordingly.
|
|
2601
|
+
projects_dirs = _resolve_projects_dirs_for_gate()
|
|
2602
|
+
|
|
2603
|
+
# F3 scope: only rows we have authority over (see docstring).
|
|
2604
|
+
snapshot_rows = conn.execute(
|
|
2605
|
+
"SELECT id, range_start_iso, range_end_iso "
|
|
2606
|
+
"FROM weekly_cost_snapshots "
|
|
2607
|
+
"WHERE mode = 'auto' AND project IS NULL"
|
|
2608
|
+
).fetchall()
|
|
2609
|
+
|
|
2610
|
+
# The gate is now a pure state machine (cctally-dev#93): the old
|
|
2611
|
+
# inline G3 fail-closed block and the defensive default-dir
|
|
2612
|
+
# fallback are gone. An empty ``projects_dirs`` is the legitimate
|
|
2613
|
+
# ``disk_state="absent"`` topology — the resolver DEFERS (row 7)
|
|
2614
|
+
# when ``data_present`` and PROCEEDS (row 5, body no-ops) when
|
|
2615
|
+
# there's nothing to protect, with the operator-guidance reason
|
|
2616
|
+
# text baked into the resolver. No body-level recompute guard
|
|
2617
|
+
# (spec D7): the recompute below computes every in-range value
|
|
2618
|
+
# from surviving ``session_entries``, INCLUDING to $0 — the
|
|
2619
|
+
# wholesale-zeroing protection lives entirely in the gate.
|
|
2620
|
+
_gate_001_post_ingest_completed(
|
|
2621
|
+
cache_ro, projects_dirs, data_present=bool(snapshot_rows),
|
|
2622
|
+
)
|
|
2623
|
+
|
|
2624
|
+
# Banner gated on "we actually have eligible rows to recompute"
|
|
2625
|
+
# AND "active subcommand is not in _BANNER_SUPPRESSED_COMMANDS"
|
|
2626
|
+
# — composed via the shared ``_recompute_banner_should_emit``
|
|
2627
|
+
# helper that 001/008/009/010 all funnel through. Empty-
|
|
2628
|
+
# snapshot topologies (most goldens, fresh-install upgrades)
|
|
2629
|
+
# plus hot/scripted paths (`blocks`, `record-usage`, etc.)
|
|
2630
|
+
# stay quiet. Heavy users invoking interactive non-report
|
|
2631
|
+
# commands (52+ weekly snapshots) still see it once.
|
|
2632
|
+
if _recompute_banner_should_emit(data_present=bool(snapshot_rows)):
|
|
2633
|
+
eprint(
|
|
2634
|
+
"[cctally] Recomputing weekly_cost_snapshots from "
|
|
2635
|
+
"corrected session_entries (one-time; may take 30-60s "
|
|
2636
|
+
"on heavy histories)..."
|
|
2637
|
+
)
|
|
2638
|
+
|
|
2639
|
+
conn.execute("BEGIN")
|
|
2640
|
+
try:
|
|
2641
|
+
for snap_id, range_start_iso, range_end_iso in snapshot_rows:
|
|
2642
|
+
if range_start_iso is None or range_end_iso is None:
|
|
2643
|
+
# Legacy row written before range_*_iso columns
|
|
2644
|
+
# existed. Skip (not crash) — leaves the snapshot at
|
|
2645
|
+
# its pre-fix value; CHANGELOG calls this out.
|
|
2646
|
+
continue
|
|
2647
|
+
# V1 — closed interval ``<=`` matches the production
|
|
2648
|
+
# writer (``iter_entries`` in bin/_cctally_cache.py: lex
|
|
2649
|
+
# ``timestamp_utc >= ? AND timestamp_utc <= ?``). The
|
|
2650
|
+
# migration's prior half-open ``<`` end silently excluded
|
|
2651
|
+
# any ``session_entries`` row whose ``timestamp_utc``
|
|
2652
|
+
# equalled the snapshot's ``range_end_iso`` boundary —
|
|
2653
|
+
# an edge with positive probability on subscription-week
|
|
2654
|
+
# boundaries where Claude Code's status-line tick can
|
|
2655
|
+
# land an entry exactly on the reset instant. After this
|
|
2656
|
+
# fix, the migration's recompute is byte-for-byte
|
|
2657
|
+
# symmetric with every subsequent ``sync-week`` row that
|
|
2658
|
+
# gets written through ``compute_week_cost`` →
|
|
2659
|
+
# ``iter_entries`` — so R-DEDUP2 in
|
|
2660
|
+
# ``bin/cctally-reconcile-test`` no longer needs to
|
|
2661
|
+
# caveat the divergence.
|
|
2662
|
+
# Canonicalize range bounds to the same UTC ISO shape
|
|
2663
|
+
# ``session_entries.timestamp_utc`` carries on disk so
|
|
2664
|
+
# lex compare hits ``idx_entries_timestamp`` instead of
|
|
2665
|
+
# SCANning. See ``_canonical_utc_iso_for_index`` for the
|
|
2666
|
+
# EXPLAIN QUERY PLAN rationale; mirrors
|
|
2667
|
+
# ``iter_entries`` in bin/_cctally_cache.py.
|
|
2668
|
+
entries = cache_ro.execute(
|
|
2669
|
+
"SELECT model, input_tokens, output_tokens, "
|
|
2670
|
+
"cache_create_tokens, cache_read_tokens, "
|
|
2671
|
+
"usage_extra_json, cost_usd_raw "
|
|
2672
|
+
"FROM session_entries "
|
|
2673
|
+
"WHERE timestamp_utc >= ? AND timestamp_utc <= ?",
|
|
2674
|
+
(
|
|
2675
|
+
_canonical_utc_iso_for_index(range_start_iso),
|
|
2676
|
+
_canonical_utc_iso_for_index(range_end_iso),
|
|
2677
|
+
),
|
|
2678
|
+
).fetchall()
|
|
2679
|
+
total = 0.0
|
|
2680
|
+
for model, i, o, cc, cr, extras_json, raw in entries:
|
|
2681
|
+
usage = {
|
|
2682
|
+
"input_tokens": i,
|
|
2683
|
+
"output_tokens": o,
|
|
2684
|
+
"cache_creation_input_tokens": cc,
|
|
2685
|
+
"cache_read_input_tokens": cr,
|
|
2686
|
+
}
|
|
2687
|
+
if extras_json:
|
|
2688
|
+
usage.update(json.loads(extras_json))
|
|
2689
|
+
total += _calculate_entry_cost(
|
|
2690
|
+
model, usage, mode="auto", cost_usd=raw,
|
|
2691
|
+
)
|
|
2692
|
+
conn.execute(
|
|
2693
|
+
"UPDATE weekly_cost_snapshots "
|
|
2694
|
+
"SET cost_usd = ? WHERE id = ?",
|
|
2695
|
+
(total, snap_id),
|
|
2696
|
+
)
|
|
2697
|
+
# D3 — INSERT OR IGNORE for race safety. Mirrors the
|
|
2698
|
+
# convention applied to every other production migration
|
|
2699
|
+
# and the matching change to cache migration 001.
|
|
2700
|
+
conn.execute(
|
|
2701
|
+
"INSERT OR IGNORE INTO schema_migrations "
|
|
2702
|
+
"(name, applied_at_utc) VALUES (?, ?)",
|
|
2703
|
+
(
|
|
2704
|
+
"008_recompute_weekly_cost_snapshots_dedup_fix",
|
|
2705
|
+
now_utc_iso(),
|
|
2706
|
+
),
|
|
2707
|
+
)
|
|
2708
|
+
conn.execute("COMMIT")
|
|
2709
|
+
except Exception:
|
|
2710
|
+
conn.execute("ROLLBACK")
|
|
2711
|
+
raise
|
|
2712
|
+
finally:
|
|
2713
|
+
cache_ro.close()
|
|
2714
|
+
|
|
2715
|
+
|
|
2716
|
+
# === Region 7e: Shared cross-DB gate setup for 008/009/010 ==================
|
|
2717
|
+
|
|
2718
|
+
|
|
2719
|
+
def _open_cache_ro_with_gate_defer() -> sqlite3.Connection:
|
|
2720
|
+
"""Shared bootstrap for stats migrations 008/009/010 that recompute
|
|
2721
|
+
from cache.db's ``session_entries``.
|
|
2722
|
+
|
|
2723
|
+
Eagerly applies cache.db's dispatcher (so cache 001's marker is in
|
|
2724
|
+
place even on stats-only invocations), then opens cache.db read-only
|
|
2725
|
+
for the gate check. Either step's failure modes translate to
|
|
2726
|
+
``MigrationGateNotMet`` so the dispatcher's defer machinery handles
|
|
2727
|
+
them cleanly (no migration-error banner). Mirrors the V4 + G4/G5
|
|
2728
|
+
fixes baked into 008's body.
|
|
2729
|
+
|
|
2730
|
+
Returns the read-only cache.db connection. Caller is responsible for
|
|
2731
|
+
``.close()``.
|
|
2732
|
+
"""
|
|
2733
|
+
try:
|
|
2734
|
+
_eagerly_apply_cache_migrations()
|
|
2735
|
+
except sqlite3.OperationalError as exc:
|
|
2736
|
+
if _is_transient_sqlite_error(exc):
|
|
2737
|
+
raise MigrationGateNotMet(
|
|
2738
|
+
"cache.db not yet initialized or transiently locked; "
|
|
2739
|
+
"run any JSONL-reading command (e.g. `cctally weekly`) "
|
|
2740
|
+
"once and retry."
|
|
2741
|
+
) from None
|
|
2742
|
+
raise
|
|
2743
|
+
|
|
2744
|
+
cache_db_path = _cctally_core.CACHE_DB_PATH
|
|
2745
|
+
try:
|
|
2746
|
+
cache_ro = sqlite3.connect(
|
|
2747
|
+
f"file:{cache_db_path}?mode=ro", uri=True,
|
|
2748
|
+
)
|
|
2749
|
+
except sqlite3.OperationalError as exc:
|
|
2750
|
+
if _is_transient_sqlite_error(exc):
|
|
2751
|
+
raise MigrationGateNotMet(
|
|
2752
|
+
"cache.db not yet initialized or transiently locked; "
|
|
2753
|
+
"run any JSONL-reading command (e.g. `cctally weekly`) "
|
|
2754
|
+
"once and retry."
|
|
2755
|
+
) from None
|
|
2756
|
+
raise
|
|
2757
|
+
|
|
2758
|
+
# Pin a SINGLE consistent cache.db snapshot for the whole recompute
|
|
2759
|
+
# (cctally-dev#93 review). cache.db is WAL (bin/_cctally_cache.py:
|
|
2760
|
+
# `PRAGMA journal_mode=WAL`), and Python's sqlite3 only auto-BEGINs
|
|
2761
|
+
# before DML — every read on this RO connection would otherwise run
|
|
2762
|
+
# in autocommit, so each `cache_ro.execute(SELECT …)` could observe a
|
|
2763
|
+
# NEWER `session_entries` snapshot if `record-usage`/`hook-tick`/
|
|
2764
|
+
# `cache-sync` committed between loop iterations. That lets one
|
|
2765
|
+
# migration run recompute different rows from different cache states
|
|
2766
|
+
# and still stamp its schema marker (an internally-inconsistent
|
|
2767
|
+
# recompute). An explicit deferred BEGIN starts a read transaction
|
|
2768
|
+
# whose snapshot is locked at the first read (the gate's
|
|
2769
|
+
# schema_migrations probe) and held until COMMIT/ROLLBACK; in WAL the
|
|
2770
|
+
# reader never blocks the writer, so concurrent ingests still
|
|
2771
|
+
# proceed — they just land in a newer WAL frame this transaction
|
|
2772
|
+
# won't see. The caller's `finally: cache_ro.close()` ends the
|
|
2773
|
+
# transaction. The recompute writes target stats.db (`conn`), NOT
|
|
2774
|
+
# this connection, so a still-open read txn here never blocks them.
|
|
2775
|
+
try:
|
|
2776
|
+
cache_ro.execute("BEGIN")
|
|
2777
|
+
except sqlite3.OperationalError as exc:
|
|
2778
|
+
cache_ro.close()
|
|
2779
|
+
if _is_transient_sqlite_error(exc):
|
|
2780
|
+
raise MigrationGateNotMet(
|
|
2781
|
+
"cache.db not yet initialized or transiently locked; "
|
|
2782
|
+
"run any JSONL-reading command (e.g. `cctally weekly`) "
|
|
2783
|
+
"once and retry."
|
|
2784
|
+
) from None
|
|
2785
|
+
raise
|
|
2786
|
+
return cache_ro
|
|
2787
|
+
|
|
2788
|
+
|
|
2789
|
+
def _resolve_projects_dirs_for_gate() -> list[pathlib.Path]:
|
|
2790
|
+
"""Shared resolver for stats migrations 008/009/010 gate checks.
|
|
2791
|
+
|
|
2792
|
+
Returns the list of Claude projects/ dirs to feed to
|
|
2793
|
+
``_gate_001_post_ingest_completed``. Mirrors 008's resolution chain:
|
|
2794
|
+
env-aware resolver first, defensive fallback to
|
|
2795
|
+
``CLAUDE_PROJECTS_DIR`` when the resolver returns ``[]`` but the
|
|
2796
|
+
default exists on disk (covers test-time monkeypatch overrides).
|
|
2797
|
+
|
|
2798
|
+
Empty list returned only when NO projects/ dir resolves under any
|
|
2799
|
+
env-configured or default root. An empty list is the legitimate
|
|
2800
|
+
``disk_state="absent"`` topology — callers no longer fail-closed
|
|
2801
|
+
inline (Task 5 removed every caller's G3 block); they unconditionally
|
|
2802
|
+
delegate the empty-list decision to the resolver, which DEFERs at
|
|
2803
|
+
row 7 when historical rows remain and PROCEEDs at row 5 otherwise.
|
|
2804
|
+
"""
|
|
2805
|
+
projects_dirs = _cctally_core._resolve_claude_projects_dirs()
|
|
2806
|
+
if not projects_dirs and _cctally_core.CLAUDE_PROJECTS_DIR.is_dir():
|
|
2807
|
+
projects_dirs = [_cctally_core.CLAUDE_PROJECTS_DIR]
|
|
2808
|
+
return projects_dirs
|
|
2809
|
+
|
|
2810
|
+
|
|
2811
|
+
def _canonical_utc_iso_for_index(value: str) -> str:
|
|
2812
|
+
"""Normalize an ISO-8601 timestamp string to the canonical UTC form
|
|
2813
|
+
that ``session_entries.timestamp_utc`` stores on disk, so a lex
|
|
2814
|
+
comparison against the indexed column hits ``idx_entries_timestamp``
|
|
2815
|
+
instead of degrading to a full SCAN.
|
|
2816
|
+
|
|
2817
|
+
Why this exists
|
|
2818
|
+
---------------
|
|
2819
|
+
``session_entries.timestamp_utc`` is always written via
|
|
2820
|
+
``entry.timestamp.astimezone(dt.timezone.utc).isoformat()`` in
|
|
2821
|
+
``sync_cache`` (bin/_cctally_cache.py — the only writer). On disk
|
|
2822
|
+
every row therefore looks like ``2026-05-01T12:34:56.789012+00:00``.
|
|
2823
|
+
|
|
2824
|
+
Migration 008/009/010's range bounds arrive in mixed shapes:
|
|
2825
|
+
|
|
2826
|
+
* ``weekly_cost_snapshots.range_start_iso`` /
|
|
2827
|
+
``range_end_iso`` — host-local-offset bytes if the writer's
|
|
2828
|
+
``parse_iso_datetime`` saw a naive input (returns
|
|
2829
|
+
``parsed.astimezone()``) or ``+00:00`` when fed canonical
|
|
2830
|
+
week-start instants.
|
|
2831
|
+
* ``five_hour_blocks.block_start_at`` — host-local-offset
|
|
2832
|
+
bytes (same ``parse_iso_datetime`` chokepoint).
|
|
2833
|
+
* ``five_hour_blocks.last_observed_at_utc`` — always
|
|
2834
|
+
``Z``-suffixed (``now_utc_iso()``).
|
|
2835
|
+
* ``percent_milestones.week_start_at`` /
|
|
2836
|
+
``captured_at_utc`` — same mix.
|
|
2837
|
+
|
|
2838
|
+
The prior implementation wrapped both sides of the WHERE in
|
|
2839
|
+
``unixepoch(...)`` to absorb the offset mix. That made the
|
|
2840
|
+
comparison correct but defeated ``idx_entries_timestamp`` —
|
|
2841
|
+
``EXPLAIN QUERY PLAN`` rendered ``SCAN session_entries`` on every
|
|
2842
|
+
range slice. On a heavy user's cache.db (10k+ rows) that turned
|
|
2843
|
+
the one-time recompute from "30-60s" into multiple minutes.
|
|
2844
|
+
|
|
2845
|
+
By canonicalizing at the Python boundary into the same shape the
|
|
2846
|
+
writer uses, both sides of ``WHERE timestamp_utc >= ? AND
|
|
2847
|
+
timestamp_utc <= ?`` carry the same offset notation and lex
|
|
2848
|
+
compare is correct. Index hit:
|
|
2849
|
+
``SEARCH session_entries USING INDEX idx_entries_timestamp
|
|
2850
|
+
(timestamp_utc>? AND timestamp_utc<?)``.
|
|
2851
|
+
|
|
2852
|
+
Matches the canonicalization pattern in
|
|
2853
|
+
``bin/_cctally_cache.py``'s ``iter_entries`` /
|
|
2854
|
+
``get_claude_session_entries`` (the production read paths).
|
|
2855
|
+
"""
|
|
2856
|
+
return parse_iso_datetime(
|
|
2857
|
+
value, "migration-range-bound",
|
|
2858
|
+
).astimezone(dt.timezone.utc).isoformat()
|
|
2859
|
+
|
|
2860
|
+
|
|
2861
|
+
# === Region 7f: Stats migration 009_recompute_five_hour_blocks_dedup_fix ====
|
|
2862
|
+
|
|
2863
|
+
@stats_migration("009_recompute_five_hour_blocks_dedup_fix")
|
|
2864
|
+
def _009_recompute_five_hour_blocks_dedup_fix(
|
|
2865
|
+
conn: sqlite3.Connection,
|
|
2866
|
+
) -> None:
|
|
2867
|
+
"""Recompute ``five_hour_blocks.total_*`` + rollup-children
|
|
2868
|
+
(``five_hour_block_models`` / ``five_hour_block_projects``) from the
|
|
2869
|
+
now-corrected ``session_entries``. Gated on cache migration 001
|
|
2870
|
+
having applied AND ``sync_cache`` having re-walked the on-disk JSONL
|
|
2871
|
+
since (the ``cache_meta`` ``claude_ingest_walk_complete`` marker is
|
|
2872
|
+
present) — the shared gate (``_gate_001_post_ingest_completed`` →
|
|
2873
|
+
``resolve_upgrade_gate``), same as 008.
|
|
2874
|
+
|
|
2875
|
+
Scope (B1)
|
|
2876
|
+
----------
|
|
2877
|
+
The 5h block writer (``maybe_update_five_hour_block``) only recomputes
|
|
2878
|
+
totals for the CURRENTLY ACTIVE block — closed historical blocks
|
|
2879
|
+
keep their pre-dedup totals forever. ``five_hour_block_models`` /
|
|
2880
|
+
``five_hour_block_projects`` are recompute-every-tick on the active
|
|
2881
|
+
block too. Without this migration, every historical 5h block + its
|
|
2882
|
+
rollup children stays at the inflated pre-dedup numbers.
|
|
2883
|
+
|
|
2884
|
+
This migration walks EVERY row in ``five_hour_blocks`` (active and
|
|
2885
|
+
closed), recomputes ``total_*`` from the corrected
|
|
2886
|
+
``session_entries`` over ``[block_start_at, last_observed_at_utc]``,
|
|
2887
|
+
and replace-alls the per-(window, model) and per-(window, project)
|
|
2888
|
+
rollup children. Mirrors the live writer's algorithm in
|
|
2889
|
+
``maybe_update_five_hour_block`` byte-for-byte — same closed
|
|
2890
|
+
interval, same ``unixepoch()`` cross-offset normalization, same
|
|
2891
|
+
``LEFT JOIN session_files`` for project attribution, same
|
|
2892
|
+
``project_path or '(unknown)'`` sentinel.
|
|
2893
|
+
|
|
2894
|
+
Timestamp comparison
|
|
2895
|
+
--------------------
|
|
2896
|
+
``block_start_at`` is stored with the host's display offset
|
|
2897
|
+
(``parse_iso_datetime`` returns ``parsed.astimezone()``;
|
|
2898
|
+
``+03:00`` on a non-UTC host); ``last_observed_at_utc`` is
|
|
2899
|
+
``Z``-suffixed (``now_utc_iso()``); ``session_entries.timestamp_utc``
|
|
2900
|
+
is canonical UTC ISO (``+00:00``) on disk. Both range bounds
|
|
2901
|
+
normalize through ``_canonical_utc_iso_for_index`` at the Python
|
|
2902
|
+
boundary so plain lex compare against ``timestamp_utc`` hits
|
|
2903
|
+
``idx_entries_timestamp`` — same shape as 008/010 and the
|
|
2904
|
+
user-facing read paths in ``bin/_cctally_cache.py``.
|
|
2905
|
+
|
|
2906
|
+
Closed interval (V1)
|
|
2907
|
+
--------------------
|
|
2908
|
+
``<=`` matches the live writer's ``get_claude_session_entries``
|
|
2909
|
+
predicate (``timestamp >= ? AND timestamp <= ?``). A pre-fix
|
|
2910
|
+
half-open ``<`` would silently exclude any session_entries row
|
|
2911
|
+
whose ``timestamp_utc`` exactly equalled a block's
|
|
2912
|
+
``last_observed_at_utc`` — an edge with positive probability since
|
|
2913
|
+
``last_observed_at_utc`` IS the timestamp of some session-line tick.
|
|
2914
|
+
|
|
2915
|
+
Banner
|
|
2916
|
+
------
|
|
2917
|
+
Gated on ``five_hour_blocks`` non-emptiness so test goldens and
|
|
2918
|
+
fresh-install upgrades stay quiet (mirrors 008's ``snapshot_rows``
|
|
2919
|
+
gate). Heavy users with dozens of historical blocks still see it
|
|
2920
|
+
once.
|
|
2921
|
+
|
|
2922
|
+
Spec: docs/superpowers/specs/2026-05-22-ccusage-dedup-parity.md §I3 (B1).
|
|
2923
|
+
"""
|
|
2924
|
+
cache_ro = _open_cache_ro_with_gate_defer()
|
|
2925
|
+
try:
|
|
2926
|
+
projects_dirs = _resolve_projects_dirs_for_gate()
|
|
2927
|
+
|
|
2928
|
+
block_rows = conn.execute(
|
|
2929
|
+
"SELECT id, five_hour_window_key, block_start_at, "
|
|
2930
|
+
"last_observed_at_utc "
|
|
2931
|
+
"FROM five_hour_blocks"
|
|
2932
|
+
).fetchall()
|
|
2933
|
+
|
|
2934
|
+
# Pure state-machine gate (cctally-dev#93): the inline G3
|
|
2935
|
+
# fail-closed block and the default-dir fallback are gone; an
|
|
2936
|
+
# empty ``projects_dirs`` is the ``disk_state="absent"`` topology
|
|
2937
|
+
# the resolver handles (row 7 DEFER when data_present, row 5
|
|
2938
|
+
# PROCEED otherwise). No body-level recompute guard (spec D7) —
|
|
2939
|
+
# every in-range block recomputes from surviving
|
|
2940
|
+
# ``session_entries``, including to $0.
|
|
2941
|
+
_gate_001_post_ingest_completed(
|
|
2942
|
+
cache_ro, projects_dirs, data_present=bool(block_rows),
|
|
2943
|
+
)
|
|
2944
|
+
|
|
2945
|
+
# SW5-style banner gating via the shared
|
|
2946
|
+
# ``_recompute_banner_should_emit`` helper: only print when
|
|
2947
|
+
# block_rows is non-empty AND the active subcommand is not in
|
|
2948
|
+
# ``_BANNER_SUPPRESSED_COMMANDS`` (notably ``blocks``, whose
|
|
2949
|
+
# stdout-formatted table would otherwise get prefixed by a
|
|
2950
|
+
# stderr announcement — surfaced by floor-band-trap fixture).
|
|
2951
|
+
if _recompute_banner_should_emit(data_present=bool(block_rows)):
|
|
2952
|
+
eprint(
|
|
2953
|
+
"[cctally] Recomputing closed 5h block totals after "
|
|
2954
|
+
"dedup fix (one-time; may take 30-60s on heavy "
|
|
2955
|
+
"histories)..."
|
|
2956
|
+
)
|
|
2957
|
+
|
|
2958
|
+
conn.execute("BEGIN")
|
|
2959
|
+
try:
|
|
2960
|
+
for (
|
|
2961
|
+
block_id, window_key, block_start_at,
|
|
2962
|
+
last_observed_at_utc,
|
|
2963
|
+
) in block_rows:
|
|
2964
|
+
# Walk session_entries over [block_start, last_observed]
|
|
2965
|
+
# joined to session_files for project_path attribution.
|
|
2966
|
+
# NULL session_files.project_path collapses to
|
|
2967
|
+
# '(unknown)' at the bucket layer — same sentinel as the
|
|
2968
|
+
# live writer (_compute_block_totals at
|
|
2969
|
+
# bin/_cctally_record.py).
|
|
2970
|
+
# Canonicalize range bounds to the same UTC ISO shape
|
|
2971
|
+
# ``session_entries.timestamp_utc`` carries on disk so
|
|
2972
|
+
# lex compare hits ``idx_entries_timestamp`` instead of
|
|
2973
|
+
# SCANning. See ``_canonical_utc_iso_for_index`` for the
|
|
2974
|
+
# EXPLAIN QUERY PLAN rationale; mirrors
|
|
2975
|
+
# ``get_claude_session_entries`` in
|
|
2976
|
+
# bin/_cctally_cache.py.
|
|
2977
|
+
entries = cache_ro.execute(
|
|
2978
|
+
"SELECT se.model, se.input_tokens, se.output_tokens, "
|
|
2979
|
+
" se.cache_create_tokens, se.cache_read_tokens, "
|
|
2980
|
+
" se.usage_extra_json, se.cost_usd_raw, "
|
|
2981
|
+
" sf.project_path "
|
|
2982
|
+
"FROM session_entries se "
|
|
2983
|
+
"LEFT JOIN session_files sf "
|
|
2984
|
+
" ON sf.path = se.source_path "
|
|
2985
|
+
"WHERE se.timestamp_utc >= ? "
|
|
2986
|
+
" AND se.timestamp_utc <= ?",
|
|
2987
|
+
(
|
|
2988
|
+
_canonical_utc_iso_for_index(block_start_at),
|
|
2989
|
+
_canonical_utc_iso_for_index(
|
|
2990
|
+
last_observed_at_utc
|
|
2991
|
+
),
|
|
2992
|
+
),
|
|
2993
|
+
).fetchall()
|
|
2994
|
+
|
|
2995
|
+
total_in = 0
|
|
2996
|
+
total_out = 0
|
|
2997
|
+
total_cc = 0
|
|
2998
|
+
total_cr = 0
|
|
2999
|
+
total_cost = 0.0
|
|
3000
|
+
by_model: dict[str, dict[str, Any]] = {}
|
|
3001
|
+
by_project: dict[str, dict[str, Any]] = {}
|
|
3002
|
+
for (
|
|
3003
|
+
model, in_t, out_t, cc_t, cr_t,
|
|
3004
|
+
extras_json, raw_cost, project_path,
|
|
3005
|
+
) in entries:
|
|
3006
|
+
usage = {
|
|
3007
|
+
"input_tokens": in_t,
|
|
3008
|
+
"output_tokens": out_t,
|
|
3009
|
+
"cache_creation_input_tokens": cc_t,
|
|
3010
|
+
"cache_read_input_tokens": cr_t,
|
|
3011
|
+
}
|
|
3012
|
+
if extras_json:
|
|
3013
|
+
usage.update(json.loads(extras_json))
|
|
3014
|
+
cost = _calculate_entry_cost(
|
|
3015
|
+
model, usage, mode="auto", cost_usd=raw_cost,
|
|
3016
|
+
)
|
|
3017
|
+
total_in += int(in_t or 0)
|
|
3018
|
+
total_out += int(out_t or 0)
|
|
3019
|
+
total_cc += int(cc_t or 0)
|
|
3020
|
+
total_cr += int(cr_t or 0)
|
|
3021
|
+
total_cost += cost
|
|
3022
|
+
|
|
3023
|
+
proj_key = project_path or "(unknown)"
|
|
3024
|
+
for bucket_key, bucket_dict in (
|
|
3025
|
+
(model, by_model),
|
|
3026
|
+
(proj_key, by_project),
|
|
3027
|
+
):
|
|
3028
|
+
b = bucket_dict.setdefault(
|
|
3029
|
+
bucket_key,
|
|
3030
|
+
{
|
|
3031
|
+
"input_tokens": 0,
|
|
3032
|
+
"output_tokens": 0,
|
|
3033
|
+
"cache_create_tokens": 0,
|
|
3034
|
+
"cache_read_tokens": 0,
|
|
3035
|
+
"cost_usd": 0.0,
|
|
3036
|
+
"entry_count": 0,
|
|
3037
|
+
},
|
|
3038
|
+
)
|
|
3039
|
+
b["input_tokens"] += int(in_t or 0)
|
|
3040
|
+
b["output_tokens"] += int(out_t or 0)
|
|
3041
|
+
b["cache_create_tokens"] += int(cc_t or 0)
|
|
3042
|
+
b["cache_read_tokens"] += int(cr_t or 0)
|
|
3043
|
+
b["cost_usd"] += cost
|
|
3044
|
+
b["entry_count"] += 1
|
|
3045
|
+
|
|
3046
|
+
conn.execute(
|
|
3047
|
+
"UPDATE five_hour_blocks "
|
|
3048
|
+
"SET total_input_tokens = ?, "
|
|
3049
|
+
" total_output_tokens = ?, "
|
|
3050
|
+
" total_cache_create_tokens = ?, "
|
|
3051
|
+
" total_cache_read_tokens = ?, "
|
|
3052
|
+
" total_cost_usd = ? "
|
|
3053
|
+
"WHERE id = ?",
|
|
3054
|
+
(
|
|
3055
|
+
total_in, total_out, total_cc, total_cr,
|
|
3056
|
+
total_cost, block_id,
|
|
3057
|
+
),
|
|
3058
|
+
)
|
|
3059
|
+
|
|
3060
|
+
# Replace-all per-(window, model) and per-(window,
|
|
3061
|
+
# project) rollup-children. Same pattern as the live
|
|
3062
|
+
# writer (DELETE WHERE five_hour_window_key = ? +
|
|
3063
|
+
# bulk INSERT). DELETE keyed on window_key (NOT
|
|
3064
|
+
# block_id) so the replace-all sweeps any orphans from
|
|
3065
|
+
# earlier parent rebuilds.
|
|
3066
|
+
conn.execute(
|
|
3067
|
+
"DELETE FROM five_hour_block_models "
|
|
3068
|
+
"WHERE five_hour_window_key = ?",
|
|
3069
|
+
(int(window_key),),
|
|
3070
|
+
)
|
|
3071
|
+
if by_model:
|
|
3072
|
+
conn.executemany(
|
|
3073
|
+
"INSERT INTO five_hour_block_models "
|
|
3074
|
+
"(block_id, five_hour_window_key, model, "
|
|
3075
|
+
" input_tokens, output_tokens, "
|
|
3076
|
+
" cache_create_tokens, cache_read_tokens, "
|
|
3077
|
+
" cost_usd, entry_count) "
|
|
3078
|
+
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
|
3079
|
+
[
|
|
3080
|
+
(
|
|
3081
|
+
int(block_id),
|
|
3082
|
+
int(window_key),
|
|
3083
|
+
model,
|
|
3084
|
+
b["input_tokens"],
|
|
3085
|
+
b["output_tokens"],
|
|
3086
|
+
b["cache_create_tokens"],
|
|
3087
|
+
b["cache_read_tokens"],
|
|
3088
|
+
b["cost_usd"],
|
|
3089
|
+
b["entry_count"],
|
|
3090
|
+
)
|
|
3091
|
+
for model, b in by_model.items()
|
|
3092
|
+
],
|
|
3093
|
+
)
|
|
3094
|
+
|
|
3095
|
+
conn.execute(
|
|
3096
|
+
"DELETE FROM five_hour_block_projects "
|
|
3097
|
+
"WHERE five_hour_window_key = ?",
|
|
3098
|
+
(int(window_key),),
|
|
3099
|
+
)
|
|
3100
|
+
if by_project:
|
|
3101
|
+
conn.executemany(
|
|
3102
|
+
"INSERT INTO five_hour_block_projects "
|
|
3103
|
+
"(block_id, five_hour_window_key, "
|
|
3104
|
+
" project_path, "
|
|
3105
|
+
" input_tokens, output_tokens, "
|
|
3106
|
+
" cache_create_tokens, cache_read_tokens, "
|
|
3107
|
+
" cost_usd, entry_count) "
|
|
3108
|
+
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
|
3109
|
+
[
|
|
3110
|
+
(
|
|
3111
|
+
int(block_id),
|
|
3112
|
+
int(window_key),
|
|
3113
|
+
proj,
|
|
3114
|
+
b["input_tokens"],
|
|
3115
|
+
b["output_tokens"],
|
|
3116
|
+
b["cache_create_tokens"],
|
|
3117
|
+
b["cache_read_tokens"],
|
|
3118
|
+
b["cost_usd"],
|
|
3119
|
+
b["entry_count"],
|
|
3120
|
+
)
|
|
3121
|
+
for proj, b in by_project.items()
|
|
3122
|
+
],
|
|
3123
|
+
)
|
|
3124
|
+
|
|
3125
|
+
conn.execute(
|
|
3126
|
+
"INSERT OR IGNORE INTO schema_migrations "
|
|
3127
|
+
"(name, applied_at_utc) VALUES (?, ?)",
|
|
3128
|
+
(
|
|
3129
|
+
"009_recompute_five_hour_blocks_dedup_fix",
|
|
3130
|
+
now_utc_iso(),
|
|
3131
|
+
),
|
|
3132
|
+
)
|
|
3133
|
+
conn.execute("COMMIT")
|
|
3134
|
+
except Exception:
|
|
3135
|
+
conn.execute("ROLLBACK")
|
|
3136
|
+
raise
|
|
3137
|
+
finally:
|
|
3138
|
+
cache_ro.close()
|
|
3139
|
+
|
|
3140
|
+
|
|
3141
|
+
# === Region 7g: Stats migration 010_recompute_percent_milestones_dedup_fix ==
|
|
3142
|
+
|
|
3143
|
+
@stats_migration("010_recompute_percent_milestones_dedup_fix")
|
|
3144
|
+
def _010_recompute_percent_milestones_dedup_fix(
|
|
3145
|
+
conn: sqlite3.Connection,
|
|
3146
|
+
) -> None:
|
|
3147
|
+
"""Recompute ``percent_milestones.cumulative_cost_usd`` +
|
|
3148
|
+
``marginal_cost_usd`` from the now-corrected ``session_entries``.
|
|
3149
|
+
Gated on cache migration 001 having applied AND ``sync_cache``
|
|
3150
|
+
having repopulated ``session_entries`` since.
|
|
3151
|
+
|
|
3152
|
+
Scope (B2)
|
|
3153
|
+
----------
|
|
3154
|
+
``percent_milestones`` is normally write-once forward-only (per
|
|
3155
|
+
"Write-once milestones" gotcha): the cost-at-moment-of-crossing is
|
|
3156
|
+
captured at insert time and never recomputed. After the upstream
|
|
3157
|
+
dedup fix, every historical milestone's ``cumulative_cost_usd`` is
|
|
3158
|
+
inflated by the same factor that inflated
|
|
3159
|
+
``weekly_cost_snapshots`` — keeping them as-recorded would leave
|
|
3160
|
+
``percent-breakdown`` showing systematically higher numbers than
|
|
3161
|
+
the corrected weekly cost for the same window.
|
|
3162
|
+
|
|
3163
|
+
This migration is the one-time scoped exception. For each row:
|
|
3164
|
+
|
|
3165
|
+
* ``cumulative_cost_usd`` = SUM cost over
|
|
3166
|
+
``[week_start_at_iso, captured_at_utc]`` from the corrected
|
|
3167
|
+
``session_entries``. Sentinel for week_start: prefer
|
|
3168
|
+
``week_start_at`` (ISO); fall back to ``week_start_date``
|
|
3169
|
+
normalized to midnight UTC if ``week_start_at IS NULL``
|
|
3170
|
+
(legacy rows; same shape as ``weekly_cost_snapshots``).
|
|
3171
|
+
* ``marginal_cost_usd`` = ``cumulative - prior.cumulative``,
|
|
3172
|
+
where ``prior`` is the immediately lower
|
|
3173
|
+
``percent_threshold`` for the same ``(week_start_date,
|
|
3174
|
+
reset_event_id)``. First milestone of a week has
|
|
3175
|
+
``marginal == cumulative``.
|
|
3176
|
+
|
|
3177
|
+
Forward-going behavior is unchanged — new crossings keep their
|
|
3178
|
+
"write-once at moment of crossing" semantics. This migration only
|
|
3179
|
+
rewrites the historical rows once.
|
|
3180
|
+
|
|
3181
|
+
Timestamp comparison
|
|
3182
|
+
--------------------
|
|
3183
|
+
Range bounds normalize through ``_canonical_utc_iso_for_index``
|
|
3184
|
+
at the Python boundary so plain lex compare against
|
|
3185
|
+
``timestamp_utc`` hits ``idx_entries_timestamp``. Same rule as
|
|
3186
|
+
008/009 and the user-facing read paths in
|
|
3187
|
+
``bin/_cctally_cache.py``.
|
|
3188
|
+
|
|
3189
|
+
Closed interval (V1)
|
|
3190
|
+
--------------------
|
|
3191
|
+
Same ``<=`` rule as 008/009 — matches the live writer's
|
|
3192
|
+
``iter_entries`` predicate.
|
|
3193
|
+
|
|
3194
|
+
Banner
|
|
3195
|
+
------
|
|
3196
|
+
Gated on ``percent_milestones`` non-emptiness (symmetric with
|
|
3197
|
+
008's ``snapshot_rows`` and 009's ``block_rows`` gates).
|
|
3198
|
+
|
|
3199
|
+
Spec: docs/superpowers/specs/2026-05-22-ccusage-dedup-parity.md §I3 (B2).
|
|
3200
|
+
"""
|
|
3201
|
+
cache_ro = _open_cache_ro_with_gate_defer()
|
|
3202
|
+
try:
|
|
3203
|
+
projects_dirs = _resolve_projects_dirs_for_gate()
|
|
3204
|
+
|
|
3205
|
+
milestone_rows = conn.execute(
|
|
3206
|
+
"SELECT id, week_start_date, week_start_at, captured_at_utc, "
|
|
3207
|
+
" percent_threshold, reset_event_id "
|
|
3208
|
+
"FROM percent_milestones "
|
|
3209
|
+
"ORDER BY week_start_date ASC, reset_event_id ASC, "
|
|
3210
|
+
" percent_threshold ASC, id ASC"
|
|
3211
|
+
).fetchall()
|
|
3212
|
+
|
|
3213
|
+
# Pure state-machine gate (cctally-dev#93): no inline G3 block, no
|
|
3214
|
+
# default-dir fallback. The resolver classifies an empty
|
|
3215
|
+
# ``projects_dirs`` as ``disk_state="absent"`` and decides (row 7
|
|
3216
|
+
# DEFER when data_present, row 5 PROCEED otherwise). No body-level
|
|
3217
|
+
# recompute guard and NO segment guard (spec D7): every milestone
|
|
3218
|
+
# recomputes from surviving ``session_entries`` — a zero-entry
|
|
3219
|
+
# segment correctly yields cumulative=0/marginal=0, kept isolated
|
|
3220
|
+
# by the ``seg_key`` partitioning of the marginal chain.
|
|
3221
|
+
_gate_001_post_ingest_completed(
|
|
3222
|
+
cache_ro, projects_dirs, data_present=bool(milestone_rows),
|
|
3223
|
+
)
|
|
3224
|
+
|
|
3225
|
+
# SW5-style banner gating via the shared
|
|
3226
|
+
# ``_recompute_banner_should_emit`` helper: only print when
|
|
3227
|
+
# milestone_rows is non-empty AND the active subcommand is not
|
|
3228
|
+
# in ``_BANNER_SUPPRESSED_COMMANDS``. Mirrors 008/009.
|
|
3229
|
+
if _recompute_banner_should_emit(
|
|
3230
|
+
data_present=bool(milestone_rows)
|
|
3231
|
+
):
|
|
3232
|
+
eprint(
|
|
3233
|
+
"[cctally] Recomputing percent milestone costs after "
|
|
3234
|
+
"dedup fix (one-time; may take 30-60s on heavy "
|
|
3235
|
+
"histories)..."
|
|
3236
|
+
)
|
|
3237
|
+
|
|
3238
|
+
conn.execute("BEGIN")
|
|
3239
|
+
try:
|
|
3240
|
+
# Track per-(week_start_date, reset_event_id) the cumulative
|
|
3241
|
+
# cost of the immediately-prior threshold in the SAME segment
|
|
3242
|
+
# so we can derive marginal = cumulative - prior.cumulative.
|
|
3243
|
+
# The ORDER BY week_start_date, reset_event_id, threshold
|
|
3244
|
+
# above is what makes this single-pass safe.
|
|
3245
|
+
prev_cum_by_segment: dict[tuple[str, int], float] = {}
|
|
3246
|
+
|
|
3247
|
+
for (
|
|
3248
|
+
mid, week_start_date, week_start_at, captured_at_utc,
|
|
3249
|
+
threshold, reset_event_id,
|
|
3250
|
+
) in milestone_rows:
|
|
3251
|
+
# week_start_at preferred; legacy rows fall back to
|
|
3252
|
+
# week_start_date treated as midnight UTC (same shape
|
|
3253
|
+
# weekly_cost_snapshots writers use when week_start_at
|
|
3254
|
+
# is absent).
|
|
3255
|
+
if week_start_at:
|
|
3256
|
+
range_start_iso = week_start_at
|
|
3257
|
+
elif week_start_date:
|
|
3258
|
+
range_start_iso = f"{week_start_date}T00:00:00+00:00"
|
|
3259
|
+
else:
|
|
3260
|
+
# Truly unrecoverable boundary — skip the row, leave
|
|
3261
|
+
# cumulative_cost as-recorded. CHANGELOG notes
|
|
3262
|
+
# parallel to 008's NULL range_*_iso skip.
|
|
3263
|
+
continue
|
|
3264
|
+
|
|
3265
|
+
# Canonicalize range bounds to the same UTC ISO shape
|
|
3266
|
+
# ``session_entries.timestamp_utc`` carries on disk so
|
|
3267
|
+
# lex compare hits ``idx_entries_timestamp`` instead of
|
|
3268
|
+
# SCANning. See ``_canonical_utc_iso_for_index`` for the
|
|
3269
|
+
# EXPLAIN QUERY PLAN rationale; mirrors 008/009.
|
|
3270
|
+
entries = cache_ro.execute(
|
|
3271
|
+
"SELECT model, input_tokens, output_tokens, "
|
|
3272
|
+
" cache_create_tokens, cache_read_tokens, "
|
|
3273
|
+
" usage_extra_json, cost_usd_raw "
|
|
3274
|
+
"FROM session_entries "
|
|
3275
|
+
"WHERE timestamp_utc >= ? AND timestamp_utc <= ?",
|
|
3276
|
+
(
|
|
3277
|
+
_canonical_utc_iso_for_index(range_start_iso),
|
|
3278
|
+
_canonical_utc_iso_for_index(captured_at_utc),
|
|
3279
|
+
),
|
|
3280
|
+
).fetchall()
|
|
3281
|
+
|
|
3282
|
+
cumulative = 0.0
|
|
3283
|
+
for (
|
|
3284
|
+
model, i, o, cc, cr, extras_json, raw,
|
|
3285
|
+
) in entries:
|
|
3286
|
+
usage = {
|
|
3287
|
+
"input_tokens": i,
|
|
3288
|
+
"output_tokens": o,
|
|
3289
|
+
"cache_creation_input_tokens": cc,
|
|
3290
|
+
"cache_read_input_tokens": cr,
|
|
3291
|
+
}
|
|
3292
|
+
if extras_json:
|
|
3293
|
+
usage.update(json.loads(extras_json))
|
|
3294
|
+
cumulative += _calculate_entry_cost(
|
|
3295
|
+
model, usage, mode="auto", cost_usd=raw,
|
|
3296
|
+
)
|
|
3297
|
+
|
|
3298
|
+
seg_key = (week_start_date, int(reset_event_id or 0))
|
|
3299
|
+
prior_cum = prev_cum_by_segment.get(seg_key)
|
|
3300
|
+
marginal = (
|
|
3301
|
+
cumulative
|
|
3302
|
+
if prior_cum is None
|
|
3303
|
+
else cumulative - prior_cum
|
|
3304
|
+
)
|
|
3305
|
+
prev_cum_by_segment[seg_key] = cumulative
|
|
3306
|
+
|
|
3307
|
+
conn.execute(
|
|
3308
|
+
"UPDATE percent_milestones "
|
|
3309
|
+
"SET cumulative_cost_usd = ?, "
|
|
3310
|
+
" marginal_cost_usd = ? "
|
|
3311
|
+
"WHERE id = ?",
|
|
3312
|
+
(cumulative, marginal, mid),
|
|
3313
|
+
)
|
|
3314
|
+
|
|
3315
|
+
conn.execute(
|
|
3316
|
+
"INSERT OR IGNORE INTO schema_migrations "
|
|
3317
|
+
"(name, applied_at_utc) VALUES (?, ?)",
|
|
3318
|
+
(
|
|
3319
|
+
"010_recompute_percent_milestones_dedup_fix",
|
|
3320
|
+
now_utc_iso(),
|
|
3321
|
+
),
|
|
3322
|
+
)
|
|
3323
|
+
conn.execute("COMMIT")
|
|
3324
|
+
except Exception:
|
|
3325
|
+
conn.execute("ROLLBACK")
|
|
3326
|
+
raise
|
|
3327
|
+
finally:
|
|
3328
|
+
cache_ro.close()
|
|
3329
|
+
|
|
3330
|
+
|
|
1682
3331
|
# === Region 8: Test-only migration registration (was bin/cctally:12086-12140) ===
|
|
1683
3332
|
|
|
1684
3333
|
# ──────────────────────────────────────────────────────────────────────
|