cctally 1.27.1 → 1.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -202,9 +202,38 @@ class DowngradeDetected(Exception):
202
202
  self.db_label = db_label
203
203
  self.db_version = db_version
204
204
  self.max_known = max_known
205
+ db_key = "cache" if db_label.startswith("cache") else "stats"
205
206
  super().__init__(
206
207
  f"{db_label} is at version {db_version} but this cctally "
207
- f"only knows up to {max_known}."
208
+ f"only knows up to {max_known}. A newer/unreleased cctally likely "
209
+ f"touched this data dir. Run `cctally db recover --db {db_key}` to "
210
+ f"revert it to the known schema head (cache.db is re-derivable and "
211
+ f"recovers without --yes; stats.db needs --yes and may require a "
212
+ f"re-record afterward)."
213
+ )
214
+
215
+
216
+ class ProdMigrationRefused(Exception):
217
+ """Raised by the dispatcher when a git-checkout binary would forward-migrate
218
+ the REAL prod data dir (~/.local/share/cctally), which would brick the
219
+ installed release with DowngradeDetected (issue #142).
220
+
221
+ Escape hatch: set CCTALLY_ALLOW_PROD_MIGRATION=1. The guard is
222
+ connection-scoped + password-DB-resolved (see _would_block_prod_migration
223
+ + _cctally_core._real_prod_data_dir) so it never fires on :memory:/temp/
224
+ fake-HOME test connections. Spec:
225
+ docs/superpowers/specs/2026-06-05-prod-migration-guard-design.md."""
226
+
227
+ def __init__(self, db_label: str, next_migration: str):
228
+ self.db_label = db_label
229
+ self.next_migration = next_migration
230
+ super().__init__(
231
+ f"cctally: refusing to apply migration '{next_migration}' "
232
+ f"({db_label}) to the prod data dir (~/.local/share/cctally) from "
233
+ f"a dev checkout — a checkout may carry migrations your installed "
234
+ f"cctally can't read, which would brick it (DowngradeDetected). "
235
+ f"Point CCTALLY_DATA_DIR at a scratch/dev dir, or run the installed "
236
+ f"binary. Override with CCTALLY_ALLOW_PROD_MIGRATION=1."
208
237
  )
209
238
 
210
239
 
@@ -430,11 +459,156 @@ def _bootstrap_rename_legacy_markers(conn: sqlite3.Connection, db_label: str) ->
430
459
  _clear_migration_error_log_entries(old)
431
460
 
432
461
 
462
+ def _conn_db_dir(conn: sqlite3.Connection) -> "pathlib.Path | None":
463
+ """Resolved directory of the connection's `main` database file, or None for
464
+ an in-memory / no-file connection (PRAGMA database_list returns '' there).
465
+ Tuple-indexed so it works on the cache.db connection (no row_factory)."""
466
+ for row in conn.execute("PRAGMA database_list").fetchall():
467
+ if row[1] == "main":
468
+ db_file = row[2]
469
+ if not db_file:
470
+ return None
471
+ return pathlib.Path(db_file).resolve().parent
472
+ return None
473
+
474
+
475
+ def _would_block_prod_migration(conn: sqlite3.Connection) -> bool:
476
+ """True iff a git-checkout binary is about to migrate a DB that physically
477
+ lives in the REAL prod data dir (issue #142).
478
+
479
+ Connection-scoped (NOT global APP_DIR) so :memory:/temp/scratch connections
480
+ never trip it; HOME-faking-immune via _real_prod_data_dir (password DB, not
481
+ $HOME); suppressor-INDEPENDENT raw .git check so it still fires under the
482
+ test-suite's CCTALLY_DISABLE_DEV_AUTODETECT. Escape: CCTALLY_ALLOW_PROD_MIGRATION."""
483
+ if os.environ.get("CCTALLY_ALLOW_PROD_MIGRATION"):
484
+ return False
485
+ if not (_cctally_core._repo_root() / ".git").exists():
486
+ return False
487
+ db_dir = _conn_db_dir(conn)
488
+ if db_dir is None:
489
+ return False
490
+ try:
491
+ return db_dir == _cctally_core._real_prod_data_dir().resolve()
492
+ except OSError:
493
+ return False
494
+
495
+
496
+ def _first_pending_migration_name(
497
+ conn: sqlite3.Connection, registry: "list[Migration]", cur_version: int
498
+ ) -> str:
499
+ """Best-effort name of the first not-yet-applied migration, for the refusal
500
+ message. Marker-aware (handles skip-gaps + db-unskip's user_version=0) with
501
+ a raw-index fallback. Legacy unprefixed markers are an accepted imperfection
502
+ — the name is a human hint, not load-bearing."""
503
+ try:
504
+ applied = {r[0] for r in conn.execute(
505
+ "SELECT name FROM schema_migrations").fetchall()}
506
+ except sqlite3.OperationalError:
507
+ applied = set()
508
+ try:
509
+ skipped = {r[0] for r in conn.execute(
510
+ "SELECT name FROM schema_migrations_skipped").fetchall()}
511
+ except sqlite3.OperationalError:
512
+ skipped = set()
513
+ for m in registry:
514
+ if m.name not in applied and m.name not in skipped:
515
+ return m.name
516
+ return registry[cur_version].name
517
+
518
+
519
+ def _recover_version_ahead(
520
+ conn: sqlite3.Connection,
521
+ registry: list[Migration],
522
+ db_label: str,
523
+ ) -> dict:
524
+ """Reconcile a version-ahead DB down to this binary's known head (issue #145).
525
+
526
+ A DB whose ``PRAGMA user_version`` exceeds ``len(registry)`` was last
527
+ touched by a newer/unreleased cctally. cache.db is fully re-derivable, so
528
+ we heal in place instead of bricking: trim the unknown (ahead) markers from
529
+ BOTH ledger tables, then reconcile ``user_version``.
530
+
531
+ We DELIBERATELY do not blind-set ``user_version = len(registry)``: the
532
+ dispatcher treats ``schema_migrations_skipped`` as authoritative and only
533
+ advances ``user_version`` when every known migration is applied-or-skipped.
534
+ So we trim unknown rows from both tables (Codex review P1 #1), then set
535
+ ``user_version = len(registry)`` only if every known migration is
536
+ applied-or-skipped; otherwise ``0`` so the dispatcher's normal walk re-runs
537
+ the still-pending known migrations idempotently (Codex review P1 #2) — never
538
+ cementing a fast-path past a genuinely-missing known migration.
539
+
540
+ Extra tables/columns the unknown migration created are left inert (SQLite
541
+ tolerates them; cache is re-derivable). Idempotent: no-op when not ahead.
542
+ Tolerates absent ledger tables (Codex review P2).
543
+
544
+ Returns ``{"reverted_from", "reverted_to", "trimmed"}`` for the caller's
545
+ breadcrumb / ``db recover`` report.
546
+ """
547
+ cur_version = conn.execute("PRAGMA user_version").fetchone()[0]
548
+ if cur_version <= len(registry):
549
+ return {"reverted_from": cur_version, "reverted_to": cur_version, "trimmed": 0}
550
+
551
+ aliases = _LEGACY_MARKER_ALIASES_BY_DB.get(db_label, {})
552
+ known = {m.name for m in registry} | set(aliases.keys()) | set(aliases.values())
553
+ placeholders = ",".join("?" for _ in known) if known else "''"
554
+ params = tuple(known)
555
+
556
+ trimmed = 0
557
+ for table in ("schema_migrations", "schema_migrations_skipped"):
558
+ try:
559
+ cur = conn.execute(
560
+ f"DELETE FROM {table} WHERE name NOT IN ({placeholders})", params
561
+ )
562
+ trimmed += max(cur.rowcount, 0) # DELETE rowcount is always >= 0
563
+ except sqlite3.OperationalError:
564
+ pass # table absent → nothing to trim there
565
+
566
+ applied: set[str] = set()
567
+ skipped: set[str] = set()
568
+ for table, dest in (("schema_migrations", applied),
569
+ ("schema_migrations_skipped", skipped)):
570
+ try:
571
+ for row in conn.execute(f"SELECT name FROM {table}").fetchall():
572
+ # Normalize legacy unprefixed markers to their canonical NNN_
573
+ # name (issue #148). The alias union above keeps such a row from
574
+ # being trimmed; without this normalization the membership test
575
+ # below compares canonical m.name against the legacy alias and
576
+ # falsely concludes the migration is missing, resetting
577
+ # user_version to 0 and forcing a needless full re-walk. Mirrors
578
+ # the alias-aware read in cmd_db_status.
579
+ dest.add(aliases.get(row[0], row[0]))
580
+ except sqlite3.OperationalError:
581
+ pass
582
+
583
+ all_known_done = all((m.name in applied or m.name in skipped) for m in registry)
584
+ new_version = len(registry) if all_known_done else 0
585
+ conn.execute(f"PRAGMA user_version = {new_version}")
586
+ conn.commit()
587
+ return {"reverted_from": cur_version, "reverted_to": new_version, "trimmed": trimmed}
588
+
589
+
590
+ def _stamp_applied(conn, name, applied_at_utc=None):
591
+ """Persist the schema_migrations marker for ``name``, then commit.
592
+
593
+ Central stamp owned by the dispatcher (issue #140). Handlers no longer
594
+ self-stamp — EXCEPT cache 001, whose stamp must stay atomic with its
595
+ destructive wipe; for that one this call is an idempotent no-op.
596
+ ``INSERT OR IGNORE`` so a pre-existing row (cache 001, or a concurrent
597
+ winner) never raises.
598
+ """
599
+ conn.execute(
600
+ "INSERT OR IGNORE INTO schema_migrations (name, applied_at_utc) VALUES (?, ?)",
601
+ (name, applied_at_utc or now_utc_iso()),
602
+ )
603
+ conn.commit()
604
+
605
+
433
606
  def _run_pending_migrations(
434
607
  conn: sqlite3.Connection,
435
608
  *,
436
609
  registry: list[Migration],
437
610
  db_label: str,
611
+ recover_version_ahead: bool = False,
438
612
  ) -> None:
439
613
  """Apply pending migrations from ``registry`` against ``conn``.
440
614
 
@@ -475,9 +649,31 @@ def _run_pending_migrations(
475
649
  """
476
650
  cur_version = conn.execute("PRAGMA user_version").fetchone()[0]
477
651
  if cur_version > len(registry):
478
- raise DowngradeDetected(
479
- db_label, db_version=cur_version, max_known=len(registry),
480
- )
652
+ if recover_version_ahead:
653
+ # cache.db is re-derivable — heal in place instead of bricking (#145).
654
+ info = _recover_version_ahead(conn, registry, db_label)
655
+ eprint(
656
+ f"cctally: {db_label} was ahead (v{info['reverted_from']} > "
657
+ f"known v{len(registry)}); trimmed unknown migration state and "
658
+ f"reconciled to the known head (cache is re-derivable). Run "
659
+ f"'cctally cache-sync --rebuild' for a full rebuild."
660
+ )
661
+ cur_version = conn.execute("PRAGMA user_version").fetchone()[0]
662
+ # common case: cur_version == len(registry) → fast-path below.
663
+ # adversarial (a known marker was missing): cur_version == 0 →
664
+ # falls through to the normal pending-loop, which reconciles.
665
+ # NOTE: on that adversarial fall-through against a prod cache.db,
666
+ # _recover_version_ahead has ALREADY committed user_version=0, so
667
+ # the prod-migration guard below ("user_version provably unchanged")
668
+ # is reached with user_version already lowered. That is acceptable
669
+ # ONLY because heal opts in for cache.db, which is re-derivable — a
670
+ # reset-to-0 then ProdMigrationRefused just makes the next legit
671
+ # open re-walk. The guard's "unchanged" invariant holds for stats.db
672
+ # (never heals) and for the non-heal path.
673
+ else:
674
+ raise DowngradeDetected(
675
+ db_label, db_version=cur_version, max_known=len(registry),
676
+ )
481
677
  if cur_version == len(registry):
482
678
  # When the registry is currently empty (today's cache.db case),
483
679
  # still leave the schema_migrations table behind so a later
@@ -504,6 +700,19 @@ def _run_pending_migrations(
504
700
  )
505
701
  return # fast path
506
702
 
703
+ # Prod-migration guard (issue #142): a git-checkout binary must not
704
+ # forward-migrate the real prod data dir — that bumps user_version past
705
+ # what the installed release knows and bricks it with DowngradeDetected.
706
+ # We are past the two early returns, so cur_version < len(registry): there
707
+ # ARE pending migrations that would advance user_version. Refuse BEFORE
708
+ # bootstrap-rename / fresh-install detection / any marker write, so
709
+ # user_version is provably unchanged. Connection-scoped so it only fires
710
+ # on the real prod DB files, never on :memory:/temp/scratch test conns.
711
+ if _would_block_prod_migration(conn):
712
+ raise ProdMigrationRefused(
713
+ db_label, _first_pending_migration_name(conn, registry, cur_version)
714
+ )
715
+
507
716
  # Track whether schema_migrations existed before this open so we can
508
717
  # detect the fresh-install path. After bootstrap, even a "first time
509
718
  # opened with framework code" DB might have rows from the legacy
@@ -606,6 +815,11 @@ def _run_pending_migrations(
606
815
  "weekly_cost_snapshots",
607
816
  "five_hour_blocks",
608
817
  "percent_milestones",
818
+ # budget milestone tables tracked by 011 (#137); empty on fresh
819
+ # installs, so this only guards a hand-dropped schema_migrations DB.
820
+ "budget_milestones",
821
+ "projected_milestones",
822
+ "codex_budget_milestones",
609
823
  ),
610
824
  "cache.db": ("session_entries",),
611
825
  }.get(db_label, ())
@@ -636,6 +850,7 @@ def _run_pending_migrations(
636
850
  qualified_name = f"{db_label}:{m.name}"
637
851
  try:
638
852
  m.handler(conn)
853
+ _stamp_applied(conn, m.name, now_iso) # central stamp (#140)
639
854
  _clear_migration_error_log_entries(qualified_name)
640
855
  applied.add(m.name)
641
856
  except MigrationGateNotMet as gate_exc:
@@ -713,25 +928,18 @@ def _backfill_five_hour_block_models(conn: sqlite3.Connection) -> None:
713
928
  `DELETE FROM five_hour_blocks` followed by re-backfill doesn't
714
929
  leave duplicates.
715
930
 
716
- Always inserts the schema_migrations marker at the end (inside the
717
- same transaction) so the gate closes regardless of how many child
718
- rows were written empty `session_entries` for a block (real
719
- users with API/web-only blocks) yields zero child rows but MUST
720
- still close the gate (regression scenario Q2).
931
+ The gate closes regardless of how many child rows were written —
932
+ empty `session_entries` for a block (real users with API/web-only
933
+ blocks) yields zero child rows but MUST still be marked applied
934
+ (regression scenario Q2). The dispatcher central-stamps the
935
+ schema_migrations marker on this handler's clean return (#140).
721
936
  """
722
937
  # Empty-table fast path: with no parent five_hour_blocks rows, this
723
- # backfill has nothing to do. We still must close the gate so the
724
- # dispatcher sees us as applied. INSERT OR IGNORE the marker and
725
- # return (replaces the prior `has_blocks` outer gate from the
726
- # pre-framework era).
938
+ # backfill has nothing to do. Return cleanly so the dispatcher
939
+ # central-stamps us as applied (#140) replaces the prior
940
+ # `has_blocks` outer gate from the pre-framework era.
727
941
  if not conn.execute("SELECT 1 FROM five_hour_blocks LIMIT 1").fetchone():
728
- conn.execute(
729
- "INSERT OR IGNORE INTO schema_migrations (name, applied_at_utc) VALUES (?, ?)",
730
- ("001_five_hour_block_models_backfill_v1", now_utc_iso()),
731
- )
732
- conn.commit()
733
942
  return
734
- now_iso = now_utc_iso()
735
943
  conn.execute("BEGIN")
736
944
  try:
737
945
  # Defensive: clean up any orphans from a prior parent rebuild.
@@ -792,15 +1000,6 @@ def _backfill_five_hour_block_models(conn: sqlite3.Connection) -> None:
792
1000
  ],
793
1001
  )
794
1002
 
795
- # Mark migration done — closes the gate even when zero rows
796
- # were written (empty session_entries / API-only blocks).
797
- conn.execute(
798
- """
799
- INSERT OR IGNORE INTO schema_migrations (name, applied_at_utc)
800
- VALUES (?, ?)
801
- """,
802
- ("001_five_hour_block_models_backfill_v1", now_iso),
803
- )
804
1003
  conn.commit()
805
1004
  except Exception:
806
1005
  conn.rollback()
@@ -814,24 +1013,17 @@ def _backfill_five_hour_block_projects(conn: sqlite3.Connection) -> None:
814
1013
  """Upgrade-user backfill of five_hour_block_projects.
815
1014
 
816
1015
  Mirror of _backfill_five_hour_block_models but writes by_project
817
- buckets and inserts the projects-side schema_migrations marker.
818
- Cleans up orphan child rows defensively before the main loop.
819
- Marker insert fires regardless of child-row count so the gate
820
- closes for empty-row backfills too.
1016
+ buckets. Cleans up orphan child rows defensively before the main
1017
+ loop. The dispatcher central-stamps the projects-side
1018
+ schema_migrations marker on clean return (#140), so the gate closes
1019
+ for empty-row backfills too.
821
1020
  """
822
1021
  # Empty-table fast path: with no parent five_hour_blocks rows, this
823
- # backfill has nothing to do. We still must close the gate so the
824
- # dispatcher sees us as applied. INSERT OR IGNORE the marker and
825
- # return (replaces the prior `has_blocks` outer gate from the
826
- # pre-framework era).
1022
+ # backfill has nothing to do. Return cleanly so the dispatcher
1023
+ # central-stamps us as applied (#140) replaces the prior
1024
+ # `has_blocks` outer gate from the pre-framework era.
827
1025
  if not conn.execute("SELECT 1 FROM five_hour_blocks LIMIT 1").fetchone():
828
- conn.execute(
829
- "INSERT OR IGNORE INTO schema_migrations (name, applied_at_utc) VALUES (?, ?)",
830
- ("002_five_hour_block_projects_backfill_v1", now_utc_iso()),
831
- )
832
- conn.commit()
833
1026
  return
834
- now_iso = now_utc_iso()
835
1027
  conn.execute("BEGIN")
836
1028
  try:
837
1029
  conn.execute(
@@ -888,13 +1080,6 @@ def _backfill_five_hour_block_projects(conn: sqlite3.Connection) -> None:
888
1080
  ],
889
1081
  )
890
1082
 
891
- conn.execute(
892
- """
893
- INSERT OR IGNORE INTO schema_migrations (name, applied_at_utc)
894
- VALUES (?, ?)
895
- """,
896
- ("002_five_hour_block_projects_backfill_v1", now_iso),
897
- )
898
1083
  conn.commit()
899
1084
  except Exception:
900
1085
  conn.rollback()
@@ -1428,13 +1613,6 @@ def _migration_merge_5h_block_duplicates_v1(conn: sqlite3.Connection) -> None:
1428
1613
  dropped_ids,
1429
1614
  )
1430
1615
 
1431
- conn.execute(
1432
- """
1433
- INSERT OR IGNORE INTO schema_migrations (name, applied_at_utc)
1434
- VALUES (?, ?)
1435
- """,
1436
- ("003_merge_5h_block_duplicates_v1", now_utc_iso()),
1437
- )
1438
1616
  conn.commit()
1439
1617
  except Exception:
1440
1618
  conn.rollback()
@@ -1483,7 +1661,8 @@ def _migration_heal_forked_week_start_date_buckets(conn: sqlite3.Connection) ->
1483
1661
  external state (no ``cache.db`` open, no JSONL walk).
1484
1662
 
1485
1663
  Empty-table fast path: when none of the three tables has a forked
1486
- row, INSERT the marker and return without opening a transaction.
1664
+ row, return without opening a transaction (the dispatcher
1665
+ central-stamps the marker on clean return, #140).
1487
1666
 
1488
1667
  Spec hook: paired regression test in
1489
1668
  ``tests/test_heal_forked_week_start_date_buckets.py``.
@@ -1491,7 +1670,7 @@ def _migration_heal_forked_week_start_date_buckets(conn: sqlite3.Connection) ->
1491
1670
  # Empty-fork fast path. UNION ALL across the three tables; one
1492
1671
  # SELECT 1 / LIMIT 1 short-circuits on the first violator. When
1493
1672
  # zero rows are forked, skip the BEGIN/UPDATE block entirely and
1494
- # just stamp the marker.
1673
+ # return (the dispatcher central-stamps the marker, #140).
1495
1674
  has_fork_row = conn.execute(
1496
1675
  """
1497
1676
  SELECT 1 FROM (
@@ -1510,12 +1689,6 @@ def _migration_heal_forked_week_start_date_buckets(conn: sqlite3.Connection) ->
1510
1689
  """
1511
1690
  ).fetchone()
1512
1691
  if not has_fork_row:
1513
- conn.execute(
1514
- "INSERT OR IGNORE INTO schema_migrations (name, applied_at_utc) "
1515
- "VALUES (?, ?)",
1516
- ("004_heal_forked_week_start_date_buckets", now_utc_iso()),
1517
- )
1518
- conn.commit()
1519
1692
  return
1520
1693
 
1521
1694
  conn.execute("BEGIN")
@@ -1571,13 +1744,6 @@ def _migration_heal_forked_week_start_date_buckets(conn: sqlite3.Connection) ->
1571
1744
  """
1572
1745
  )
1573
1746
 
1574
- conn.execute(
1575
- """
1576
- INSERT OR IGNORE INTO schema_migrations (name, applied_at_utc)
1577
- VALUES (?, ?)
1578
- """,
1579
- ("004_heal_forked_week_start_date_buckets", now_utc_iso()),
1580
- )
1581
1747
  conn.commit()
1582
1748
  except Exception:
1583
1749
  conn.rollback()
@@ -1608,23 +1774,18 @@ def _migration_percent_milestones_reset_event_id(conn: sqlite3.Connection) -> No
1608
1774
 
1609
1775
  Idempotent: a second invocation finds the column already present
1610
1776
  and returns. Empty-table fast path: when the column is already
1611
- present the marker still gets stamped — no schema edit needed.
1777
+ present this handler is a no-op — no schema edit needed (the
1778
+ dispatcher central-stamps the marker on clean return, #140).
1612
1779
  """
1613
1780
  # Fast-path probe: column already present means a prior run of this
1614
1781
  # migration (or a fresh-install fast-stamp from the dispatcher that
1615
1782
  # already picked up the new live-schema CREATE TABLE) has done the
1616
- # work. Just stamp the marker and return.
1783
+ # work. Return; the dispatcher central-stamps the marker (#140).
1617
1784
  cols = {
1618
1785
  str(r[1])
1619
1786
  for r in conn.execute("PRAGMA table_info(percent_milestones)").fetchall()
1620
1787
  }
1621
1788
  if "reset_event_id" in cols:
1622
- conn.execute(
1623
- "INSERT OR IGNORE INTO schema_migrations (name, applied_at_utc) "
1624
- "VALUES (?, ?)",
1625
- ("005_percent_milestones_reset_event_id", now_utc_iso()),
1626
- )
1627
- conn.commit()
1628
1789
  return
1629
1790
 
1630
1791
  conn.execute("BEGIN")
@@ -1681,11 +1842,6 @@ def _migration_percent_milestones_reset_event_id(conn: sqlite3.Connection) -> No
1681
1842
  """
1682
1843
  )
1683
1844
  conn.execute("DROP TABLE percent_milestones_old_005")
1684
- conn.execute(
1685
- "INSERT OR IGNORE INTO schema_migrations (name, applied_at_utc) "
1686
- "VALUES (?, ?)",
1687
- ("005_percent_milestones_reset_event_id", now_utc_iso()),
1688
- )
1689
1845
  conn.commit()
1690
1846
  except Exception:
1691
1847
  conn.rollback()
@@ -1722,26 +1878,18 @@ def _migration_five_hour_milestones_reset_event_id(conn: sqlite3.Connection) ->
1722
1878
  (fresh-install fast-stamp from the dispatcher because the live
1723
1879
  ``CREATE TABLE IF NOT EXISTS five_hour_milestones`` already carries
1724
1880
  the new shape — REQUIRED for fresh-install correctness per spec §3.2),
1725
- the marker still gets stamped — no schema edit needed.
1881
+ this handler is a no-op — no schema edit needed (the dispatcher
1882
+ central-stamps the marker on clean return, #140).
1726
1883
  """
1727
1884
  # Fast-path probe: column already present means a prior run of this
1728
1885
  # migration (or a fresh-install fast-stamp from the dispatcher that
1729
1886
  # already picked up the new live-schema CREATE TABLE) has done the
1730
- # work. Just stamp the marker and return. The marker INSERT runs in
1731
- # SQLite's implicit transaction (auto-opened by the write, closed by
1732
- # ``commit()`` — same shape as migration 005's fast path); no explicit
1733
- # ``BEGIN`` is needed for a single-statement DML.
1887
+ # work. Return; the dispatcher central-stamps the marker (#140).
1734
1888
  cols = {
1735
1889
  str(r[1])
1736
1890
  for r in conn.execute("PRAGMA table_info(five_hour_milestones)").fetchall()
1737
1891
  }
1738
1892
  if "reset_event_id" in cols:
1739
- conn.execute(
1740
- "INSERT OR IGNORE INTO schema_migrations (name, applied_at_utc) "
1741
- "VALUES (?, ?)",
1742
- ("006_five_hour_milestones_reset_event_id", now_utc_iso()),
1743
- )
1744
- conn.commit()
1745
1893
  return
1746
1894
 
1747
1895
  conn.execute("BEGIN")
@@ -1813,11 +1961,6 @@ def _migration_five_hour_milestones_reset_event_id(conn: sqlite3.Connection) ->
1813
1961
  """
1814
1962
  )
1815
1963
  conn.execute("DROP TABLE five_hour_milestones_old_006")
1816
- conn.execute(
1817
- "INSERT OR IGNORE INTO schema_migrations (name, applied_at_utc) "
1818
- "VALUES (?, ?)",
1819
- ("006_five_hour_milestones_reset_event_id", now_utc_iso()),
1820
- )
1821
1964
  conn.commit()
1822
1965
  except Exception:
1823
1966
  conn.rollback()
@@ -1861,22 +2004,16 @@ def _migration_observed_pre_credit_pct(conn: sqlite3.Connection) -> None:
1861
2004
 
1862
2005
  Idempotent: a second invocation finds the column already present
1863
2006
  and returns. Empty-column fast path: when the live CREATE TABLE
1864
- already carries the column (fresh install), stamp the marker and
1865
- return without an ALTER. Simple ADD COLUMN no UNIQUE constraint
1866
- change, so no rename-recreate-copy needed (contrast migrations
1867
- 005 / 006).
2007
+ already carries the column (fresh install), return without an ALTER
2008
+ (the dispatcher central-stamps the marker on clean return, #140).
2009
+ Simple ADD COLUMN — no UNIQUE constraint change, so no
2010
+ rename-recreate-copy needed (contrast migrations 005 / 006).
1868
2011
  """
1869
2012
  cols = {
1870
2013
  str(r[1])
1871
2014
  for r in conn.execute("PRAGMA table_info(week_reset_events)").fetchall()
1872
2015
  }
1873
2016
  if "observed_pre_credit_pct" in cols:
1874
- conn.execute(
1875
- "INSERT OR IGNORE INTO schema_migrations (name, applied_at_utc) "
1876
- "VALUES (?, ?)",
1877
- ("007_observed_pre_credit_pct", now_utc_iso()),
1878
- )
1879
- conn.commit()
1880
2017
  return
1881
2018
 
1882
2019
  conn.execute("BEGIN")
@@ -1885,11 +2022,6 @@ def _migration_observed_pre_credit_pct(conn: sqlite3.Connection) -> None:
1885
2022
  "ALTER TABLE week_reset_events "
1886
2023
  "ADD COLUMN observed_pre_credit_pct REAL"
1887
2024
  )
1888
- conn.execute(
1889
- "INSERT OR IGNORE INTO schema_migrations (name, applied_at_utc) "
1890
- "VALUES (?, ?)",
1891
- ("007_observed_pre_credit_pct", now_utc_iso()),
1892
- )
1893
2025
  conn.commit()
1894
2026
  except Exception:
1895
2027
  conn.rollback()
@@ -2182,6 +2314,34 @@ def _apply_cache_schema(conn: sqlite3.Connection) -> None:
2182
2314
  ON session_entries(msg_id, req_id)
2183
2315
  WHERE msg_id IS NOT NULL AND req_id IS NOT NULL;
2184
2316
 
2317
+ CREATE TABLE IF NOT EXISTS conversation_messages (
2318
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
2319
+ session_id TEXT,
2320
+ uuid TEXT,
2321
+ parent_uuid TEXT,
2322
+ source_path TEXT NOT NULL,
2323
+ byte_offset INTEGER NOT NULL,
2324
+ timestamp_utc TEXT,
2325
+ entry_type TEXT NOT NULL,
2326
+ text TEXT NOT NULL DEFAULT '',
2327
+ blocks_json TEXT NOT NULL DEFAULT '[]',
2328
+ model TEXT,
2329
+ msg_id TEXT,
2330
+ req_id TEXT,
2331
+ cwd TEXT,
2332
+ git_branch TEXT,
2333
+ is_sidechain INTEGER NOT NULL DEFAULT 0,
2334
+ UNIQUE(source_path, byte_offset)
2335
+ );
2336
+ CREATE INDEX IF NOT EXISTS idx_conv_session_ts
2337
+ ON conversation_messages(session_id, timestamp_utc, id);
2338
+ CREATE INDEX IF NOT EXISTS idx_conv_session_uuid
2339
+ ON conversation_messages(session_id, uuid);
2340
+ CREATE INDEX IF NOT EXISTS idx_conv_source
2341
+ ON conversation_messages(source_path);
2342
+ CREATE INDEX IF NOT EXISTS idx_conv_turnkey
2343
+ ON conversation_messages(msg_id, req_id);
2344
+
2185
2345
  CREATE TABLE IF NOT EXISTS codex_session_files (
2186
2346
  path TEXT PRIMARY KEY,
2187
2347
  size_bytes INTEGER NOT NULL,
@@ -2227,6 +2387,170 @@ def _apply_cache_schema(conn: sqlite3.Connection) -> None:
2227
2387
  "CREATE INDEX IF NOT EXISTS idx_session_files_session_id "
2228
2388
  "ON session_files(session_id)"
2229
2389
  )
2390
+ # FTS5 is optional in the sqlite build. Create the external-content index +
2391
+ # sync triggers as separate executes wrapped in one try; on failure create
2392
+ # NEITHER the table NOR the triggers (a trigger referencing a missing table
2393
+ # would itself error), set a persisted flag, and let search fall back to
2394
+ # LIKE. Spec §1. Idempotent (IF NOT EXISTS).
2395
+ if _fts5_available(conn):
2396
+ try:
2397
+ # Recovery (spec §1/P2): if a PRIOR run marked FTS unavailable,
2398
+ # conversation_messages rows were ingested (by sync_cache / the
2399
+ # backfill) WITHOUT the AFTER INSERT trigger ever indexing them —
2400
+ # or a prior downgrade dropped the index while leaving the base
2401
+ # rows. Detect that BEFORE clearing the flag so we can rebuild the
2402
+ # external-content index from conversation_messages below. A fresh
2403
+ # install never sets the flag, so this stays False and no rebuild
2404
+ # runs (the triggers index rows incrementally as they arrive).
2405
+ recovering = conn.execute(
2406
+ "SELECT 1 FROM cache_meta WHERE key='fts5_unavailable'"
2407
+ ).fetchone() is not None
2408
+ conn.execute(
2409
+ "CREATE VIRTUAL TABLE IF NOT EXISTS conversation_fts "
2410
+ "USING fts5(text, content='conversation_messages', content_rowid='id')")
2411
+ # Trigger DDL lives in ONE place (_CONV_FTS_TRIGGER_DDL) so this
2412
+ # initial create and the #138 storm-free full-clear
2413
+ # (clear_conversation_messages, which drops + recreates the
2414
+ # triggers) can never drift.
2415
+ _create_conversation_fts_triggers(conn)
2416
+ if recovering:
2417
+ # Repopulate the freshly-(re)created index from the base table
2418
+ # so pre-recovery history is searchable. Cheap no-op when
2419
+ # conversation_messages is empty.
2420
+ conn.execute(
2421
+ "INSERT INTO conversation_fts(conversation_fts) VALUES('rebuild')")
2422
+ conn.execute("DELETE FROM cache_meta WHERE key='fts5_unavailable'")
2423
+ except sqlite3.OperationalError:
2424
+ # partial create cleanup, then mark unavailable
2425
+ _drop_conversation_fts_triggers(conn)
2426
+ try:
2427
+ conn.execute("DROP TABLE IF EXISTS conversation_fts")
2428
+ except sqlite3.OperationalError:
2429
+ pass
2430
+ _set_cache_meta(conn, "fts5_unavailable", "1")
2431
+ else:
2432
+ # FTS5 is unavailable on THIS sqlite build. If a prior (FTS-capable)
2433
+ # run created the sync triggers, they now reference an unusable
2434
+ # conversation_fts and EVERY INSERT into conversation_messages would
2435
+ # raise "no such module: fts5". Because the conversation INSERT shares
2436
+ # sync_cache's per-file write transaction with session_entries, that
2437
+ # rollback would discard COST ingest too. Drop the orphan triggers so
2438
+ # writes succeed under the LIKE fallback. (The conversation_fts vtable
2439
+ # itself can't be DROPped without the fts5 module, but with no triggers
2440
+ # nothing writes to it.)
2441
+ _drop_conversation_fts_triggers(conn)
2442
+ _set_cache_meta(conn, "fts5_unavailable", "1")
2443
+ # The FTS branch above issues DML (DELETE/INSERT on cache_meta) which opens
2444
+ # an implicit transaction under sqlite3's legacy autocommit mode. Close it
2445
+ # so the migration dispatcher's subsequent ``conn.execute("BEGIN")`` starts
2446
+ # cleanly (mirrors the bootstrap-rename commit envelope rationale).
2447
+ conn.commit()
2448
+
2449
+
2450
+ def _fts5_available(conn: sqlite3.Connection) -> bool:
2451
+ """True if this sqlite build can create an FTS5 table. Cheap probe on a
2452
+ temp table that is created then dropped. Hidden test seam: tests monkeypatch
2453
+ this to False to exercise the LIKE fallback."""
2454
+ try:
2455
+ conn.execute("CREATE VIRTUAL TABLE IF NOT EXISTS _fts5_probe USING fts5(x)")
2456
+ conn.execute("DROP TABLE IF EXISTS _fts5_probe")
2457
+ return True
2458
+ except sqlite3.OperationalError:
2459
+ return False
2460
+
2461
+
2462
+ def _set_cache_meta(conn: sqlite3.Connection, key: str, value: str) -> None:
2463
+ conn.execute(
2464
+ "CREATE TABLE IF NOT EXISTS cache_meta (key TEXT PRIMARY KEY, value TEXT)")
2465
+ conn.execute("INSERT INTO cache_meta(key, value) VALUES(?, ?) "
2466
+ "ON CONFLICT(key) DO UPDATE SET value=excluded.value", (key, value))
2467
+
2468
+
2469
+ # Conversation FTS sync triggers (external-content FTS5). Defined ONCE here so
2470
+ # the initial create in _apply_cache_schema and the #138 storm-free full-clear
2471
+ # in clear_conversation_messages (which drops + recreates them) can never drift.
2472
+ # conv_fts_ad / conv_fts_au use the external-content `'delete'` idiom.
2473
+ _CONV_FTS_TRIGGER_DDL = (
2474
+ "CREATE TRIGGER IF NOT EXISTS conv_fts_ai AFTER INSERT ON conversation_messages "
2475
+ "BEGIN INSERT INTO conversation_fts(rowid, text) VALUES (new.id, new.text); END",
2476
+ "CREATE TRIGGER IF NOT EXISTS conv_fts_ad AFTER DELETE ON conversation_messages "
2477
+ "BEGIN INSERT INTO conversation_fts(conversation_fts, rowid, text) "
2478
+ "VALUES('delete', old.id, old.text); END",
2479
+ "CREATE TRIGGER IF NOT EXISTS conv_fts_au AFTER UPDATE OF text ON conversation_messages "
2480
+ "BEGIN INSERT INTO conversation_fts(conversation_fts, rowid, text) "
2481
+ "VALUES('delete', old.id, old.text); "
2482
+ "INSERT INTO conversation_fts(rowid, text) VALUES (new.id, new.text); END",
2483
+ )
2484
+ # Drop by name (the body is irrelevant to DROP TRIGGER); reverse order is
2485
+ # cosmetic — order doesn't matter for independent triggers.
2486
+ _CONV_FTS_TRIGGER_NAMES = ("conv_fts_au", "conv_fts_ad", "conv_fts_ai")
2487
+
2488
+
2489
+ def _create_conversation_fts_triggers(conn: sqlite3.Connection) -> None:
2490
+ """Create the three external-content FTS5 sync triggers (idempotent —
2491
+ each is ``IF NOT EXISTS``). Single source of truth for the trigger DDL,
2492
+ shared by ``_apply_cache_schema`` and ``clear_conversation_messages``
2493
+ (#138). The caller must have already created ``conversation_fts``."""
2494
+ for stmt in _CONV_FTS_TRIGGER_DDL:
2495
+ conn.execute(stmt)
2496
+
2497
+
2498
+ def _drop_conversation_fts_triggers(conn: sqlite3.Connection) -> None:
2499
+ """Drop the three FTS5 sync triggers (idempotent — ``IF EXISTS``). Swallows
2500
+ ``OperationalError`` per statement so a partial/absent trigger set (e.g. an
2501
+ FTS-unavailable build) is tolerated."""
2502
+ for name in _CONV_FTS_TRIGGER_NAMES:
2503
+ try:
2504
+ conn.execute(f"DROP TRIGGER IF EXISTS {name}")
2505
+ except sqlite3.OperationalError:
2506
+ pass
2507
+
2508
+
2509
+ def clear_conversation_messages(conn: sqlite3.Connection) -> None:
2510
+ """Full-clear ``conversation_messages`` + its FTS index WITHOUT firing the
2511
+ per-row delete trigger O(rows) (#138).
2512
+
2513
+ A bulk ``DELETE FROM conversation_messages`` fires ``conv_fts_ad`` once per
2514
+ row — each an FTS5 ``'delete'`` shadow-write — AND forfeits SQLite's
2515
+ no-trigger truncate fast-path, stalling the held ``cache.db.lock`` far
2516
+ longer than the ``session_entries`` clear alone. We suppress the triggers:
2517
+
2518
+ drop all 3 conv_fts triggers
2519
+ → DELETE FROM conversation_messages (true truncate fast-path now)
2520
+ → INSERT INTO conversation_fts(conversation_fts) VALUES('delete-all')
2521
+ (resets the external-content index)
2522
+ → recreate all 3 triggers
2523
+
2524
+ Ordering is load-bearing: clearing the FTS index while the per-row delete
2525
+ trigger is still live makes the base ``DELETE`` write ``'delete'`` postings
2526
+ against already-gone rows and CORRUPTS the index (``database disk image is
2527
+ malformed``; verified on SQLite 3.53.1). Dropping the triggers first makes
2528
+ the base ``DELETE`` not touch the index at all; the explicit ``'delete-all'``
2529
+ then resets it cleanly and ``integrity-check`` still passes.
2530
+
2531
+ Runs inside the caller's open transaction (the held ``cache.db.lock``); the
2532
+ caller owns the commit. When FTS5 is unavailable
2533
+ (``cache_meta.fts5_unavailable`` set → no triggers, no usable vtable),
2534
+ falls back to a plain base ``DELETE`` — there are no triggers to storm and a
2535
+ ``'delete-all'`` would error on the absent vtable."""
2536
+ try:
2537
+ fts_unavailable = conn.execute(
2538
+ "SELECT 1 FROM cache_meta WHERE key='fts5_unavailable'"
2539
+ ).fetchone() is not None
2540
+ except sqlite3.OperationalError:
2541
+ # No cache_meta yet — only possible before the schema is applied, in
2542
+ # which case there is no FTS vtable/triggers either. Bias to the plain
2543
+ # DELETE: it can't storm what doesn't exist and won't touch a vtable.
2544
+ fts_unavailable = True
2545
+
2546
+ if fts_unavailable:
2547
+ conn.execute("DELETE FROM conversation_messages")
2548
+ return
2549
+
2550
+ _drop_conversation_fts_triggers(conn)
2551
+ conn.execute("DELETE FROM conversation_messages")
2552
+ conn.execute("INSERT INTO conversation_fts(conversation_fts) VALUES('delete-all')")
2553
+ _create_conversation_fts_triggers(conn)
2230
2554
 
2231
2555
 
2232
2556
  def _eagerly_apply_cache_migrations() -> None:
@@ -2338,6 +2662,7 @@ def _eagerly_apply_cache_migrations() -> None:
2338
2662
  # if 001 has already applied, this is a fast-path return.
2339
2663
  _run_pending_migrations(
2340
2664
  conn, registry=_CACHE_MIGRATIONS, db_label="cache.db",
2665
+ recover_version_ahead=True,
2341
2666
  )
2342
2667
  finally:
2343
2668
  # Close immediately so the WAL writer lock (if any) is
@@ -2627,6 +2952,45 @@ def _001_dedup_highest_wins_locked(conn: sqlite3.Connection) -> None:
2627
2952
  raise
2628
2953
 
2629
2954
 
2955
+ # === Region 7c: Cache migration 002_conversation_messages_backfill ===
2956
+
2957
+ @cache_migration("002_conversation_messages_backfill")
2958
+ def _002_conversation_messages_backfill(conn: sqlite3.Connection) -> None:
2959
+ """Mark the ``conversation_messages`` backfill pending (Plan 1 Task 5; the
2960
+ deferral is issue #139).
2961
+
2962
+ The table + indexes + FTS already live in ``_apply_cache_schema`` (so fresh
2963
+ installs have them and the dispatcher stamps THIS migration without running
2964
+ it — there is no history to populate). This handler runs only on an
2965
+ EXISTING install (``session_entries`` non-empty), which needs the message
2966
+ index populated from the full JSONL history.
2967
+
2968
+ Rather than walk that history INLINE — which blocked the triggering command
2969
+ until the whole (potentially ~1M-line) backfill completed, including a
2970
+ stats-only ``cctally report`` that fires the cache dispatcher via
2971
+ ``_eagerly_apply_cache_migrations`` but never opens cache.db for reads
2972
+ (issue #139) — this handler just sets the ``conversation_backfill_pending``
2973
+ cache_meta flag and returns in microseconds. The actual offset-0 backfill
2974
+ runs on the next ``sync_cache``, which already holds the ``cache.db.lock``
2975
+ flock and owns the walker (see ``_cctally_cache.sync_cache``); a
2976
+ cache-consuming command — or, most often, the background ``hook-tick`` —
2977
+ absorbs the one-time walk where the latency is expected/invisible. Because
2978
+ the handler no longer touches JSONL it needs no flock and cannot contend
2979
+ with a concurrent sync, so the old non-blocking-flock +
2980
+ ``MigrationGateNotMet`` defer dance is gone.
2981
+
2982
+ Does NOT self-stamp its ``schema_migrations`` marker: the dispatcher owns
2983
+ the central stamp on the existing-install success path (issue #140), calling
2984
+ ``_stamp_applied(conn, m.name)`` right after this handler returns cleanly —
2985
+ so the migration persists and is never re-walked (re-setting the flag) on a
2986
+ subsequent ``open_cache_db()``. This handler only commits the cache_meta
2987
+ flag. The flag itself is consumed + cleared by the first ``sync_cache`` that
2988
+ sees it (idempotent + crash-resumable there); a ``cache-sync --rebuild``
2989
+ clears it directly since its normal offset-0 walk repopulates the index."""
2990
+ _set_cache_meta(conn, "conversation_backfill_pending", "1")
2991
+ conn.commit()
2992
+
2993
+
2630
2994
  # === Region 7d: Stats migration 008_recompute_weekly_cost_snapshots_dedup_fix ===
2631
2995
 
2632
2996
  @stats_migration("008_recompute_weekly_cost_snapshots_dedup_fix")
@@ -2789,17 +3153,6 @@ def _008_recompute_weekly_cost_snapshots_dedup_fix(
2789
3153
  "SET cost_usd = ? WHERE id = ?",
2790
3154
  (total, snap_id),
2791
3155
  )
2792
- # D3 — INSERT OR IGNORE for race safety. Mirrors the
2793
- # convention applied to every other production migration
2794
- # and the matching change to cache migration 001.
2795
- conn.execute(
2796
- "INSERT OR IGNORE INTO schema_migrations "
2797
- "(name, applied_at_utc) VALUES (?, ?)",
2798
- (
2799
- "008_recompute_weekly_cost_snapshots_dedup_fix",
2800
- now_utc_iso(),
2801
- ),
2802
- )
2803
3156
  conn.execute("COMMIT")
2804
3157
  except Exception:
2805
3158
  conn.execute("ROLLBACK")
@@ -3217,14 +3570,6 @@ def _009_recompute_five_hour_blocks_dedup_fix(
3217
3570
  ],
3218
3571
  )
3219
3572
 
3220
- conn.execute(
3221
- "INSERT OR IGNORE INTO schema_migrations "
3222
- "(name, applied_at_utc) VALUES (?, ?)",
3223
- (
3224
- "009_recompute_five_hour_blocks_dedup_fix",
3225
- now_utc_iso(),
3226
- ),
3227
- )
3228
3573
  conn.execute("COMMIT")
3229
3574
  except Exception:
3230
3575
  conn.execute("ROLLBACK")
@@ -3407,14 +3752,6 @@ def _010_recompute_percent_milestones_dedup_fix(
3407
3752
  (cumulative, marginal, mid),
3408
3753
  )
3409
3754
 
3410
- conn.execute(
3411
- "INSERT OR IGNORE INTO schema_migrations "
3412
- "(name, applied_at_utc) VALUES (?, ?)",
3413
- (
3414
- "010_recompute_percent_milestones_dedup_fix",
3415
- now_utc_iso(),
3416
- ),
3417
- )
3418
3755
  conn.execute("COMMIT")
3419
3756
  except Exception:
3420
3757
  conn.execute("ROLLBACK")
@@ -3423,6 +3760,208 @@ def _010_recompute_percent_milestones_dedup_fix(
3423
3760
  cache_ro.close()
3424
3761
 
3425
3762
 
3763
+ @stats_migration("011_budget_milestone_period_keys")
3764
+ def _migration_budget_milestone_period_keys(conn: sqlite3.Connection) -> None:
3765
+ """Add a write-once ``period`` column to the three budget milestone tables
3766
+ and include it in each UNIQUE key (issue #137).
3767
+
3768
+ ``budget_milestones`` -> UNIQUE(week_start_at, period, threshold)
3769
+ ``codex_budget_milestones`` -> UNIQUE(period_start_at, period, threshold)
3770
+ ``projected_milestones`` -> UNIQUE(week_start_at, period, metric, threshold)
3771
+
3772
+ Fixes (1) stale dashboard period labels and (2) the calendar-week /
3773
+ calendar-month dedup collision when the 1st of the month lands on the
3774
+ configured week-start weekday.
3775
+
3776
+ Historical rows are backfilled to ``period = NULL`` (the "pre-011 unknown
3777
+ period" sentinel) rather than a fabricated value, honoring write-once
3778
+ milestones. The firing pre-probe matches ``period = ? OR period IS NULL``
3779
+ so unknown-period rows never re-fire (no spurious upgrade alert), and the
3780
+ dashboard COALESCEs NULL to the vendor-default noun.
3781
+
3782
+ SQLite cannot ALTER an inline UNIQUE in place -> rename-recreate-copy idiom
3783
+ (same as migration 005). Idempotent: a table that already has ``period``
3784
+ (fresh install where the live CREATE made the new shape, or a prior run) is
3785
+ skipped; when all three are present the handler returns and the dispatcher
3786
+ central-stamps the marker (#140).
3787
+ """
3788
+ specs = [
3789
+ (
3790
+ "budget_milestones",
3791
+ """
3792
+ CREATE TABLE budget_milestones (
3793
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
3794
+ week_start_at TEXT NOT NULL,
3795
+ period TEXT,
3796
+ threshold INTEGER NOT NULL,
3797
+ budget_usd REAL NOT NULL,
3798
+ spent_usd REAL NOT NULL,
3799
+ consumption_pct REAL NOT NULL,
3800
+ crossed_at_utc TEXT NOT NULL,
3801
+ alerted_at TEXT,
3802
+ UNIQUE(week_start_at, period, threshold)
3803
+ )
3804
+ """,
3805
+ # (cols copied target<-source) — period omitted from source => NULL
3806
+ "id, week_start_at, threshold, budget_usd, spent_usd, "
3807
+ "consumption_pct, crossed_at_utc, alerted_at",
3808
+ ),
3809
+ (
3810
+ "codex_budget_milestones",
3811
+ """
3812
+ CREATE TABLE codex_budget_milestones (
3813
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
3814
+ period_start_at TEXT NOT NULL,
3815
+ period TEXT,
3816
+ threshold INTEGER NOT NULL,
3817
+ budget_usd REAL NOT NULL,
3818
+ spent_usd REAL NOT NULL,
3819
+ consumption_pct REAL NOT NULL,
3820
+ crossed_at_utc TEXT NOT NULL,
3821
+ alerted_at TEXT,
3822
+ UNIQUE(period_start_at, period, threshold)
3823
+ )
3824
+ """,
3825
+ "id, period_start_at, threshold, budget_usd, spent_usd, "
3826
+ "consumption_pct, crossed_at_utc, alerted_at",
3827
+ ),
3828
+ (
3829
+ "projected_milestones",
3830
+ """
3831
+ CREATE TABLE projected_milestones (
3832
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
3833
+ week_start_at TEXT NOT NULL,
3834
+ period TEXT,
3835
+ metric TEXT NOT NULL,
3836
+ threshold INTEGER NOT NULL,
3837
+ projected_value REAL NOT NULL,
3838
+ denominator REAL NOT NULL,
3839
+ crossed_at_utc TEXT NOT NULL,
3840
+ alerted_at TEXT,
3841
+ UNIQUE(week_start_at, period, metric, threshold)
3842
+ )
3843
+ """,
3844
+ "id, week_start_at, metric, threshold, projected_value, "
3845
+ "denominator, crossed_at_utc, alerted_at",
3846
+ ),
3847
+ ]
3848
+
3849
+ def _has_period(table: str) -> bool:
3850
+ cols = {
3851
+ str(r[1])
3852
+ for r in conn.execute(f"PRAGMA table_info({table})").fetchall()
3853
+ }
3854
+ return "period" in cols
3855
+
3856
+ def _table_exists(table: str) -> bool:
3857
+ return conn.execute(
3858
+ "SELECT 1 FROM sqlite_master WHERE type='table' AND name=?", (table,)
3859
+ ).fetchone() is not None
3860
+
3861
+ # Compute needs-rebuild BEFORE any transaction (no deferred-BEGIN-then-read on
3862
+ # stats.db — SQLITE_BUSY_SNAPSHOT, migrations-gotchas.md). A spec table that
3863
+ # does not exist (e.g. codex_budget_milestones on a DB predating that feature,
3864
+ # now that v012 no longer live-creates it — #143) needs no period column.
3865
+ pending = [s for s in specs if _table_exists(s[0]) and not _has_period(s[0])]
3866
+
3867
+ if not pending:
3868
+ # Fresh install (live CREATE already made the new shape) or prior run.
3869
+ return
3870
+
3871
+ conn.execute("BEGIN IMMEDIATE") # write-lock up front; DDL is first DML
3872
+ try:
3873
+ for table, create_sql, cols in pending:
3874
+ old = f"{table}_old_011"
3875
+ conn.execute(f"ALTER TABLE {table} RENAME TO {old}")
3876
+ conn.execute(create_sql)
3877
+ # period omitted from the SELECT => NULL for every historical row
3878
+ conn.execute(
3879
+ f"INSERT INTO {table} ({cols}) SELECT {cols} FROM {old}"
3880
+ )
3881
+ conn.execute(f"DROP TABLE {old}")
3882
+ conn.commit()
3883
+ except Exception:
3884
+ conn.rollback()
3885
+ raise
3886
+
3887
+
3888
+ @stats_migration("012_unify_budget_milestones_vendor")
3889
+ def _migration_unify_budget_milestones_vendor(conn: sqlite3.Connection) -> None:
3890
+ """Merge ``codex_budget_milestones`` into a vendor-tagged ``budget_milestones``
3891
+ (issue #143).
3892
+
3893
+ ``budget_milestones`` (Claude, keyed ``week_start_at``) and
3894
+ ``codex_budget_milestones`` (Codex, keyed ``period_start_at``) are
3895
+ structurally identical modulo vendor + key-column name. This migration
3896
+ rebuilds ``budget_milestones`` with a ``vendor`` column and the renamed
3897
+ ``period_start_at`` key, copies Claude rows (``week_start_at``->``period_start_at``,
3898
+ ``vendor='claude'``) and Codex rows (``vendor='codex'``), and drops the Codex
3899
+ table. History + ``alerted_at`` + ``period`` are preserved verbatim; the
3900
+ write-once ``period`` NULL sentinel is carried as-is. ``id`` is NOT copied
3901
+ (AUTOINCREMENT reassigns — the envelope/dispatch ids are composite strings,
3902
+ never the row PK).
3903
+
3904
+ State machine (idempotent / partial-state safe): the Claude rebuild and the
3905
+ Codex absorb are independently guarded, so a retry after a crash-before-stamp
3906
+ (table already unified, Codex maybe gone) is a clean no-op or a Codex-only
3907
+ absorb. Reads happen BEFORE BEGIN IMMEDIATE (SQLITE_BUSY_SNAPSHOT).
3908
+ """
3909
+ def _cols(table: str) -> set:
3910
+ return {str(r[1]) for r in conn.execute(f"PRAGMA table_info({table})").fetchall()}
3911
+
3912
+ def _table_exists(table: str) -> bool:
3913
+ return conn.execute(
3914
+ "SELECT 1 FROM sqlite_master WHERE type='table' AND name=?", (table,)
3915
+ ).fetchone() is not None
3916
+
3917
+ claude_needs_rebuild = "vendor" not in _cols("budget_milestones")
3918
+ codex_present = _table_exists("codex_budget_milestones")
3919
+ if not claude_needs_rebuild and not codex_present:
3920
+ return # already unified, no Codex leftover -> dispatcher fast-stamps
3921
+
3922
+ new_table = """
3923
+ CREATE TABLE budget_milestones (
3924
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
3925
+ vendor TEXT NOT NULL,
3926
+ period_start_at TEXT NOT NULL,
3927
+ period TEXT,
3928
+ threshold INTEGER NOT NULL,
3929
+ budget_usd REAL NOT NULL,
3930
+ spent_usd REAL NOT NULL,
3931
+ consumption_pct REAL NOT NULL,
3932
+ crossed_at_utc TEXT NOT NULL,
3933
+ alerted_at TEXT,
3934
+ UNIQUE(vendor, period_start_at, period, threshold)
3935
+ )
3936
+ """
3937
+ cols = ("vendor, period_start_at, period, threshold, budget_usd, spent_usd, "
3938
+ "consumption_pct, crossed_at_utc, alerted_at")
3939
+ conn.execute("BEGIN IMMEDIATE")
3940
+ try:
3941
+ if claude_needs_rebuild:
3942
+ conn.execute("ALTER TABLE budget_milestones RENAME TO budget_milestones_old_012")
3943
+ conn.execute(new_table)
3944
+ conn.execute(
3945
+ f"INSERT INTO budget_milestones ({cols}) "
3946
+ "SELECT 'claude', week_start_at, period, threshold, budget_usd, "
3947
+ "spent_usd, consumption_pct, crossed_at_utc, alerted_at "
3948
+ "FROM budget_milestones_old_012"
3949
+ )
3950
+ conn.execute("DROP TABLE budget_milestones_old_012")
3951
+ if codex_present:
3952
+ conn.execute(
3953
+ f"INSERT INTO budget_milestones ({cols}) "
3954
+ "SELECT 'codex', period_start_at, period, threshold, budget_usd, "
3955
+ "spent_usd, consumption_pct, crossed_at_utc, alerted_at "
3956
+ "FROM codex_budget_milestones"
3957
+ )
3958
+ conn.execute("DROP TABLE codex_budget_milestones")
3959
+ conn.commit()
3960
+ except Exception:
3961
+ conn.rollback()
3962
+ raise
3963
+
3964
+
3426
3965
  # === Region 8: Test-only migration registration (was bin/cctally:12086-12140) ===
3427
3966
 
3428
3967
  # ──────────────────────────────────────────────────────────────────────
@@ -3447,39 +3986,22 @@ if os.environ.get("CCTALLY_MIGRATION_TEST_MODE") == "1":
3447
3986
  @stats_migration(_stats_test_name)
3448
3987
  def _test_migration_failure_injection(conn):
3449
3988
  """Test-only migration: raises RuntimeError when test_failure_trigger
3450
- table is non-empty; otherwise inserts the marker and succeeds."""
3989
+ table is non-empty; otherwise it is a no-op (the dispatcher stamps)."""
3451
3990
  if conn.execute(
3452
3991
  "SELECT 1 FROM sqlite_master WHERE type='table' AND name='test_failure_trigger'"
3453
3992
  ).fetchone() and conn.execute(
3454
3993
  "SELECT 1 FROM test_failure_trigger LIMIT 1"
3455
3994
  ).fetchone():
3456
3995
  raise RuntimeError("test failure injected")
3457
- conn.execute("BEGIN")
3458
- try:
3459
- conn.execute(
3460
- "INSERT OR IGNORE INTO schema_migrations (name, applied_at_utc) VALUES (?, ?)",
3461
- (_stats_test_name, now_utc_iso()),
3462
- )
3463
- conn.commit()
3464
- except Exception:
3465
- conn.rollback()
3466
- raise
3996
+ return
3467
3997
 
3468
3998
  _cache_test_seq = len(_CACHE_MIGRATIONS) + 1
3469
3999
  _cache_test_name = f"{_cache_test_seq:03d}_test_cache_migration"
3470
4000
 
3471
4001
  @cache_migration(_cache_test_name)
3472
4002
  def _test_cache_migration(conn):
3473
- conn.execute("BEGIN")
3474
- try:
3475
- conn.execute(
3476
- "INSERT OR IGNORE INTO schema_migrations (name, applied_at_utc) VALUES (?, ?)",
3477
- (_cache_test_name, now_utc_iso()),
3478
- )
3479
- conn.commit()
3480
- except Exception:
3481
- conn.rollback()
3482
- raise
4003
+ """Test-only cache migration: no-op body; the dispatcher stamps."""
4004
+ return
3483
4005
 
3484
4006
 
3485
4007
  # === Region 9: db CLI subcommands (was bin/cctally:19707-20043) ===
@@ -3821,3 +4343,74 @@ def cmd_db_unskip(args: argparse.Namespace) -> int:
3821
4343
  conn.close()
3822
4344
  print(f"Unskipped: {name} (will run on next open).")
3823
4345
  return 0
4346
+
4347
+
4348
+ def cmd_db_recover(args: argparse.Namespace) -> int:
4349
+ """Revert a version-ahead DB to this binary's known schema head (#145).
4350
+
4351
+ cache.db is fully re-derivable, so `--db cache` heals without --yes.
4352
+ stats.db holds non-re-derivable snapshots/milestones, so `--db stats`
4353
+ requires explicit --yes and may need a re-record afterward, AND honors the
4354
+ #146 prod guard (a dev/worktree binary refuses to trim+revert the real prod
4355
+ stats.db unless CCTALLY_ALLOW_PROD_MIGRATION=1). Bypasses
4356
+ open_db()/open_cache_db() (raw connect) so it never re-triggers the
4357
+ dispatcher. Idempotent: a no-op when the DB is not ahead.
4358
+ """
4359
+ which = args.db # "cache" | "stats"
4360
+ if which == "cache":
4361
+ path, registry, label = _cctally_core.CACHE_DB_PATH, _CACHE_MIGRATIONS, "cache.db"
4362
+ else:
4363
+ path, registry, label = _cctally_core.DB_PATH, _STATS_MIGRATIONS, "stats.db"
4364
+
4365
+ # Absent file → nothing to recover; do NOT connect (sqlite3.connect would
4366
+ # create an empty DB file — mirrors cmd_db_unskip).
4367
+ if not path.exists():
4368
+ print(f"cctally: {label} not present; nothing to recover.")
4369
+ return 0
4370
+
4371
+ conn = sqlite3.connect(path)
4372
+ try:
4373
+ cur_version = conn.execute("PRAGMA user_version").fetchone()[0]
4374
+ head = len(registry)
4375
+ if cur_version <= head:
4376
+ print(
4377
+ f"cctally: {label} is at version {cur_version} "
4378
+ f"(≤ known {head}); nothing to recover."
4379
+ )
4380
+ return 0
4381
+ # Prod guard (issue #146): a dev/worktree binary must not trim the unknown
4382
+ # migration markers + revert user_version on the installed release's
4383
+ # NON-re-derivable prod stats.db — the destructive cousin of the #142
4384
+ # forward-migration guard (trimmed markers can't be re-derived). Reuses
4385
+ # the same connection-scoped predicate (git checkout AND the DB physically
4386
+ # in the real prod dir, password-DB-resolved, honoring
4387
+ # CCTALLY_ALLOW_PROD_MIGRATION). cache.db is re-derivable and intentionally
4388
+ # exempt — it mirrors the dispatcher's opt-in auto-heal.
4389
+ if which == "stats" and _would_block_prod_migration(conn):
4390
+ eprint(
4391
+ "cctally: refusing to recover stats.db in the prod data dir "
4392
+ "(~/.local/share/cctally) from a dev checkout — trimming the "
4393
+ "unknown migration markers and reverting user_version on the "
4394
+ "installed release's non-re-derivable stats.db could corrupt it. "
4395
+ "Run the installed binary, or override with "
4396
+ "CCTALLY_ALLOW_PROD_MIGRATION=1."
4397
+ )
4398
+ return 2
4399
+ if which == "stats" and not getattr(args, "yes", False):
4400
+ eprint(
4401
+ f"cctally: {label} is at version {cur_version} but this cctally "
4402
+ f"only knows up to {head}. Recovering stats.db trims the unknown "
4403
+ f"migration markers and reverts user_version, but any schema the "
4404
+ f"unknown migration created is left in place and a re-record/"
4405
+ f"re-sync may be needed. Re-run with --yes to proceed, or restore "
4406
+ f"{label} from a backup."
4407
+ )
4408
+ return 2
4409
+ info = _recover_version_ahead(conn, registry, label)
4410
+ print(
4411
+ f"cctally: reverted {label} v{info['reverted_from']} → "
4412
+ f"v{info['reverted_to']}, dropped {info['trimmed']} unknown marker(s)."
4413
+ )
4414
+ return 0
4415
+ finally:
4416
+ conn.close()