cctally 1.11.0 → 1.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +52 -0
- package/bin/_cctally_alerts.py +14 -18
- package/bin/_cctally_cache.py +366 -140
- package/bin/_cctally_config.py +31 -22
- package/bin/_cctally_core.py +145 -8
- package/bin/_cctally_dashboard.py +2 -1
- package/bin/_cctally_db.py +1696 -35
- package/bin/_cctally_record.py +27 -27
- package/bin/_cctally_setup.py +39 -27
- package/bin/_cctally_tui.py +2 -1
- package/bin/_cctally_update.py +41 -33
- package/bin/_lib_changelog.py +3 -1
- package/bin/_lib_jsonl.py +80 -16
- package/bin/_lib_share_templates.py +31 -13
- package/bin/cctally +112 -109
- package/package.json +1 -1
package/bin/_cctally_cache.py
CHANGED
|
@@ -41,21 +41,18 @@ Holds:
|
|
|
41
41
|
- ``cmd_cache_sync`` — entry point for ``cctally cache-sync
|
|
42
42
|
[--source {claude,codex,all}] [--rebuild]``.
|
|
43
43
|
|
|
44
|
-
What
|
|
44
|
+
What lives in bin/_cctally_core (promoted 2026-05-22, #84):
|
|
45
45
|
- Path constants ``APP_DIR``, ``CACHE_DB_PATH``, ``CACHE_LOCK_PATH``,
|
|
46
|
-
``CACHE_LOCK_CODEX_PATH
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
``
|
|
50
|
-
``
|
|
51
|
-
``
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
``
|
|
56
|
-
widely scattered (record-usage tick, dashboard panels, share render
|
|
57
|
-
kernel, block tests, every JSONL-reading subcommand fixture) and
|
|
58
|
-
Phase C-style inline patching would touch dozens of sites.
|
|
46
|
+
``CACHE_LOCK_CODEX_PATH``. Moved bodies read these via call-time
|
|
47
|
+
``_cctally_core.X`` and tests patch via
|
|
48
|
+
``monkeypatch.setattr(_cctally_core, "X", v)`` (or the conftest
|
|
49
|
+
``redirect_paths()`` helper). The legacy
|
|
50
|
+
``setitem(ns, "CACHE_DB_PATH", …)`` pattern is forbidden by
|
|
51
|
+
``test_no_old_style_test_patches_for_promoted_globals``.
|
|
52
|
+
|
|
53
|
+
What stays in bin/cctally:
|
|
54
|
+
- ``CODEX_SESSIONS_DIR`` — out of scope for #84; still read via the
|
|
55
|
+
``c = _cctally()`` call-time accessor (spec §5.5).
|
|
59
56
|
- ``_sum_cost_for_range`` — sits at the cache↔report boundary; 6+
|
|
60
57
|
callers outside cache (forecast, weekly, report, project, doctor),
|
|
61
58
|
so the directive keeps it on the bin/cctally side.
|
|
@@ -122,6 +119,7 @@ def _cctally():
|
|
|
122
119
|
# Spec 2026-05-17-cctally-core-kernel-extraction.md §3.3: kernel symbols
|
|
123
120
|
# (Z-leaf + Z-mid) import from _cctally_core. The legacy shim function
|
|
124
121
|
# for ``eprint`` is deleted.
|
|
122
|
+
import _cctally_core
|
|
125
123
|
from _cctally_core import eprint
|
|
126
124
|
|
|
127
125
|
|
|
@@ -167,6 +165,7 @@ _CodexIterState = _lib_jsonl._CodexIterState
|
|
|
167
165
|
_iter_jsonl_entries_with_offsets = _lib_jsonl._iter_jsonl_entries_with_offsets
|
|
168
166
|
_iter_codex_jsonl_entries_with_offsets = _lib_jsonl._iter_codex_jsonl_entries_with_offsets
|
|
169
167
|
_parse_usage_entries = _lib_jsonl._parse_usage_entries
|
|
168
|
+
_should_replace = _lib_jsonl._should_replace
|
|
170
169
|
|
|
171
170
|
_cctally_db_sib = _load_lib("_cctally_db")
|
|
172
171
|
add_column_if_missing = _cctally_db_sib.add_column_if_missing
|
|
@@ -175,11 +174,12 @@ _CACHE_MIGRATIONS = _cctally_db_sib._CACHE_MIGRATIONS
|
|
|
175
174
|
|
|
176
175
|
|
|
177
176
|
# === BEGIN MOVED REGIONS ===
|
|
178
|
-
# Path constants
|
|
179
|
-
# CACHE_LOCK_CODEX_PATH
|
|
180
|
-
#
|
|
181
|
-
#
|
|
182
|
-
# in
|
|
177
|
+
# Path constants APP_DIR / CACHE_DB_PATH / CACHE_LOCK_PATH /
|
|
178
|
+
# CACHE_LOCK_CODEX_PATH live in _cctally_core (promoted 2026-05-22, #84);
|
|
179
|
+
# moved bodies read them via call-time ``_cctally_core.X`` and tests
|
|
180
|
+
# patch via ``monkeypatch.setattr(_cctally_core, "X", v)``.
|
|
181
|
+
# CODEX_SESSIONS_DIR stays in bin/cctally (out of scope for #84) and is
|
|
182
|
+
# still accessed via the ``c = _cctally()`` call-time accessor.
|
|
183
183
|
|
|
184
184
|
# === Region 1: ProjectKey + _resolve_project_key (was bin/cctally:1994-2069) ===
|
|
185
185
|
|
|
@@ -305,7 +305,16 @@ class IngestStats:
|
|
|
305
305
|
files_processed: int = 0
|
|
306
306
|
files_skipped_unchanged: int = 0
|
|
307
307
|
files_reset_truncated: int = 0
|
|
308
|
-
|
|
308
|
+
# Count of session_entries rows written by this sync — both genuinely-
|
|
309
|
+
# new INSERTs and ccusage-parity ON CONFLICT DO UPDATE replacements
|
|
310
|
+
# (the dedup tiebreaker swaps a streaming-intermediate row for the
|
|
311
|
+
# post-stream finalization). SQLite's `total_changes` counter
|
|
312
|
+
# increments on both, so this field is "rows changed", not "rows
|
|
313
|
+
# newly inserted". Pre-dedup builds used INSERT OR IGNORE where
|
|
314
|
+
# conflicts did NOT bump the counter; the name change preserves the
|
|
315
|
+
# observability metric without misrepresenting UPSERT updates as
|
|
316
|
+
# new inserts.
|
|
317
|
+
rows_changed: int = 0
|
|
309
318
|
lock_contended: bool = False
|
|
310
319
|
|
|
311
320
|
|
|
@@ -315,7 +324,7 @@ def _progress_stderr(stats: IngestStats, *, force: bool = False) -> None:
|
|
|
315
324
|
return
|
|
316
325
|
eprint(
|
|
317
326
|
f"[cache-sync] {stats.files_processed}/{stats.files_total} files, "
|
|
318
|
-
f"{stats.
|
|
327
|
+
f"{stats.rows_changed} rows changed"
|
|
319
328
|
)
|
|
320
329
|
|
|
321
330
|
|
|
@@ -422,10 +431,10 @@ def sync_cache(
|
|
|
422
431
|
"""
|
|
423
432
|
stats = IngestStats()
|
|
424
433
|
c = _cctally()
|
|
425
|
-
|
|
426
|
-
|
|
434
|
+
_cctally_core.APP_DIR.mkdir(parents=True, exist_ok=True)
|
|
435
|
+
_cctally_core.CACHE_LOCK_PATH.touch()
|
|
427
436
|
|
|
428
|
-
lock_fh = open(
|
|
437
|
+
lock_fh = open(_cctally_core.CACHE_LOCK_PATH, "w")
|
|
429
438
|
try:
|
|
430
439
|
try:
|
|
431
440
|
fcntl.flock(lock_fh, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
|
@@ -434,6 +443,30 @@ def sync_cache(
|
|
|
434
443
|
stats.lock_contended = True
|
|
435
444
|
return stats
|
|
436
445
|
|
|
446
|
+
# Walk-complete sentinel gating (cctally-dev#93, D5b/D6b). Capture
|
|
447
|
+
# whether cache 001 was already applied at the moment this sync
|
|
448
|
+
# acquired the lock. The end-of-loop marker write is gated on this so
|
|
449
|
+
# a walk whose baseline predates the 001 wipe (the "straddle" run)
|
|
450
|
+
# withholds the marker — it cannot vouch for a cache 001 wiped
|
|
451
|
+
# underneath it. On the normal first-upgrade flow open_cache_db runs
|
|
452
|
+
# the dispatcher (001 applies in-process) BEFORE sync_cache is ever
|
|
453
|
+
# called, so this is True and the marker is written as expected. If
|
|
454
|
+
# schema_migrations doesn't exist yet, treat as not-applied (False).
|
|
455
|
+
try:
|
|
456
|
+
applied_at_start = conn.execute(
|
|
457
|
+
"SELECT 1 FROM schema_migrations WHERE name='001_dedup_highest_wins'"
|
|
458
|
+
).fetchone() is not None
|
|
459
|
+
except sqlite3.OperationalError:
|
|
460
|
+
applied_at_start = False
|
|
461
|
+
|
|
462
|
+
# Tracks whether every file in this walk was either ingested cleanly
|
|
463
|
+
# or confirmed-current. Any per-file error-skip (stat/read failure or
|
|
464
|
+
# a DB error that rolls back + continues) flips it False so the marker
|
|
465
|
+
# is withheld — an incomplete walk must not look complete. The
|
|
466
|
+
# unchanged-file early-exit (`size == prev_size`) does NOT flip it: a
|
|
467
|
+
# confirmed-current file still counts as walked.
|
|
468
|
+
walk_clean = True
|
|
469
|
+
|
|
437
470
|
if rebuild:
|
|
438
471
|
# Clear INSIDE the lock — a concurrent rebuild that lost the
|
|
439
472
|
# race would otherwise have wiped this cache before bailing,
|
|
@@ -442,6 +475,11 @@ def sync_cache(
|
|
|
442
475
|
# empty baseline.
|
|
443
476
|
conn.execute("DELETE FROM session_entries")
|
|
444
477
|
conn.execute("DELETE FROM session_files")
|
|
478
|
+
# Clear the walk-complete sentinel atomically with the wipe
|
|
479
|
+
# (cctally-dev#93, D5/D2): a stale "complete" marker must never
|
|
480
|
+
# survive a destructive rebuild. The end-of-loop write below
|
|
481
|
+
# re-establishes it only after this rebuild's clean walk.
|
|
482
|
+
conn.execute("DELETE FROM cache_meta WHERE key='claude_ingest_walk_complete'")
|
|
445
483
|
conn.commit()
|
|
446
484
|
eprint("[cache-sync] rebuild: cleared Claude cached entries")
|
|
447
485
|
|
|
@@ -465,6 +503,120 @@ def sync_cache(
|
|
|
465
503
|
)
|
|
466
504
|
}
|
|
467
505
|
|
|
506
|
+
# Orphaned-tracked-file detection (cctally-dev#93 review). A path
|
|
507
|
+
# tracked in session_files (with data already ingested) but no
|
|
508
|
+
# longer present on disk leaves orphaned session_entries rows that
|
|
509
|
+
# the per-file loop below never visits — it iterates only on-disk
|
|
510
|
+
# `paths`. sync_cache deliberately does NOT prune those orphans
|
|
511
|
+
# in-place: a deleted file shares the truncation hazard (under the
|
|
512
|
+
# sticky source_path dedup a surviving file may carry the same
|
|
513
|
+
# (msg_id, req_id) yet keep its size_bytes, so a per-orphan DELETE
|
|
514
|
+
# could drop a row the survivor still owns without re-ingesting
|
|
515
|
+
# it), and a blanket full-reset would wrongly fire on the
|
|
516
|
+
# legitimate "cache seeded with synthetic source paths" fixture
|
|
517
|
+
# pattern. Instead we INVALIDATE the walk-complete marker: an
|
|
518
|
+
# orphaned cache no longer faithfully mirrors disk, so it is — by
|
|
519
|
+
# the marker's own definition — not a complete walk. We must
|
|
520
|
+
# actively DELETE any marker a PRIOR clean walk left behind (not
|
|
521
|
+
# merely withhold THIS run's end-of-loop rewrite — that rewrite is
|
|
522
|
+
# gated on walk_clean, but a stale marker from a previous sync
|
|
523
|
+
# would otherwise survive and keep vouching for completeness).
|
|
524
|
+
# Setting walk_clean=False additionally suppresses the end-of-loop
|
|
525
|
+
# rewrite so the marker stays absent for this run. With the marker
|
|
526
|
+
# gone the upgrade gate DEFERs the 008/009/010 recomputes (rather
|
|
527
|
+
# than certifying aggregates that still include data from files no
|
|
528
|
+
# longer on disk); the operator clears the orphans by running
|
|
529
|
+
# `cache-sync --rebuild` (the documented re-derive path), which
|
|
530
|
+
# re-establishes the marker. Only paths whose row carried ingested
|
|
531
|
+
# bytes (size_bytes > 0) count — a size_bytes=0 row holds no
|
|
532
|
+
# session_entries, so its absence leaves no orphan. The DELETE +
|
|
533
|
+
# commit lands BEFORE the per-file read+parse loop, so no write
|
|
534
|
+
# lock is held into that loop (same discipline as the truncation
|
|
535
|
+
# escalation just below).
|
|
536
|
+
on_disk_paths = {str(jp) for jp in paths}
|
|
537
|
+
orphaned_tracked_paths = [
|
|
538
|
+
p for p, (size_bytes, _, _) in existing.items()
|
|
539
|
+
if size_bytes and p not in on_disk_paths
|
|
540
|
+
]
|
|
541
|
+
if orphaned_tracked_paths:
|
|
542
|
+
eprint(
|
|
543
|
+
f"[cache] {len(orphaned_tracked_paths)} tracked file(s) no "
|
|
544
|
+
f"longer on disk; invalidating walk-complete marker "
|
|
545
|
+
f"(run `cache-sync --rebuild` to prune orphaned entries)"
|
|
546
|
+
)
|
|
547
|
+
conn.execute(
|
|
548
|
+
"DELETE FROM cache_meta WHERE key='claude_ingest_walk_complete'"
|
|
549
|
+
)
|
|
550
|
+
conn.commit()
|
|
551
|
+
walk_clean = False # orphaned rows -> cache doesn't mirror disk (D5a)
|
|
552
|
+
|
|
553
|
+
# Pre-scan for any truncation among tracked files. Under the
|
|
554
|
+
# ccusage-parity ON CONFLICT DO UPDATE, source_path is PINNED to
|
|
555
|
+
# whichever file first inserted a (msg_id, req_id) row (see U1
|
|
556
|
+
# in this file). Later UPSERTs from a DIFFERENT file may have
|
|
557
|
+
# updated the token columns on that row while leaving source_path
|
|
558
|
+
# pointing at the original (now possibly truncated) file. A
|
|
559
|
+
# naive per-file truncation path then deletes by source_path and
|
|
560
|
+
# loses data the other file is still carrying — but that other
|
|
561
|
+
# file's `size_bytes` is unchanged, so the per-file early-exit
|
|
562
|
+
# at `if size == prev_size: continue` skips its re-ingest.
|
|
563
|
+
#
|
|
564
|
+
# Escalation: when any file's size has shrunk, drop the entire
|
|
565
|
+
# session_entries cache and force every file to re-ingest from
|
|
566
|
+
# offset 0. The cache is fully re-derivable, this is rare (only
|
|
567
|
+
# on JSONL rotation / manual edits), and it sidesteps the
|
|
568
|
+
# per-key contributing-file bookkeeping that would otherwise be
|
|
569
|
+
# required. The lock is already held, so this is atomic with
|
|
570
|
+
# the subsequent per-file ingest.
|
|
571
|
+
truncated_paths: set[str] = set()
|
|
572
|
+
for jp in paths:
|
|
573
|
+
prev = existing.get(str(jp))
|
|
574
|
+
if prev is None:
|
|
575
|
+
continue
|
|
576
|
+
try:
|
|
577
|
+
st = jp.stat()
|
|
578
|
+
except OSError:
|
|
579
|
+
continue
|
|
580
|
+
if st.st_size < prev[0]:
|
|
581
|
+
truncated_paths.add(str(jp))
|
|
582
|
+
|
|
583
|
+
if truncated_paths:
|
|
584
|
+
eprint(
|
|
585
|
+
f"[cache-sync] truncation detected on {len(truncated_paths)} "
|
|
586
|
+
f"file(s) — re-ingesting all files (safe under ccusage-parity "
|
|
587
|
+
f"dedup)"
|
|
588
|
+
)
|
|
589
|
+
conn.execute("DELETE FROM session_entries")
|
|
590
|
+
# Clear the walk-complete sentinel atomically with the truncation
|
|
591
|
+
# full-reset (cctally-dev#93, D5/D2): the cache is being wiped, so
|
|
592
|
+
# any "complete" marker is now stale. The end-of-loop write below
|
|
593
|
+
# re-establishes it only after this run's clean re-ingest walk.
|
|
594
|
+
conn.execute("DELETE FROM cache_meta WHERE key='claude_ingest_walk_complete'")
|
|
595
|
+
# Crash-safety: also clear session_files's size/offset tracking
|
|
596
|
+
# so a partial-state recovery on the NEXT sync forces every
|
|
597
|
+
# file's per-file branch to take the fresh-ingest path. Without
|
|
598
|
+
# this, if the process is killed (kill -9, power loss) between
|
|
599
|
+
# this DELETE commit and the per-file re-ingest commits below,
|
|
600
|
+
# the next sync would only re-detect the originally-truncated
|
|
601
|
+
# file(s); other files still have matching size_bytes and the
|
|
602
|
+
# `if size == prev_size: continue` early-exit would leave them
|
|
603
|
+
# missing from session_entries until file size changes or an
|
|
604
|
+
# operator runs `cache-sync --rebuild`. UPDATE (not DELETE)
|
|
605
|
+
# preserves session_id / project_path columns lazy-backfilled
|
|
606
|
+
# by _ensure_session_files_row (used by the `session`
|
|
607
|
+
# subcommand's JOIN).
|
|
608
|
+
conn.execute(
|
|
609
|
+
"UPDATE session_files SET size_bytes = 0, last_byte_offset = 0"
|
|
610
|
+
)
|
|
611
|
+
conn.commit()
|
|
612
|
+
stats.files_reset_truncated += len(truncated_paths)
|
|
613
|
+
# Force every file to re-ingest from offset 0: clearing the
|
|
614
|
+
# `existing` map makes `prev is None` true downstream, so the
|
|
615
|
+
# per-file branch takes the fresh-ingest path (start_offset=0,
|
|
616
|
+
# truncated=False since we already wiped the table above —
|
|
617
|
+
# avoids a redundant per-file DELETE that would be a no-op).
|
|
618
|
+
existing = {}
|
|
619
|
+
|
|
468
620
|
for jp in paths:
|
|
469
621
|
path_str = str(jp)
|
|
470
622
|
# Backfill session_id/project_path for A2 `session` subcommand.
|
|
@@ -478,6 +630,7 @@ def sync_cache(
|
|
|
478
630
|
st = jp.stat()
|
|
479
631
|
except OSError as exc:
|
|
480
632
|
eprint(f"[cache] stat failed for {jp}: {exc}")
|
|
633
|
+
walk_clean = False # skipped a file without ingesting (D5a)
|
|
481
634
|
continue
|
|
482
635
|
|
|
483
636
|
size = st.st_size
|
|
@@ -536,6 +689,7 @@ def sync_cache(
|
|
|
536
689
|
final_offset = fh.tell()
|
|
537
690
|
except OSError as exc:
|
|
538
691
|
eprint(f"[cache] could not read {jp}: {exc}")
|
|
692
|
+
walk_clean = False # skipped a file without ingesting (D5a)
|
|
539
693
|
continue
|
|
540
694
|
|
|
541
695
|
# Python's sqlite3 module starts an implicit transaction on the
|
|
@@ -553,16 +707,65 @@ def sync_cache(
|
|
|
553
707
|
stats.files_reset_truncated += 1
|
|
554
708
|
if rows:
|
|
555
709
|
before = conn.total_changes
|
|
710
|
+
# ccusage-parity ON CONFLICT DO UPDATE: higher-token total
|
|
711
|
+
# wins on conflict; speed-set breaks ties. The partial
|
|
712
|
+
# UNIQUE index `idx_entries_dedup` restricts the conflict
|
|
713
|
+
# target to (msg_id IS NOT NULL AND req_id IS NOT NULL),
|
|
714
|
+
# so the WHERE clause on the conflict target MUST repeat
|
|
715
|
+
# that predicate verbatim — bare `ON CONFLICT(msg_id,
|
|
716
|
+
# req_id)` raises OperationalError. NULL-keyed rows fall
|
|
717
|
+
# through to a plain INSERT, unchanged.
|
|
718
|
+
#
|
|
719
|
+
# `source_path` is INTENTIONALLY OMITTED from the DO
|
|
720
|
+
# UPDATE SET clause: it stays pinned to whichever JSONL
|
|
721
|
+
# FIRST INSERTed the (msg_id, req_id) row. The
|
|
722
|
+
# downstream `LEFT JOIN session_files ON sf.path =
|
|
723
|
+
# se.source_path` uses source_path to attribute tokens
|
|
724
|
+
# to a `project_path`. If a later UPSERT from a
|
|
725
|
+
# different file flipped source_path, the row's
|
|
726
|
+
# project attribution would move with the winner —
|
|
727
|
+
# `cctally project` would mis-aggregate. Sticky
|
|
728
|
+
# source_path matches pre-dedup INSERT OR IGNORE
|
|
729
|
+
# behavior and the operator's mental model.
|
|
730
|
+
# (`line_offset` is similarly sticky for the same
|
|
731
|
+
# reason — the offset only makes sense within the
|
|
732
|
+
# file that originally wrote the row.)
|
|
556
733
|
conn.executemany(
|
|
557
|
-
"""INSERT
|
|
734
|
+
"""INSERT INTO session_entries
|
|
558
735
|
(source_path, line_offset, timestamp_utc, model,
|
|
559
736
|
msg_id, req_id, input_tokens, output_tokens,
|
|
560
737
|
cache_create_tokens, cache_read_tokens,
|
|
561
738
|
usage_extra_json, cost_usd_raw)
|
|
562
|
-
VALUES (?,?,?,?,?,?,?,?,?,?,?,?)
|
|
739
|
+
VALUES (?,?,?,?,?,?,?,?,?,?,?,?)
|
|
740
|
+
ON CONFLICT(msg_id, req_id)
|
|
741
|
+
WHERE msg_id IS NOT NULL AND req_id IS NOT NULL
|
|
742
|
+
DO UPDATE SET
|
|
743
|
+
timestamp_utc = excluded.timestamp_utc,
|
|
744
|
+
model = excluded.model,
|
|
745
|
+
input_tokens = excluded.input_tokens,
|
|
746
|
+
output_tokens = excluded.output_tokens,
|
|
747
|
+
cache_create_tokens = excluded.cache_create_tokens,
|
|
748
|
+
cache_read_tokens = excluded.cache_read_tokens,
|
|
749
|
+
usage_extra_json = excluded.usage_extra_json,
|
|
750
|
+
cost_usd_raw = excluded.cost_usd_raw
|
|
751
|
+
WHERE
|
|
752
|
+
(excluded.input_tokens + excluded.output_tokens
|
|
753
|
+
+ excluded.cache_create_tokens + excluded.cache_read_tokens)
|
|
754
|
+
>
|
|
755
|
+
(session_entries.input_tokens + session_entries.output_tokens
|
|
756
|
+
+ session_entries.cache_create_tokens + session_entries.cache_read_tokens)
|
|
757
|
+
OR (
|
|
758
|
+
(excluded.input_tokens + excluded.output_tokens
|
|
759
|
+
+ excluded.cache_create_tokens + excluded.cache_read_tokens)
|
|
760
|
+
=
|
|
761
|
+
(session_entries.input_tokens + session_entries.output_tokens
|
|
762
|
+
+ session_entries.cache_create_tokens + session_entries.cache_read_tokens)
|
|
763
|
+
AND json_extract(excluded.usage_extra_json, '$.speed') IS NOT NULL
|
|
764
|
+
AND json_extract(session_entries.usage_extra_json, '$.speed') IS NULL
|
|
765
|
+
)""",
|
|
563
766
|
rows,
|
|
564
767
|
)
|
|
565
|
-
stats.
|
|
768
|
+
stats.rows_changed += conn.total_changes - before
|
|
566
769
|
# UPSERT preserves session_id / project_path columns populated
|
|
567
770
|
# by _ensure_session_files_row at the top of this loop. A plain
|
|
568
771
|
# INSERT OR REPLACE would wipe them on every changed-file sync.
|
|
@@ -585,6 +788,7 @@ def sync_cache(
|
|
|
585
788
|
except sqlite3.DatabaseError as exc:
|
|
586
789
|
eprint(f"[cache] db error on {jp}: {exc}")
|
|
587
790
|
conn.rollback()
|
|
791
|
+
walk_clean = False # rolled back this file without ingesting (D5a)
|
|
588
792
|
continue
|
|
589
793
|
|
|
590
794
|
if progress is not None:
|
|
@@ -592,6 +796,22 @@ def sync_cache(
|
|
|
592
796
|
|
|
593
797
|
if progress is not None:
|
|
594
798
|
progress(stats)
|
|
799
|
+
|
|
800
|
+
# Walk-complete sentinel write (cctally-dev#93, D5a). Still inside the
|
|
801
|
+
# held fcntl lock, before the finally-unlock. Only when the entire walk
|
|
802
|
+
# was clean AND cache 001 was already applied at the start of this run
|
|
803
|
+
# (D5b): an unclean walk or a straddle run must not vouch for cache
|
|
804
|
+
# completeness. A lock-contended sync returned early above and never
|
|
805
|
+
# reaches here. Presence (not the timestamp) is the gate signal; the
|
|
806
|
+
# value stores the completion instant for doctor/debugging.
|
|
807
|
+
if walk_clean and applied_at_start:
|
|
808
|
+
conn.execute(
|
|
809
|
+
"INSERT INTO cache_meta(key, value) "
|
|
810
|
+
"VALUES('claude_ingest_walk_complete', ?) "
|
|
811
|
+
"ON CONFLICT(key) DO UPDATE SET value = excluded.value",
|
|
812
|
+
(dt.datetime.now(dt.timezone.utc).isoformat(),),
|
|
813
|
+
)
|
|
814
|
+
conn.commit()
|
|
595
815
|
return stats
|
|
596
816
|
finally:
|
|
597
817
|
try:
|
|
@@ -665,15 +885,26 @@ def _collect_entries_direct(
|
|
|
665
885
|
*,
|
|
666
886
|
project: str | None = None,
|
|
667
887
|
) -> list[UsageEntry]:
|
|
668
|
-
"""Legacy direct-parse fallback used when the cache DB can't be opened.
|
|
888
|
+
"""Legacy direct-parse fallback used when the cache DB can't be opened.
|
|
889
|
+
|
|
890
|
+
Uses the ccusage-parity dict-keyed accumulator: dedup-keyed entries
|
|
891
|
+
live in `dedupe_map` and are tiebroken via `_should_replace` (higher
|
|
892
|
+
token total wins, speed-set breaks ties). Entries with NULL msg_id or
|
|
893
|
+
req_id bypass the map and land verbatim — partial UNIQUE index on the
|
|
894
|
+
cache mirrors this behavior. Flattened + sorted once at the end.
|
|
895
|
+
"""
|
|
669
896
|
files = _discover_session_files(range_start, project=project)
|
|
670
|
-
|
|
671
|
-
|
|
897
|
+
dedupe_map: dict[str, UsageEntry] = {}
|
|
898
|
+
no_key: list[UsageEntry] = []
|
|
672
899
|
for fp in files:
|
|
673
|
-
|
|
674
|
-
_parse_usage_entries(
|
|
900
|
+
no_key.extend(
|
|
901
|
+
_parse_usage_entries(
|
|
902
|
+
fp, range_start, range_end, dedupe_map=dedupe_map,
|
|
903
|
+
)
|
|
675
904
|
)
|
|
676
|
-
|
|
905
|
+
all_entries = list(dedupe_map.values()) + no_key
|
|
906
|
+
all_entries.sort(key=lambda e: e.timestamp)
|
|
907
|
+
return all_entries
|
|
677
908
|
|
|
678
909
|
|
|
679
910
|
# === Region 4: _JoinedClaudeEntry + get_claude_session_entries (was bin/cctally:2478-2668) ===
|
|
@@ -809,10 +1040,23 @@ def _direct_parse_claude_session_entries(
|
|
|
809
1040
|
scan the file for the first `sessionId` / `cwd` value, else fall
|
|
810
1041
|
back to the filename UUID and the decoded-escaped parent directory
|
|
811
1042
|
— same logic as `_ensure_session_files_row`.
|
|
1043
|
+
|
|
1044
|
+
Uses the ccusage-parity dict-keyed accumulator. Each per-file parse
|
|
1045
|
+
contributes into a global `(entry, source_path)` map keyed by
|
|
1046
|
+
`msg_id:req_id`; ties broken by `_should_replace`. NULL-keyed entries
|
|
1047
|
+
bypass dedup. After all files are walked, results are stamped with
|
|
1048
|
+
their owning file's session_id/cwd metadata and emitted in
|
|
1049
|
+
timestamp order.
|
|
812
1050
|
"""
|
|
813
|
-
results: list[_JoinedClaudeEntry] = []
|
|
814
1051
|
files = _discover_session_files(range_start, project=project)
|
|
815
|
-
|
|
1052
|
+
|
|
1053
|
+
# File metadata: source_path -> (session_id, project_path/cwd).
|
|
1054
|
+
meta_by_path: dict[str, tuple[str, str]] = {}
|
|
1055
|
+
|
|
1056
|
+
# Global accumulator: (msg_id:req_id) -> (UsageEntry, source_path).
|
|
1057
|
+
dedupe_map: dict[str, tuple[UsageEntry, str]] = {}
|
|
1058
|
+
# Null-key entries (rare; same as the cache's partial-index fallthrough).
|
|
1059
|
+
no_key_with_meta: list[tuple[UsageEntry, str]] = []
|
|
816
1060
|
|
|
817
1061
|
for fp in files:
|
|
818
1062
|
source_path = str(fp)
|
|
@@ -847,27 +1091,67 @@ def _direct_parse_claude_session_entries(
|
|
|
847
1091
|
session_id = os.path.splitext(os.path.basename(source_path))[0]
|
|
848
1092
|
if cwd is None:
|
|
849
1093
|
cwd = _decode_escaped_cwd(os.path.basename(os.path.dirname(source_path)))
|
|
1094
|
+
meta_by_path[source_path] = (session_id, cwd)
|
|
1095
|
+
|
|
1096
|
+
# Parse this file with a fresh per-file dedupe_map so we can attach
|
|
1097
|
+
# the source_path provenance to whatever wins this file's local
|
|
1098
|
+
# contests. Then merge into the global map using the same
|
|
1099
|
+
# `_should_replace` rule. (A shared dedupe_map across files would
|
|
1100
|
+
# lose the source_path of the winning entry — _parse_usage_entries
|
|
1101
|
+
# has no awareness of per-file metadata.)
|
|
1102
|
+
file_dedupe_map: dict[str, UsageEntry] = {}
|
|
1103
|
+
file_no_key = _parse_usage_entries(
|
|
1104
|
+
fp, range_start, range_end, dedupe_map=file_dedupe_map,
|
|
1105
|
+
)
|
|
850
1106
|
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
1107
|
+
# Merge file-local no-key entries directly (no dedup contest).
|
|
1108
|
+
for entry in file_no_key:
|
|
1109
|
+
no_key_with_meta.append((entry, source_path))
|
|
1110
|
+
|
|
1111
|
+
# Merge file-local dedup-keyed entries into the global map.
|
|
1112
|
+
# Same tiebreaker as the cache's ON CONFLICT DO UPDATE clause:
|
|
1113
|
+
# higher-token total wins the entry DATA. But `source_path` is
|
|
1114
|
+
# STICKY to whichever file FIRST contributed the key — it is NOT
|
|
1115
|
+
# flipped to the winner. This mirrors the cache ingest path, where
|
|
1116
|
+
# `source_path` is intentionally OMITTED from the ON CONFLICT DO
|
|
1117
|
+
# UPDATE SET clause (see this file's UPSERT, ~line 636) so the
|
|
1118
|
+
# downstream `LEFT JOIN session_files ON sf.path = se.source_path`
|
|
1119
|
+
# attributes tokens to the project of the file that first wrote the
|
|
1120
|
+
# row. Replacing it here would move project attribution to the
|
|
1121
|
+
# winner's file — `cctally project` (and any session_files join)
|
|
1122
|
+
# would then disagree with the normal cached behavior exactly when
|
|
1123
|
+
# this fallback path is exercised.
|
|
1124
|
+
for key, entry in file_dedupe_map.items():
|
|
1125
|
+
existing = dedupe_map.get(key)
|
|
1126
|
+
if existing is None:
|
|
1127
|
+
dedupe_map[key] = (entry, source_path)
|
|
1128
|
+
elif _should_replace(entry, existing[0]):
|
|
1129
|
+
# Winner's DATA, first contributor's source_path (sticky).
|
|
1130
|
+
dedupe_map[key] = (entry, existing[1])
|
|
1131
|
+
|
|
1132
|
+
# Flatten + emit.
|
|
1133
|
+
results: list[_JoinedClaudeEntry] = []
|
|
1134
|
+
flat: list[tuple[UsageEntry, str]] = list(dedupe_map.values()) + no_key_with_meta
|
|
1135
|
+
flat.sort(key=lambda pair: pair[0].timestamp)
|
|
1136
|
+
for entry, source_path in flat:
|
|
1137
|
+
usage = entry.usage
|
|
1138
|
+
sid, cwd = meta_by_path[source_path]
|
|
1139
|
+
results.append(_JoinedClaudeEntry(
|
|
1140
|
+
timestamp=entry.timestamp,
|
|
1141
|
+
model=entry.model,
|
|
1142
|
+
input_tokens=int(usage.get("input_tokens", 0) or 0),
|
|
1143
|
+
output_tokens=int(usage.get("output_tokens", 0) or 0),
|
|
1144
|
+
cache_creation_tokens=int(
|
|
1145
|
+
usage.get("cache_creation_input_tokens", 0) or 0
|
|
1146
|
+
),
|
|
1147
|
+
cache_read_tokens=int(
|
|
1148
|
+
usage.get("cache_read_input_tokens", 0) or 0
|
|
1149
|
+
),
|
|
1150
|
+
source_path=source_path,
|
|
1151
|
+
session_id=sid,
|
|
1152
|
+
project_path=cwd,
|
|
1153
|
+
cost_usd=entry.cost_usd,
|
|
1154
|
+
))
|
|
871
1155
|
|
|
872
1156
|
return results
|
|
873
1157
|
|
|
@@ -881,7 +1165,13 @@ class CodexIngestStats:
|
|
|
881
1165
|
files_processed: int = 0
|
|
882
1166
|
files_skipped_unchanged: int = 0
|
|
883
1167
|
files_reset_truncated: int = 0
|
|
884
|
-
|
|
1168
|
+
# Count of codex_session_entries rows written by this sync. Codex
|
|
1169
|
+
# ingest uses INSERT OR IGNORE — ignored conflicts do NOT bump
|
|
1170
|
+
# SQLite's `total_changes`, so this number is effectively "rows
|
|
1171
|
+
# newly inserted". Field is named ``rows_changed`` for parity with
|
|
1172
|
+
# ``IngestStats`` (Claude path) which carries an UPSERT and
|
|
1173
|
+
# therefore counts both new INSERTs and DO UPDATE replacements.
|
|
1174
|
+
rows_changed: int = 0
|
|
885
1175
|
lock_contended: bool = False
|
|
886
1176
|
|
|
887
1177
|
|
|
@@ -891,7 +1181,7 @@ def _progress_codex_stderr(stats: CodexIngestStats, *, force: bool = False) -> N
|
|
|
891
1181
|
return
|
|
892
1182
|
eprint(
|
|
893
1183
|
f"[codex-cache] {stats.files_processed}/{stats.files_total} files, "
|
|
894
|
-
f"{stats.
|
|
1184
|
+
f"{stats.rows_changed} rows changed"
|
|
895
1185
|
)
|
|
896
1186
|
|
|
897
1187
|
|
|
@@ -914,10 +1204,10 @@ def sync_codex_cache(
|
|
|
914
1204
|
"""
|
|
915
1205
|
stats = CodexIngestStats()
|
|
916
1206
|
c = _cctally()
|
|
917
|
-
|
|
918
|
-
|
|
1207
|
+
_cctally_core.APP_DIR.mkdir(parents=True, exist_ok=True)
|
|
1208
|
+
_cctally_core.CACHE_LOCK_CODEX_PATH.touch()
|
|
919
1209
|
|
|
920
|
-
lock_fh = open(
|
|
1210
|
+
lock_fh = open(_cctally_core.CACHE_LOCK_CODEX_PATH, "w")
|
|
921
1211
|
try:
|
|
922
1212
|
try:
|
|
923
1213
|
fcntl.flock(lock_fh, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
|
@@ -1096,7 +1386,7 @@ def sync_codex_cache(
|
|
|
1096
1386
|
VALUES (?,?,?,?,?,?,?,?,?,?)""",
|
|
1097
1387
|
rows,
|
|
1098
1388
|
)
|
|
1099
|
-
stats.
|
|
1389
|
+
stats.rows_changed += conn.total_changes - before
|
|
1100
1390
|
conn.execute(
|
|
1101
1391
|
"""INSERT OR REPLACE INTO codex_session_files
|
|
1102
1392
|
(path, size_bytes, mtime_ns, last_byte_offset,
|
|
@@ -1263,94 +1553,30 @@ def open_cache_db() -> sqlite3.Connection:
|
|
|
1263
1553
|
recreated — the cache is fully re-derivable from JSONL, so this is safe.
|
|
1264
1554
|
"""
|
|
1265
1555
|
c = _cctally()
|
|
1266
|
-
|
|
1556
|
+
_cctally_core.APP_DIR.mkdir(parents=True, exist_ok=True)
|
|
1267
1557
|
try:
|
|
1268
|
-
conn = sqlite3.connect(
|
|
1558
|
+
conn = sqlite3.connect(_cctally_core.CACHE_DB_PATH)
|
|
1269
1559
|
conn.execute("SELECT 1").fetchone()
|
|
1270
1560
|
except sqlite3.DatabaseError as exc:
|
|
1271
1561
|
eprint(f"[cache] corrupt cache DB ({exc}); recreating")
|
|
1272
1562
|
try:
|
|
1273
|
-
|
|
1563
|
+
_cctally_core.CACHE_DB_PATH.unlink()
|
|
1274
1564
|
except FileNotFoundError:
|
|
1275
1565
|
pass
|
|
1276
|
-
conn = sqlite3.connect(
|
|
1566
|
+
conn = sqlite3.connect(_cctally_core.CACHE_DB_PATH)
|
|
1277
1567
|
|
|
1278
1568
|
conn.execute("PRAGMA journal_mode=WAL")
|
|
1279
1569
|
conn.execute("PRAGMA busy_timeout=5000")
|
|
1280
1570
|
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
CREATE TABLE IF NOT EXISTS session_entries (
|
|
1291
|
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
1292
|
-
source_path TEXT NOT NULL,
|
|
1293
|
-
line_offset INTEGER NOT NULL,
|
|
1294
|
-
timestamp_utc TEXT NOT NULL,
|
|
1295
|
-
model TEXT NOT NULL,
|
|
1296
|
-
msg_id TEXT,
|
|
1297
|
-
req_id TEXT,
|
|
1298
|
-
input_tokens INTEGER NOT NULL DEFAULT 0,
|
|
1299
|
-
output_tokens INTEGER NOT NULL DEFAULT 0,
|
|
1300
|
-
cache_create_tokens INTEGER NOT NULL DEFAULT 0,
|
|
1301
|
-
cache_read_tokens INTEGER NOT NULL DEFAULT 0,
|
|
1302
|
-
usage_extra_json TEXT,
|
|
1303
|
-
cost_usd_raw REAL
|
|
1304
|
-
);
|
|
1305
|
-
CREATE INDEX IF NOT EXISTS idx_entries_timestamp
|
|
1306
|
-
ON session_entries(timestamp_utc);
|
|
1307
|
-
CREATE INDEX IF NOT EXISTS idx_entries_source
|
|
1308
|
-
ON session_entries(source_path);
|
|
1309
|
-
CREATE UNIQUE INDEX IF NOT EXISTS idx_entries_dedup
|
|
1310
|
-
ON session_entries(msg_id, req_id)
|
|
1311
|
-
WHERE msg_id IS NOT NULL AND req_id IS NOT NULL;
|
|
1312
|
-
|
|
1313
|
-
CREATE TABLE IF NOT EXISTS codex_session_files (
|
|
1314
|
-
path TEXT PRIMARY KEY,
|
|
1315
|
-
size_bytes INTEGER NOT NULL,
|
|
1316
|
-
mtime_ns INTEGER NOT NULL,
|
|
1317
|
-
last_byte_offset INTEGER NOT NULL,
|
|
1318
|
-
last_ingested_at TEXT NOT NULL,
|
|
1319
|
-
last_session_id TEXT,
|
|
1320
|
-
last_model TEXT
|
|
1321
|
-
);
|
|
1322
|
-
CREATE TABLE IF NOT EXISTS codex_session_entries (
|
|
1323
|
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
1324
|
-
source_path TEXT NOT NULL,
|
|
1325
|
-
line_offset INTEGER NOT NULL,
|
|
1326
|
-
timestamp_utc TEXT NOT NULL,
|
|
1327
|
-
session_id TEXT NOT NULL,
|
|
1328
|
-
model TEXT NOT NULL,
|
|
1329
|
-
input_tokens INTEGER NOT NULL DEFAULT 0,
|
|
1330
|
-
cached_input_tokens INTEGER NOT NULL DEFAULT 0,
|
|
1331
|
-
output_tokens INTEGER NOT NULL DEFAULT 0,
|
|
1332
|
-
reasoning_output_tokens INTEGER NOT NULL DEFAULT 0,
|
|
1333
|
-
total_tokens INTEGER NOT NULL DEFAULT 0,
|
|
1334
|
-
UNIQUE(source_path, line_offset)
|
|
1335
|
-
);
|
|
1336
|
-
CREATE INDEX IF NOT EXISTS idx_codex_entries_timestamp
|
|
1337
|
-
ON codex_session_entries(timestamp_utc);
|
|
1338
|
-
CREATE INDEX IF NOT EXISTS idx_codex_entries_session
|
|
1339
|
-
ON codex_session_entries(session_id);
|
|
1340
|
-
CREATE INDEX IF NOT EXISTS idx_codex_entries_source
|
|
1341
|
-
ON codex_session_entries(source_path);
|
|
1342
|
-
"""
|
|
1343
|
-
)
|
|
1344
|
-
|
|
1345
|
-
# Inline migration: add session_id / project_path columns to session_files
|
|
1346
|
-
# if they're missing. These were added for A2 `session` subcommand metadata;
|
|
1347
|
-
# populated lazily in sync_cache() / _ensure_session_files_row().
|
|
1348
|
-
add_column_if_missing(conn, "session_files", "session_id", "TEXT")
|
|
1349
|
-
add_column_if_missing(conn, "session_files", "project_path", "TEXT")
|
|
1350
|
-
conn.execute(
|
|
1351
|
-
"CREATE INDEX IF NOT EXISTS idx_session_files_session_id "
|
|
1352
|
-
"ON session_files(session_id)"
|
|
1353
|
-
)
|
|
1571
|
+
# Apply the shared cache.db schema (cctally-dev#93, D4): Claude tables +
|
|
1572
|
+
# indexes, the session_id / project_path column adds on session_files
|
|
1573
|
+
# (A2 `session` metadata, populated lazily in sync_cache() /
|
|
1574
|
+
# _ensure_session_files_row()), the Codex base tables + indexes, and the
|
|
1575
|
+
# cache_meta sentinel table. This is the single cache.db schema source —
|
|
1576
|
+
# the eager-apply path (_eagerly_apply_cache_migrations) uses the SAME
|
|
1577
|
+
# helper, so the two can no longer drift. The Codex last_total_tokens
|
|
1578
|
+
# ALTER + purge stays below (out of the shared helper — D4/P1#3).
|
|
1579
|
+
_cctally_db_sib._apply_cache_schema(conn)
|
|
1354
1580
|
|
|
1355
1581
|
# Migration: add last_total_tokens to codex_session_files. When the column
|
|
1356
1582
|
# is newly added (i.e. this is the first run after upgrade), purge the
|
|
@@ -1409,7 +1635,7 @@ def cmd_cache_sync(args: argparse.Namespace) -> int:
|
|
|
1409
1635
|
f"[cache-sync] claude done: {stats.files_processed} processed, "
|
|
1410
1636
|
f"{stats.files_skipped_unchanged} skipped, "
|
|
1411
1637
|
f"{stats.files_reset_truncated} reset, "
|
|
1412
|
-
f"{stats.
|
|
1638
|
+
f"{stats.rows_changed} rows changed"
|
|
1413
1639
|
)
|
|
1414
1640
|
|
|
1415
1641
|
if source in ("codex", "all"):
|
|
@@ -1427,7 +1653,7 @@ def cmd_cache_sync(args: argparse.Namespace) -> int:
|
|
|
1427
1653
|
f"[cache-sync] codex done: {stats.files_processed} processed, "
|
|
1428
1654
|
f"{stats.files_skipped_unchanged} skipped, "
|
|
1429
1655
|
f"{stats.files_reset_truncated} reset, "
|
|
1430
|
-
f"{stats.
|
|
1656
|
+
f"{stats.rows_changed} rows changed"
|
|
1431
1657
|
)
|
|
1432
1658
|
|
|
1433
1659
|
return 0
|