cctally 1.11.1 → 1.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2350,7 +2350,7 @@ def cmd_hook_tick(args: argparse.Namespace) -> int:
2350
2350
  cache_conn = open_cache_db()
2351
2351
  try:
2352
2352
  stats = sync_cache(cache_conn)
2353
- ingested = int(stats.rows_inserted)
2353
+ ingested = int(stats.rows_changed)
2354
2354
  finally:
2355
2355
  try:
2356
2356
  cache_conn.close()
@@ -1716,13 +1716,23 @@ def _setup_install(args: argparse.Namespace) -> int:
1716
1716
  cache_conn = c.open_cache_db()
1717
1717
  try:
1718
1718
  stats = c.sync_cache(cache_conn)
1719
- rows = int(stats.rows_inserted)
1719
+ rows = int(stats.rows_changed)
1720
1720
  finally:
1721
1721
  try:
1722
1722
  cache_conn.close()
1723
1723
  except Exception:
1724
1724
  pass
1725
1725
  bootstrap_rows = rows
1726
+ # `rows` counts both genuine INSERTs and ccusage-parity DO UPDATE
1727
+ # replacements (see IngestStats.rows_changed). On first install
1728
+ # this is always 0-vs-N pure inserts (cache is empty), so "N new
1729
+ # entries" is exactly accurate. On a re-install / upgrade path
1730
+ # with active sessions, `rows` also counts UPSERT replacements
1731
+ # (streaming-vs-final tiebreaker swaps), so the count is more
1732
+ # accurately "ingest activity" than "rows newly added" — but
1733
+ # we keep "new entries" because (a) it's still a useful signal
1734
+ # to the operator that the cache is alive, and (b) the dominant
1735
+ # case (first install) reads literally.
1726
1736
  out.append(f"✓ Synced session cache ({rows} new entries)")
1727
1737
  except Exception as exc:
1728
1738
  out.append(f"⚠ sync_cache during bootstrap failed: {exc}")
package/bin/_lib_jsonl.py CHANGED
@@ -59,14 +59,64 @@ class CodexEntry:
59
59
  source_path: str
60
60
 
61
61
 
62
+ def _entry_token_total(entry: "UsageEntry") -> int:
63
+ """Sum of the four billed token fields. Mirrors ccusage's
64
+ `usage_token_total` in rust/crates/ccusage/src/claude_loader.rs:516."""
65
+ u = entry.usage
66
+ return (
67
+ int(u.get("input_tokens", 0) or 0)
68
+ + int(u.get("output_tokens", 0) or 0)
69
+ + int(u.get("cache_creation_input_tokens", 0) or 0)
70
+ + int(u.get("cache_read_input_tokens", 0) or 0)
71
+ )
72
+
73
+
74
+ def _should_replace(
75
+ candidate: "UsageEntry", existing: "UsageEntry"
76
+ ) -> bool:
77
+ """Port of ccusage's `should_replace_deduped_entry` in
78
+ rust/crates/ccusage/src/claude_loader.rs:531. Higher token total wins;
79
+ on equal totals, the row with `speed` set (non-null) wins (the post-stream
80
+ finalization row carries `speed`; streaming intermediates don't).
81
+
82
+ The `usage.get("speed") is not None` check matches the SQL UPDATE WHERE
83
+ clause's `json_extract(..., '$.speed') IS NOT NULL` in `sync_cache`'s
84
+ INSERT … ON CONFLICT … DO UPDATE, keeping the direct-parse fallback and
85
+ cache-ingest paths in lockstep on the rare-but-possible "explicit JSON
86
+ null" payload.
87
+ """
88
+ c_total = _entry_token_total(candidate)
89
+ e_total = _entry_token_total(existing)
90
+ if c_total != e_total:
91
+ return c_total > e_total
92
+ return (candidate.usage.get("speed") is not None
93
+ and existing.usage.get("speed") is None)
94
+
95
+
62
96
  def _parse_usage_entries(
63
97
  jsonl_path: pathlib.Path,
64
98
  range_start: dt.datetime,
65
99
  range_end: dt.datetime,
66
- seen_hashes: set[str] | None = None,
100
+ *,
101
+ dedupe_map: "dict[str, UsageEntry]",
67
102
  ) -> list[UsageEntry]:
68
- """Parse assistant entries from a JSONL file within the given time range."""
69
- entries: list[UsageEntry] = []
103
+ """Parse one JSONL file's assistant entries within [range_start, range_end].
104
+
105
+ Dedup contract (matches ccusage's `push_deduped_entry`):
106
+ - Entries with non-null (msg_id, req_id) go into `dedupe_map`; if a key
107
+ already maps to an entry, replace iff `_should_replace(candidate, existing)`.
108
+ - Entries with null msg_id or null req_id (rare in modern Claude Code,
109
+ but possible on synthetic / legacy emissions) skip the dedup map and
110
+ land in a separate list — partial UNIQUE index on the cache mirrors
111
+ this behavior.
112
+ - `<synthetic>` model rows are dropped entirely (matches ccusage's
113
+ claude_loader.rs:454).
114
+
115
+ Caller is responsible for sorting the returned list by timestamp if
116
+ needed; `_collect_entries_direct` does this once across all files
117
+ after flattening `dedupe_map.values()`.
118
+ """
119
+ no_key_entries: list[UsageEntry] = []
70
120
  try:
71
121
  with open(jsonl_path, "r", encoding="utf-8", errors="replace") as fh:
72
122
  for line in fh:
@@ -96,6 +146,11 @@ def _parse_usage_entries(
96
146
  model = msg.get("model") or obj.get("model")
97
147
  if not isinstance(model, str) or not model.strip():
98
148
  continue
149
+ model = model.strip()
150
+ if model == "<synthetic>":
151
+ # Matches ccusage's claude_loader.rs:454 — synthetic
152
+ # placeholder rows carry no billable usage.
153
+ continue
99
154
 
100
155
  try:
101
156
  ts = dt.datetime.fromisoformat(
@@ -109,16 +164,8 @@ def _parse_usage_entries(
109
164
  if ts < range_start or ts > range_end:
110
165
  continue
111
166
 
112
- # Deduplicate by message.id + requestId (same as ccusage)
113
167
  msg_id = msg.get("id")
114
168
  req_id = obj.get("requestId")
115
- if msg_id is not None and req_id is not None:
116
- entry_hash = f"{msg_id}:{req_id}"
117
- if seen_hashes is not None:
118
- if entry_hash in seen_hashes:
119
- continue
120
- seen_hashes.add(entry_hash)
121
-
122
169
  cost_usd_raw = obj.get("costUSD")
123
170
  cost_usd = (
124
171
  float(cost_usd_raw)
@@ -126,16 +173,26 @@ def _parse_usage_entries(
126
173
  else None
127
174
  )
128
175
 
129
- entries.append(UsageEntry(
176
+ entry = UsageEntry(
130
177
  timestamp=ts,
131
- model=model.strip(),
178
+ model=model,
132
179
  usage=usage,
133
180
  cost_usd=cost_usd,
134
- ))
181
+ )
182
+
183
+ if msg_id is None or req_id is None:
184
+ no_key_entries.append(entry)
185
+ continue
186
+ key = f"{msg_id}:{req_id}"
187
+ existing = dedupe_map.get(key)
188
+ if existing is None or _should_replace(entry, existing):
189
+ dedupe_map[key] = entry
135
190
  except OSError as exc:
136
191
  _eprint(f"[cost] could not read {jsonl_path}: {exc}")
137
192
 
138
- return entries
193
+ # The function returns ONLY this file's no-key entries; the caller
194
+ # flattens `dedupe_map.values()` once at the end across all files.
195
+ return no_key_entries
139
196
 
140
197
 
141
198
  def _iter_jsonl_entries_with_offsets(fh):
@@ -185,6 +242,13 @@ def _iter_jsonl_entries_with_offsets(fh):
185
242
  model = msg.get("model") or obj.get("model")
186
243
  if not isinstance(model, str) or not model.strip():
187
244
  continue
245
+ model = model.strip()
246
+ if model == "<synthetic>":
247
+ # Matches ccusage's claude_loader.rs:454. Filtered at the
248
+ # iterator level so the cache ingest path can't accidentally
249
+ # store these rows even if a downstream loop forgets to
250
+ # double-check (see `sync_cache` in _cctally_cache.py).
251
+ continue
188
252
 
189
253
  try:
190
254
  ts = dt.datetime.fromisoformat(ts_raw.strip().replace("Z", "+00:00"))
@@ -202,7 +266,7 @@ def _iter_jsonl_entries_with_offsets(fh):
202
266
  offset,
203
267
  UsageEntry(
204
268
  timestamp=ts,
205
- model=model.strip(),
269
+ model=model,
206
270
  usage=usage,
207
271
  cost_usd=cost_usd,
208
272
  ),
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "cctally",
3
- "version": "1.11.1",
3
+ "version": "1.12.0",
4
4
  "description": "Claude Code usage tracker and local dashboard for Pro/Max subscription limits - weekly cost-per-percent trend, quota forecasts, threshold alerts. ccusage-compatible.",
5
5
  "homepage": "https://github.com/omrikais/cctally",
6
6
  "repository": {