cctally 1.7.0 → 1.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +27 -0
- package/bin/_cctally_alerts.py +231 -0
- package/bin/_cctally_cache.py +1432 -0
- package/bin/_cctally_config.py +560 -0
- package/bin/_cctally_dashboard.py +5403 -0
- package/bin/_cctally_db.py +1837 -0
- package/bin/_cctally_record.py +2305 -0
- package/bin/_cctally_refresh.py +812 -0
- package/bin/_cctally_release.py +751 -0
- package/bin/_cctally_setup.py +1571 -0
- package/bin/_cctally_sync_week.py +110 -0
- package/bin/_cctally_tui.py +4487 -0
- package/bin/_cctally_update.py +2132 -0
- package/bin/_lib_aggregators.py +712 -0
- package/bin/_lib_alerts_payload.py +194 -0
- package/bin/_lib_blocks.py +441 -0
- package/bin/_lib_diff_kernel.py +1618 -0
- package/bin/_lib_display_tz.py +361 -0
- package/bin/_lib_doctor.py +137 -0
- package/bin/_lib_five_hour.py +82 -0
- package/bin/_lib_jsonl.py +403 -0
- package/bin/_lib_pricing.py +520 -0
- package/bin/_lib_render.py +2785 -0
- package/bin/_lib_semver.py +105 -0
- package/bin/_lib_subscription_weeks.py +492 -0
- package/bin/cctally +11694 -35448
- package/package.json +24 -1
|
@@ -0,0 +1,403 @@
|
|
|
1
|
+
"""JSONL entry parsing for Claude + Codex session files.
|
|
2
|
+
|
|
3
|
+
Pure-fn layer (no I/O at import time): holds the two streaming readers
|
|
4
|
+
that delta-resume Claude `~/.claude/projects/**/*.jsonl` and Codex
|
|
5
|
+
`~/.codex/sessions/**/*.jsonl` files, the bulk parser that does
|
|
6
|
+
range-filtered + msg-id/req-id-dedup'd reads (the legacy entry point
|
|
7
|
+
preserved for paths that don't go through `cache.db`), and the
|
|
8
|
+
dataclasses they produce (`UsageEntry`, `CodexEntry`) + the mutable
|
|
9
|
+
cross-call tracker (`_CodexIterState`).
|
|
10
|
+
|
|
11
|
+
`bin/cctally` re-exports every public symbol below so the ~50 internal
|
|
12
|
+
call sites + SourceFileLoader-based tests
|
|
13
|
+
(`tests/test_dashboard_api_block`, `tests/test_blocks_recorded_anchor`,
|
|
14
|
+
`bin/build-codex-fixtures.py`) resolve unchanged. Zero call-time
|
|
15
|
+
back-references to `bin/cctally`: this module is a pure leaf in the
|
|
16
|
+
sibling graph. The only cross-module helper used (`eprint`) is
|
|
17
|
+
duplicated as a private `_eprint` per the split design's §5.3 contract.
|
|
18
|
+
|
|
19
|
+
Spec: docs/superpowers/specs/2026-05-13-bin-cctally-split-design.md
|
|
20
|
+
"""
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import datetime as dt
|
|
24
|
+
import json
|
|
25
|
+
import pathlib
|
|
26
|
+
import re
|
|
27
|
+
import sys
|
|
28
|
+
from dataclasses import dataclass
|
|
29
|
+
from typing import Any
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _eprint(*args: Any) -> None:
|
|
33
|
+
print(*args, file=sys.stderr)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass
|
|
37
|
+
class UsageEntry:
|
|
38
|
+
timestamp: dt.datetime
|
|
39
|
+
model: str
|
|
40
|
+
usage: dict[str, Any]
|
|
41
|
+
cost_usd: float | None
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass
|
|
45
|
+
class CodexEntry:
|
|
46
|
+
"""One emitted Codex `token_count` event row.
|
|
47
|
+
|
|
48
|
+
Mirrors the columns of codex_session_entries. `last_token_usage` fields
|
|
49
|
+
are used (per-turn deltas), not the cumulative totals.
|
|
50
|
+
"""
|
|
51
|
+
timestamp: dt.datetime
|
|
52
|
+
session_id: str
|
|
53
|
+
model: str
|
|
54
|
+
input_tokens: int
|
|
55
|
+
cached_input_tokens: int
|
|
56
|
+
output_tokens: int
|
|
57
|
+
reasoning_output_tokens: int
|
|
58
|
+
total_tokens: int
|
|
59
|
+
source_path: str
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _parse_usage_entries(
|
|
63
|
+
jsonl_path: pathlib.Path,
|
|
64
|
+
range_start: dt.datetime,
|
|
65
|
+
range_end: dt.datetime,
|
|
66
|
+
seen_hashes: set[str] | None = None,
|
|
67
|
+
) -> list[UsageEntry]:
|
|
68
|
+
"""Parse assistant entries from a JSONL file within the given time range."""
|
|
69
|
+
entries: list[UsageEntry] = []
|
|
70
|
+
try:
|
|
71
|
+
with open(jsonl_path, "r", encoding="utf-8", errors="replace") as fh:
|
|
72
|
+
for line in fh:
|
|
73
|
+
line = line.strip()
|
|
74
|
+
if not line:
|
|
75
|
+
continue
|
|
76
|
+
try:
|
|
77
|
+
obj = json.loads(line)
|
|
78
|
+
except json.JSONDecodeError:
|
|
79
|
+
continue
|
|
80
|
+
|
|
81
|
+
if obj.get("type") != "assistant":
|
|
82
|
+
continue
|
|
83
|
+
|
|
84
|
+
ts_raw = obj.get("timestamp")
|
|
85
|
+
if not isinstance(ts_raw, str) or not ts_raw.strip():
|
|
86
|
+
continue
|
|
87
|
+
|
|
88
|
+
msg = obj.get("message")
|
|
89
|
+
if not isinstance(msg, dict):
|
|
90
|
+
msg = obj
|
|
91
|
+
|
|
92
|
+
usage = msg.get("usage")
|
|
93
|
+
if not isinstance(usage, dict):
|
|
94
|
+
continue
|
|
95
|
+
|
|
96
|
+
model = msg.get("model") or obj.get("model")
|
|
97
|
+
if not isinstance(model, str) or not model.strip():
|
|
98
|
+
continue
|
|
99
|
+
|
|
100
|
+
try:
|
|
101
|
+
ts = dt.datetime.fromisoformat(
|
|
102
|
+
ts_raw.strip().replace("Z", "+00:00")
|
|
103
|
+
)
|
|
104
|
+
if ts.tzinfo is None:
|
|
105
|
+
ts = ts.replace(tzinfo=dt.timezone.utc)
|
|
106
|
+
except ValueError:
|
|
107
|
+
continue
|
|
108
|
+
|
|
109
|
+
if ts < range_start or ts > range_end:
|
|
110
|
+
continue
|
|
111
|
+
|
|
112
|
+
# Deduplicate by message.id + requestId (same as ccusage)
|
|
113
|
+
msg_id = msg.get("id")
|
|
114
|
+
req_id = obj.get("requestId")
|
|
115
|
+
if msg_id is not None and req_id is not None:
|
|
116
|
+
entry_hash = f"{msg_id}:{req_id}"
|
|
117
|
+
if seen_hashes is not None:
|
|
118
|
+
if entry_hash in seen_hashes:
|
|
119
|
+
continue
|
|
120
|
+
seen_hashes.add(entry_hash)
|
|
121
|
+
|
|
122
|
+
cost_usd_raw = obj.get("costUSD")
|
|
123
|
+
cost_usd = (
|
|
124
|
+
float(cost_usd_raw)
|
|
125
|
+
if cost_usd_raw is not None
|
|
126
|
+
else None
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
entries.append(UsageEntry(
|
|
130
|
+
timestamp=ts,
|
|
131
|
+
model=model.strip(),
|
|
132
|
+
usage=usage,
|
|
133
|
+
cost_usd=cost_usd,
|
|
134
|
+
))
|
|
135
|
+
except OSError as exc:
|
|
136
|
+
_eprint(f"[cost] could not read {jsonl_path}: {exc}")
|
|
137
|
+
|
|
138
|
+
return entries
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def _iter_jsonl_entries_with_offsets(fh):
|
|
142
|
+
"""Yield (byte_offset, UsageEntry, msg_id, req_id) for each assistant
|
|
143
|
+
entry starting from fh's current position.
|
|
144
|
+
|
|
145
|
+
Uses readline()+tell() rather than `for line in fh` so byte offsets are
|
|
146
|
+
accurate for resume-from-offset after partial ingests. Malformed JSON
|
|
147
|
+
and non-assistant lines are skipped, but the offset still advances past
|
|
148
|
+
them so they are never re-read. Range filtering is intentionally NOT
|
|
149
|
+
done here — filters are applied at query time by iter_entries().
|
|
150
|
+
"""
|
|
151
|
+
while True:
|
|
152
|
+
offset = fh.tell()
|
|
153
|
+
line = fh.readline()
|
|
154
|
+
if not line:
|
|
155
|
+
return
|
|
156
|
+
if not line.endswith("\n"):
|
|
157
|
+
# Partial tail line — writer is mid-flight. Rewind so the
|
|
158
|
+
# next sync re-reads this line once the newline is in place.
|
|
159
|
+
# Without this, sync_cache would store fh.tell() (past the
|
|
160
|
+
# partial) as last_byte_offset and permanently skip the entry.
|
|
161
|
+
fh.seek(offset)
|
|
162
|
+
return
|
|
163
|
+
stripped = line.strip()
|
|
164
|
+
if not stripped:
|
|
165
|
+
continue
|
|
166
|
+
try:
|
|
167
|
+
obj = json.loads(stripped)
|
|
168
|
+
except json.JSONDecodeError:
|
|
169
|
+
continue
|
|
170
|
+
if obj.get("type") != "assistant":
|
|
171
|
+
continue
|
|
172
|
+
|
|
173
|
+
ts_raw = obj.get("timestamp")
|
|
174
|
+
if not isinstance(ts_raw, str) or not ts_raw.strip():
|
|
175
|
+
continue
|
|
176
|
+
|
|
177
|
+
msg = obj.get("message")
|
|
178
|
+
if not isinstance(msg, dict):
|
|
179
|
+
msg = obj
|
|
180
|
+
|
|
181
|
+
usage = msg.get("usage")
|
|
182
|
+
if not isinstance(usage, dict):
|
|
183
|
+
continue
|
|
184
|
+
|
|
185
|
+
model = msg.get("model") or obj.get("model")
|
|
186
|
+
if not isinstance(model, str) or not model.strip():
|
|
187
|
+
continue
|
|
188
|
+
|
|
189
|
+
try:
|
|
190
|
+
ts = dt.datetime.fromisoformat(ts_raw.strip().replace("Z", "+00:00"))
|
|
191
|
+
if ts.tzinfo is None:
|
|
192
|
+
ts = ts.replace(tzinfo=dt.timezone.utc)
|
|
193
|
+
except ValueError:
|
|
194
|
+
continue
|
|
195
|
+
|
|
196
|
+
msg_id = msg.get("id")
|
|
197
|
+
req_id = obj.get("requestId")
|
|
198
|
+
cost_usd_raw = obj.get("costUSD")
|
|
199
|
+
cost_usd = float(cost_usd_raw) if cost_usd_raw is not None else None
|
|
200
|
+
|
|
201
|
+
yield (
|
|
202
|
+
offset,
|
|
203
|
+
UsageEntry(
|
|
204
|
+
timestamp=ts,
|
|
205
|
+
model=model.strip(),
|
|
206
|
+
usage=usage,
|
|
207
|
+
cost_usd=cost_usd,
|
|
208
|
+
),
|
|
209
|
+
msg_id,
|
|
210
|
+
req_id,
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
_CODEX_FILENAME_UUID_RE = re.compile(
|
|
215
|
+
r"rollout-\d{4}-\d{2}-\d{2}T\d{2}-\d{2}-\d{2}-([0-9a-fA-F-]{36})\.jsonl$"
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
@dataclass
|
|
220
|
+
class _CodexIterState:
|
|
221
|
+
"""Mutable per-file tracker exposed to callers of
|
|
222
|
+
`_iter_codex_jsonl_entries_with_offsets` so the iterator's terminal
|
|
223
|
+
session_id/model are visible even when the delta window ends on a
|
|
224
|
+
`session_meta` or `turn_context` event with no subsequent yielded
|
|
225
|
+
`token_count`. Callers seed it with previously-persisted values and
|
|
226
|
+
read it back after the iterator drains.
|
|
227
|
+
"""
|
|
228
|
+
session_id: str | None = None
|
|
229
|
+
model: str | None = None
|
|
230
|
+
total_tokens: int = 0
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def _iter_codex_jsonl_entries_with_offsets(
|
|
234
|
+
fh,
|
|
235
|
+
path_str: str,
|
|
236
|
+
*,
|
|
237
|
+
initial_session_id: str | None = None,
|
|
238
|
+
initial_model: str | None = None,
|
|
239
|
+
initial_total_tokens: int = 0,
|
|
240
|
+
state: _CodexIterState | None = None,
|
|
241
|
+
):
|
|
242
|
+
"""Yield (line_offset, CodexEntry) for each billable `token_count` event.
|
|
243
|
+
|
|
244
|
+
Maintains per-file state (session_id, model) as records are streamed.
|
|
245
|
+
Callers performing a delta resume from non-zero byte offset should pass
|
|
246
|
+
the previously-observed session_id/model as initial_session_id and
|
|
247
|
+
initial_model so attribution stays correct even if the new byte range
|
|
248
|
+
contains no fresh session_meta / turn_context record.
|
|
249
|
+
|
|
250
|
+
If `state` is supplied it is updated in-place on every `session_meta`
|
|
251
|
+
/ `turn_context` record regardless of whether any subsequent
|
|
252
|
+
`token_count` actually yields. This lets callers observe the iterator's
|
|
253
|
+
terminal state even when the delta window ends on a metadata record —
|
|
254
|
+
otherwise `last_model` would silently persist a stale value and the
|
|
255
|
+
next resume would mis-attribute the first post-resume token_count.
|
|
256
|
+
|
|
257
|
+
Skips token_count events with payload.info == None (rate-limit-only
|
|
258
|
+
events). Falls back to filename-derived session_id with a one-shot warning
|
|
259
|
+
if session_meta is never observed.
|
|
260
|
+
|
|
261
|
+
Codex CLI emits multiple `token_count` events per completed turn (UI/
|
|
262
|
+
turn_context updates re-emit the same `last_token_usage` while the
|
|
263
|
+
cumulative `info.total_token_usage.total_tokens` stays flat). To avoid
|
|
264
|
+
double-counting, we track the cumulative total across yields and skip
|
|
265
|
+
any event whose cumulative total is not strictly greater than the
|
|
266
|
+
previously-seen cumulative. Callers doing delta resumes should pass the
|
|
267
|
+
last persisted cumulative as `initial_total_tokens`. If `total_token_usage`
|
|
268
|
+
is missing or non-dict (older Codex builds), we fall back to yielding
|
|
269
|
+
unconditionally — preserving legacy behavior on those rollouts.
|
|
270
|
+
|
|
271
|
+
Readline()+tell() is used rather than `for line in fh` so byte offsets
|
|
272
|
+
are accurate for resume-from-offset after partial ingests. Partial-tail
|
|
273
|
+
lines (no trailing \\n) trigger a seek-back so the next sync re-reads
|
|
274
|
+
the line once the newline is flushed.
|
|
275
|
+
"""
|
|
276
|
+
if state is None:
|
|
277
|
+
state = _CodexIterState()
|
|
278
|
+
# Seed the tracker from the kwargs. Kwargs take priority only when the
|
|
279
|
+
# caller-supplied state has no value yet — this preserves the existing
|
|
280
|
+
# contract for callers that pass kwargs without a state object, while
|
|
281
|
+
# letting callers who DO pass a pre-populated state see it honored.
|
|
282
|
+
if state.session_id is None and initial_session_id is not None:
|
|
283
|
+
state.session_id = initial_session_id
|
|
284
|
+
if state.model is None and initial_model is not None:
|
|
285
|
+
state.model = initial_model
|
|
286
|
+
last_total_tokens: int = int(initial_total_tokens or 0)
|
|
287
|
+
# Suppress the filename-UUID fallback warning when we already have a
|
|
288
|
+
# seeded session_id (delta resume path). Without this, every resume
|
|
289
|
+
# into a slice of the file that doesn't re-observe session_meta would
|
|
290
|
+
# noisily warn even though attribution is correct.
|
|
291
|
+
filename_session_id_warned = state.session_id is not None
|
|
292
|
+
filename_uuid_match = _CODEX_FILENAME_UUID_RE.search(path_str)
|
|
293
|
+
filename_uuid = filename_uuid_match.group(1) if filename_uuid_match else None
|
|
294
|
+
|
|
295
|
+
while True:
|
|
296
|
+
offset = fh.tell()
|
|
297
|
+
line = fh.readline()
|
|
298
|
+
if not line:
|
|
299
|
+
return
|
|
300
|
+
if not line.endswith("\n"):
|
|
301
|
+
fh.seek(offset)
|
|
302
|
+
return
|
|
303
|
+
stripped = line.strip()
|
|
304
|
+
if not stripped:
|
|
305
|
+
continue
|
|
306
|
+
try:
|
|
307
|
+
obj = json.loads(stripped)
|
|
308
|
+
except json.JSONDecodeError:
|
|
309
|
+
continue
|
|
310
|
+
|
|
311
|
+
rtype = obj.get("type")
|
|
312
|
+
payload = obj.get("payload") if isinstance(obj.get("payload"), dict) else {}
|
|
313
|
+
|
|
314
|
+
if rtype == "session_meta":
|
|
315
|
+
sid = payload.get("id")
|
|
316
|
+
if isinstance(sid, str) and sid:
|
|
317
|
+
state.session_id = sid
|
|
318
|
+
continue
|
|
319
|
+
|
|
320
|
+
if rtype == "turn_context":
|
|
321
|
+
m = payload.get("model")
|
|
322
|
+
if isinstance(m, str) and m.strip():
|
|
323
|
+
state.model = m.strip()
|
|
324
|
+
continue
|
|
325
|
+
|
|
326
|
+
if rtype != "event_msg":
|
|
327
|
+
continue
|
|
328
|
+
|
|
329
|
+
if payload.get("type") != "token_count":
|
|
330
|
+
continue
|
|
331
|
+
info = payload.get("info")
|
|
332
|
+
if not isinstance(info, dict):
|
|
333
|
+
continue
|
|
334
|
+
ltu = info.get("last_token_usage")
|
|
335
|
+
if not isinstance(ltu, dict):
|
|
336
|
+
continue
|
|
337
|
+
|
|
338
|
+
# Dedupe re-emitted token_count events. Codex re-emits `last_token_usage`
|
|
339
|
+
# on UI/turn_context updates with a flat `total_token_usage.total_tokens`;
|
|
340
|
+
# only yield once per actual turn by requiring the cumulative to strictly
|
|
341
|
+
# advance. If `total_token_usage` is missing or non-dict (older Codex
|
|
342
|
+
# builds), skip the guard and yield — preserving legacy behavior.
|
|
343
|
+
ttu = info.get("total_token_usage")
|
|
344
|
+
if isinstance(ttu, dict):
|
|
345
|
+
try:
|
|
346
|
+
cumulative = int(ttu.get("total_tokens") or 0)
|
|
347
|
+
except (TypeError, ValueError):
|
|
348
|
+
cumulative = 0
|
|
349
|
+
if cumulative <= last_total_tokens:
|
|
350
|
+
continue
|
|
351
|
+
else:
|
|
352
|
+
cumulative = None # type: ignore[assignment]
|
|
353
|
+
|
|
354
|
+
ts_raw = obj.get("timestamp")
|
|
355
|
+
if not isinstance(ts_raw, str) or not ts_raw.strip():
|
|
356
|
+
continue
|
|
357
|
+
try:
|
|
358
|
+
ts = dt.datetime.fromisoformat(ts_raw.strip().replace("Z", "+00:00"))
|
|
359
|
+
if ts.tzinfo is None:
|
|
360
|
+
ts = ts.replace(tzinfo=dt.timezone.utc)
|
|
361
|
+
except ValueError:
|
|
362
|
+
continue
|
|
363
|
+
|
|
364
|
+
session_id = state.session_id
|
|
365
|
+
if session_id is None:
|
|
366
|
+
session_id = filename_uuid
|
|
367
|
+
if session_id is not None and not filename_session_id_warned:
|
|
368
|
+
_eprint(
|
|
369
|
+
f"[codex] session_meta not seen in {path_str}; "
|
|
370
|
+
f"falling back to filename UUID {session_id}"
|
|
371
|
+
)
|
|
372
|
+
filename_session_id_warned = True
|
|
373
|
+
if session_id is None:
|
|
374
|
+
# No session_meta and no parseable filename UUID — skip row.
|
|
375
|
+
continue
|
|
376
|
+
|
|
377
|
+
model = state.model or "unknown"
|
|
378
|
+
|
|
379
|
+
def _int(key: str) -> int:
|
|
380
|
+
v = ltu.get(key)
|
|
381
|
+
try:
|
|
382
|
+
return int(v or 0)
|
|
383
|
+
except (TypeError, ValueError):
|
|
384
|
+
return 0
|
|
385
|
+
|
|
386
|
+
yield (
|
|
387
|
+
offset,
|
|
388
|
+
CodexEntry(
|
|
389
|
+
timestamp=ts,
|
|
390
|
+
session_id=session_id,
|
|
391
|
+
model=model,
|
|
392
|
+
input_tokens=_int("input_tokens"),
|
|
393
|
+
cached_input_tokens=_int("cached_input_tokens"),
|
|
394
|
+
output_tokens=_int("output_tokens"),
|
|
395
|
+
reasoning_output_tokens=_int("reasoning_output_tokens"),
|
|
396
|
+
total_tokens=_int("total_tokens"),
|
|
397
|
+
source_path=path_str,
|
|
398
|
+
),
|
|
399
|
+
)
|
|
400
|
+
# Advance the cumulative watermark only after a successful yield so
|
|
401
|
+
# resume-from-offset continues to dedupe against the last counted turn.
|
|
402
|
+
if isinstance(ttu, dict) and cumulative is not None:
|
|
403
|
+
last_total_tokens = cumulative
|