cctally 1.7.0 → 1.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +7 -0
- package/bin/_cctally_alerts.py +231 -0
- package/bin/_cctally_cache.py +1432 -0
- package/bin/_cctally_config.py +560 -0
- package/bin/_cctally_dashboard.py +5218 -0
- package/bin/_cctally_db.py +1729 -0
- package/bin/_cctally_record.py +2120 -0
- package/bin/_cctally_refresh.py +812 -0
- package/bin/_cctally_release.py +751 -0
- package/bin/_cctally_setup.py +1571 -0
- package/bin/_cctally_sync_week.py +110 -0
- package/bin/_cctally_tui.py +4381 -0
- package/bin/_cctally_update.py +2132 -0
- package/bin/_lib_aggregators.py +712 -0
- package/bin/_lib_alerts_payload.py +194 -0
- package/bin/_lib_blocks.py +414 -0
- package/bin/_lib_diff_kernel.py +1618 -0
- package/bin/_lib_display_tz.py +361 -0
- package/bin/_lib_doctor.py +58 -0
- package/bin/_lib_five_hour.py +82 -0
- package/bin/_lib_jsonl.py +403 -0
- package/bin/_lib_pricing.py +520 -0
- package/bin/_lib_render.py +2785 -0
- package/bin/_lib_semver.py +105 -0
- package/bin/_lib_subscription_weeks.py +492 -0
- package/bin/cctally +11034 -35415
- package/package.json +24 -1
|
@@ -0,0 +1,712 @@
|
|
|
1
|
+
"""Daily / monthly / weekly / session aggregators for Claude + Codex.
|
|
2
|
+
|
|
3
|
+
Pure-fn layer (no I/O at import time): holds every helper that groups a
|
|
4
|
+
list of session entries into per-bucket or per-session records for the
|
|
5
|
+
`daily`, `monthly`, `weekly`, `session`, `codex-daily`, `codex-monthly`,
|
|
6
|
+
`codex-weekly`, and `codex-session` subcommands, plus the four
|
|
7
|
+
dataclasses they produce (`BucketUsage`, `CodexBucketUsage`,
|
|
8
|
+
`CodexSessionUsage`, `ClaudeSessionUsage`) and the Codex
|
|
9
|
+
session-path-parsing helper (`_session_path_parts`).
|
|
10
|
+
|
|
11
|
+
Sibling dependencies (loaded at module-load time via `_load_lib`):
|
|
12
|
+
- `_lib_jsonl.UsageEntry`, `_lib_jsonl.CodexEntry` — the dataclasses
|
|
13
|
+
the aggregators iterate over.
|
|
14
|
+
- `_lib_pricing._calculate_entry_cost`, `_calculate_codex_entry_cost`,
|
|
15
|
+
`_is_codex_fallback` — per-entry cost computation.
|
|
16
|
+
- `_lib_display_tz._resolve_tz` — IANA tz resolution for codex date
|
|
17
|
+
bucketing (Claude aggregators take a `ZoneInfo` directly).
|
|
18
|
+
- `_lib_subscription_weeks.SubWeek` — typing for `_aggregate_weekly`'s
|
|
19
|
+
`weeks` parameter.
|
|
20
|
+
|
|
21
|
+
bin/cctally back-references via `_cctally()` (spec §5.5 pattern, same as
|
|
22
|
+
`bin/_lib_subscription_weeks.py`):
|
|
23
|
+
- `CODEX_SESSIONS_DIR` — base path used by `_session_path_parts` for
|
|
24
|
+
upstream-compatible relative-path computation.
|
|
25
|
+
- `_decode_escaped_cwd` — Claude `project_path` fallback when
|
|
26
|
+
`session_files.project_path` is NULL.
|
|
27
|
+
|
|
28
|
+
`_JoinedClaudeEntry` (the input type for `_aggregate_claude_sessions`)
|
|
29
|
+
is referenced only as a string annotation — no runtime import needed.
|
|
30
|
+
|
|
31
|
+
`bin/cctally` re-exports every public symbol below so the ~30 internal
|
|
32
|
+
call sites + SourceFileLoader-based tests
|
|
33
|
+
(`tests/test_lib_share`, `tests/test_dashboard_daily_panel`) resolve
|
|
34
|
+
unchanged.
|
|
35
|
+
|
|
36
|
+
Spec: docs/superpowers/specs/2026-05-13-bin-cctally-split-design.md
|
|
37
|
+
"""
|
|
38
|
+
from __future__ import annotations
|
|
39
|
+
|
|
40
|
+
import datetime as dt
|
|
41
|
+
import os
|
|
42
|
+
import pathlib
|
|
43
|
+
import sys
|
|
44
|
+
from dataclasses import dataclass
|
|
45
|
+
from typing import Any, Callable
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _cctally():
|
|
49
|
+
"""Resolve the current `cctally` module at call-time (spec §5.5)."""
|
|
50
|
+
return sys.modules["cctally"]
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _load_lib(name: str):
|
|
54
|
+
cached = sys.modules.get(name)
|
|
55
|
+
if cached is not None:
|
|
56
|
+
return cached
|
|
57
|
+
import importlib.util as _ilu
|
|
58
|
+
p = pathlib.Path(__file__).resolve().parent / f"{name}.py"
|
|
59
|
+
spec = _ilu.spec_from_file_location(name, p)
|
|
60
|
+
mod = _ilu.module_from_spec(spec)
|
|
61
|
+
sys.modules[name] = mod
|
|
62
|
+
spec.loader.exec_module(mod)
|
|
63
|
+
return mod
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
_lib_jsonl = _load_lib("_lib_jsonl")
|
|
67
|
+
UsageEntry = _lib_jsonl.UsageEntry
|
|
68
|
+
CodexEntry = _lib_jsonl.CodexEntry
|
|
69
|
+
|
|
70
|
+
_lib_pricing = _load_lib("_lib_pricing")
|
|
71
|
+
_calculate_entry_cost = _lib_pricing._calculate_entry_cost
|
|
72
|
+
_calculate_codex_entry_cost = _lib_pricing._calculate_codex_entry_cost
|
|
73
|
+
_is_codex_fallback = _lib_pricing._is_codex_fallback
|
|
74
|
+
|
|
75
|
+
_lib_display_tz = _load_lib("_lib_display_tz")
|
|
76
|
+
_resolve_tz = _lib_display_tz._resolve_tz
|
|
77
|
+
|
|
78
|
+
_lib_subscription_weeks = _load_lib("_lib_subscription_weeks")
|
|
79
|
+
SubWeek = _lib_subscription_weeks.SubWeek
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
@dataclass
|
|
83
|
+
class BucketUsage:
|
|
84
|
+
"""Aggregated usage for one time bucket.
|
|
85
|
+
|
|
86
|
+
`bucket` holds the bucket identifier in a format chosen by the caller
|
|
87
|
+
(e.g., "YYYY-MM-DD" for daily, "YYYY-MM" for monthly).
|
|
88
|
+
"""
|
|
89
|
+
bucket: str
|
|
90
|
+
input_tokens: int
|
|
91
|
+
output_tokens: int
|
|
92
|
+
cache_creation_tokens: int
|
|
93
|
+
cache_read_tokens: int
|
|
94
|
+
total_tokens: int
|
|
95
|
+
cost_usd: float
|
|
96
|
+
models: list[str] # Distinct full model names seen (first-seen order)
|
|
97
|
+
model_breakdowns: list[dict[str, Any]] # Sorted by cost desc
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _aggregate_buckets(
|
|
101
|
+
entries: list[UsageEntry],
|
|
102
|
+
key_fn: Callable[[UsageEntry], str],
|
|
103
|
+
mode: str = "auto",
|
|
104
|
+
) -> list[BucketUsage]:
|
|
105
|
+
"""Group UsageEntry list into per-bucket records.
|
|
106
|
+
|
|
107
|
+
`key_fn(entry)` returns the bucket key (e.g. "2026-04-17" or "2026-04").
|
|
108
|
+
The returned list is sorted by bucket key ascending — callers reverse
|
|
109
|
+
for --order desc. Model breakdowns within each bucket are sorted by
|
|
110
|
+
descending cost, matching upstream ccusage.
|
|
111
|
+
"""
|
|
112
|
+
by_bucket: dict[str, dict[str, Any]] = {}
|
|
113
|
+
models_order: dict[str, list[str]] = {}
|
|
114
|
+
|
|
115
|
+
for entry in entries:
|
|
116
|
+
if entry.model == "<synthetic>":
|
|
117
|
+
continue
|
|
118
|
+
usage = entry.usage
|
|
119
|
+
display_model = f"{entry.model}-fast" if usage.get("speed") == "fast" else entry.model
|
|
120
|
+
key = key_fn(entry)
|
|
121
|
+
bucket = by_bucket.setdefault(key, {
|
|
122
|
+
"input": 0,
|
|
123
|
+
"output": 0,
|
|
124
|
+
"cache_create": 0,
|
|
125
|
+
"cache_read": 0,
|
|
126
|
+
"cost": 0.0,
|
|
127
|
+
"models": {},
|
|
128
|
+
})
|
|
129
|
+
order = models_order.setdefault(key, [])
|
|
130
|
+
|
|
131
|
+
inp = int(usage.get("input_tokens", 0) or 0)
|
|
132
|
+
out = int(usage.get("output_tokens", 0) or 0)
|
|
133
|
+
cc = int(usage.get("cache_creation_input_tokens", 0) or 0)
|
|
134
|
+
cr = int(usage.get("cache_read_input_tokens", 0) or 0)
|
|
135
|
+
cost = _calculate_entry_cost(
|
|
136
|
+
entry.model, usage, mode=mode, cost_usd=entry.cost_usd,
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
bucket["input"] += inp
|
|
140
|
+
bucket["output"] += out
|
|
141
|
+
bucket["cache_create"] += cc
|
|
142
|
+
bucket["cache_read"] += cr
|
|
143
|
+
bucket["cost"] += cost
|
|
144
|
+
|
|
145
|
+
model_bucket = bucket["models"].setdefault(display_model, {
|
|
146
|
+
"input": 0,
|
|
147
|
+
"output": 0,
|
|
148
|
+
"cache_create": 0,
|
|
149
|
+
"cache_read": 0,
|
|
150
|
+
"cost": 0.0,
|
|
151
|
+
})
|
|
152
|
+
model_bucket["input"] += inp
|
|
153
|
+
model_bucket["output"] += out
|
|
154
|
+
model_bucket["cache_create"] += cc
|
|
155
|
+
model_bucket["cache_read"] += cr
|
|
156
|
+
model_bucket["cost"] += cost
|
|
157
|
+
|
|
158
|
+
if display_model not in order:
|
|
159
|
+
order.append(display_model)
|
|
160
|
+
|
|
161
|
+
result: list[BucketUsage] = []
|
|
162
|
+
for key in sorted(by_bucket.keys()):
|
|
163
|
+
b = by_bucket[key]
|
|
164
|
+
model_breakdowns = [
|
|
165
|
+
{
|
|
166
|
+
"modelName": model,
|
|
167
|
+
"inputTokens": mb["input"],
|
|
168
|
+
"outputTokens": mb["output"],
|
|
169
|
+
"cacheCreationTokens": mb["cache_create"],
|
|
170
|
+
"cacheReadTokens": mb["cache_read"],
|
|
171
|
+
"cost": mb["cost"],
|
|
172
|
+
}
|
|
173
|
+
for model, mb in b["models"].items()
|
|
174
|
+
]
|
|
175
|
+
model_breakdowns.sort(key=lambda m: m["cost"], reverse=True)
|
|
176
|
+
total_tokens = b["input"] + b["output"] + b["cache_create"] + b["cache_read"]
|
|
177
|
+
result.append(BucketUsage(
|
|
178
|
+
bucket=key,
|
|
179
|
+
input_tokens=b["input"],
|
|
180
|
+
output_tokens=b["output"],
|
|
181
|
+
cache_creation_tokens=b["cache_create"],
|
|
182
|
+
cache_read_tokens=b["cache_read"],
|
|
183
|
+
total_tokens=total_tokens,
|
|
184
|
+
cost_usd=b["cost"],
|
|
185
|
+
models=models_order[key],
|
|
186
|
+
model_breakdowns=model_breakdowns,
|
|
187
|
+
))
|
|
188
|
+
return result
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def _aggregate_daily(
|
|
192
|
+
entries: list[UsageEntry],
|
|
193
|
+
mode: str = "auto",
|
|
194
|
+
*,
|
|
195
|
+
tz: "Any | None" = None,
|
|
196
|
+
) -> list[BucketUsage]:
|
|
197
|
+
"""Daily grouping: tz-localized date (YYYY-MM-DD).
|
|
198
|
+
|
|
199
|
+
Day boundaries follow the resolved display tz (`tz=None` -> host local
|
|
200
|
+
via bare astimezone(); explicit ZoneInfo -> that zone). Per spec
|
|
201
|
+
Q5/F6 this is intentional: setting `display.tz=utc` makes daily
|
|
202
|
+
buckets cut at UTC midnight even when the host is in a different zone.
|
|
203
|
+
"""
|
|
204
|
+
return _aggregate_buckets(
|
|
205
|
+
entries,
|
|
206
|
+
key_fn=lambda e: e.timestamp.astimezone(tz).strftime("%Y-%m-%d"),
|
|
207
|
+
mode=mode,
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def _aggregate_monthly(
|
|
212
|
+
entries: list[UsageEntry],
|
|
213
|
+
mode: str = "auto",
|
|
214
|
+
*,
|
|
215
|
+
tz: "Any | None" = None,
|
|
216
|
+
) -> list[BucketUsage]:
|
|
217
|
+
"""Monthly grouping: tz-localized calendar month (YYYY-MM).
|
|
218
|
+
|
|
219
|
+
See ``_aggregate_daily`` re: day-boundary semantics.
|
|
220
|
+
"""
|
|
221
|
+
return _aggregate_buckets(
|
|
222
|
+
entries,
|
|
223
|
+
key_fn=lambda e: e.timestamp.astimezone(tz).strftime("%Y-%m"),
|
|
224
|
+
mode=mode,
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def _aggregate_weekly(
|
|
229
|
+
entries: list[UsageEntry],
|
|
230
|
+
weeks: list[SubWeek],
|
|
231
|
+
mode: str = "auto",
|
|
232
|
+
) -> list[BucketUsage]:
|
|
233
|
+
"""Group UsageEntry list into per-week buckets aligned to `weeks`.
|
|
234
|
+
|
|
235
|
+
Entries outside every SubWeek's interval are dropped upstream (before
|
|
236
|
+
handing off to `_aggregate_buckets`, which does not itself tolerate a
|
|
237
|
+
`None` key — it would place a `None` key in the dict and then blow up
|
|
238
|
+
on the final `sorted(by_bucket.keys())`). The returned
|
|
239
|
+
`BucketUsage.bucket` equals the week's `start_date.isoformat()`.
|
|
240
|
+
First-match-wins for overlapping SubWeeks (can occur at Anthropic
|
|
241
|
+
reset-day-drift boundaries — see `_compute_subscription_weeks`).
|
|
242
|
+
"""
|
|
243
|
+
# Pre-parse week bounds once. Both `parsed_bounds` (sorted by
|
|
244
|
+
# `start_dt` ASC via `_compute_subscription_weeks`) and the entry
|
|
245
|
+
# list (sorted by `timestamp_utc` ASC from SQL) are sorted, so we
|
|
246
|
+
# can use bisect on a parallel `starts` list to locate the
|
|
247
|
+
# candidate week in O(log W) per entry rather than the linear
|
|
248
|
+
# scan that previously ran ~130k x ~54 = 7M comparisons.
|
|
249
|
+
import bisect
|
|
250
|
+
parse_iso_datetime = _cctally().parse_iso_datetime
|
|
251
|
+
parsed_bounds: list[tuple[dt.datetime, dt.datetime, str]] = []
|
|
252
|
+
for w in weeks:
|
|
253
|
+
start_dt = parse_iso_datetime(w.start_ts, "week.start_ts")
|
|
254
|
+
end_dt = parse_iso_datetime(w.end_ts, "week.end_ts")
|
|
255
|
+
parsed_bounds.append((start_dt, end_dt, w.start_date.isoformat()))
|
|
256
|
+
|
|
257
|
+
starts = [b[0] for b in parsed_bounds]
|
|
258
|
+
|
|
259
|
+
def _week_key_or_none(entry: UsageEntry) -> str | None:
|
|
260
|
+
ts = entry.timestamp # TZ-aware datetime (enforced by _parse_usage_entries)
|
|
261
|
+
# Rightmost week whose start_dt <= ts.
|
|
262
|
+
idx = bisect.bisect_right(starts, ts) - 1
|
|
263
|
+
if idx < 0:
|
|
264
|
+
return None
|
|
265
|
+
# Preserve first-match-wins semantics for the rare overlap
|
|
266
|
+
# regions that appear at Anthropic reset-day-drift boundaries:
|
|
267
|
+
# walk back while prior weeks also contain ts. Non-overlap
|
|
268
|
+
# case exits this loop immediately.
|
|
269
|
+
while idx > 0:
|
|
270
|
+
prev_start, prev_end, _prev_key = parsed_bounds[idx - 1]
|
|
271
|
+
if prev_start <= ts < prev_end:
|
|
272
|
+
idx -= 1
|
|
273
|
+
else:
|
|
274
|
+
break
|
|
275
|
+
start_dt, end_dt, key = parsed_bounds[idx]
|
|
276
|
+
if start_dt <= ts < end_dt:
|
|
277
|
+
return key
|
|
278
|
+
return None
|
|
279
|
+
|
|
280
|
+
# Precompute key for each entry and drop Nones; avoids scanning
|
|
281
|
+
# parsed_bounds twice (once to filter, once again inside the closure
|
|
282
|
+
# `_aggregate_buckets` calls).
|
|
283
|
+
keyed: list[tuple[UsageEntry, str]] = []
|
|
284
|
+
for e in entries:
|
|
285
|
+
k = _week_key_or_none(e)
|
|
286
|
+
if k is not None:
|
|
287
|
+
keyed.append((e, k))
|
|
288
|
+
|
|
289
|
+
key_lookup = {id(e): k for e, k in keyed}
|
|
290
|
+
in_range_entries = [e for e, _ in keyed]
|
|
291
|
+
|
|
292
|
+
return _aggregate_buckets(
|
|
293
|
+
in_range_entries,
|
|
294
|
+
key_fn=lambda e: key_lookup[id(e)],
|
|
295
|
+
mode=mode,
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
@dataclass
|
|
300
|
+
class CodexBucketUsage:
|
|
301
|
+
"""Aggregated Codex usage for one time bucket (date or month)."""
|
|
302
|
+
bucket: str
|
|
303
|
+
input_tokens: int
|
|
304
|
+
cached_input_tokens: int
|
|
305
|
+
output_tokens: int
|
|
306
|
+
reasoning_output_tokens: int
|
|
307
|
+
total_tokens: int
|
|
308
|
+
cost_usd: float
|
|
309
|
+
models: list[str] # Distinct full model names (first-seen order)
|
|
310
|
+
model_breakdowns: list[dict[str, Any]] # Sorted by cost desc
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
@dataclass
|
|
314
|
+
class CodexSessionUsage:
|
|
315
|
+
"""Aggregated Codex usage for one session.
|
|
316
|
+
|
|
317
|
+
`session_id_path` is the upstream-compatible identifier: relative path
|
|
318
|
+
under ~/.codex/sessions/ WITHOUT the .jsonl extension
|
|
319
|
+
(e.g. "2025/12/25/rollout-..."). `session_file` is the basename without
|
|
320
|
+
.jsonl. `directory` is the relative parent path. `session_id` is the
|
|
321
|
+
inner UUID (from JSONL session_meta), retained for debug/display but
|
|
322
|
+
not used as a grouping key.
|
|
323
|
+
"""
|
|
324
|
+
session_id: str
|
|
325
|
+
session_id_path: str
|
|
326
|
+
session_file: str
|
|
327
|
+
directory: str
|
|
328
|
+
input_tokens: int
|
|
329
|
+
cached_input_tokens: int
|
|
330
|
+
output_tokens: int
|
|
331
|
+
reasoning_output_tokens: int
|
|
332
|
+
total_tokens: int
|
|
333
|
+
cost_usd: float
|
|
334
|
+
models: list[str]
|
|
335
|
+
model_breakdowns: list[dict[str, Any]]
|
|
336
|
+
last_activity: dt.datetime
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
@dataclass
|
|
340
|
+
class ClaudeSessionUsage:
|
|
341
|
+
"""Aggregated Claude usage for one sessionId (may span multiple JSONL files)."""
|
|
342
|
+
session_id: str
|
|
343
|
+
project_path: str
|
|
344
|
+
source_paths: list[str]
|
|
345
|
+
first_activity: dt.datetime
|
|
346
|
+
last_activity: dt.datetime
|
|
347
|
+
input_tokens: int
|
|
348
|
+
cache_creation_tokens: int
|
|
349
|
+
cache_read_tokens: int
|
|
350
|
+
output_tokens: int
|
|
351
|
+
total_tokens: int
|
|
352
|
+
cost_usd: float
|
|
353
|
+
models: list[str] # first-seen order
|
|
354
|
+
model_breakdowns: list[dict[str, Any]] # sorted by cost desc
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
def _aggregate_codex_buckets(
|
|
358
|
+
entries: list[CodexEntry],
|
|
359
|
+
key_fn: Callable[[CodexEntry], str],
|
|
360
|
+
) -> list[CodexBucketUsage]:
|
|
361
|
+
"""Group CodexEntry list into per-bucket records sorted by key ascending.
|
|
362
|
+
|
|
363
|
+
Model breakdowns within each bucket are sorted by descending cost —
|
|
364
|
+
matches upstream ccusage-codex.
|
|
365
|
+
"""
|
|
366
|
+
by_bucket: dict[str, dict[str, Any]] = {}
|
|
367
|
+
models_order: dict[str, list[str]] = {}
|
|
368
|
+
|
|
369
|
+
for entry in entries:
|
|
370
|
+
key = key_fn(entry)
|
|
371
|
+
bucket = by_bucket.setdefault(key, {
|
|
372
|
+
"input": 0, "cached_input": 0, "output": 0,
|
|
373
|
+
"reasoning": 0, "total": 0, "cost": 0.0, "models": {},
|
|
374
|
+
})
|
|
375
|
+
order = models_order.setdefault(key, [])
|
|
376
|
+
|
|
377
|
+
cost = _calculate_codex_entry_cost(
|
|
378
|
+
entry.model,
|
|
379
|
+
entry.input_tokens,
|
|
380
|
+
entry.cached_input_tokens,
|
|
381
|
+
entry.output_tokens,
|
|
382
|
+
entry.reasoning_output_tokens,
|
|
383
|
+
)
|
|
384
|
+
|
|
385
|
+
bucket["input"] += entry.input_tokens
|
|
386
|
+
bucket["cached_input"] += entry.cached_input_tokens
|
|
387
|
+
bucket["output"] += entry.output_tokens
|
|
388
|
+
bucket["reasoning"] += entry.reasoning_output_tokens
|
|
389
|
+
bucket["total"] += entry.total_tokens
|
|
390
|
+
bucket["cost"] += cost
|
|
391
|
+
|
|
392
|
+
mb = bucket["models"].setdefault(entry.model, {
|
|
393
|
+
"input": 0, "cached_input": 0, "output": 0,
|
|
394
|
+
"reasoning": 0, "cost": 0.0,
|
|
395
|
+
})
|
|
396
|
+
mb["input"] += entry.input_tokens
|
|
397
|
+
mb["cached_input"] += entry.cached_input_tokens
|
|
398
|
+
mb["output"] += entry.output_tokens
|
|
399
|
+
mb["reasoning"] += entry.reasoning_output_tokens
|
|
400
|
+
mb["cost"] += cost
|
|
401
|
+
|
|
402
|
+
if entry.model not in order:
|
|
403
|
+
order.append(entry.model)
|
|
404
|
+
|
|
405
|
+
result: list[CodexBucketUsage] = []
|
|
406
|
+
for key in sorted(by_bucket.keys()):
|
|
407
|
+
b = by_bucket[key]
|
|
408
|
+
model_breakdowns = [
|
|
409
|
+
{
|
|
410
|
+
"modelName": model,
|
|
411
|
+
"inputTokens": mb["input"],
|
|
412
|
+
"cachedInputTokens": mb["cached_input"],
|
|
413
|
+
"outputTokens": mb["output"],
|
|
414
|
+
"reasoningOutputTokens": mb["reasoning"],
|
|
415
|
+
"totalTokens": mb["input"] + mb["output"],
|
|
416
|
+
"cost": mb["cost"],
|
|
417
|
+
"isFallback": _is_codex_fallback(model),
|
|
418
|
+
}
|
|
419
|
+
for model, mb in b["models"].items()
|
|
420
|
+
]
|
|
421
|
+
model_breakdowns.sort(key=lambda m: m["cost"], reverse=True)
|
|
422
|
+
result.append(CodexBucketUsage(
|
|
423
|
+
bucket=key,
|
|
424
|
+
input_tokens=b["input"],
|
|
425
|
+
cached_input_tokens=b["cached_input"],
|
|
426
|
+
output_tokens=b["output"],
|
|
427
|
+
reasoning_output_tokens=b["reasoning"],
|
|
428
|
+
total_tokens=b["input"] + b["output"],
|
|
429
|
+
cost_usd=b["cost"],
|
|
430
|
+
models=models_order[key],
|
|
431
|
+
model_breakdowns=model_breakdowns,
|
|
432
|
+
))
|
|
433
|
+
return result
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
def _aggregate_codex_daily(
|
|
437
|
+
entries: list[CodexEntry], *, tz_name: str | None = None,
|
|
438
|
+
) -> list[CodexBucketUsage]:
|
|
439
|
+
"""Daily grouping. Default: local tz. With ``tz_name``: that IANA zone."""
|
|
440
|
+
tz = _resolve_tz(tz_name)
|
|
441
|
+
if tz is not None:
|
|
442
|
+
key_fn = lambda e: e.timestamp.astimezone(tz).strftime("%Y-%m-%d") # noqa: E731
|
|
443
|
+
else:
|
|
444
|
+
key_fn = lambda e: e.timestamp.astimezone().strftime("%Y-%m-%d") # noqa: E731
|
|
445
|
+
return _aggregate_codex_buckets(entries, key_fn=key_fn)
|
|
446
|
+
|
|
447
|
+
|
|
448
|
+
def _aggregate_codex_monthly(
|
|
449
|
+
entries: list[CodexEntry], *, tz_name: str | None = None,
|
|
450
|
+
) -> list[CodexBucketUsage]:
|
|
451
|
+
"""Monthly grouping. Default: local tz. With ``tz_name``: that IANA zone."""
|
|
452
|
+
tz = _resolve_tz(tz_name)
|
|
453
|
+
if tz is not None:
|
|
454
|
+
key_fn = lambda e: e.timestamp.astimezone(tz).strftime("%Y-%m") # noqa: E731
|
|
455
|
+
else:
|
|
456
|
+
key_fn = lambda e: e.timestamp.astimezone().strftime("%Y-%m") # noqa: E731
|
|
457
|
+
return _aggregate_codex_buckets(entries, key_fn=key_fn)
|
|
458
|
+
|
|
459
|
+
|
|
460
|
+
def _aggregate_codex_weekly(
|
|
461
|
+
entries: list[CodexEntry],
|
|
462
|
+
tz_name: str | None,
|
|
463
|
+
week_start_idx: int,
|
|
464
|
+
) -> list[CodexBucketUsage]:
|
|
465
|
+
"""Group Codex entries by calendar week.
|
|
466
|
+
|
|
467
|
+
Week-start day is controlled by ``week_start_idx`` (0=Mon..6=Sun), which
|
|
468
|
+
the caller resolves from config.json via ``get_week_start_name`` +
|
|
469
|
+
``WEEKDAY_MAP``. Bucket key is the ISO date of the week's first day
|
|
470
|
+
in the display timezone (local tz when ``tz_name`` is None).
|
|
471
|
+
"""
|
|
472
|
+
tz = _resolve_tz(tz_name)
|
|
473
|
+
|
|
474
|
+
def _week_key(entry: CodexEntry) -> str:
|
|
475
|
+
# internal fallback: host-local intentional (else branch)
|
|
476
|
+
local_dt = entry.timestamp.astimezone(tz) if tz is not None else entry.timestamp.astimezone()
|
|
477
|
+
local_date = local_dt.date()
|
|
478
|
+
diff = (local_date.weekday() - week_start_idx) % 7
|
|
479
|
+
week_start = local_date - dt.timedelta(days=diff)
|
|
480
|
+
return week_start.isoformat()
|
|
481
|
+
|
|
482
|
+
return _aggregate_codex_buckets(entries, key_fn=_week_key)
|
|
483
|
+
|
|
484
|
+
|
|
485
|
+
def _session_path_parts(source_path: str) -> tuple[str, str, str]:
|
|
486
|
+
"""Return (session_id_path, session_file, directory) from a full path.
|
|
487
|
+
|
|
488
|
+
session_id_path = relative path under CODEX_SESSIONS_DIR with .jsonl
|
|
489
|
+
stripped (e.g. "2025/12/25/rollout-...").
|
|
490
|
+
session_file = basename without .jsonl extension.
|
|
491
|
+
directory = relative parent path under CODEX_SESSIONS_DIR.
|
|
492
|
+
|
|
493
|
+
Accepts three input shapes:
|
|
494
|
+
1. Absolute path under CODEX_SESSIONS_DIR (the runtime sync path).
|
|
495
|
+
2. Bare-relative path starting with ".codex/sessions/..." — the form
|
|
496
|
+
emitted by build-codex-fixtures.py so committed fixture cache.db
|
|
497
|
+
files stay free of maintainer absolute paths (public-mirror safe).
|
|
498
|
+
3. Anything else — falls back to basename-only.
|
|
499
|
+
"""
|
|
500
|
+
CODEX_SESSIONS_DIR = _cctally().CODEX_SESSIONS_DIR
|
|
501
|
+
p = pathlib.Path(source_path)
|
|
502
|
+
try:
|
|
503
|
+
rel = p.relative_to(CODEX_SESSIONS_DIR)
|
|
504
|
+
except ValueError:
|
|
505
|
+
# Try bare-relative ".codex/sessions/<rest>" before basename fallback.
|
|
506
|
+
# Use PurePosixPath to avoid Windows-style drive parsing on unusual
|
|
507
|
+
# inputs; fixture-emitted paths are always POSIX.
|
|
508
|
+
parts = pathlib.PurePosixPath(source_path).parts
|
|
509
|
+
if len(parts) >= 3 and parts[0] == ".codex" and parts[1] == "sessions":
|
|
510
|
+
rel = pathlib.PurePosixPath(*parts[2:])
|
|
511
|
+
else:
|
|
512
|
+
rel = pathlib.Path(p.name)
|
|
513
|
+
stem = rel.with_suffix("") # strip .jsonl
|
|
514
|
+
return str(stem), stem.name, str(stem.parent)
|
|
515
|
+
|
|
516
|
+
|
|
517
|
+
def _aggregate_codex_sessions(entries: list[CodexEntry]) -> list[CodexSessionUsage]:
|
|
518
|
+
"""Group by session file path (upstream-compatible).
|
|
519
|
+
|
|
520
|
+
Sessions are keyed by the full relative-path-without-.jsonl rather than
|
|
521
|
+
the inner UUID. Result is sorted by last_activity descending (most
|
|
522
|
+
recent first), matching upstream's default view.
|
|
523
|
+
|
|
524
|
+
Per-model breakdowns include `isFallback: bool` — true when the model is
|
|
525
|
+
absent from CODEX_MODEL_PRICING.
|
|
526
|
+
"""
|
|
527
|
+
by_session: dict[str, dict[str, Any]] = {}
|
|
528
|
+
for entry in entries:
|
|
529
|
+
id_path, file_name, directory = _session_path_parts(entry.source_path)
|
|
530
|
+
sess = by_session.setdefault(id_path, {
|
|
531
|
+
"session_id_uuid": entry.session_id,
|
|
532
|
+
"session_file": file_name,
|
|
533
|
+
"directory": directory,
|
|
534
|
+
"input": 0, "cached_input": 0, "output": 0, "reasoning": 0,
|
|
535
|
+
"cost": 0.0, "models": {}, "models_order": [],
|
|
536
|
+
"last": entry.timestamp,
|
|
537
|
+
})
|
|
538
|
+
cost = _calculate_codex_entry_cost(
|
|
539
|
+
entry.model, entry.input_tokens, entry.cached_input_tokens,
|
|
540
|
+
entry.output_tokens, entry.reasoning_output_tokens,
|
|
541
|
+
)
|
|
542
|
+
sess["input"] += entry.input_tokens
|
|
543
|
+
sess["cached_input"] += entry.cached_input_tokens
|
|
544
|
+
sess["output"] += entry.output_tokens
|
|
545
|
+
sess["reasoning"] += entry.reasoning_output_tokens
|
|
546
|
+
sess["cost"] += cost
|
|
547
|
+
|
|
548
|
+
mb = sess["models"].setdefault(entry.model, {
|
|
549
|
+
"input": 0, "cached_input": 0, "output": 0, "reasoning": 0, "cost": 0.0,
|
|
550
|
+
})
|
|
551
|
+
mb["input"] += entry.input_tokens
|
|
552
|
+
mb["cached_input"] += entry.cached_input_tokens
|
|
553
|
+
mb["output"] += entry.output_tokens
|
|
554
|
+
mb["reasoning"] += entry.reasoning_output_tokens
|
|
555
|
+
mb["cost"] += cost
|
|
556
|
+
|
|
557
|
+
if entry.model not in sess["models_order"]:
|
|
558
|
+
sess["models_order"].append(entry.model)
|
|
559
|
+
if entry.timestamp > sess["last"]:
|
|
560
|
+
sess["last"] = entry.timestamp
|
|
561
|
+
|
|
562
|
+
result: list[CodexSessionUsage] = []
|
|
563
|
+
for id_path, s in by_session.items():
|
|
564
|
+
model_breakdowns = [
|
|
565
|
+
{
|
|
566
|
+
"modelName": model,
|
|
567
|
+
"inputTokens": mb["input"],
|
|
568
|
+
"cachedInputTokens": mb["cached_input"],
|
|
569
|
+
"outputTokens": mb["output"],
|
|
570
|
+
"reasoningOutputTokens": mb["reasoning"],
|
|
571
|
+
"totalTokens": mb["input"] + mb["output"],
|
|
572
|
+
"cost": mb["cost"],
|
|
573
|
+
"isFallback": _is_codex_fallback(model),
|
|
574
|
+
}
|
|
575
|
+
for model, mb in s["models"].items()
|
|
576
|
+
]
|
|
577
|
+
model_breakdowns.sort(key=lambda m: m["cost"], reverse=True)
|
|
578
|
+
result.append(CodexSessionUsage(
|
|
579
|
+
session_id=s["session_id_uuid"],
|
|
580
|
+
session_id_path=id_path,
|
|
581
|
+
session_file=s["session_file"],
|
|
582
|
+
directory=s["directory"],
|
|
583
|
+
input_tokens=s["input"],
|
|
584
|
+
cached_input_tokens=s["cached_input"],
|
|
585
|
+
output_tokens=s["output"],
|
|
586
|
+
reasoning_output_tokens=s["reasoning"],
|
|
587
|
+
total_tokens=s["input"] + s["output"], # derived, matches upstream
|
|
588
|
+
cost_usd=s["cost"],
|
|
589
|
+
models=list(s["models_order"]),
|
|
590
|
+
model_breakdowns=model_breakdowns,
|
|
591
|
+
last_activity=s["last"],
|
|
592
|
+
))
|
|
593
|
+
result.sort(key=lambda x: x.last_activity, reverse=True)
|
|
594
|
+
return result
|
|
595
|
+
|
|
596
|
+
|
|
597
|
+
def _aggregate_claude_sessions(
|
|
598
|
+
entries: list["_JoinedClaudeEntry"],
|
|
599
|
+
) -> list[ClaudeSessionUsage]:
|
|
600
|
+
"""Group entries by session_id, collapsing resumed-across-files sessions.
|
|
601
|
+
|
|
602
|
+
Entries with session_id=None fall back to filename UUID (derived from
|
|
603
|
+
source_path). Cost is computed fresh from CLAUDE_MODEL_PRICING.
|
|
604
|
+
Returns descending-by-last_activity; caller reverses for --order asc.
|
|
605
|
+
"""
|
|
606
|
+
_decode_escaped_cwd = _cctally()._decode_escaped_cwd
|
|
607
|
+
by_session: dict[str, dict[str, Any]] = {}
|
|
608
|
+
warn_count = 0
|
|
609
|
+
|
|
610
|
+
for entry in entries:
|
|
611
|
+
# Skip synthetic entries (Claude Code internal markers, not real
|
|
612
|
+
# model calls). Mirrors `_aggregate_buckets` (line ~2176). Must
|
|
613
|
+
# occur before the session_id fallback so synthetic entries don't
|
|
614
|
+
# inflate warn_count either.
|
|
615
|
+
if entry.model == "<synthetic>":
|
|
616
|
+
continue
|
|
617
|
+
sid = entry.session_id
|
|
618
|
+
if sid is None:
|
|
619
|
+
stem = os.path.splitext(os.path.basename(entry.source_path))[0]
|
|
620
|
+
sid = stem
|
|
621
|
+
warn_count += 1
|
|
622
|
+
|
|
623
|
+
sess = by_session.setdefault(sid, {
|
|
624
|
+
"session_id": sid,
|
|
625
|
+
"project_path": entry.project_path or _decode_escaped_cwd(
|
|
626
|
+
os.path.basename(os.path.dirname(entry.source_path))
|
|
627
|
+
),
|
|
628
|
+
"source_paths": set(),
|
|
629
|
+
"first": entry.timestamp,
|
|
630
|
+
"last": entry.timestamp,
|
|
631
|
+
"input": 0, "cache_create": 0, "cache_read": 0, "output": 0,
|
|
632
|
+
"cost": 0.0,
|
|
633
|
+
"models_order": [],
|
|
634
|
+
"models": {},
|
|
635
|
+
"latest_source_path": entry.source_path,
|
|
636
|
+
"latest_ts": entry.timestamp,
|
|
637
|
+
})
|
|
638
|
+
|
|
639
|
+
sess["source_paths"].add(entry.source_path)
|
|
640
|
+
if entry.timestamp < sess["first"]:
|
|
641
|
+
sess["first"] = entry.timestamp
|
|
642
|
+
if entry.timestamp > sess["last"]:
|
|
643
|
+
sess["last"] = entry.timestamp
|
|
644
|
+
# Track latest source_path for tie-breaker when resume crosses cwd.
|
|
645
|
+
if entry.timestamp >= sess["latest_ts"]:
|
|
646
|
+
sess["latest_ts"] = entry.timestamp
|
|
647
|
+
sess["latest_source_path"] = entry.source_path
|
|
648
|
+
if entry.project_path:
|
|
649
|
+
sess["project_path"] = entry.project_path
|
|
650
|
+
|
|
651
|
+
usage = {
|
|
652
|
+
"input_tokens": entry.input_tokens,
|
|
653
|
+
"output_tokens": entry.output_tokens,
|
|
654
|
+
"cache_creation_input_tokens": entry.cache_creation_tokens,
|
|
655
|
+
"cache_read_input_tokens": entry.cache_read_tokens,
|
|
656
|
+
}
|
|
657
|
+
cost = _calculate_entry_cost(entry.model, usage)
|
|
658
|
+
|
|
659
|
+
sess["input"] += entry.input_tokens
|
|
660
|
+
sess["cache_create"] += entry.cache_creation_tokens
|
|
661
|
+
sess["cache_read"] += entry.cache_read_tokens
|
|
662
|
+
sess["output"] += entry.output_tokens
|
|
663
|
+
sess["cost"] += cost
|
|
664
|
+
|
|
665
|
+
if entry.model not in sess["models"]:
|
|
666
|
+
sess["models_order"].append(entry.model)
|
|
667
|
+
mb = sess["models"].setdefault(entry.model, {
|
|
668
|
+
"model": entry.model,
|
|
669
|
+
"input": 0, "cache_create": 0, "cache_read": 0, "output": 0,
|
|
670
|
+
"cost": 0.0,
|
|
671
|
+
})
|
|
672
|
+
mb["input"] += entry.input_tokens
|
|
673
|
+
mb["cache_create"] += entry.cache_creation_tokens
|
|
674
|
+
mb["cache_read"] += entry.cache_read_tokens
|
|
675
|
+
mb["output"] += entry.output_tokens
|
|
676
|
+
mb["cost"] += cost
|
|
677
|
+
|
|
678
|
+
if warn_count:
|
|
679
|
+
print(
|
|
680
|
+
f"Warning: {warn_count} entries lacked session_files rows "
|
|
681
|
+
f"(cache may be catching up).",
|
|
682
|
+
file=sys.stderr,
|
|
683
|
+
)
|
|
684
|
+
|
|
685
|
+
# Materialize and sort.
|
|
686
|
+
results: list[ClaudeSessionUsage] = []
|
|
687
|
+
for sess in by_session.values():
|
|
688
|
+
breakdowns = sorted(
|
|
689
|
+
[sess["models"][m] for m in sess["models_order"]],
|
|
690
|
+
key=lambda mb: -mb["cost"],
|
|
691
|
+
)
|
|
692
|
+
# Spec A2.8 (design.md:422): Total Tokens = input + output only;
|
|
693
|
+
# cache tokens shown separately but not summed — parallels
|
|
694
|
+
# `codex-session` (see `_codex_sessions_to_json`, line ~3603).
|
|
695
|
+
total_tokens = sess["input"] + sess["output"]
|
|
696
|
+
results.append(ClaudeSessionUsage(
|
|
697
|
+
session_id=sess["session_id"],
|
|
698
|
+
project_path=sess["project_path"],
|
|
699
|
+
source_paths=sorted(sess["source_paths"]),
|
|
700
|
+
first_activity=sess["first"],
|
|
701
|
+
last_activity=sess["last"],
|
|
702
|
+
input_tokens=sess["input"],
|
|
703
|
+
cache_creation_tokens=sess["cache_create"],
|
|
704
|
+
cache_read_tokens=sess["cache_read"],
|
|
705
|
+
output_tokens=sess["output"],
|
|
706
|
+
total_tokens=total_tokens,
|
|
707
|
+
cost_usd=sess["cost"],
|
|
708
|
+
models=sess["models_order"],
|
|
709
|
+
model_breakdowns=breakdowns,
|
|
710
|
+
))
|
|
711
|
+
results.sort(key=lambda s: s.last_activity, reverse=True)
|
|
712
|
+
return results
|