cctally 1.7.0 → 1.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,712 @@
1
+ """Daily / monthly / weekly / session aggregators for Claude + Codex.
2
+
3
+ Pure-fn layer (no I/O at import time): holds every helper that groups a
4
+ list of session entries into per-bucket or per-session records for the
5
+ `daily`, `monthly`, `weekly`, `session`, `codex-daily`, `codex-monthly`,
6
+ `codex-weekly`, and `codex-session` subcommands, plus the four
7
+ dataclasses they produce (`BucketUsage`, `CodexBucketUsage`,
8
+ `CodexSessionUsage`, `ClaudeSessionUsage`) and the Codex
9
+ session-path-parsing helper (`_session_path_parts`).
10
+
11
+ Sibling dependencies (loaded at module-load time via `_load_lib`):
12
+ - `_lib_jsonl.UsageEntry`, `_lib_jsonl.CodexEntry` — the dataclasses
13
+ the aggregators iterate over.
14
+ - `_lib_pricing._calculate_entry_cost`, `_calculate_codex_entry_cost`,
15
+ `_is_codex_fallback` — per-entry cost computation.
16
+ - `_lib_display_tz._resolve_tz` — IANA tz resolution for codex date
17
+ bucketing (Claude aggregators take a `ZoneInfo` directly).
18
+ - `_lib_subscription_weeks.SubWeek` — typing for `_aggregate_weekly`'s
19
+ `weeks` parameter.
20
+
21
+ bin/cctally back-references via `_cctally()` (spec §5.5 pattern, same as
22
+ `bin/_lib_subscription_weeks.py`):
23
+ - `CODEX_SESSIONS_DIR` — base path used by `_session_path_parts` for
24
+ upstream-compatible relative-path computation.
25
+ - `_decode_escaped_cwd` — Claude `project_path` fallback when
26
+ `session_files.project_path` is NULL.
27
+
28
+ `_JoinedClaudeEntry` (the input type for `_aggregate_claude_sessions`)
29
+ is referenced only as a string annotation — no runtime import needed.
30
+
31
+ `bin/cctally` re-exports every public symbol below so the ~30 internal
32
+ call sites + SourceFileLoader-based tests
33
+ (`tests/test_lib_share`, `tests/test_dashboard_daily_panel`) resolve
34
+ unchanged.
35
+
36
+ Spec: docs/superpowers/specs/2026-05-13-bin-cctally-split-design.md
37
+ """
38
+ from __future__ import annotations
39
+
40
+ import datetime as dt
41
+ import os
42
+ import pathlib
43
+ import sys
44
+ from dataclasses import dataclass
45
+ from typing import Any, Callable
46
+
47
+
48
+ def _cctally():
49
+ """Resolve the current `cctally` module at call-time (spec §5.5)."""
50
+ return sys.modules["cctally"]
51
+
52
+
53
+ def _load_lib(name: str):
54
+ cached = sys.modules.get(name)
55
+ if cached is not None:
56
+ return cached
57
+ import importlib.util as _ilu
58
+ p = pathlib.Path(__file__).resolve().parent / f"{name}.py"
59
+ spec = _ilu.spec_from_file_location(name, p)
60
+ mod = _ilu.module_from_spec(spec)
61
+ sys.modules[name] = mod
62
+ spec.loader.exec_module(mod)
63
+ return mod
64
+
65
+
66
+ _lib_jsonl = _load_lib("_lib_jsonl")
67
+ UsageEntry = _lib_jsonl.UsageEntry
68
+ CodexEntry = _lib_jsonl.CodexEntry
69
+
70
+ _lib_pricing = _load_lib("_lib_pricing")
71
+ _calculate_entry_cost = _lib_pricing._calculate_entry_cost
72
+ _calculate_codex_entry_cost = _lib_pricing._calculate_codex_entry_cost
73
+ _is_codex_fallback = _lib_pricing._is_codex_fallback
74
+
75
+ _lib_display_tz = _load_lib("_lib_display_tz")
76
+ _resolve_tz = _lib_display_tz._resolve_tz
77
+
78
+ _lib_subscription_weeks = _load_lib("_lib_subscription_weeks")
79
+ SubWeek = _lib_subscription_weeks.SubWeek
80
+
81
+
82
+ @dataclass
83
+ class BucketUsage:
84
+ """Aggregated usage for one time bucket.
85
+
86
+ `bucket` holds the bucket identifier in a format chosen by the caller
87
+ (e.g., "YYYY-MM-DD" for daily, "YYYY-MM" for monthly).
88
+ """
89
+ bucket: str
90
+ input_tokens: int
91
+ output_tokens: int
92
+ cache_creation_tokens: int
93
+ cache_read_tokens: int
94
+ total_tokens: int
95
+ cost_usd: float
96
+ models: list[str] # Distinct full model names seen (first-seen order)
97
+ model_breakdowns: list[dict[str, Any]] # Sorted by cost desc
98
+
99
+
100
+ def _aggregate_buckets(
101
+ entries: list[UsageEntry],
102
+ key_fn: Callable[[UsageEntry], str],
103
+ mode: str = "auto",
104
+ ) -> list[BucketUsage]:
105
+ """Group UsageEntry list into per-bucket records.
106
+
107
+ `key_fn(entry)` returns the bucket key (e.g. "2026-04-17" or "2026-04").
108
+ The returned list is sorted by bucket key ascending — callers reverse
109
+ for --order desc. Model breakdowns within each bucket are sorted by
110
+ descending cost, matching upstream ccusage.
111
+ """
112
+ by_bucket: dict[str, dict[str, Any]] = {}
113
+ models_order: dict[str, list[str]] = {}
114
+
115
+ for entry in entries:
116
+ if entry.model == "<synthetic>":
117
+ continue
118
+ usage = entry.usage
119
+ display_model = f"{entry.model}-fast" if usage.get("speed") == "fast" else entry.model
120
+ key = key_fn(entry)
121
+ bucket = by_bucket.setdefault(key, {
122
+ "input": 0,
123
+ "output": 0,
124
+ "cache_create": 0,
125
+ "cache_read": 0,
126
+ "cost": 0.0,
127
+ "models": {},
128
+ })
129
+ order = models_order.setdefault(key, [])
130
+
131
+ inp = int(usage.get("input_tokens", 0) or 0)
132
+ out = int(usage.get("output_tokens", 0) or 0)
133
+ cc = int(usage.get("cache_creation_input_tokens", 0) or 0)
134
+ cr = int(usage.get("cache_read_input_tokens", 0) or 0)
135
+ cost = _calculate_entry_cost(
136
+ entry.model, usage, mode=mode, cost_usd=entry.cost_usd,
137
+ )
138
+
139
+ bucket["input"] += inp
140
+ bucket["output"] += out
141
+ bucket["cache_create"] += cc
142
+ bucket["cache_read"] += cr
143
+ bucket["cost"] += cost
144
+
145
+ model_bucket = bucket["models"].setdefault(display_model, {
146
+ "input": 0,
147
+ "output": 0,
148
+ "cache_create": 0,
149
+ "cache_read": 0,
150
+ "cost": 0.0,
151
+ })
152
+ model_bucket["input"] += inp
153
+ model_bucket["output"] += out
154
+ model_bucket["cache_create"] += cc
155
+ model_bucket["cache_read"] += cr
156
+ model_bucket["cost"] += cost
157
+
158
+ if display_model not in order:
159
+ order.append(display_model)
160
+
161
+ result: list[BucketUsage] = []
162
+ for key in sorted(by_bucket.keys()):
163
+ b = by_bucket[key]
164
+ model_breakdowns = [
165
+ {
166
+ "modelName": model,
167
+ "inputTokens": mb["input"],
168
+ "outputTokens": mb["output"],
169
+ "cacheCreationTokens": mb["cache_create"],
170
+ "cacheReadTokens": mb["cache_read"],
171
+ "cost": mb["cost"],
172
+ }
173
+ for model, mb in b["models"].items()
174
+ ]
175
+ model_breakdowns.sort(key=lambda m: m["cost"], reverse=True)
176
+ total_tokens = b["input"] + b["output"] + b["cache_create"] + b["cache_read"]
177
+ result.append(BucketUsage(
178
+ bucket=key,
179
+ input_tokens=b["input"],
180
+ output_tokens=b["output"],
181
+ cache_creation_tokens=b["cache_create"],
182
+ cache_read_tokens=b["cache_read"],
183
+ total_tokens=total_tokens,
184
+ cost_usd=b["cost"],
185
+ models=models_order[key],
186
+ model_breakdowns=model_breakdowns,
187
+ ))
188
+ return result
189
+
190
+
191
+ def _aggregate_daily(
192
+ entries: list[UsageEntry],
193
+ mode: str = "auto",
194
+ *,
195
+ tz: "Any | None" = None,
196
+ ) -> list[BucketUsage]:
197
+ """Daily grouping: tz-localized date (YYYY-MM-DD).
198
+
199
+ Day boundaries follow the resolved display tz (`tz=None` -> host local
200
+ via bare astimezone(); explicit ZoneInfo -> that zone). Per spec
201
+ Q5/F6 this is intentional: setting `display.tz=utc` makes daily
202
+ buckets cut at UTC midnight even when the host is in a different zone.
203
+ """
204
+ return _aggregate_buckets(
205
+ entries,
206
+ key_fn=lambda e: e.timestamp.astimezone(tz).strftime("%Y-%m-%d"),
207
+ mode=mode,
208
+ )
209
+
210
+
211
+ def _aggregate_monthly(
212
+ entries: list[UsageEntry],
213
+ mode: str = "auto",
214
+ *,
215
+ tz: "Any | None" = None,
216
+ ) -> list[BucketUsage]:
217
+ """Monthly grouping: tz-localized calendar month (YYYY-MM).
218
+
219
+ See ``_aggregate_daily`` re: day-boundary semantics.
220
+ """
221
+ return _aggregate_buckets(
222
+ entries,
223
+ key_fn=lambda e: e.timestamp.astimezone(tz).strftime("%Y-%m"),
224
+ mode=mode,
225
+ )
226
+
227
+
228
+ def _aggregate_weekly(
229
+ entries: list[UsageEntry],
230
+ weeks: list[SubWeek],
231
+ mode: str = "auto",
232
+ ) -> list[BucketUsage]:
233
+ """Group UsageEntry list into per-week buckets aligned to `weeks`.
234
+
235
+ Entries outside every SubWeek's interval are dropped upstream (before
236
+ handing off to `_aggregate_buckets`, which does not itself tolerate a
237
+ `None` key — it would place a `None` key in the dict and then blow up
238
+ on the final `sorted(by_bucket.keys())`). The returned
239
+ `BucketUsage.bucket` equals the week's `start_date.isoformat()`.
240
+ First-match-wins for overlapping SubWeeks (can occur at Anthropic
241
+ reset-day-drift boundaries — see `_compute_subscription_weeks`).
242
+ """
243
+ # Pre-parse week bounds once. Both `parsed_bounds` (sorted by
244
+ # `start_dt` ASC via `_compute_subscription_weeks`) and the entry
245
+ # list (sorted by `timestamp_utc` ASC from SQL) are sorted, so we
246
+ # can use bisect on a parallel `starts` list to locate the
247
+ # candidate week in O(log W) per entry rather than the linear
248
+ # scan that previously ran ~130k x ~54 = 7M comparisons.
249
+ import bisect
250
+ parse_iso_datetime = _cctally().parse_iso_datetime
251
+ parsed_bounds: list[tuple[dt.datetime, dt.datetime, str]] = []
252
+ for w in weeks:
253
+ start_dt = parse_iso_datetime(w.start_ts, "week.start_ts")
254
+ end_dt = parse_iso_datetime(w.end_ts, "week.end_ts")
255
+ parsed_bounds.append((start_dt, end_dt, w.start_date.isoformat()))
256
+
257
+ starts = [b[0] for b in parsed_bounds]
258
+
259
+ def _week_key_or_none(entry: UsageEntry) -> str | None:
260
+ ts = entry.timestamp # TZ-aware datetime (enforced by _parse_usage_entries)
261
+ # Rightmost week whose start_dt <= ts.
262
+ idx = bisect.bisect_right(starts, ts) - 1
263
+ if idx < 0:
264
+ return None
265
+ # Preserve first-match-wins semantics for the rare overlap
266
+ # regions that appear at Anthropic reset-day-drift boundaries:
267
+ # walk back while prior weeks also contain ts. Non-overlap
268
+ # case exits this loop immediately.
269
+ while idx > 0:
270
+ prev_start, prev_end, _prev_key = parsed_bounds[idx - 1]
271
+ if prev_start <= ts < prev_end:
272
+ idx -= 1
273
+ else:
274
+ break
275
+ start_dt, end_dt, key = parsed_bounds[idx]
276
+ if start_dt <= ts < end_dt:
277
+ return key
278
+ return None
279
+
280
+ # Precompute key for each entry and drop Nones; avoids scanning
281
+ # parsed_bounds twice (once to filter, once again inside the closure
282
+ # `_aggregate_buckets` calls).
283
+ keyed: list[tuple[UsageEntry, str]] = []
284
+ for e in entries:
285
+ k = _week_key_or_none(e)
286
+ if k is not None:
287
+ keyed.append((e, k))
288
+
289
+ key_lookup = {id(e): k for e, k in keyed}
290
+ in_range_entries = [e for e, _ in keyed]
291
+
292
+ return _aggregate_buckets(
293
+ in_range_entries,
294
+ key_fn=lambda e: key_lookup[id(e)],
295
+ mode=mode,
296
+ )
297
+
298
+
299
+ @dataclass
300
+ class CodexBucketUsage:
301
+ """Aggregated Codex usage for one time bucket (date or month)."""
302
+ bucket: str
303
+ input_tokens: int
304
+ cached_input_tokens: int
305
+ output_tokens: int
306
+ reasoning_output_tokens: int
307
+ total_tokens: int
308
+ cost_usd: float
309
+ models: list[str] # Distinct full model names (first-seen order)
310
+ model_breakdowns: list[dict[str, Any]] # Sorted by cost desc
311
+
312
+
313
+ @dataclass
314
+ class CodexSessionUsage:
315
+ """Aggregated Codex usage for one session.
316
+
317
+ `session_id_path` is the upstream-compatible identifier: relative path
318
+ under ~/.codex/sessions/ WITHOUT the .jsonl extension
319
+ (e.g. "2025/12/25/rollout-..."). `session_file` is the basename without
320
+ .jsonl. `directory` is the relative parent path. `session_id` is the
321
+ inner UUID (from JSONL session_meta), retained for debug/display but
322
+ not used as a grouping key.
323
+ """
324
+ session_id: str
325
+ session_id_path: str
326
+ session_file: str
327
+ directory: str
328
+ input_tokens: int
329
+ cached_input_tokens: int
330
+ output_tokens: int
331
+ reasoning_output_tokens: int
332
+ total_tokens: int
333
+ cost_usd: float
334
+ models: list[str]
335
+ model_breakdowns: list[dict[str, Any]]
336
+ last_activity: dt.datetime
337
+
338
+
339
+ @dataclass
340
+ class ClaudeSessionUsage:
341
+ """Aggregated Claude usage for one sessionId (may span multiple JSONL files)."""
342
+ session_id: str
343
+ project_path: str
344
+ source_paths: list[str]
345
+ first_activity: dt.datetime
346
+ last_activity: dt.datetime
347
+ input_tokens: int
348
+ cache_creation_tokens: int
349
+ cache_read_tokens: int
350
+ output_tokens: int
351
+ total_tokens: int
352
+ cost_usd: float
353
+ models: list[str] # first-seen order
354
+ model_breakdowns: list[dict[str, Any]] # sorted by cost desc
355
+
356
+
357
+ def _aggregate_codex_buckets(
358
+ entries: list[CodexEntry],
359
+ key_fn: Callable[[CodexEntry], str],
360
+ ) -> list[CodexBucketUsage]:
361
+ """Group CodexEntry list into per-bucket records sorted by key ascending.
362
+
363
+ Model breakdowns within each bucket are sorted by descending cost —
364
+ matches upstream ccusage-codex.
365
+ """
366
+ by_bucket: dict[str, dict[str, Any]] = {}
367
+ models_order: dict[str, list[str]] = {}
368
+
369
+ for entry in entries:
370
+ key = key_fn(entry)
371
+ bucket = by_bucket.setdefault(key, {
372
+ "input": 0, "cached_input": 0, "output": 0,
373
+ "reasoning": 0, "total": 0, "cost": 0.0, "models": {},
374
+ })
375
+ order = models_order.setdefault(key, [])
376
+
377
+ cost = _calculate_codex_entry_cost(
378
+ entry.model,
379
+ entry.input_tokens,
380
+ entry.cached_input_tokens,
381
+ entry.output_tokens,
382
+ entry.reasoning_output_tokens,
383
+ )
384
+
385
+ bucket["input"] += entry.input_tokens
386
+ bucket["cached_input"] += entry.cached_input_tokens
387
+ bucket["output"] += entry.output_tokens
388
+ bucket["reasoning"] += entry.reasoning_output_tokens
389
+ bucket["total"] += entry.total_tokens
390
+ bucket["cost"] += cost
391
+
392
+ mb = bucket["models"].setdefault(entry.model, {
393
+ "input": 0, "cached_input": 0, "output": 0,
394
+ "reasoning": 0, "cost": 0.0,
395
+ })
396
+ mb["input"] += entry.input_tokens
397
+ mb["cached_input"] += entry.cached_input_tokens
398
+ mb["output"] += entry.output_tokens
399
+ mb["reasoning"] += entry.reasoning_output_tokens
400
+ mb["cost"] += cost
401
+
402
+ if entry.model not in order:
403
+ order.append(entry.model)
404
+
405
+ result: list[CodexBucketUsage] = []
406
+ for key in sorted(by_bucket.keys()):
407
+ b = by_bucket[key]
408
+ model_breakdowns = [
409
+ {
410
+ "modelName": model,
411
+ "inputTokens": mb["input"],
412
+ "cachedInputTokens": mb["cached_input"],
413
+ "outputTokens": mb["output"],
414
+ "reasoningOutputTokens": mb["reasoning"],
415
+ "totalTokens": mb["input"] + mb["output"],
416
+ "cost": mb["cost"],
417
+ "isFallback": _is_codex_fallback(model),
418
+ }
419
+ for model, mb in b["models"].items()
420
+ ]
421
+ model_breakdowns.sort(key=lambda m: m["cost"], reverse=True)
422
+ result.append(CodexBucketUsage(
423
+ bucket=key,
424
+ input_tokens=b["input"],
425
+ cached_input_tokens=b["cached_input"],
426
+ output_tokens=b["output"],
427
+ reasoning_output_tokens=b["reasoning"],
428
+ total_tokens=b["input"] + b["output"],
429
+ cost_usd=b["cost"],
430
+ models=models_order[key],
431
+ model_breakdowns=model_breakdowns,
432
+ ))
433
+ return result
434
+
435
+
436
+ def _aggregate_codex_daily(
437
+ entries: list[CodexEntry], *, tz_name: str | None = None,
438
+ ) -> list[CodexBucketUsage]:
439
+ """Daily grouping. Default: local tz. With ``tz_name``: that IANA zone."""
440
+ tz = _resolve_tz(tz_name)
441
+ if tz is not None:
442
+ key_fn = lambda e: e.timestamp.astimezone(tz).strftime("%Y-%m-%d") # noqa: E731
443
+ else:
444
+ key_fn = lambda e: e.timestamp.astimezone().strftime("%Y-%m-%d") # noqa: E731
445
+ return _aggregate_codex_buckets(entries, key_fn=key_fn)
446
+
447
+
448
+ def _aggregate_codex_monthly(
449
+ entries: list[CodexEntry], *, tz_name: str | None = None,
450
+ ) -> list[CodexBucketUsage]:
451
+ """Monthly grouping. Default: local tz. With ``tz_name``: that IANA zone."""
452
+ tz = _resolve_tz(tz_name)
453
+ if tz is not None:
454
+ key_fn = lambda e: e.timestamp.astimezone(tz).strftime("%Y-%m") # noqa: E731
455
+ else:
456
+ key_fn = lambda e: e.timestamp.astimezone().strftime("%Y-%m") # noqa: E731
457
+ return _aggregate_codex_buckets(entries, key_fn=key_fn)
458
+
459
+
460
+ def _aggregate_codex_weekly(
461
+ entries: list[CodexEntry],
462
+ tz_name: str | None,
463
+ week_start_idx: int,
464
+ ) -> list[CodexBucketUsage]:
465
+ """Group Codex entries by calendar week.
466
+
467
+ Week-start day is controlled by ``week_start_idx`` (0=Mon..6=Sun), which
468
+ the caller resolves from config.json via ``get_week_start_name`` +
469
+ ``WEEKDAY_MAP``. Bucket key is the ISO date of the week's first day
470
+ in the display timezone (local tz when ``tz_name`` is None).
471
+ """
472
+ tz = _resolve_tz(tz_name)
473
+
474
+ def _week_key(entry: CodexEntry) -> str:
475
+ # internal fallback: host-local intentional (else branch)
476
+ local_dt = entry.timestamp.astimezone(tz) if tz is not None else entry.timestamp.astimezone()
477
+ local_date = local_dt.date()
478
+ diff = (local_date.weekday() - week_start_idx) % 7
479
+ week_start = local_date - dt.timedelta(days=diff)
480
+ return week_start.isoformat()
481
+
482
+ return _aggregate_codex_buckets(entries, key_fn=_week_key)
483
+
484
+
485
+ def _session_path_parts(source_path: str) -> tuple[str, str, str]:
486
+ """Return (session_id_path, session_file, directory) from a full path.
487
+
488
+ session_id_path = relative path under CODEX_SESSIONS_DIR with .jsonl
489
+ stripped (e.g. "2025/12/25/rollout-...").
490
+ session_file = basename without .jsonl extension.
491
+ directory = relative parent path under CODEX_SESSIONS_DIR.
492
+
493
+ Accepts three input shapes:
494
+ 1. Absolute path under CODEX_SESSIONS_DIR (the runtime sync path).
495
+ 2. Bare-relative path starting with ".codex/sessions/..." — the form
496
+ emitted by build-codex-fixtures.py so committed fixture cache.db
497
+ files stay free of maintainer absolute paths (public-mirror safe).
498
+ 3. Anything else — falls back to basename-only.
499
+ """
500
+ CODEX_SESSIONS_DIR = _cctally().CODEX_SESSIONS_DIR
501
+ p = pathlib.Path(source_path)
502
+ try:
503
+ rel = p.relative_to(CODEX_SESSIONS_DIR)
504
+ except ValueError:
505
+ # Try bare-relative ".codex/sessions/<rest>" before basename fallback.
506
+ # Use PurePosixPath to avoid Windows-style drive parsing on unusual
507
+ # inputs; fixture-emitted paths are always POSIX.
508
+ parts = pathlib.PurePosixPath(source_path).parts
509
+ if len(parts) >= 3 and parts[0] == ".codex" and parts[1] == "sessions":
510
+ rel = pathlib.PurePosixPath(*parts[2:])
511
+ else:
512
+ rel = pathlib.Path(p.name)
513
+ stem = rel.with_suffix("") # strip .jsonl
514
+ return str(stem), stem.name, str(stem.parent)
515
+
516
+
517
+ def _aggregate_codex_sessions(entries: list[CodexEntry]) -> list[CodexSessionUsage]:
518
+ """Group by session file path (upstream-compatible).
519
+
520
+ Sessions are keyed by the full relative-path-without-.jsonl rather than
521
+ the inner UUID. Result is sorted by last_activity descending (most
522
+ recent first), matching upstream's default view.
523
+
524
+ Per-model breakdowns include `isFallback: bool` — true when the model is
525
+ absent from CODEX_MODEL_PRICING.
526
+ """
527
+ by_session: dict[str, dict[str, Any]] = {}
528
+ for entry in entries:
529
+ id_path, file_name, directory = _session_path_parts(entry.source_path)
530
+ sess = by_session.setdefault(id_path, {
531
+ "session_id_uuid": entry.session_id,
532
+ "session_file": file_name,
533
+ "directory": directory,
534
+ "input": 0, "cached_input": 0, "output": 0, "reasoning": 0,
535
+ "cost": 0.0, "models": {}, "models_order": [],
536
+ "last": entry.timestamp,
537
+ })
538
+ cost = _calculate_codex_entry_cost(
539
+ entry.model, entry.input_tokens, entry.cached_input_tokens,
540
+ entry.output_tokens, entry.reasoning_output_tokens,
541
+ )
542
+ sess["input"] += entry.input_tokens
543
+ sess["cached_input"] += entry.cached_input_tokens
544
+ sess["output"] += entry.output_tokens
545
+ sess["reasoning"] += entry.reasoning_output_tokens
546
+ sess["cost"] += cost
547
+
548
+ mb = sess["models"].setdefault(entry.model, {
549
+ "input": 0, "cached_input": 0, "output": 0, "reasoning": 0, "cost": 0.0,
550
+ })
551
+ mb["input"] += entry.input_tokens
552
+ mb["cached_input"] += entry.cached_input_tokens
553
+ mb["output"] += entry.output_tokens
554
+ mb["reasoning"] += entry.reasoning_output_tokens
555
+ mb["cost"] += cost
556
+
557
+ if entry.model not in sess["models_order"]:
558
+ sess["models_order"].append(entry.model)
559
+ if entry.timestamp > sess["last"]:
560
+ sess["last"] = entry.timestamp
561
+
562
+ result: list[CodexSessionUsage] = []
563
+ for id_path, s in by_session.items():
564
+ model_breakdowns = [
565
+ {
566
+ "modelName": model,
567
+ "inputTokens": mb["input"],
568
+ "cachedInputTokens": mb["cached_input"],
569
+ "outputTokens": mb["output"],
570
+ "reasoningOutputTokens": mb["reasoning"],
571
+ "totalTokens": mb["input"] + mb["output"],
572
+ "cost": mb["cost"],
573
+ "isFallback": _is_codex_fallback(model),
574
+ }
575
+ for model, mb in s["models"].items()
576
+ ]
577
+ model_breakdowns.sort(key=lambda m: m["cost"], reverse=True)
578
+ result.append(CodexSessionUsage(
579
+ session_id=s["session_id_uuid"],
580
+ session_id_path=id_path,
581
+ session_file=s["session_file"],
582
+ directory=s["directory"],
583
+ input_tokens=s["input"],
584
+ cached_input_tokens=s["cached_input"],
585
+ output_tokens=s["output"],
586
+ reasoning_output_tokens=s["reasoning"],
587
+ total_tokens=s["input"] + s["output"], # derived, matches upstream
588
+ cost_usd=s["cost"],
589
+ models=list(s["models_order"]),
590
+ model_breakdowns=model_breakdowns,
591
+ last_activity=s["last"],
592
+ ))
593
+ result.sort(key=lambda x: x.last_activity, reverse=True)
594
+ return result
595
+
596
+
597
+ def _aggregate_claude_sessions(
598
+ entries: list["_JoinedClaudeEntry"],
599
+ ) -> list[ClaudeSessionUsage]:
600
+ """Group entries by session_id, collapsing resumed-across-files sessions.
601
+
602
+ Entries with session_id=None fall back to filename UUID (derived from
603
+ source_path). Cost is computed fresh from CLAUDE_MODEL_PRICING.
604
+ Returns descending-by-last_activity; caller reverses for --order asc.
605
+ """
606
+ _decode_escaped_cwd = _cctally()._decode_escaped_cwd
607
+ by_session: dict[str, dict[str, Any]] = {}
608
+ warn_count = 0
609
+
610
+ for entry in entries:
611
+ # Skip synthetic entries (Claude Code internal markers, not real
612
+ # model calls). Mirrors `_aggregate_buckets` (line ~2176). Must
613
+ # occur before the session_id fallback so synthetic entries don't
614
+ # inflate warn_count either.
615
+ if entry.model == "<synthetic>":
616
+ continue
617
+ sid = entry.session_id
618
+ if sid is None:
619
+ stem = os.path.splitext(os.path.basename(entry.source_path))[0]
620
+ sid = stem
621
+ warn_count += 1
622
+
623
+ sess = by_session.setdefault(sid, {
624
+ "session_id": sid,
625
+ "project_path": entry.project_path or _decode_escaped_cwd(
626
+ os.path.basename(os.path.dirname(entry.source_path))
627
+ ),
628
+ "source_paths": set(),
629
+ "first": entry.timestamp,
630
+ "last": entry.timestamp,
631
+ "input": 0, "cache_create": 0, "cache_read": 0, "output": 0,
632
+ "cost": 0.0,
633
+ "models_order": [],
634
+ "models": {},
635
+ "latest_source_path": entry.source_path,
636
+ "latest_ts": entry.timestamp,
637
+ })
638
+
639
+ sess["source_paths"].add(entry.source_path)
640
+ if entry.timestamp < sess["first"]:
641
+ sess["first"] = entry.timestamp
642
+ if entry.timestamp > sess["last"]:
643
+ sess["last"] = entry.timestamp
644
+ # Track latest source_path for tie-breaker when resume crosses cwd.
645
+ if entry.timestamp >= sess["latest_ts"]:
646
+ sess["latest_ts"] = entry.timestamp
647
+ sess["latest_source_path"] = entry.source_path
648
+ if entry.project_path:
649
+ sess["project_path"] = entry.project_path
650
+
651
+ usage = {
652
+ "input_tokens": entry.input_tokens,
653
+ "output_tokens": entry.output_tokens,
654
+ "cache_creation_input_tokens": entry.cache_creation_tokens,
655
+ "cache_read_input_tokens": entry.cache_read_tokens,
656
+ }
657
+ cost = _calculate_entry_cost(entry.model, usage)
658
+
659
+ sess["input"] += entry.input_tokens
660
+ sess["cache_create"] += entry.cache_creation_tokens
661
+ sess["cache_read"] += entry.cache_read_tokens
662
+ sess["output"] += entry.output_tokens
663
+ sess["cost"] += cost
664
+
665
+ if entry.model not in sess["models"]:
666
+ sess["models_order"].append(entry.model)
667
+ mb = sess["models"].setdefault(entry.model, {
668
+ "model": entry.model,
669
+ "input": 0, "cache_create": 0, "cache_read": 0, "output": 0,
670
+ "cost": 0.0,
671
+ })
672
+ mb["input"] += entry.input_tokens
673
+ mb["cache_create"] += entry.cache_creation_tokens
674
+ mb["cache_read"] += entry.cache_read_tokens
675
+ mb["output"] += entry.output_tokens
676
+ mb["cost"] += cost
677
+
678
+ if warn_count:
679
+ print(
680
+ f"Warning: {warn_count} entries lacked session_files rows "
681
+ f"(cache may be catching up).",
682
+ file=sys.stderr,
683
+ )
684
+
685
+ # Materialize and sort.
686
+ results: list[ClaudeSessionUsage] = []
687
+ for sess in by_session.values():
688
+ breakdowns = sorted(
689
+ [sess["models"][m] for m in sess["models_order"]],
690
+ key=lambda mb: -mb["cost"],
691
+ )
692
+ # Spec A2.8 (design.md:422): Total Tokens = input + output only;
693
+ # cache tokens shown separately but not summed — parallels
694
+ # `codex-session` (see `_codex_sessions_to_json`, line ~3603).
695
+ total_tokens = sess["input"] + sess["output"]
696
+ results.append(ClaudeSessionUsage(
697
+ session_id=sess["session_id"],
698
+ project_path=sess["project_path"],
699
+ source_paths=sorted(sess["source_paths"]),
700
+ first_activity=sess["first"],
701
+ last_activity=sess["last"],
702
+ input_tokens=sess["input"],
703
+ cache_creation_tokens=sess["cache_create"],
704
+ cache_read_tokens=sess["cache_read"],
705
+ output_tokens=sess["output"],
706
+ total_tokens=total_tokens,
707
+ cost_usd=sess["cost"],
708
+ models=sess["models_order"],
709
+ model_breakdowns=breakdowns,
710
+ ))
711
+ results.sort(key=lambda s: s.last_activity, reverse=True)
712
+ return results