cctally 1.10.3 → 1.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,938 @@
1
+ """Pure-function kernel for cctally cache-report.
2
+
3
+ This module owns the day/session bucketing, financial computation, and
4
+ anomaly classification logic that previously lived inline in
5
+ ``bin/cctally``. The CLI command ``cctally cache-report`` and the
6
+ dashboard sync builder both consume this kernel; the kernel itself is
7
+ pure (no I/O, no logging, no environment reads, no SQLite connection).
8
+
9
+ Display-tz threading: bucketing functions accept ``display_tz``
10
+ explicitly. ``None`` means host-local fallback (legacy behavior).
11
+ Callers pass the resolved IANA zone from ``resolve_display_tz``.
12
+
13
+ See ``docs/superpowers/specs/2026-05-21-cache-report-panel-design.md``
14
+ §5 for the full contract.
15
+ """
16
+ from __future__ import annotations
17
+
18
+ import datetime as dt
19
+ from dataclasses import dataclass, field
20
+ from typing import Any, Callable, Iterable, Literal, Optional
21
+ from zoneinfo import ZoneInfo
22
+
23
+
24
+ # Anthropic's per-call >200K-tokens tier — kept in sync with bin/_lib_pricing.
25
+ # Callers may override via the ``tiered_threshold`` kwarg.
26
+ DEFAULT_TIERED_THRESHOLD = 200_000
27
+
28
+
29
+ # Minimum baseline samples for the per-row anomaly classifier.
30
+ # Daily mode: >=5 trailing days. Session mode: >=10 trailing sessions
31
+ # (richer signal per sample so a higher minimum keeps thin-baseline
32
+ # false positives down).
33
+ CACHE_REPORT_MIN_BASELINE_DAYS = 5
34
+ CACHE_REPORT_MIN_BASELINE_SESSIONS = 10
35
+
36
+
37
+ # Literal alias mirroring TS `CacheAnomalyReason` at
38
+ # dashboard/web/src/types/envelope.ts:71 — keeps the two surfaces in
39
+ # lockstep so a typo on either side fails type-check.
40
+ CacheAnomalyReason = Literal["net_negative", "cache_drop"]
41
+
42
+
43
+ @dataclass
44
+ class CacheModelBreakdown:
45
+ model_name: str
46
+ input_tokens: int
47
+ output_tokens: int
48
+ cache_creation_tokens: int
49
+ cache_read_tokens: int
50
+ cache_hit_percent: float
51
+ cost: float
52
+ saved_usd: float = 0.0
53
+ wasted_usd: float = 0.0
54
+ net_usd: float = 0.0
55
+
56
+
57
+ @dataclass
58
+ class CacheBreakdownRow:
59
+ """One row of the panel/modal by-project / by-model breakdown.
60
+
61
+ Carried by the kernel so by-project and by-model share a single
62
+ aggregation path. The dashboard wraps each into the SSE-side frozen
63
+ ``CacheReportBreakdownRow`` (same field shape — only ``key`` /
64
+ ``cache_hit_percent`` / ``net_usd`` cross the envelope boundary)
65
+ without further transformation. The token fields stay internal:
66
+ they're populated so the tail-aggregate "(other)" row hit-% can
67
+ sum directly from the head rows rather than re-walking the raw
68
+ bucket map (EFF-4).
69
+ """
70
+ key: str
71
+ cache_hit_percent: float
72
+ net_usd: float
73
+ input_tokens: int = 0
74
+ cache_creation_tokens: int = 0
75
+ cache_read_tokens: int = 0
76
+
77
+
78
+ @dataclass
79
+ class CacheRow:
80
+ # Day-mode rows carry ``date``. Session-mode rows carry ``session_id``,
81
+ # ``project_path``, ``last_activity``, ``source_paths``. The two are
82
+ # never populated together.
83
+ date: str | None = None
84
+ session_id: str | None = None
85
+ project_path: str | None = None
86
+ last_activity: dt.datetime | None = None
87
+ source_paths: list[str] = field(default_factory=list)
88
+
89
+ # Token counters
90
+ input_tokens: int = 0
91
+ output_tokens: int = 0
92
+ cache_creation_tokens: int = 0
93
+ cache_read_tokens: int = 0
94
+
95
+ # Financials
96
+ cost: float = 0.0
97
+ saved_usd: float = 0.0
98
+ wasted_usd: float = 0.0
99
+ net_usd: float = 0.0
100
+
101
+ # Per-model breakdown children
102
+ model_breakdowns: list[CacheModelBreakdown] = field(default_factory=list)
103
+
104
+ # Anomaly (populated by _classify_anomalies)
105
+ anomaly_triggered: bool = False
106
+ anomaly_reasons: list[CacheAnomalyReason] = field(default_factory=list)
107
+
108
+ @property
109
+ def total_tokens(self) -> int:
110
+ return (
111
+ self.input_tokens + self.output_tokens
112
+ + self.cache_creation_tokens + self.cache_read_tokens
113
+ )
114
+
115
+ @property
116
+ def cache_hit_percent(self) -> float:
117
+ return _compute_cache_hit_percent(
118
+ self.input_tokens, self.cache_creation_tokens, self.cache_read_tokens
119
+ )
120
+
121
+
122
+ @dataclass
123
+ class _Bucket:
124
+ """Per-(day,model) / per-session / per-breakdown-key aggregation accumulator.
125
+
126
+ Used by ``_aggregate_cache_by_day``, ``_aggregate_cache_by_session``,
127
+ and ``_aggregate_cache_breakdown`` so all three aggregators share one
128
+ set of field names — typos become type errors, not silent runtime
129
+ zero. The breakdown aggregator only populates the token + cache-$
130
+ fields (``output_tokens`` / ``cost`` stay zero); that's fine — the
131
+ by-project / by-model paths don't surface them.
132
+ """
133
+ input_tokens: int = 0
134
+ output_tokens: int = 0
135
+ cache_creation_tokens: int = 0
136
+ cache_read_tokens: int = 0
137
+ cost: float = 0.0
138
+ saved_usd: float = 0.0
139
+ wasted_usd: float = 0.0
140
+ net_usd: float = 0.0
141
+
142
+
143
+ def _compute_cache_hit_percent(
144
+ input_tokens: int,
145
+ cache_creation_tokens: int,
146
+ cache_read_tokens: int,
147
+ ) -> float:
148
+ """Compute cache hit percentage from token counts.
149
+
150
+ Formula: ``cache_read / (input + cache_creation + cache_read) * 100``.
151
+ Returns ``0.0`` when there are no tokens.
152
+ """
153
+ total_input = input_tokens + cache_creation_tokens + cache_read_tokens
154
+ if total_input == 0:
155
+ return 0.0
156
+ return (cache_read_tokens / total_input) * 100
157
+
158
+
159
+ def _lookup_pricing(model: str, pricing: dict) -> dict | None:
160
+ """Resolve pricing for a model. Strips ``anthropic/`` / ``anthropic.``
161
+ aliases — same behavior as ``_lib_pricing._resolve_model_pricing`` but
162
+ without the stderr warning side-effect (the kernel is pure).
163
+ """
164
+ p = pricing.get(model)
165
+ if p is not None:
166
+ return p
167
+ for prefix in ("anthropic/", "anthropic."):
168
+ if model.startswith(prefix):
169
+ stripped = model[len(prefix):]
170
+ p = pricing.get(stripped)
171
+ if p is not None:
172
+ return p
173
+ return None
174
+
175
+
176
+ def _compute_entry_cache_dollars(
177
+ model: str,
178
+ cache_creation_tokens: int,
179
+ cache_read_tokens: int,
180
+ *,
181
+ pricing: dict,
182
+ tiered_threshold: int = DEFAULT_TIERED_THRESHOLD,
183
+ ) -> tuple[float, float, float]:
184
+ """Return ``(saved_usd, wasted_usd, net_usd)`` for a single entry.
185
+
186
+ ``saved_usd`` = ``cache_read_tokens × (base_rate − read_rate)``
187
+ — what you'd have paid without caching.
188
+ ``wasted_usd`` = ``cache_creation_tokens × (create_rate − base_rate)``
189
+ — premium paid to write cache.
190
+ ``net_usd`` = ``saved_usd − wasted_usd``. Positive = caching helped.
191
+
192
+ Applies Anthropic's per-call >200K-tokens tier (mirrors the
193
+ ``_tiered`` helper in ``_calculate_entry_cost``). Aggregating tokens
194
+ across multiple calls and then pricing would under-count savings on
195
+ any single call that crossed the tier. Resolves ``anthropic/`` and
196
+ ``anthropic.`` aliases via ``_lookup_pricing`` so cache-dollar
197
+ numbers stay aligned with cost numbers.
198
+
199
+ Unknown models (no pricing entry) → ``(0.0, 0.0, 0.0)`` silently;
200
+ the CLI's ``_calculate_entry_cost`` path emits the one-shot stderr
201
+ warning for unknown models elsewhere.
202
+ """
203
+ p = _lookup_pricing(model, pricing) or {}
204
+ if not p:
205
+ return (0.0, 0.0, 0.0)
206
+
207
+ def _tiered_rate(tokens: int, base_key: str, tiered_key: str) -> float:
208
+ """Blended $/token rate for a single-call token count under tiered pricing."""
209
+ base_rate = p.get(base_key, 0.0)
210
+ tiered_rate = p.get(tiered_key)
211
+ if tokens <= 0:
212
+ return 0.0
213
+ if tokens > tiered_threshold and tiered_rate is not None:
214
+ below = tiered_threshold
215
+ above = tokens - tiered_threshold
216
+ return (below * base_rate + above * tiered_rate) / tokens
217
+ return base_rate
218
+
219
+ base_for_read = _tiered_rate(
220
+ cache_read_tokens,
221
+ "input_cost_per_token",
222
+ "input_cost_per_token_above_200k_tokens",
223
+ )
224
+ read_rate = _tiered_rate(
225
+ cache_read_tokens,
226
+ "cache_read_input_token_cost",
227
+ "cache_read_input_token_cost_above_200k_tokens",
228
+ )
229
+ base_for_create = _tiered_rate(
230
+ cache_creation_tokens,
231
+ "input_cost_per_token",
232
+ "input_cost_per_token_above_200k_tokens",
233
+ )
234
+ create_rate = _tiered_rate(
235
+ cache_creation_tokens,
236
+ "cache_creation_input_token_cost",
237
+ "cache_creation_input_token_cost_above_200k_tokens",
238
+ )
239
+
240
+ saved = cache_read_tokens * max(0.0, base_for_read - read_rate)
241
+ wasted = cache_creation_tokens * max(0.0, create_rate - base_for_create)
242
+ net = saved - wasted
243
+ return (saved, wasted, net)
244
+
245
+
246
+ # ---------------------------------------------------------------------------
247
+ # Day-mode aggregator with explicit display_tz threading
248
+ # ---------------------------------------------------------------------------
249
+
250
+ def _resolve_bucket_tz(display_tz: ZoneInfo | None) -> dt.tzinfo:
251
+ """Return the tz used to bucket entry timestamps into calendar days.
252
+
253
+ ``display_tz`` is the caller's resolved IANA zone (from
254
+ ``resolve_display_tz`` in the CLI / dashboard). ``None`` triggers the
255
+ legacy host-local fallback — preserves the pre-extraction contract
256
+ for direct internal callers and matches the
257
+ "internal fallback: host-local intentional" annotation in
258
+ ``bin/cctally`` for the pre-extraction call site.
259
+ """
260
+ if display_tz is not None:
261
+ return display_tz
262
+ # internal fallback: host-local intentional
263
+ return dt.datetime.now().astimezone().tzinfo # type: ignore[return-value]
264
+
265
+
266
+ def _aggregate_cache_by_day(
267
+ entries: Iterable,
268
+ *,
269
+ display_tz: ZoneInfo | None,
270
+ pricing: dict,
271
+ cost_calculator: Callable[[str, dict, str, Optional[float]], float],
272
+ ) -> list[CacheRow]:
273
+ """Group entries by display-tz local date.
274
+
275
+ ``display_tz`` controls bucketing. ``None`` falls back to host-local —
276
+ matches the legacy contract for direct callers (the pre-extraction
277
+ site was annotated "internal fallback: host-local intentional"). The
278
+ extraction closes a pre-existing minor bug where the CLI parsed
279
+ ``--since`` / ``--until`` in display tz but bucketed by host-local
280
+ (spec §1.6 / plan A3); callers pass the same resolved tz they used
281
+ for window parsing.
282
+
283
+ ``cost_calculator`` is the per-entry cost function (the CLI passes
284
+ ``_calculate_entry_cost`` with embedded pricing; the dashboard
285
+ snapshot builder injects the same). Required: the kernel does not
286
+ fall back to a default so production callers can't accidentally
287
+ bypass the embedded pricing tables.
288
+
289
+ Overlaps with ``_lib_aggregators._aggregate_buckets`` but kept
290
+ separate: the cache-report kernel is purity-contract (no internal
291
+ imports per module docstring), and the day-bucket shape diverges
292
+ (per-model breakdown children, cache-dollar tiered math). Cross-ref
293
+ for future unification if the kernel ever takes an
294
+ ``_lib_pricing`` dependency.
295
+
296
+ Callers pre-filter entries to the desired window via their own
297
+ ``get_entries`` query; the kernel does not re-filter.
298
+ """
299
+ tz = _resolve_bucket_tz(display_tz)
300
+
301
+ day_model_buckets: dict[str, dict[str, _Bucket]] = {}
302
+ for entry in entries:
303
+ # ``entry.timestamp`` is an aware UTC datetime per SessionEntry
304
+ # contract; ``astimezone(tz)`` shifts to the display tz before
305
+ # taking the calendar date.
306
+ day_key = entry.timestamp.astimezone(tz).strftime("%Y-%m-%d")
307
+ cost = cost_calculator(entry.model, entry.usage, "auto", entry.cost_usd)
308
+ create_tok = entry.usage.get("cache_creation_input_tokens", 0)
309
+ read_tok = entry.usage.get("cache_read_input_tokens", 0)
310
+ saved, wasted, net = _compute_entry_cache_dollars(
311
+ entry.model, create_tok, read_tok, pricing=pricing,
312
+ )
313
+ models = day_model_buckets.setdefault(day_key, {})
314
+ b = models.setdefault(entry.model, _Bucket())
315
+ b.input_tokens += entry.usage.get("input_tokens", 0)
316
+ b.output_tokens += entry.usage.get("output_tokens", 0)
317
+ b.cache_creation_tokens += create_tok
318
+ b.cache_read_tokens += read_tok
319
+ b.cost += cost
320
+ b.saved_usd += saved
321
+ b.wasted_usd += wasted
322
+ b.net_usd += net
323
+
324
+ result: list[CacheRow] = []
325
+ for day_key in sorted(day_model_buckets.keys()):
326
+ models = day_model_buckets[day_key]
327
+ row = CacheRow(date=day_key)
328
+ for model_name in sorted(models.keys()):
329
+ b = models[model_name]
330
+ mb = CacheModelBreakdown(
331
+ model_name=model_name,
332
+ input_tokens=b.input_tokens,
333
+ output_tokens=b.output_tokens,
334
+ cache_creation_tokens=b.cache_creation_tokens,
335
+ cache_read_tokens=b.cache_read_tokens,
336
+ cache_hit_percent=_compute_cache_hit_percent(
337
+ b.input_tokens, b.cache_creation_tokens, b.cache_read_tokens
338
+ ),
339
+ cost=b.cost,
340
+ saved_usd=b.saved_usd,
341
+ wasted_usd=b.wasted_usd,
342
+ net_usd=b.net_usd,
343
+ )
344
+ row.model_breakdowns.append(mb)
345
+ row.input_tokens += mb.input_tokens
346
+ row.output_tokens += mb.output_tokens
347
+ row.cache_creation_tokens += mb.cache_creation_tokens
348
+ row.cache_read_tokens += mb.cache_read_tokens
349
+ row.cost += mb.cost
350
+ row.saved_usd += mb.saved_usd
351
+ row.wasted_usd += mb.wasted_usd
352
+ row.net_usd += mb.net_usd
353
+ result.append(row)
354
+ return result
355
+
356
+
357
+ # ---------------------------------------------------------------------------
358
+ # Session-mode aggregator (resume-merged across JSONL files)
359
+ # ---------------------------------------------------------------------------
360
+
361
+ def _filename_uuid_stem(path: str) -> str:
362
+ """Extract the UUID stem from a JSONL filename.
363
+
364
+ Claude JSONL files are named ``<uuid>.jsonl``; fall back to the full
365
+ filename (without extension) if the stem isn't a valid UUID shape.
366
+ Matches the ``session`` subcommand's convention for unresolved session
367
+ IDs. Stays pure — uses only ``str.partition``, no ``os.path`` and no
368
+ syscalls.
369
+ """
370
+ # The original lived in bin/cctally and used os.path.basename; this
371
+ # rebuild matches that contract with pure-string slicing so the
372
+ # kernel doesn't import os.
373
+ last_slash = path.rfind("/")
374
+ base = path[last_slash + 1:] if last_slash != -1 else path
375
+ stem, _, _ = base.partition(".")
376
+ return stem
377
+
378
+
379
+ @dataclass
380
+ class _SessionAggregationResult:
381
+ """Bundles session rows + the fallback warning count.
382
+
383
+ Returned by ``_aggregate_cache_by_session`` so callers can choose
384
+ whether to emit the "N entries lacked session_files rows" one-shot
385
+ warning. The CLI adapter consumes ``fallback_count`` to emit the
386
+ legacy stderr line; the dashboard snapshot builder ignores it (the
387
+ panel surfaces freshness via the doctor chip instead).
388
+ """
389
+ rows: list[CacheRow]
390
+ fallback_count: int
391
+
392
+
393
+ def _aggregate_cache_by_session(
394
+ entries: Iterable,
395
+ *,
396
+ pricing: dict,
397
+ cost_calculator: Callable[[str, dict, str, Optional[float]], float],
398
+ project_decoder: Callable[[str], str],
399
+ ) -> _SessionAggregationResult:
400
+ """Group Claude entries by sessionId (resumed-merged).
401
+
402
+ Resume-merging: entries from multiple JSONL files sharing a sessionId
403
+ collapse into one row. ``project_path`` reflects the most-recent
404
+ in-window entry's resolved project (with a per-session fallback to
405
+ the decoded cwd from the source path's parent directory).
406
+
407
+ Synthetic entries (``model == '<synthetic>'``) are dropped — they're
408
+ Claude Code's internal markers, not real model calls — before any
409
+ bucketing, so they don't inflate the fallback count either.
410
+
411
+ Entries with ``session_id is None`` fall back to the filename UUID
412
+ stem (matching ``cctally session``); the count of such fallback
413
+ entries rides back on ``_SessionAggregationResult.fallback_count``
414
+ so the caller can emit the legacy one-shot stderr warning.
415
+
416
+ ``cost_calculator`` / ``pricing`` / ``project_decoder`` are required
417
+ keyword-only — production callers inject ``_calculate_entry_cost`` +
418
+ ``CLAUDE_MODEL_PRICING`` + a ``_decode_escaped_cwd``-backed decoder
419
+ so the kernel stays free of pricing globals / cost-dispatch I/O.
420
+
421
+ Callers pre-filter entries to the desired window via their own
422
+ ``get_claude_session_entries`` query; the kernel does not re-filter.
423
+ """
424
+ # buckets[sid] = {"entries": [...], "project_path": str|None,
425
+ # "last_activity": dt|None, "source_paths": set[str]}
426
+ buckets: dict[str, dict[str, Any]] = {}
427
+ fallback_count = 0
428
+ for entry in entries:
429
+ if entry.model == "<synthetic>":
430
+ continue
431
+ sid = entry.session_id
432
+ if sid is None:
433
+ sid = _filename_uuid_stem(entry.source_path)
434
+ fallback_count += 1
435
+ b = buckets.setdefault(sid, {
436
+ "entries": [],
437
+ # Seed with decoded-cwd fallback so rows still resolve a
438
+ # Project cell while session_files backfill is incomplete.
439
+ # Real project_path from session_files (if present on any
440
+ # joined row) overrides below.
441
+ "project_path": project_decoder(entry.source_path),
442
+ "last_activity": None,
443
+ "source_paths": set(),
444
+ })
445
+ b["entries"].append(entry)
446
+ b["source_paths"].add(entry.source_path)
447
+ if b["last_activity"] is None or entry.timestamp > b["last_activity"]:
448
+ b["last_activity"] = entry.timestamp
449
+ # Project path from most-recent in-window entry that has it.
450
+ if entry.project_path:
451
+ b["project_path"] = entry.project_path
452
+
453
+ result: list[CacheRow] = []
454
+ for sid, b in buckets.items():
455
+ # Per-model sub-buckets scoped to this session's entries.
456
+ model_buckets: dict[str, _Bucket] = {}
457
+ for entry in b["entries"]:
458
+ mb_raw = model_buckets.setdefault(entry.model, _Bucket())
459
+ mb_raw.input_tokens += entry.input_tokens
460
+ mb_raw.output_tokens += entry.output_tokens
461
+ mb_raw.cache_creation_tokens += entry.cache_creation_tokens
462
+ mb_raw.cache_read_tokens += entry.cache_read_tokens
463
+ mb_raw.cost += cost_calculator(
464
+ entry.model,
465
+ {
466
+ "input_tokens": entry.input_tokens,
467
+ "output_tokens": entry.output_tokens,
468
+ "cache_creation_input_tokens": entry.cache_creation_tokens,
469
+ "cache_read_input_tokens": entry.cache_read_tokens,
470
+ },
471
+ "auto",
472
+ entry.cost_usd,
473
+ )
474
+ saved, wasted, net = _compute_entry_cache_dollars(
475
+ entry.model,
476
+ entry.cache_creation_tokens,
477
+ entry.cache_read_tokens,
478
+ pricing=pricing,
479
+ )
480
+ mb_raw.saved_usd += saved
481
+ mb_raw.wasted_usd += wasted
482
+ mb_raw.net_usd += net
483
+
484
+ row = CacheRow(
485
+ session_id=sid,
486
+ project_path=b["project_path"],
487
+ last_activity=b["last_activity"],
488
+ source_paths=sorted(b["source_paths"]),
489
+ )
490
+ for model_name in sorted(model_buckets.keys()):
491
+ mb_raw = model_buckets[model_name]
492
+ mb = CacheModelBreakdown(
493
+ model_name=model_name,
494
+ input_tokens=mb_raw.input_tokens,
495
+ output_tokens=mb_raw.output_tokens,
496
+ cache_creation_tokens=mb_raw.cache_creation_tokens,
497
+ cache_read_tokens=mb_raw.cache_read_tokens,
498
+ cache_hit_percent=_compute_cache_hit_percent(
499
+ mb_raw.input_tokens,
500
+ mb_raw.cache_creation_tokens,
501
+ mb_raw.cache_read_tokens,
502
+ ),
503
+ cost=mb_raw.cost,
504
+ saved_usd=mb_raw.saved_usd,
505
+ wasted_usd=mb_raw.wasted_usd,
506
+ net_usd=mb_raw.net_usd,
507
+ )
508
+ row.model_breakdowns.append(mb)
509
+ row.input_tokens += mb.input_tokens
510
+ row.output_tokens += mb.output_tokens
511
+ row.cache_creation_tokens += mb.cache_creation_tokens
512
+ row.cache_read_tokens += mb.cache_read_tokens
513
+ row.cost += mb.cost
514
+ row.saved_usd += mb.saved_usd
515
+ row.wasted_usd += mb.wasted_usd
516
+ row.net_usd += mb.net_usd
517
+ result.append(row)
518
+
519
+ # Initial ordering descending by last_activity; the CLI's
520
+ # ``_sort_cache_rows`` may resort under ``--sort``. Use tz-aware
521
+ # sentinel to avoid naive-vs-aware comparison errors on rows missing
522
+ # last_activity.
523
+ _min_dt = dt.datetime.min.replace(tzinfo=dt.timezone.utc)
524
+ result.sort(key=lambda r: r.last_activity or _min_dt, reverse=True)
525
+ return _SessionAggregationResult(rows=result, fallback_count=fallback_count)
526
+
527
+
528
+ # ---------------------------------------------------------------------------
529
+ # Anomaly classification + baseline median
530
+ # ---------------------------------------------------------------------------
531
+
532
+ def _row_anchor(r: CacheRow) -> dt.datetime | None:
533
+ """Return the row's position in time for baseline-window comparison.
534
+
535
+ Session rows carry ``last_activity`` (an aware datetime); daily rows
536
+ carry ``date`` (an ISO-8601 ``YYYY-MM-DD``). For daily rows we use
537
+ ``.astimezone()`` (not ``.replace(tzinfo=...)``) so the OS tzdb
538
+ gives the correct offset for the given date — avoids DST drift on
539
+ dates that straddle a DST boundary. Mirrors the idiom in
540
+ ``_parse_cli_date_range``.
541
+ """
542
+ if r.last_activity is not None:
543
+ return r.last_activity
544
+ if r.date:
545
+ # internal fallback: host-local intentional
546
+ return dt.datetime.strptime(r.date, "%Y-%m-%d").astimezone()
547
+ return None
548
+
549
+
550
+ def _compute_baseline_median(
551
+ rows: list[CacheRow],
552
+ *,
553
+ anchor: dt.datetime,
554
+ window_days: int,
555
+ min_samples: int,
556
+ exclude_row: CacheRow | None = None,
557
+ is_session_mode: bool = False,
558
+ ) -> float | None:
559
+ """Median ``cache_hit_percent`` across rows whose anchor falls in
560
+ ``[anchor − window_days, anchor − upper_offset]``.
561
+
562
+ Returns ``None`` when fewer than ``min_samples`` rows qualify. The
563
+ upper offset is ``1s`` in session mode (recent sessions stay
564
+ eligible even when they collide on the second) and ``1d`` in daily
565
+ mode (yesterday IS in the baseline but today is excluded).
566
+
567
+ ``exclude_row`` lets the per-row classifier skip the focal row when
568
+ computing the baseline median for that row — without this, a row's
569
+ own hit % would self-include in its baseline. Callers passing the
570
+ cross-row "median over the whole window" (e.g. the dashboard
571
+ spotlight) leave ``exclude_row=None``.
572
+ """
573
+ import statistics
574
+
575
+ upper_offset = (
576
+ dt.timedelta(seconds=1) if is_session_mode else dt.timedelta(days=1)
577
+ )
578
+ lower_bound = anchor - dt.timedelta(days=window_days)
579
+ upper_bound = anchor - upper_offset
580
+ values: list[float] = []
581
+ for r in rows:
582
+ if exclude_row is not None and r is exclude_row:
583
+ continue
584
+ ra = _row_anchor(r)
585
+ if ra is None:
586
+ continue
587
+ if lower_bound <= ra <= upper_bound:
588
+ values.append(r.cache_hit_percent)
589
+ if len(values) < min_samples:
590
+ return None
591
+ return statistics.median(values)
592
+
593
+
594
+ def _classify_anomalies(
595
+ rows: list[CacheRow],
596
+ *,
597
+ threshold_pp: int,
598
+ window_days: int,
599
+ enabled: bool = True,
600
+ ) -> None:
601
+ """Mutate each row's ``anomaly_triggered`` / ``anomaly_reasons`` in place.
602
+
603
+ Trigger 1 (``net_negative``): ``net_usd < 0`` (strict). Skipped when the
604
+ row has zero cache activity (no-op session, not a bug).
605
+
606
+ Trigger 2 (``cache_drop``): ``cache_hit_percent`` is ``>= threshold_pp``
607
+ below the trailing ``window_days`` median of OTHER rows. Requires
608
+ a minimum of ``CACHE_REPORT_MIN_BASELINE_DAYS`` (daily) or
609
+ ``CACHE_REPORT_MIN_BASELINE_SESSIONS`` (session) baseline samples;
610
+ silently skipped otherwise.
611
+
612
+ Reasons are appended in deterministic order: ``net_negative`` first
613
+ (no baseline needed), then ``cache_drop`` (matches the
614
+ pre-extraction order tests / fixtures expect).
615
+
616
+ Mode is inferred from the first row: if it has a ``session_id``,
617
+ session mode (window_days back to ``<= last_activity − 1s``);
618
+ else daily mode (window_days back to ``<= date − 1 day``).
619
+ """
620
+ if not enabled:
621
+ for row in rows:
622
+ row.anomaly_triggered = False
623
+ row.anomaly_reasons = []
624
+ return
625
+ if not rows:
626
+ return
627
+
628
+ is_session_mode = rows[0].session_id is not None
629
+ min_baseline = (
630
+ CACHE_REPORT_MIN_BASELINE_SESSIONS if is_session_mode
631
+ else CACHE_REPORT_MIN_BASELINE_DAYS
632
+ )
633
+
634
+ # Pre-compute anchors once to avoid O(n²·datetime-parse) overhead.
635
+ anchors: list[dt.datetime | None] = [_row_anchor(r) for r in rows]
636
+
637
+ for i, row in enumerate(rows):
638
+ reasons: list[CacheAnomalyReason] = []
639
+
640
+ # Trigger 1: net_negative (no baseline needed; cache-activity guard).
641
+ if row.cache_creation_tokens + row.cache_read_tokens > 0:
642
+ if row.net_usd < 0:
643
+ reasons.append("net_negative")
644
+
645
+ # Trigger 2: cache_drop (requires baseline).
646
+ anchor = anchors[i]
647
+ if anchor is not None:
648
+ median = _compute_baseline_median(
649
+ rows, anchor=anchor,
650
+ window_days=window_days, min_samples=min_baseline,
651
+ exclude_row=row, is_session_mode=is_session_mode,
652
+ )
653
+ if median is not None and (median - row.cache_hit_percent) >= threshold_pp:
654
+ reasons.append("cache_drop")
655
+
656
+ row.anomaly_reasons = reasons
657
+ row.anomaly_triggered = bool(reasons)
658
+
659
+
660
+ # ---------------------------------------------------------------------------
661
+ # Window-wide breakdown aggregator (by-project / by-model dedup)
662
+ # ---------------------------------------------------------------------------
663
+
664
+ def _aggregate_cache_breakdown(
665
+ entries: Iterable,
666
+ *,
667
+ key_fn: Callable[[Any], str],
668
+ pricing: dict,
669
+ skip_synthetic: bool = True,
670
+ top_n: int = 5,
671
+ other_label: str = "(other)",
672
+ ) -> tuple[CacheBreakdownRow, ...]:
673
+ """Sum cache hit % + net $ per bucket; top ``top_n`` + ``(other)``.
674
+
675
+ Single source of truth for the dashboard's by-project AND by-model
676
+ breakdowns (spec §4.2). The caller injects ``key_fn`` to pick the
677
+ bucket label per entry:
678
+
679
+ - by-project: ``lambda e: getattr(e, "project_path", None) or "(unknown)"``
680
+ - by-model: ``lambda e: e.model``
681
+
682
+ ``skip_synthetic`` drops ``e.model == "<synthetic>"`` entries before
683
+ bucketing — Claude Code's internal markers aren't real model calls
684
+ and would inflate token totals for whichever axis is keyed on
685
+ something other than ``model``. Defaults to True so both axes agree
686
+ on which entries contribute (closes the by-project / by-model
687
+ drift previously caused by an inconsistent filter on the two
688
+ dashboard-side helpers).
689
+
690
+ Sorted by ``abs(net_usd)`` desc. When there are more than ``top_n``
691
+ buckets, the tail collapses into a single ``(other)`` row whose
692
+ ``cache_hit_percent`` is the TRUE aggregate hit % across the tail's
693
+ token totals (not a placeholder zero, not the mean of the tail's
694
+ per-bucket percentages) — matches the by-project numbers users
695
+ would see if they widened the top-N. The aggregate is computed by
696
+ summing the head rows' token fields rather than re-walking the raw
697
+ bucket map (EFF-4).
698
+ """
699
+ buckets: dict[str, _Bucket] = {}
700
+ for e in entries:
701
+ if skip_synthetic and getattr(e, "model", None) == "<synthetic>":
702
+ continue
703
+ key = key_fn(e)
704
+ b = buckets.setdefault(key, _Bucket())
705
+ b.input_tokens += getattr(e, "input_tokens", 0)
706
+ b.cache_creation_tokens += getattr(e, "cache_creation_tokens", 0)
707
+ b.cache_read_tokens += getattr(e, "cache_read_tokens", 0)
708
+ saved, wasted, net = _compute_entry_cache_dollars(
709
+ getattr(e, "model", ""),
710
+ getattr(e, "cache_creation_tokens", 0),
711
+ getattr(e, "cache_read_tokens", 0),
712
+ pricing=pricing,
713
+ )
714
+ b.saved_usd += saved
715
+ b.wasted_usd += wasted
716
+ b.net_usd += net
717
+
718
+ out: list[CacheBreakdownRow] = []
719
+ for key, b in buckets.items():
720
+ out.append(CacheBreakdownRow(
721
+ key=key,
722
+ cache_hit_percent=_compute_cache_hit_percent(
723
+ b.input_tokens, b.cache_creation_tokens, b.cache_read_tokens,
724
+ ),
725
+ net_usd=b.net_usd,
726
+ input_tokens=b.input_tokens,
727
+ cache_creation_tokens=b.cache_creation_tokens,
728
+ cache_read_tokens=b.cache_read_tokens,
729
+ ))
730
+ out.sort(key=lambda r: abs(r.net_usd), reverse=True)
731
+ if len(out) <= top_n:
732
+ return tuple(out)
733
+ head = out[:top_n]
734
+ tail = out[top_n:]
735
+ other_net = sum(r.net_usd for r in tail)
736
+ # True aggregate hit % over the tail buckets — sum directly from the
737
+ # CacheBreakdownRow token fields (EFF-4 — avoids the previous triple
738
+ # walk over ``buckets.items()``).
739
+ tail_input = sum(r.input_tokens for r in tail)
740
+ tail_creation = sum(r.cache_creation_tokens for r in tail)
741
+ tail_read = sum(r.cache_read_tokens for r in tail)
742
+ other_pct = _compute_cache_hit_percent(tail_input, tail_creation, tail_read)
743
+ head.append(CacheBreakdownRow(
744
+ key=other_label, cache_hit_percent=other_pct, net_usd=other_net,
745
+ input_tokens=tail_input,
746
+ cache_creation_tokens=tail_creation,
747
+ cache_read_tokens=tail_read,
748
+ ))
749
+ return tuple(head)
750
+
751
+
752
+ def _aggregate_cache_breakdown_from_rows(
753
+ rows: Iterable["CacheRow"],
754
+ *,
755
+ skip_synthetic: bool = True,
756
+ top_n: int = 5,
757
+ other_label: str = "(other)",
758
+ ) -> tuple[CacheBreakdownRow, ...]:
759
+ """By-model breakdown folded from day-mode rows.
760
+
761
+ Day-mode ``_aggregate_cache_by_day`` already buckets per-entry cache
762
+ dollars by ``(date, model)``. Walking those pre-aggregated buckets is
763
+ O(rows × distinct_models) — orders of magnitude cheaper than calling
764
+ ``_aggregate_cache_breakdown`` a second time over the raw entries
765
+ iterable (which re-runs the tiered-pricing math per entry). Output
766
+ is byte-equivalent to ``_aggregate_cache_breakdown(entries, key_fn=
767
+ lambda e: e.model)`` modulo float-addition ordering.
768
+
769
+ ``skip_synthetic`` drops the ``"<synthetic>"`` model bucket. Day-mode
770
+ keeps synthetic entries in ``row.model_breakdowns`` because that view
771
+ is intra-day diagnostic; the by-model view here is the user-facing
772
+ "where did the savings land" rollup, so synthetic is dropped to match
773
+ ``_aggregate_cache_breakdown``'s contract.
774
+ """
775
+ buckets: dict[str, _Bucket] = {}
776
+ for row in rows:
777
+ for mb in row.model_breakdowns:
778
+ if skip_synthetic and mb.model_name == "<synthetic>":
779
+ continue
780
+ b = buckets.setdefault(mb.model_name, _Bucket())
781
+ b.input_tokens += mb.input_tokens
782
+ b.cache_creation_tokens += mb.cache_creation_tokens
783
+ b.cache_read_tokens += mb.cache_read_tokens
784
+ b.net_usd += mb.net_usd
785
+
786
+ out: list[CacheBreakdownRow] = []
787
+ for key, b in buckets.items():
788
+ out.append(CacheBreakdownRow(
789
+ key=key,
790
+ cache_hit_percent=_compute_cache_hit_percent(
791
+ b.input_tokens, b.cache_creation_tokens, b.cache_read_tokens,
792
+ ),
793
+ net_usd=b.net_usd,
794
+ input_tokens=b.input_tokens,
795
+ cache_creation_tokens=b.cache_creation_tokens,
796
+ cache_read_tokens=b.cache_read_tokens,
797
+ ))
798
+ out.sort(key=lambda r: abs(r.net_usd), reverse=True)
799
+ if len(out) <= top_n:
800
+ return tuple(out)
801
+ head = out[:top_n]
802
+ tail = out[top_n:]
803
+ other_net = sum(r.net_usd for r in tail)
804
+ tail_input = sum(r.input_tokens for r in tail)
805
+ tail_creation = sum(r.cache_creation_tokens for r in tail)
806
+ tail_read = sum(r.cache_read_tokens for r in tail)
807
+ other_pct = _compute_cache_hit_percent(tail_input, tail_creation, tail_read)
808
+ head.append(CacheBreakdownRow(
809
+ key=other_label, cache_hit_percent=other_pct, net_usd=other_net,
810
+ input_tokens=tail_input,
811
+ cache_creation_tokens=tail_creation,
812
+ cache_read_tokens=tail_read,
813
+ ))
814
+ return tuple(head)
815
+
816
+
817
+ # ---------------------------------------------------------------------------
818
+ # Top-level orchestrator
819
+ # ---------------------------------------------------------------------------
820
+
821
+ @dataclass
822
+ class _CacheReportResult:
823
+ """Internal dataclass returned by ``_build_cache_report``.
824
+
825
+ Consumed by both the CLI renderer (which formats into table or JSON)
826
+ and the dashboard snapshot builder (which shapes into
827
+ ``CacheReportSnapshot`` for the SSE envelope). ``display_tz_key`` is
828
+ the resolved IANA zone name (or ``None`` when the caller passed
829
+ ``display_tz=None`` and the kernel fell back to host-local).
830
+
831
+ ``today_baseline_median`` is the median cache_hit_percent across
832
+ "other" rows (excluding today's row) over the trailing
833
+ ``anomaly_window_days`` — populated in day mode only (session mode
834
+ has no equivalent "today" concept). Surfaced here so the dashboard
835
+ snapshot builder can read it without re-running
836
+ ``_compute_baseline_median`` over the same data (EFF-3).
837
+ """
838
+ rows: list[CacheRow]
839
+ mode: Literal["day", "session"]
840
+ window_days: int
841
+ anomaly_threshold_pp: int
842
+ anomaly_window_days: int
843
+ display_tz_key: str | None
844
+ today_baseline_median: float | None = None
845
+
846
+
847
+ def _build_cache_report(
848
+ entries: Iterable,
849
+ *,
850
+ now_utc: dt.datetime,
851
+ window_days: int,
852
+ anomaly_threshold_pp: int,
853
+ anomaly_window_days: int,
854
+ display_tz: ZoneInfo | None,
855
+ pricing: dict,
856
+ cost_calculator: Callable[[str, dict, str, Optional[float]], float],
857
+ mode: Literal["day", "session"] = "day",
858
+ project_decoder: Callable[[str], str] | None = None,
859
+ anomaly_enabled: bool = True,
860
+ ) -> _CacheReportResult:
861
+ """Top-level orchestrator: aggregate + classify anomalies.
862
+
863
+ Returns a ``_CacheReportResult`` that both the CLI renderer and the
864
+ dashboard snapshot builder consume. Pure-function — no I/O, no
865
+ logging, no environment reads. Callers (CLI / dashboard) own all
866
+ I/O via the ``entries`` iterable + the ``cost_calculator`` /
867
+ ``project_decoder`` injections.
868
+
869
+ ``mode="day"`` buckets entries by display-tz calendar date;
870
+ ``mode="session"`` buckets by Claude ``sessionId`` (resume-merged
871
+ across JSONL files). Session mode requires ``project_decoder`` (the
872
+ CLI passes its ``_decode_escaped_cwd``-backed shim); day mode
873
+ ignores it.
874
+
875
+ The ``since`` window for both modes is ``now_utc − window_days``;
876
+ the kernel trusts callers to pre-filter via their own query
877
+ (``get_entries`` / ``get_claude_session_entries``).
878
+ """
879
+ if mode == "day":
880
+ rows = _aggregate_cache_by_day(
881
+ entries,
882
+ display_tz=display_tz, pricing=pricing,
883
+ cost_calculator=cost_calculator,
884
+ )
885
+ elif mode == "session":
886
+ if project_decoder is None:
887
+ raise ValueError("session mode requires project_decoder")
888
+ rows = _aggregate_cache_by_session(
889
+ entries,
890
+ pricing=pricing,
891
+ cost_calculator=cost_calculator,
892
+ project_decoder=project_decoder,
893
+ ).rows
894
+ else:
895
+ raise ValueError(f"unknown mode: {mode!r}")
896
+
897
+ _classify_anomalies(
898
+ rows,
899
+ threshold_pp=anomaly_threshold_pp,
900
+ window_days=anomaly_window_days,
901
+ enabled=anomaly_enabled,
902
+ )
903
+
904
+ # EFF-3: surface today's baseline median directly on the result so
905
+ # the dashboard snapshot builder doesn't have to re-run
906
+ # _compute_baseline_median over the same row set. Day-mode only —
907
+ # session mode has no equivalent "today" anchor concept. Anchor
908
+ # construction mirrors the pre-EFF-3 adapter byte-for-byte —
909
+ # the strptime + astimezone(display_tz_or_UTC) pair treats the
910
+ # naive parsed datetime as host-local before shifting, which IS
911
+ # the prior contract; do not change without re-verifying the
912
+ # dashboard envelope's today.baseline_median_percent stays stable
913
+ # against the existing golden fixtures.
914
+ today_baseline_median: float | None = None
915
+ if mode == "day":
916
+ today_iso = now_utc.astimezone(
917
+ display_tz if display_tz is not None else dt.timezone.utc
918
+ ).strftime("%Y-%m-%d")
919
+ today_anchor = dt.datetime.strptime(today_iso, "%Y-%m-%d").astimezone(
920
+ display_tz if display_tz is not None else dt.timezone.utc
921
+ )
922
+ other_rows = [r for r in rows if r.date != today_iso]
923
+ today_baseline_median = _compute_baseline_median(
924
+ other_rows,
925
+ anchor=today_anchor,
926
+ window_days=anomaly_window_days,
927
+ min_samples=CACHE_REPORT_MIN_BASELINE_DAYS,
928
+ )
929
+
930
+ return _CacheReportResult(
931
+ rows=rows,
932
+ mode=mode,
933
+ window_days=window_days,
934
+ anomaly_threshold_pp=anomaly_threshold_pp,
935
+ anomaly_window_days=anomaly_window_days,
936
+ display_tz_key=display_tz.key if display_tz is not None else None,
937
+ today_baseline_median=today_baseline_median,
938
+ )