cctally 1.22.1 → 1.22.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/cctally CHANGED
@@ -603,6 +603,13 @@ cmd_alerts_test = _cctally_alerts.cmd_alerts_test
603
603
  _cctally_sync_week = _load_sibling("_cctally_sync_week")
604
604
  cmd_sync_week = _cctally_sync_week.cmd_sync_week
605
605
 
606
+ # Eager re-export of bin/_cctally_project.py — `cmd_project` is invoked
607
+ # only via the parser's `set_defaults(func=c.cmd_project)`, which resolves
608
+ # through cctally's namespace, so it must live here. The 4 helpers stay
609
+ # module-private (no external caller).
610
+ _cctally_project = _load_sibling("_cctally_project")
611
+ cmd_project = _cctally_project.cmd_project
612
+
606
613
 
607
614
  # config.json reader/writer/lock + validators + `cctally config` entry point.
608
615
  # Eager-loaded with per-symbol re-export so bare-name callers in cctally
@@ -6207,684 +6214,6 @@ def cmd_codex_session(args: argparse.Namespace) -> int:
6207
6214
  return 0
6208
6215
 
6209
6216
 
6210
- def _load_week_snapshots(
6211
- since: dt.datetime, until: dt.datetime
6212
- ) -> dict[dt.datetime, float]:
6213
- """Return {week_start_utc -> max(weekly_percent)} for weeks intersecting
6214
- the [since, until] range.
6215
-
6216
- Reads the `weekly_percent` column of `weekly_usage_snapshots` (authoritative
6217
- column name — NOT `used_7d_percent`). A week's "used %" is the maximum
6218
- snapshot captured within that week (the monotonic-within-window invariant:
6219
- weekly_percent only increases across the life of a week). Skips rows
6220
- whose `week_start_at` or `week_end_at` are NULL (pre-migration legacy
6221
- rows that only carried date granularity).
6222
-
6223
- MAX is computed in Python keyed on the parsed UTC datetime so that rows
6224
- holding different string spellings of the same instant (e.g. `+00:00` vs
6225
- `+03:00` from pre-UTC-cast canonicalizer history) coalesce into one
6226
- bucket instead of splitting and silently dropping the higher value.
6227
-
6228
- Returns an empty dict if the stats DB has no relevant rows.
6229
- """
6230
- conn = open_db()
6231
- try:
6232
- cur = conn.execute(
6233
- "SELECT week_start_at, weekly_percent FROM weekly_usage_snapshots "
6234
- "WHERE week_start_at IS NOT NULL "
6235
- "AND week_end_at IS NOT NULL "
6236
- "AND datetime(week_start_at) < datetime(?) "
6237
- "AND datetime(week_end_at) > datetime(?)",
6238
- (until.isoformat(), since.isoformat()),
6239
- )
6240
- result: dict[dt.datetime, float] = {}
6241
- for ws_iso, pct in cur.fetchall():
6242
- if ws_iso is None or pct is None:
6243
- continue
6244
- ws = dt.datetime.fromisoformat(
6245
- str(ws_iso).replace("Z", "+00:00")
6246
- )
6247
- key = ws.astimezone(dt.timezone.utc)
6248
- pct_f = float(pct)
6249
- prev = result.get(key)
6250
- if prev is None or pct_f > prev:
6251
- result[key] = pct_f
6252
- return result
6253
- finally:
6254
- conn.close()
6255
-
6256
-
6257
- def _accumulate_entry_into_bucket(
6258
- b: dict,
6259
- entry: "_JoinedClaudeEntry",
6260
- pre_computed_cost: float | None = None,
6261
- ) -> None:
6262
- """Add one joined-Claude entry's tokens, cost, session-id, and timestamps
6263
- into a project×week bucket dict.
6264
-
6265
- Cost is computed via the same `_calculate_entry_cost(model, usage_dict,
6266
- mode="auto", cost_usd=...)` path used by `_aggregate_cache_by_session`
6267
- (the other `_JoinedClaudeEntry` consumer) so pricing updates flow through
6268
- uniformly. Per-model sub-buckets mirror the parent bucket's shape.
6269
-
6270
- `pre_computed_cost`: if callers have already invoked `_calculate_entry_cost`
6271
- for this entry (e.g. to also feed the attribution denominator in
6272
- `cmd_project`), pass it in to avoid double work.
6273
- """
6274
- # Mirror `_aggregate_claude_sessions`: NULL session_id falls back to the
6275
- # source-file basename so distinct files don't collapse into one bucket.
6276
- if entry.session_id:
6277
- sid = entry.session_id
6278
- else:
6279
- sid = os.path.splitext(os.path.basename(entry.source_path))[0]
6280
- b["sessions"].add(sid)
6281
- if entry.timestamp < b["first_seen"]:
6282
- b["first_seen"] = entry.timestamp
6283
- if entry.timestamp > b["last_seen"]:
6284
- b["last_seen"] = entry.timestamp
6285
- b["input"] += entry.input_tokens
6286
- b["output"] += entry.output_tokens
6287
- b["cache_write"] += entry.cache_creation_tokens
6288
- b["cache_read"] += entry.cache_read_tokens
6289
- if pre_computed_cost is not None:
6290
- cost = pre_computed_cost
6291
- else:
6292
- cost = _calculate_entry_cost(
6293
- entry.model,
6294
- {
6295
- "input_tokens": entry.input_tokens,
6296
- "output_tokens": entry.output_tokens,
6297
- "cache_creation_input_tokens": entry.cache_creation_tokens,
6298
- "cache_read_input_tokens": entry.cache_read_tokens,
6299
- },
6300
- mode="auto",
6301
- cost_usd=entry.cost_usd,
6302
- )
6303
- b["cost_usd"] += cost
6304
- model = entry.model or "(unknown-model)"
6305
- mb = b["models"].get(model)
6306
- if mb is None:
6307
- mb = {
6308
- "cost_usd": 0.0,
6309
- "input": 0, "output": 0,
6310
- "cache_write": 0, "cache_read": 0,
6311
- "first_seen": entry.timestamp, "last_seen": entry.timestamp,
6312
- }
6313
- b["models"][model] = mb
6314
- if entry.timestamp < mb["first_seen"]:
6315
- mb["first_seen"] = entry.timestamp
6316
- if entry.timestamp > mb["last_seen"]:
6317
- mb["last_seen"] = entry.timestamp
6318
- mb["cost_usd"] += cost
6319
- mb["input"] += entry.input_tokens
6320
- mb["output"] += entry.output_tokens
6321
- mb["cache_write"] += entry.cache_creation_tokens
6322
- mb["cache_read"] += entry.cache_read_tokens
6323
-
6324
-
6325
- def _project_json_output(
6326
- *,
6327
- since: dt.datetime,
6328
- until: dt.datetime,
6329
- weeks_in_range: int,
6330
- group_mode: str,
6331
- rows: list[dict],
6332
- weeks_missing_snapshot: set[dt.datetime],
6333
- warnings: list[str],
6334
- include_breakdown: bool,
6335
- week_snapshots: dict[dt.datetime, float],
6336
- ) -> str:
6337
- """Render the project subcommand's --json payload per spec §4.
6338
-
6339
- Accepts rows already sorted by the caller (so ordering flags apply
6340
- uniformly to both terminal and JSON modes). Aggregates `totals.costUsd`
6341
- from `rows` and `totals.usedPercent` from `week_snapshots` (sum over
6342
- all weeks with snapshots in the range — matches the conservation-law
6343
- denominator used by per-project attribution). `models[]` is included
6344
- per-project only when `--breakdown` is requested to avoid payload bloat.
6345
- """
6346
- total_cost = sum(r["cost_usd"] for r in rows)
6347
- # Aggregate used % across all weeks with snapshots in the range.
6348
- total_used_pct: float | None
6349
- if week_snapshots:
6350
- total_used_pct = sum(week_snapshots.values())
6351
- else:
6352
- total_used_pct = None
6353
-
6354
- def _fmt_dt(ts: dt.datetime) -> str:
6355
- return ts.astimezone(dt.timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
6356
-
6357
- projects_json = []
6358
- for row in rows: # rows come already sorted by caller
6359
- p = {
6360
- "displayKey": row["key"].display_key,
6361
- "projectPath": row["key"].bucket_path,
6362
- "gitRoot": row["key"].git_root,
6363
- "sessions": len(row["sessions"]),
6364
- "firstSeen": _fmt_dt(row["first_seen"]),
6365
- "lastSeen": _fmt_dt(row["last_seen"]),
6366
- "inputTokens": row["input"],
6367
- "outputTokens": row["output"],
6368
- "cacheWriteTokens": row["cache_write"],
6369
- "cacheReadTokens": row["cache_read"],
6370
- "costUsd": round(row["cost_usd"], 4),
6371
- "attributedUsedPercent": (
6372
- round(row["attributed_pct"], 4)
6373
- if row["attributed_pct"] is not None else None
6374
- ),
6375
- "costPerPercent": (
6376
- round(row["cost_per_pct"], 4)
6377
- if row["cost_per_pct"] is not None else None
6378
- ),
6379
- }
6380
- if include_breakdown:
6381
- p["models"] = [
6382
- {
6383
- "model": mname,
6384
- "firstSeen": _fmt_dt(mb["first_seen"]),
6385
- "lastSeen": _fmt_dt(mb["last_seen"]),
6386
- "inputTokens": mb["input"],
6387
- "outputTokens": mb["output"],
6388
- "cacheWriteTokens": mb["cache_write"],
6389
- "cacheReadTokens": mb["cache_read"],
6390
- "costUsd": round(mb["cost_usd"], 4),
6391
- }
6392
- for mname, mb in sorted(row["models"].items())
6393
- ]
6394
- projects_json.append(p)
6395
-
6396
- payload = {
6397
- "rangeStart": since.date().isoformat(),
6398
- "rangeEnd": until.date().isoformat(),
6399
- "weeksInRange": weeks_in_range,
6400
- "groupMode": group_mode,
6401
- "totals": {
6402
- "costUsd": round(total_cost, 4),
6403
- "usedPercent": (
6404
- round(total_used_pct, 4) if total_used_pct is not None else None
6405
- ),
6406
- "weeklyAttributionAvailable": len(weeks_missing_snapshot) == 0,
6407
- },
6408
- "projects": projects_json,
6409
- "warnings": warnings,
6410
- }
6411
- return json.dumps(payload, indent=2)
6412
-
6413
-
6414
- def _project_sort_key(row: dict, sort_by: str, order: str):
6415
- """Return (primary, dname) where the primary is flipped to match
6416
- ``order``. Tie-break on dname ascending regardless of direction.
6417
-
6418
- ``sort_by`` values align with argparse choices: cost|used|name|last-seen.
6419
- """
6420
- dname = row["key"].display_key.lower()
6421
- sign = -1 if order == "desc" else 1
6422
- if sort_by == "cost":
6423
- return (sign * row["cost_usd"], dname)
6424
- if sort_by == "used":
6425
- v = row["attributed_pct"] if row["attributed_pct"] is not None else -1
6426
- return (sign * v, dname)
6427
- if sort_by == "last-seen":
6428
- return (sign * row["last_seen"].timestamp(), dname)
6429
- if sort_by == "name":
6430
- # name is asc-natural; caller uses sorted(reverse=order=='desc').
6431
- return (dname,)
6432
- # Unreachable given argparse choices, but safe default.
6433
- return (sign * row["cost_usd"], dname)
6434
-
6435
-
6436
- def cmd_project(args: argparse.Namespace) -> int:
6437
- """Roll entries up by project (git-root) with per-project usage attribution."""
6438
- _share_validate_args(args)
6439
- config = _load_claude_config_for_args(args)
6440
- # Session A (spec §7.2): bridge -z/--timezone into args.tz so the
6441
- # existing resolve_display_tz precedence absorbs the new alias.
6442
- _bridge_z_into_tz(args, config)
6443
- args._resolved_tz = resolve_display_tz(args, config)
6444
-
6445
- # Flag-combination validation (must run before any expensive work).
6446
- if args.weeks is not None and args.weeks < 1:
6447
- eprint("Error: --weeks must be >= 1")
6448
- return 1
6449
- if args.weeks is not None and (args.since or args.until):
6450
- eprint("Error: --weeks cannot be combined with --since/--until")
6451
- return 1
6452
- if args.since and args.until:
6453
- # Parse both as dates using the same multi-format helper shape used
6454
- # elsewhere in the codebase so YYYY-MM-DD and YYYYMMDD both compare
6455
- # correctly (string compare alone breaks across mixed formats).
6456
- def _parse(raw: str) -> dt.date | None:
6457
- for fmt in ("%Y-%m-%d", "%Y%m%d"):
6458
- try:
6459
- return dt.datetime.strptime(raw, fmt).date()
6460
- except ValueError:
6461
- continue
6462
- return None
6463
-
6464
- since_parsed = _parse(args.since)
6465
- until_parsed = _parse(args.until)
6466
- # Silent-skip if either date failed to parse: we only want to surface
6467
- # a "range order" error here when both inputs are well-formed. Any
6468
- # format error will be reported downstream by _parse_cli_date_range()
6469
- # so the user sees the parse problem first (not a misleading order
6470
- # complaint triggered by garbage input).
6471
- if since_parsed is not None and until_parsed is not None and since_parsed > until_parsed:
6472
- eprint("Error: --since must be <= --until")
6473
- return 1
6474
-
6475
- now = _command_as_of()
6476
- conn = open_db()
6477
-
6478
- # Resolve [since_dt, until_dt] in UTC.
6479
- if args.since or args.until:
6480
- parsed = _parse_cli_date_range(args, now_utc=now)
6481
- if isinstance(parsed, int):
6482
- return parsed
6483
- since_dt, until_dt = parsed
6484
- since_dt = since_dt.astimezone(dt.timezone.utc)
6485
- until_dt = until_dt.astimezone(dt.timezone.utc)
6486
- else:
6487
- # Default to the current subscription week; --weeks N extends backwards.
6488
- # Widen by 1us so the emit loop fires when `now` is exactly at a reset
6489
- # boundary (zero-width [now, now] makes Case A's `current < range_end`
6490
- # false, which would otherwise wrongly fall through to the Monday
6491
- # fallback for non-Monday-reset accounts).
6492
- current_weeks = _compute_subscription_weeks(
6493
- conn, now, now + dt.timedelta(microseconds=1), config=config,
6494
- )
6495
- if current_weeks:
6496
- cw_start = parse_iso_datetime(
6497
- current_weeks[0].start_ts, "week.start_ts"
6498
- ).astimezone(dt.timezone.utc)
6499
- else:
6500
- # No snapshots available: fall back to a Monday-anchored week.
6501
- cw_start = (now - dt.timedelta(days=now.weekday())).replace(
6502
- hour=0, minute=0, second=0, microsecond=0
6503
- )
6504
- if args.weeks is not None:
6505
- since_dt = cw_start - dt.timedelta(days=7 * (args.weeks - 1))
6506
- else:
6507
- since_dt = cw_start
6508
- until_dt = now
6509
-
6510
- # Pre-compute subscription-week bounds for the query window so each entry
6511
- # can be bucketed onto a canonical subscription-week start_ts. Mirrors
6512
- # `_aggregate_weekly`'s bisect pattern (first-match-wins on overlap).
6513
- subweeks = _compute_subscription_weeks(
6514
- conn, since_dt, until_dt, config=config,
6515
- )
6516
- parsed_bounds: list[tuple[dt.datetime, dt.datetime]] = []
6517
- for sw in subweeks:
6518
- s_dt = parse_iso_datetime(sw.start_ts, "week.start_ts").astimezone(dt.timezone.utc)
6519
- e_dt = parse_iso_datetime(sw.end_ts, "week.end_ts").astimezone(dt.timezone.utc)
6520
- parsed_bounds.append((s_dt, e_dt))
6521
- week_starts = [b[0] for b in parsed_bounds]
6522
-
6523
- def _week_start_for(ts: dt.datetime) -> dt.datetime | None:
6524
- """Return the canonical subscription-week start_dt for `ts`, or None
6525
- if `ts` falls outside every SubWeek interval (may happen near the
6526
- boundaries of the requested [since_dt, until_dt] window)."""
6527
- ts_utc = ts.astimezone(dt.timezone.utc)
6528
- idx = bisect.bisect_right(week_starts, ts_utc) - 1
6529
- if idx < 0:
6530
- return None
6531
- # First-match-wins on Anthropic reset-day-drift overlap (same
6532
- # walk-back as `_aggregate_weekly`).
6533
- while idx > 0:
6534
- prev_start, prev_end = parsed_bounds[idx - 1]
6535
- if prev_start <= ts_utc < prev_end:
6536
- idx -= 1
6537
- else:
6538
- break
6539
- s_dt, e_dt = parsed_bounds[idx]
6540
- if s_dt <= ts_utc < e_dt:
6541
- return s_dt
6542
- return None
6543
-
6544
- # Pre-lower filter patterns (substring, OR semantics, repeatable).
6545
- project_patterns = [p.lower() for p in (args.project or [])]
6546
- model_patterns = [m.lower() for m in (args.model or [])]
6547
-
6548
- # Widen scan to full subscription-week bounds so the attribution
6549
- # denominator includes ALL week cost, even entries outside the
6550
- # user's [since_dt, until_dt] slice. Visible buckets are still
6551
- # gated on the user slice below. Without this, a partial-week
6552
- # --since/--until slice understates the denominator and inflates
6553
- # every row's Used %.
6554
- if parsed_bounds:
6555
- scan_start = min(since_dt, parsed_bounds[0][0])
6556
- scan_end = max(until_dt, parsed_bounds[-1][1])
6557
- else:
6558
- scan_start, scan_end = since_dt, until_dt
6559
-
6560
- resolver_cache: dict[str, ProjectKey] = {}
6561
- buckets: dict[tuple[ProjectKey, dt.datetime], dict] = {}
6562
- total_cost_by_week: dict[dt.datetime, float] = {}
6563
- unknown_entry_count = 0
6564
- missing_sid_count = 0
6565
-
6566
- # Issue #89: materialize the joined-entry iterator once so we can
6567
- # (a) pre-compute the --debug report's scope (entries passing all
6568
- # rendered-row filters — user slice + --model + --project) BEFORE
6569
- # the aggregation loop runs and (b) preserve the existing
6570
- # aggregation semantics (denominator widened to ALL entries; visible
6571
- # rows only the post-filter subset). The list is small enough to
6572
- # hold (entries already in memory via the cache row factory).
6573
- joined_entries_all = list(get_claude_session_entries(scan_start, scan_end))
6574
-
6575
- # Build the --debug report dataset: skip synthetic + out-of-window
6576
- # entries, then apply --model and --project filters (mirroring the
6577
- # exact predicate at the aggregation loop below). This must match
6578
- # the rendered scope, NOT the denominator scope.
6579
- if getattr(args, "debug", False):
6580
- filtered_for_report = []
6581
- for je in joined_entries_all:
6582
- if je.model == "<synthetic>":
6583
- continue
6584
- if _week_start_for(je.timestamp) is None:
6585
- continue
6586
- if je.timestamp < since_dt or je.timestamp > until_dt:
6587
- continue
6588
- if model_patterns:
6589
- mname = (je.model or "").lower()
6590
- if not any(p in mname for p in model_patterns):
6591
- continue
6592
- key_for_filter = _resolve_project_key(
6593
- je.project_path, args.group, resolver_cache,
6594
- )
6595
- if not _project_filter_matches(key_for_filter, project_patterns):
6596
- continue
6597
- filtered_for_report.append(je)
6598
- _emit_debug_samples_if_set(
6599
- args,
6600
- [_usage_entry_from_joined(je) for je in filtered_for_report],
6601
- command_label="project",
6602
- )
6603
-
6604
- for entry in joined_entries_all:
6605
- # Skip synthetic entries (Claude Code internal markers) to match
6606
- # `_aggregate_cache_by_session` / `_aggregate_claude_sessions`.
6607
- if entry.model == "<synthetic>":
6608
- continue
6609
-
6610
- week_start = _week_start_for(entry.timestamp)
6611
- if week_start is None:
6612
- continue
6613
-
6614
- entry_cost = _calculate_entry_cost(
6615
- entry.model,
6616
- {
6617
- "input_tokens": entry.input_tokens,
6618
- "output_tokens": entry.output_tokens,
6619
- "cache_creation_input_tokens": entry.cache_creation_tokens,
6620
- "cache_read_input_tokens": entry.cache_read_tokens,
6621
- },
6622
- mode="auto",
6623
- cost_usd=entry.cost_usd,
6624
- )
6625
-
6626
- # Denominator: always contribute (whole-week attribution) so
6627
- # `--model`/`--project`/partial-slice do NOT rescale it.
6628
- total_cost_by_week[week_start] = (
6629
- total_cost_by_week.get(week_start, 0.0) + entry_cost
6630
- )
6631
-
6632
- # User-slice gate: visible rows only include entries within
6633
- # [since_dt, until_dt]. Entries outside the slice still
6634
- # contributed to the denominator above.
6635
- if entry.timestamp < since_dt or entry.timestamp > until_dt:
6636
- continue
6637
-
6638
- if model_patterns:
6639
- mname = (entry.model or "").lower()
6640
- if not any(p in mname for p in model_patterns):
6641
- continue
6642
-
6643
- key = _resolve_project_key(entry.project_path, args.group, resolver_cache)
6644
- if key.is_unknown:
6645
- unknown_entry_count += 1
6646
-
6647
- # --project filter: match against display_key OR the underlying
6648
- # path (git_root / bucket_path). Matching only display_key makes
6649
- # basename-collision suffixes (e.g. `foo (repos)`) impossible to
6650
- # select by their path segment.
6651
- if project_patterns:
6652
- dname = key.display_key.lower()
6653
- pname = (key.git_root or key.bucket_path or "").lower()
6654
- if not any((p in dname) or (p in pname) for p in project_patterns):
6655
- continue
6656
-
6657
- if entry.session_id is None:
6658
- missing_sid_count += 1
6659
-
6660
- bkey = (key, week_start)
6661
- b = buckets.get(bkey)
6662
- if b is None:
6663
- b = {
6664
- "key": key,
6665
- "week_start": week_start,
6666
- "sessions": set(),
6667
- "first_seen": entry.timestamp,
6668
- "last_seen": entry.timestamp,
6669
- "input": 0, "output": 0,
6670
- "cache_write": 0, "cache_read": 0,
6671
- "cost_usd": 0.0,
6672
- "models": {},
6673
- }
6674
- buckets[bkey] = b
6675
- _accumulate_entry_into_bucket(b, entry, pre_computed_cost=entry_cost)
6676
-
6677
- if unknown_entry_count > 0:
6678
- eprint(
6679
- f"Warning: {unknown_entry_count} entries lacked project_path — "
6680
- f"run `cache-sync` to backfill."
6681
- )
6682
- if missing_sid_count > 0:
6683
- eprint(
6684
- f"Warning: {missing_sid_count} entries lacked session_files "
6685
- f"session_id — run `cache-sync` to backfill."
6686
- )
6687
-
6688
- # --- Attribution math (Task 5) -----------------------------------------
6689
- # Load per-week `weekly_percent` (max within window) for every week that
6690
- # intersects [since_dt, until_dt]. Missing snapshots are tracked so we
6691
- # can surface `weeksMissingSnapshot` in the output — those weeks can't
6692
- # contribute to attributed %.
6693
- week_snapshots: dict[dt.datetime, float] = _load_week_snapshots(
6694
- since_dt, until_dt
6695
- )
6696
-
6697
- # Set of every week the user asked about (from the computed SubWeek
6698
- # bounds), used to report `weeksInRange` and `weeksMissingSnapshot`
6699
- # independent of whether that week had any project activity.
6700
- weeks_in_range: set[dt.datetime] = {ws for ws in week_starts}
6701
- weeks_missing_snapshot: set[dt.datetime] = {
6702
- ws for ws in weeks_in_range if ws not in week_snapshots
6703
- }
6704
-
6705
- # Collapse (project_key, week) buckets into one row per project, summing
6706
- # tokens / cost / sessions / first_seen / last_seen / models across the
6707
- # weeks the project appears in.
6708
- #
6709
- # Attribution: for each (project P, week W) bucket,
6710
- # attributed_pct[P,W] = (cost[P,W] / total_cost[W]) * weekly_percent[W]
6711
- # iff a snapshot exists for W. Weeks without a snapshot contribute None
6712
- # (their weeks are already counted in `weeks_missing_snapshot`).
6713
- project_rows: dict[str, dict] = {}
6714
- for (key, wstart), b in buckets.items():
6715
- row = project_rows.get(key.bucket_path)
6716
- if row is None:
6717
- row = {
6718
- "key": key,
6719
- "sessions": set(),
6720
- "first_seen": b["first_seen"],
6721
- "last_seen": b["last_seen"],
6722
- "input": 0, "output": 0,
6723
- "cache_write": 0, "cache_read": 0,
6724
- "cost_usd": 0.0,
6725
- # `None` until the first week with a snapshot contributes —
6726
- # preserves the distinction between "every contributing week
6727
- # lacked a snapshot" (→ None) and "genuine zero attribution"
6728
- # (→ 0.0 after a real contribution). Spec §3.
6729
- "attributed_pct": None,
6730
- "models": {},
6731
- }
6732
- project_rows[key.bucket_path] = row
6733
- row["sessions"] |= b["sessions"]
6734
- if b["first_seen"] < row["first_seen"]:
6735
- row["first_seen"] = b["first_seen"]
6736
- if b["last_seen"] > row["last_seen"]:
6737
- row["last_seen"] = b["last_seen"]
6738
- row["input"] += b["input"]
6739
- row["output"] += b["output"]
6740
- row["cache_write"] += b["cache_write"]
6741
- row["cache_read"] += b["cache_read"]
6742
- row["cost_usd"] += b["cost_usd"]
6743
-
6744
- # Merge per-model sub-buckets.
6745
- for model, mb in b["models"].items():
6746
- rm = row["models"].get(model)
6747
- if rm is None:
6748
- rm = {
6749
- "cost_usd": 0.0,
6750
- "input": 0, "output": 0,
6751
- "cache_write": 0, "cache_read": 0,
6752
- "first_seen": mb["first_seen"],
6753
- "last_seen": mb["last_seen"],
6754
- }
6755
- row["models"][model] = rm
6756
- if mb["first_seen"] < rm["first_seen"]:
6757
- rm["first_seen"] = mb["first_seen"]
6758
- if mb["last_seen"] > rm["last_seen"]:
6759
- rm["last_seen"] = mb["last_seen"]
6760
- rm["cost_usd"] += mb["cost_usd"]
6761
- rm["input"] += mb["input"]
6762
- rm["output"] += mb["output"]
6763
- rm["cache_write"] += mb["cache_write"]
6764
- rm["cache_read"] += mb["cache_read"]
6765
-
6766
- # Attribution contribution (only if this week has a snapshot and
6767
- # the week has nonzero total cost — a zero denominator would make
6768
- # the ratio meaningless). `attributed_pct` stays `None` until the
6769
- # first real contribution; subsequent contributions accumulate.
6770
- week_pct = week_snapshots.get(wstart)
6771
- week_total = total_cost_by_week.get(wstart, 0.0)
6772
- if week_pct is not None and week_total > 0:
6773
- contribution = (b["cost_usd"] / week_total) * week_pct
6774
- row["attributed_pct"] = (
6775
- (row["attributed_pct"] or 0.0) + contribution
6776
- )
6777
-
6778
- # Compute $/1% per project: `cost_per_pct = cost_usd / attributed_pct`
6779
- # when attribution is positive; None otherwise (e.g. every contributing
6780
- # week lacked a snapshot — `attributed_pct` still None — or attribution
6781
- # came out to zero).
6782
- for row in project_rows.values():
6783
- ap = row["attributed_pct"]
6784
- if ap is not None and ap > 0:
6785
- row["cost_per_pct"] = row["cost_usd"] / ap
6786
- else:
6787
- row["cost_per_pct"] = None
6788
-
6789
- # Collect warnings to surface in the JSON payload (terminal path emits
6790
- # them inline via eprint earlier, so this list stays JSON-specific).
6791
- warnings: list[str] = []
6792
- if unknown_entry_count > 0:
6793
- warnings.append(
6794
- f"{unknown_entry_count} entries lacked project_path — "
6795
- f"run `cache-sync` to backfill."
6796
- )
6797
- if missing_sid_count > 0:
6798
- warnings.append(
6799
- f"{missing_sid_count} entries lacked session_files session_id — "
6800
- f"run `cache-sync` to backfill."
6801
- )
6802
-
6803
- # Honor --sort / --order. For numeric keys, `_project_sort_key` flips the
6804
- # primary-key sign to match the requested direction so natural `sorted()`
6805
- # ordering already produces the right answer; the dname tie-break stays
6806
- # ascending in both directions (ties never invert alphabetically). For
6807
- # `name`, the key is asc-natural (a-z) and `reverse=` is used for desc.
6808
- if args.sort == "name":
6809
- sorted_rows = sorted(
6810
- project_rows.values(),
6811
- key=lambda r: _project_sort_key(r, args.sort, args.order),
6812
- reverse=(args.order == "desc"),
6813
- )
6814
- else:
6815
- sorted_rows = sorted(
6816
- project_rows.values(),
6817
- key=lambda r: _project_sort_key(r, args.sort, args.order),
6818
- )
6819
-
6820
- # Shareable-reports gate: --format short-circuits the JSON / table
6821
- # dispatch via `_share_render_and_emit`. The mutex in
6822
- # `_add_share_args` keeps `--format` and `--json` from coexisting.
6823
- # Privacy invariant (Section 8.4 / 5.3): the wrapper runs `_lib_share._scrub`
6824
- # before rendering, so default output anonymizes project labels to
6825
- # `project-1` / `project-2` / ...; `--reveal-projects` opts back in.
6826
- # The builder populates `ProjectCell.label` / `ChartPoint.project_label`
6827
- # / `ChartPoint.x_label` with REAL names; the wrapper-level scrubber is
6828
- # the single chokepoint that rewrites them.
6829
- if getattr(args, "format", None):
6830
- # Note: --breakdown is a no-op under --format (snapshot focuses on
6831
- # the headline per-project usage table + HBar chart; per-model
6832
- # sub-rows aren't in the share spec scope). Same convention as
6833
- # cmd_daily / cmd_weekly / cmd_report.
6834
- display_tz_str = _share_display_tz_label(args._resolved_tz)
6835
- snap = _build_project_snapshot(
6836
- list(sorted_rows),
6837
- period_start=since_dt,
6838
- period_end=until_dt,
6839
- display_tz=display_tz_str,
6840
- version=_share_resolve_version(),
6841
- theme=args.theme,
6842
- reveal_projects=args.reveal_projects,
6843
- )
6844
- _share_render_and_emit(snap, args)
6845
- return 0
6846
-
6847
- if args.json:
6848
- print(_project_json_output(
6849
- since=since_dt,
6850
- until=until_dt,
6851
- weeks_in_range=len(weeks_in_range),
6852
- group_mode=args.group,
6853
- rows=sorted_rows,
6854
- weeks_missing_snapshot=weeks_missing_snapshot,
6855
- warnings=warnings,
6856
- include_breakdown=args.breakdown,
6857
- week_snapshots=week_snapshots,
6858
- ))
6859
- return 0
6860
-
6861
- # Terminal path
6862
- range_label = f"{since_dt.date().isoformat()} \u2014 {until_dt.date().isoformat()}"
6863
- title = f"Claude Token Usage Report - Projects ({range_label})"
6864
-
6865
- if not sorted_rows:
6866
- eprint("No project usage found in range.")
6867
- return 0
6868
-
6869
- # Session A (spec §7.3): the new --color flag overrides NO_COLOR
6870
- # env; --no-color overrides FORCE_COLOR env; deny-wins on the
6871
- # --color + --no-color clash. _resolve_color_enabled returns the
6872
- # effective bool; pass it as ``color=`` so the renderer skips its
6873
- # internal _supports_color_stdout() auto-detect (which would
6874
- # re-consult NO_COLOR and incorrectly disable color when the user
6875
- # passed --color under NO_COLOR=1).
6876
- print(_render_project_table(
6877
- sorted_rows,
6878
- title=title,
6879
- breakdown=args.breakdown,
6880
- weeks_missing_snapshot=len(weeks_missing_snapshot),
6881
- weeks_in_range=len(weeks_in_range),
6882
- color=_resolve_color_enabled(args),
6883
- compact=args.compact,
6884
- ))
6885
- return 0
6886
-
6887
-
6888
6217
  # ─────────────────────────────────────────────────────────────────────
6889
6218
  # diff subcommand
6890
6219
  # ─────────────────────────────────────────────────────────────────────