cctally 1.22.1 → 1.22.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,714 @@
1
+ """`cctally project` subcommand entry point.
2
+
3
+ Lazy I/O sibling: holds `cmd_project` + its 4 dedicated helpers
4
+ (`_load_week_snapshots`, `_accumulate_entry_into_bucket`,
5
+ `_project_json_output`, `_project_sort_key`). Aggregates session entries
6
+ by git-root project with per-project weekly usage attribution.
7
+
8
+ Honest imports are KERNEL-ONLY (`_cctally_core`). Every other symbol the
9
+ command calls is reached via the call-time `_cctally()` accessor so test
10
+ monkeypatches through `cctally`'s namespace are preserved — see the spec
11
+ §3.1 disposition table (the cache reads, the share builders/dispatch,
12
+ `_share_validate_args`, `_render_project_table`, `resolve_display_tz`,
13
+ and the `bin/cctally`-resident helpers all route through `c.`).
14
+
15
+ bin/cctally re-exports `cmd_project` (eager) so the parser's
16
+ `set_defaults(func=c.cmd_project)` resolves unchanged.
17
+
18
+ Spec: docs/superpowers/specs/2026-05-30-extract-project-cmd-design.md
19
+ """
20
+ from __future__ import annotations
21
+
22
+ import argparse
23
+ import bisect
24
+ import datetime as dt
25
+ import json
26
+ import os
27
+ import sys
28
+
29
+ from _cctally_core import _command_as_of, eprint, open_db, parse_iso_datetime
30
+
31
+
32
+ def _cctally():
33
+ """Resolve the current `cctally` module at call-time (spec §3.1)."""
34
+ return sys.modules["cctally"]
35
+
36
+
37
+ def _load_week_snapshots(
38
+ since: dt.datetime, until: dt.datetime
39
+ ) -> dict[dt.datetime, float]:
40
+ """Return {week_start_utc -> max(weekly_percent)} for weeks intersecting
41
+ the [since, until] range.
42
+
43
+ Reads the `weekly_percent` column of `weekly_usage_snapshots` (authoritative
44
+ column name — NOT `used_7d_percent`). A week's "used %" is the maximum
45
+ snapshot captured within that week (the monotonic-within-window invariant:
46
+ weekly_percent only increases across the life of a week). Skips rows
47
+ whose `week_start_at` or `week_end_at` are NULL (pre-migration legacy
48
+ rows that only carried date granularity).
49
+
50
+ MAX is computed in Python keyed on the parsed UTC datetime so that rows
51
+ holding different string spellings of the same instant (e.g. `+00:00` vs
52
+ `+03:00` from pre-UTC-cast canonicalizer history) coalesce into one
53
+ bucket instead of splitting and silently dropping the higher value.
54
+
55
+ Returns an empty dict if the stats DB has no relevant rows.
56
+ """
57
+ conn = open_db()
58
+ try:
59
+ cur = conn.execute(
60
+ "SELECT week_start_at, weekly_percent FROM weekly_usage_snapshots "
61
+ "WHERE week_start_at IS NOT NULL "
62
+ "AND week_end_at IS NOT NULL "
63
+ "AND datetime(week_start_at) < datetime(?) "
64
+ "AND datetime(week_end_at) > datetime(?)",
65
+ (until.isoformat(), since.isoformat()),
66
+ )
67
+ result: dict[dt.datetime, float] = {}
68
+ for ws_iso, pct in cur.fetchall():
69
+ if ws_iso is None or pct is None:
70
+ continue
71
+ ws = dt.datetime.fromisoformat(
72
+ str(ws_iso).replace("Z", "+00:00")
73
+ )
74
+ key = ws.astimezone(dt.timezone.utc)
75
+ pct_f = float(pct)
76
+ prev = result.get(key)
77
+ if prev is None or pct_f > prev:
78
+ result[key] = pct_f
79
+ return result
80
+ finally:
81
+ conn.close()
82
+
83
+
84
+ def _accumulate_entry_into_bucket(
85
+ b: dict,
86
+ entry: "_JoinedClaudeEntry",
87
+ pre_computed_cost: float | None = None,
88
+ ) -> None:
89
+ """Add one joined-Claude entry's tokens, cost, session-id, and timestamps
90
+ into a project×week bucket dict.
91
+
92
+ Cost is computed via the same `_calculate_entry_cost(model, usage_dict,
93
+ mode="auto", cost_usd=...)` path used by `_aggregate_cache_by_session`
94
+ (the other `_JoinedClaudeEntry` consumer) so pricing updates flow through
95
+ uniformly. Per-model sub-buckets mirror the parent bucket's shape.
96
+
97
+ `pre_computed_cost`: if callers have already invoked `_calculate_entry_cost`
98
+ for this entry (e.g. to also feed the attribution denominator in
99
+ `cmd_project`), pass it in to avoid double work.
100
+ """
101
+ c = _cctally()
102
+ # Mirror `_aggregate_claude_sessions`: NULL session_id falls back to the
103
+ # source-file basename so distinct files don't collapse into one bucket.
104
+ if entry.session_id:
105
+ sid = entry.session_id
106
+ else:
107
+ sid = os.path.splitext(os.path.basename(entry.source_path))[0]
108
+ b["sessions"].add(sid)
109
+ if entry.timestamp < b["first_seen"]:
110
+ b["first_seen"] = entry.timestamp
111
+ if entry.timestamp > b["last_seen"]:
112
+ b["last_seen"] = entry.timestamp
113
+ b["input"] += entry.input_tokens
114
+ b["output"] += entry.output_tokens
115
+ b["cache_write"] += entry.cache_creation_tokens
116
+ b["cache_read"] += entry.cache_read_tokens
117
+ if pre_computed_cost is not None:
118
+ cost = pre_computed_cost
119
+ else:
120
+ cost = c._calculate_entry_cost(
121
+ entry.model,
122
+ {
123
+ "input_tokens": entry.input_tokens,
124
+ "output_tokens": entry.output_tokens,
125
+ "cache_creation_input_tokens": entry.cache_creation_tokens,
126
+ "cache_read_input_tokens": entry.cache_read_tokens,
127
+ },
128
+ mode="auto",
129
+ cost_usd=entry.cost_usd,
130
+ )
131
+ b["cost_usd"] += cost
132
+ model = entry.model or "(unknown-model)"
133
+ mb = b["models"].get(model)
134
+ if mb is None:
135
+ mb = {
136
+ "cost_usd": 0.0,
137
+ "input": 0, "output": 0,
138
+ "cache_write": 0, "cache_read": 0,
139
+ "first_seen": entry.timestamp, "last_seen": entry.timestamp,
140
+ }
141
+ b["models"][model] = mb
142
+ if entry.timestamp < mb["first_seen"]:
143
+ mb["first_seen"] = entry.timestamp
144
+ if entry.timestamp > mb["last_seen"]:
145
+ mb["last_seen"] = entry.timestamp
146
+ mb["cost_usd"] += cost
147
+ mb["input"] += entry.input_tokens
148
+ mb["output"] += entry.output_tokens
149
+ mb["cache_write"] += entry.cache_creation_tokens
150
+ mb["cache_read"] += entry.cache_read_tokens
151
+
152
+
153
+ def _project_json_output(
154
+ *,
155
+ since: dt.datetime,
156
+ until: dt.datetime,
157
+ weeks_in_range: int,
158
+ group_mode: str,
159
+ rows: list[dict],
160
+ weeks_missing_snapshot: set[dt.datetime],
161
+ warnings: list[str],
162
+ include_breakdown: bool,
163
+ week_snapshots: dict[dt.datetime, float],
164
+ ) -> str:
165
+ """Render the project subcommand's --json payload per spec §4.
166
+
167
+ Accepts rows already sorted by the caller (so ordering flags apply
168
+ uniformly to both terminal and JSON modes). Aggregates `totals.costUsd`
169
+ from `rows` and `totals.usedPercent` from `week_snapshots` (sum over
170
+ all weeks with snapshots in the range — matches the conservation-law
171
+ denominator used by per-project attribution). `models[]` is included
172
+ per-project only when `--breakdown` is requested to avoid payload bloat.
173
+ """
174
+ total_cost = sum(r["cost_usd"] for r in rows)
175
+ # Aggregate used % across all weeks with snapshots in the range.
176
+ total_used_pct: float | None
177
+ if week_snapshots:
178
+ total_used_pct = sum(week_snapshots.values())
179
+ else:
180
+ total_used_pct = None
181
+
182
+ def _fmt_dt(ts: dt.datetime) -> str:
183
+ return ts.astimezone(dt.timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
184
+
185
+ projects_json = []
186
+ for row in rows: # rows come already sorted by caller
187
+ p = {
188
+ "displayKey": row["key"].display_key,
189
+ "projectPath": row["key"].bucket_path,
190
+ "gitRoot": row["key"].git_root,
191
+ "sessions": len(row["sessions"]),
192
+ "firstSeen": _fmt_dt(row["first_seen"]),
193
+ "lastSeen": _fmt_dt(row["last_seen"]),
194
+ "inputTokens": row["input"],
195
+ "outputTokens": row["output"],
196
+ "cacheWriteTokens": row["cache_write"],
197
+ "cacheReadTokens": row["cache_read"],
198
+ "costUsd": round(row["cost_usd"], 4),
199
+ "attributedUsedPercent": (
200
+ round(row["attributed_pct"], 4)
201
+ if row["attributed_pct"] is not None else None
202
+ ),
203
+ "costPerPercent": (
204
+ round(row["cost_per_pct"], 4)
205
+ if row["cost_per_pct"] is not None else None
206
+ ),
207
+ }
208
+ if include_breakdown:
209
+ p["models"] = [
210
+ {
211
+ "model": mname,
212
+ "firstSeen": _fmt_dt(mb["first_seen"]),
213
+ "lastSeen": _fmt_dt(mb["last_seen"]),
214
+ "inputTokens": mb["input"],
215
+ "outputTokens": mb["output"],
216
+ "cacheWriteTokens": mb["cache_write"],
217
+ "cacheReadTokens": mb["cache_read"],
218
+ "costUsd": round(mb["cost_usd"], 4),
219
+ }
220
+ for mname, mb in sorted(row["models"].items())
221
+ ]
222
+ projects_json.append(p)
223
+
224
+ payload = {
225
+ "rangeStart": since.date().isoformat(),
226
+ "rangeEnd": until.date().isoformat(),
227
+ "weeksInRange": weeks_in_range,
228
+ "groupMode": group_mode,
229
+ "totals": {
230
+ "costUsd": round(total_cost, 4),
231
+ "usedPercent": (
232
+ round(total_used_pct, 4) if total_used_pct is not None else None
233
+ ),
234
+ "weeklyAttributionAvailable": len(weeks_missing_snapshot) == 0,
235
+ },
236
+ "projects": projects_json,
237
+ "warnings": warnings,
238
+ }
239
+ return json.dumps(payload, indent=2)
240
+
241
+
242
+ def _project_sort_key(row: dict, sort_by: str, order: str):
243
+ """Return (primary, dname) where the primary is flipped to match
244
+ ``order``. Tie-break on dname ascending regardless of direction.
245
+
246
+ ``sort_by`` values align with argparse choices: cost|used|name|last-seen.
247
+ """
248
+ dname = row["key"].display_key.lower()
249
+ sign = -1 if order == "desc" else 1
250
+ if sort_by == "cost":
251
+ return (sign * row["cost_usd"], dname)
252
+ if sort_by == "used":
253
+ v = row["attributed_pct"] if row["attributed_pct"] is not None else -1
254
+ return (sign * v, dname)
255
+ if sort_by == "last-seen":
256
+ return (sign * row["last_seen"].timestamp(), dname)
257
+ if sort_by == "name":
258
+ # name is asc-natural; caller uses sorted(reverse=order=='desc').
259
+ return (dname,)
260
+ # Unreachable given argparse choices, but safe default.
261
+ return (sign * row["cost_usd"], dname)
262
+
263
+
264
+ def cmd_project(args: argparse.Namespace) -> int:
265
+ """Roll entries up by project (git-root) with per-project usage attribution."""
266
+ c = _cctally()
267
+ c._share_validate_args(args)
268
+ config = c._load_claude_config_for_args(args)
269
+ # Session A (spec §7.2): bridge -z/--timezone into args.tz so the
270
+ # existing resolve_display_tz precedence absorbs the new alias.
271
+ c._bridge_z_into_tz(args, config)
272
+ args._resolved_tz = c.resolve_display_tz(args, config)
273
+
274
+ # Flag-combination validation (must run before any expensive work).
275
+ if args.weeks is not None and args.weeks < 1:
276
+ eprint("Error: --weeks must be >= 1")
277
+ return 1
278
+ if args.weeks is not None and (args.since or args.until):
279
+ eprint("Error: --weeks cannot be combined with --since/--until")
280
+ return 1
281
+ if args.since and args.until:
282
+ # Parse both as dates using the same multi-format helper shape used
283
+ # elsewhere in the codebase so YYYY-MM-DD and YYYYMMDD both compare
284
+ # correctly (string compare alone breaks across mixed formats).
285
+ def _parse(raw: str) -> dt.date | None:
286
+ for fmt in ("%Y-%m-%d", "%Y%m%d"):
287
+ try:
288
+ return dt.datetime.strptime(raw, fmt).date()
289
+ except ValueError:
290
+ continue
291
+ return None
292
+
293
+ since_parsed = _parse(args.since)
294
+ until_parsed = _parse(args.until)
295
+ # Silent-skip if either date failed to parse: we only want to surface
296
+ # a "range order" error here when both inputs are well-formed. Any
297
+ # format error will be reported downstream by _parse_cli_date_range()
298
+ # so the user sees the parse problem first (not a misleading order
299
+ # complaint triggered by garbage input).
300
+ if since_parsed is not None and until_parsed is not None and since_parsed > until_parsed:
301
+ eprint("Error: --since must be <= --until")
302
+ return 1
303
+
304
+ now = _command_as_of()
305
+ conn = open_db()
306
+
307
+ # Resolve [since_dt, until_dt] in UTC.
308
+ if args.since or args.until:
309
+ parsed = c._parse_cli_date_range(args, now_utc=now)
310
+ if isinstance(parsed, int):
311
+ return parsed
312
+ since_dt, until_dt = parsed
313
+ since_dt = since_dt.astimezone(dt.timezone.utc)
314
+ until_dt = until_dt.astimezone(dt.timezone.utc)
315
+ else:
316
+ # Default to the current subscription week; --weeks N extends backwards.
317
+ # Widen by 1us so the emit loop fires when `now` is exactly at a reset
318
+ # boundary (zero-width [now, now] makes Case A's `current < range_end`
319
+ # false, which would otherwise wrongly fall through to the Monday
320
+ # fallback for non-Monday-reset accounts).
321
+ current_weeks = c._compute_subscription_weeks(
322
+ conn, now, now + dt.timedelta(microseconds=1), config=config,
323
+ )
324
+ if current_weeks:
325
+ cw_start = parse_iso_datetime(
326
+ current_weeks[0].start_ts, "week.start_ts"
327
+ ).astimezone(dt.timezone.utc)
328
+ else:
329
+ # No snapshots available: fall back to a Monday-anchored week.
330
+ cw_start = (now - dt.timedelta(days=now.weekday())).replace(
331
+ hour=0, minute=0, second=0, microsecond=0
332
+ )
333
+ if args.weeks is not None:
334
+ since_dt = cw_start - dt.timedelta(days=7 * (args.weeks - 1))
335
+ else:
336
+ since_dt = cw_start
337
+ until_dt = now
338
+
339
+ # Pre-compute subscription-week bounds for the query window so each entry
340
+ # can be bucketed onto a canonical subscription-week start_ts. Mirrors
341
+ # `_aggregate_weekly`'s bisect pattern (first-match-wins on overlap).
342
+ subweeks = c._compute_subscription_weeks(
343
+ conn, since_dt, until_dt, config=config,
344
+ )
345
+ parsed_bounds: list[tuple[dt.datetime, dt.datetime]] = []
346
+ for sw in subweeks:
347
+ s_dt = parse_iso_datetime(sw.start_ts, "week.start_ts").astimezone(dt.timezone.utc)
348
+ e_dt = parse_iso_datetime(sw.end_ts, "week.end_ts").astimezone(dt.timezone.utc)
349
+ parsed_bounds.append((s_dt, e_dt))
350
+ week_starts = [b[0] for b in parsed_bounds]
351
+
352
+ def _week_start_for(ts: dt.datetime) -> dt.datetime | None:
353
+ """Return the canonical subscription-week start_dt for `ts`, or None
354
+ if `ts` falls outside every SubWeek interval (may happen near the
355
+ boundaries of the requested [since_dt, until_dt] window)."""
356
+ ts_utc = ts.astimezone(dt.timezone.utc)
357
+ idx = bisect.bisect_right(week_starts, ts_utc) - 1
358
+ if idx < 0:
359
+ return None
360
+ # First-match-wins on Anthropic reset-day-drift overlap (same
361
+ # walk-back as `_aggregate_weekly`).
362
+ while idx > 0:
363
+ prev_start, prev_end = parsed_bounds[idx - 1]
364
+ if prev_start <= ts_utc < prev_end:
365
+ idx -= 1
366
+ else:
367
+ break
368
+ s_dt, e_dt = parsed_bounds[idx]
369
+ if s_dt <= ts_utc < e_dt:
370
+ return s_dt
371
+ return None
372
+
373
+ # Pre-lower filter patterns (substring, OR semantics, repeatable).
374
+ project_patterns = [p.lower() for p in (args.project or [])]
375
+ model_patterns = [m.lower() for m in (args.model or [])]
376
+
377
+ # Widen scan to full subscription-week bounds so the attribution
378
+ # denominator includes ALL week cost, even entries outside the
379
+ # user's [since_dt, until_dt] slice. Visible buckets are still
380
+ # gated on the user slice below. Without this, a partial-week
381
+ # --since/--until slice understates the denominator and inflates
382
+ # every row's Used %.
383
+ if parsed_bounds:
384
+ scan_start = min(since_dt, parsed_bounds[0][0])
385
+ scan_end = max(until_dt, parsed_bounds[-1][1])
386
+ else:
387
+ scan_start, scan_end = since_dt, until_dt
388
+
389
+ resolver_cache: dict[str, ProjectKey] = {}
390
+ buckets: dict[tuple[ProjectKey, dt.datetime], dict] = {}
391
+ total_cost_by_week: dict[dt.datetime, float] = {}
392
+ unknown_entry_count = 0
393
+ missing_sid_count = 0
394
+
395
+ # Issue #89: materialize the joined-entry iterator once so we can
396
+ # (a) pre-compute the --debug report's scope (entries passing all
397
+ # rendered-row filters — user slice + --model + --project) BEFORE
398
+ # the aggregation loop runs and (b) preserve the existing
399
+ # aggregation semantics (denominator widened to ALL entries; visible
400
+ # rows only the post-filter subset). The list is small enough to
401
+ # hold (entries already in memory via the cache row factory).
402
+ joined_entries_all = list(c.get_claude_session_entries(scan_start, scan_end))
403
+
404
+ # Build the --debug report dataset: skip synthetic + out-of-window
405
+ # entries, then apply --model and --project filters (mirroring the
406
+ # exact predicate at the aggregation loop below). This must match
407
+ # the rendered scope, NOT the denominator scope.
408
+ if getattr(args, "debug", False):
409
+ filtered_for_report = []
410
+ for je in joined_entries_all:
411
+ if je.model == "<synthetic>":
412
+ continue
413
+ if _week_start_for(je.timestamp) is None:
414
+ continue
415
+ if je.timestamp < since_dt or je.timestamp > until_dt:
416
+ continue
417
+ if model_patterns:
418
+ mname = (je.model or "").lower()
419
+ if not any(p in mname for p in model_patterns):
420
+ continue
421
+ key_for_filter = c._resolve_project_key(
422
+ je.project_path, args.group, resolver_cache,
423
+ )
424
+ if not c._project_filter_matches(key_for_filter, project_patterns):
425
+ continue
426
+ filtered_for_report.append(je)
427
+ c._emit_debug_samples_if_set(
428
+ args,
429
+ [c._usage_entry_from_joined(je) for je in filtered_for_report],
430
+ command_label="project",
431
+ )
432
+
433
+ for entry in joined_entries_all:
434
+ # Skip synthetic entries (Claude Code internal markers) to match
435
+ # `_aggregate_cache_by_session` / `_aggregate_claude_sessions`.
436
+ if entry.model == "<synthetic>":
437
+ continue
438
+
439
+ week_start = _week_start_for(entry.timestamp)
440
+ if week_start is None:
441
+ continue
442
+
443
+ entry_cost = c._calculate_entry_cost(
444
+ entry.model,
445
+ {
446
+ "input_tokens": entry.input_tokens,
447
+ "output_tokens": entry.output_tokens,
448
+ "cache_creation_input_tokens": entry.cache_creation_tokens,
449
+ "cache_read_input_tokens": entry.cache_read_tokens,
450
+ },
451
+ mode="auto",
452
+ cost_usd=entry.cost_usd,
453
+ )
454
+
455
+ # Denominator: always contribute (whole-week attribution) so
456
+ # `--model`/`--project`/partial-slice do NOT rescale it.
457
+ total_cost_by_week[week_start] = (
458
+ total_cost_by_week.get(week_start, 0.0) + entry_cost
459
+ )
460
+
461
+ # User-slice gate: visible rows only include entries within
462
+ # [since_dt, until_dt]. Entries outside the slice still
463
+ # contributed to the denominator above.
464
+ if entry.timestamp < since_dt or entry.timestamp > until_dt:
465
+ continue
466
+
467
+ if model_patterns:
468
+ mname = (entry.model or "").lower()
469
+ if not any(p in mname for p in model_patterns):
470
+ continue
471
+
472
+ key = c._resolve_project_key(entry.project_path, args.group, resolver_cache)
473
+ if key.is_unknown:
474
+ unknown_entry_count += 1
475
+
476
+ # --project filter: match against display_key OR the underlying
477
+ # path (git_root / bucket_path). Matching only display_key makes
478
+ # basename-collision suffixes (e.g. `foo (repos)`) impossible to
479
+ # select by their path segment.
480
+ if project_patterns:
481
+ dname = key.display_key.lower()
482
+ pname = (key.git_root or key.bucket_path or "").lower()
483
+ if not any((p in dname) or (p in pname) for p in project_patterns):
484
+ continue
485
+
486
+ if entry.session_id is None:
487
+ missing_sid_count += 1
488
+
489
+ bkey = (key, week_start)
490
+ b = buckets.get(bkey)
491
+ if b is None:
492
+ b = {
493
+ "key": key,
494
+ "week_start": week_start,
495
+ "sessions": set(),
496
+ "first_seen": entry.timestamp,
497
+ "last_seen": entry.timestamp,
498
+ "input": 0, "output": 0,
499
+ "cache_write": 0, "cache_read": 0,
500
+ "cost_usd": 0.0,
501
+ "models": {},
502
+ }
503
+ buckets[bkey] = b
504
+ _accumulate_entry_into_bucket(b, entry, pre_computed_cost=entry_cost)
505
+
506
+ if unknown_entry_count > 0:
507
+ eprint(
508
+ f"Warning: {unknown_entry_count} entries lacked project_path — "
509
+ f"run `cache-sync` to backfill."
510
+ )
511
+ if missing_sid_count > 0:
512
+ eprint(
513
+ f"Warning: {missing_sid_count} entries lacked session_files "
514
+ f"session_id — run `cache-sync` to backfill."
515
+ )
516
+
517
+ # --- Attribution math (Task 5) -----------------------------------------
518
+ # Load per-week `weekly_percent` (max within window) for every week that
519
+ # intersects [since_dt, until_dt]. Missing snapshots are tracked so we
520
+ # can surface `weeksMissingSnapshot` in the output — those weeks can't
521
+ # contribute to attributed %.
522
+ week_snapshots: dict[dt.datetime, float] = _load_week_snapshots(
523
+ since_dt, until_dt
524
+ )
525
+
526
+ # Set of every week the user asked about (from the computed SubWeek
527
+ # bounds), used to report `weeksInRange` and `weeksMissingSnapshot`
528
+ # independent of whether that week had any project activity.
529
+ weeks_in_range: set[dt.datetime] = {ws for ws in week_starts}
530
+ weeks_missing_snapshot: set[dt.datetime] = {
531
+ ws for ws in weeks_in_range if ws not in week_snapshots
532
+ }
533
+
534
+ # Collapse (project_key, week) buckets into one row per project, summing
535
+ # tokens / cost / sessions / first_seen / last_seen / models across the
536
+ # weeks the project appears in.
537
+ #
538
+ # Attribution: for each (project P, week W) bucket,
539
+ # attributed_pct[P,W] = (cost[P,W] / total_cost[W]) * weekly_percent[W]
540
+ # iff a snapshot exists for W. Weeks without a snapshot contribute None
541
+ # (their weeks are already counted in `weeks_missing_snapshot`).
542
+ project_rows: dict[str, dict] = {}
543
+ for (key, wstart), b in buckets.items():
544
+ row = project_rows.get(key.bucket_path)
545
+ if row is None:
546
+ row = {
547
+ "key": key,
548
+ "sessions": set(),
549
+ "first_seen": b["first_seen"],
550
+ "last_seen": b["last_seen"],
551
+ "input": 0, "output": 0,
552
+ "cache_write": 0, "cache_read": 0,
553
+ "cost_usd": 0.0,
554
+ # `None` until the first week with a snapshot contributes —
555
+ # preserves the distinction between "every contributing week
556
+ # lacked a snapshot" (→ None) and "genuine zero attribution"
557
+ # (→ 0.0 after a real contribution). Spec §3.
558
+ "attributed_pct": None,
559
+ "models": {},
560
+ }
561
+ project_rows[key.bucket_path] = row
562
+ row["sessions"] |= b["sessions"]
563
+ if b["first_seen"] < row["first_seen"]:
564
+ row["first_seen"] = b["first_seen"]
565
+ if b["last_seen"] > row["last_seen"]:
566
+ row["last_seen"] = b["last_seen"]
567
+ row["input"] += b["input"]
568
+ row["output"] += b["output"]
569
+ row["cache_write"] += b["cache_write"]
570
+ row["cache_read"] += b["cache_read"]
571
+ row["cost_usd"] += b["cost_usd"]
572
+
573
+ # Merge per-model sub-buckets.
574
+ for model, mb in b["models"].items():
575
+ rm = row["models"].get(model)
576
+ if rm is None:
577
+ rm = {
578
+ "cost_usd": 0.0,
579
+ "input": 0, "output": 0,
580
+ "cache_write": 0, "cache_read": 0,
581
+ "first_seen": mb["first_seen"],
582
+ "last_seen": mb["last_seen"],
583
+ }
584
+ row["models"][model] = rm
585
+ if mb["first_seen"] < rm["first_seen"]:
586
+ rm["first_seen"] = mb["first_seen"]
587
+ if mb["last_seen"] > rm["last_seen"]:
588
+ rm["last_seen"] = mb["last_seen"]
589
+ rm["cost_usd"] += mb["cost_usd"]
590
+ rm["input"] += mb["input"]
591
+ rm["output"] += mb["output"]
592
+ rm["cache_write"] += mb["cache_write"]
593
+ rm["cache_read"] += mb["cache_read"]
594
+
595
+ # Attribution contribution (only if this week has a snapshot and
596
+ # the week has nonzero total cost — a zero denominator would make
597
+ # the ratio meaningless). `attributed_pct` stays `None` until the
598
+ # first real contribution; subsequent contributions accumulate.
599
+ week_pct = week_snapshots.get(wstart)
600
+ week_total = total_cost_by_week.get(wstart, 0.0)
601
+ if week_pct is not None and week_total > 0:
602
+ contribution = (b["cost_usd"] / week_total) * week_pct
603
+ row["attributed_pct"] = (
604
+ (row["attributed_pct"] or 0.0) + contribution
605
+ )
606
+
607
+ # Compute $/1% per project: `cost_per_pct = cost_usd / attributed_pct`
608
+ # when attribution is positive; None otherwise (e.g. every contributing
609
+ # week lacked a snapshot — `attributed_pct` still None — or attribution
610
+ # came out to zero).
611
+ for row in project_rows.values():
612
+ ap = row["attributed_pct"]
613
+ if ap is not None and ap > 0:
614
+ row["cost_per_pct"] = row["cost_usd"] / ap
615
+ else:
616
+ row["cost_per_pct"] = None
617
+
618
+ # Collect warnings to surface in the JSON payload (terminal path emits
619
+ # them inline via eprint earlier, so this list stays JSON-specific).
620
+ warnings: list[str] = []
621
+ if unknown_entry_count > 0:
622
+ warnings.append(
623
+ f"{unknown_entry_count} entries lacked project_path — "
624
+ f"run `cache-sync` to backfill."
625
+ )
626
+ if missing_sid_count > 0:
627
+ warnings.append(
628
+ f"{missing_sid_count} entries lacked session_files session_id — "
629
+ f"run `cache-sync` to backfill."
630
+ )
631
+
632
+ # Honor --sort / --order. For numeric keys, `_project_sort_key` flips the
633
+ # primary-key sign to match the requested direction so natural `sorted()`
634
+ # ordering already produces the right answer; the dname tie-break stays
635
+ # ascending in both directions (ties never invert alphabetically). For
636
+ # `name`, the key is asc-natural (a-z) and `reverse=` is used for desc.
637
+ if args.sort == "name":
638
+ sorted_rows = sorted(
639
+ project_rows.values(),
640
+ key=lambda r: _project_sort_key(r, args.sort, args.order),
641
+ reverse=(args.order == "desc"),
642
+ )
643
+ else:
644
+ sorted_rows = sorted(
645
+ project_rows.values(),
646
+ key=lambda r: _project_sort_key(r, args.sort, args.order),
647
+ )
648
+
649
+ # Shareable-reports gate: --format short-circuits the JSON / table
650
+ # dispatch via `_share_render_and_emit`. The mutex in
651
+ # `_add_share_args` keeps `--format` and `--json` from coexisting.
652
+ # Privacy invariant (Section 8.4 / 5.3): the wrapper runs `_lib_share._scrub`
653
+ # before rendering, so default output anonymizes project labels to
654
+ # `project-1` / `project-2` / ...; `--reveal-projects` opts back in.
655
+ # The builder populates `ProjectCell.label` / `ChartPoint.project_label`
656
+ # / `ChartPoint.x_label` with REAL names; the wrapper-level scrubber is
657
+ # the single chokepoint that rewrites them.
658
+ if getattr(args, "format", None):
659
+ # Note: --breakdown is a no-op under --format (snapshot focuses on
660
+ # the headline per-project usage table + HBar chart; per-model
661
+ # sub-rows aren't in the share spec scope). Same convention as
662
+ # cmd_daily / cmd_weekly / cmd_report.
663
+ display_tz_str = c._share_display_tz_label(args._resolved_tz)
664
+ snap = c._build_project_snapshot(
665
+ list(sorted_rows),
666
+ period_start=since_dt,
667
+ period_end=until_dt,
668
+ display_tz=display_tz_str,
669
+ version=c._share_resolve_version(),
670
+ theme=args.theme,
671
+ reveal_projects=args.reveal_projects,
672
+ )
673
+ c._share_render_and_emit(snap, args)
674
+ return 0
675
+
676
+ if args.json:
677
+ print(_project_json_output(
678
+ since=since_dt,
679
+ until=until_dt,
680
+ weeks_in_range=len(weeks_in_range),
681
+ group_mode=args.group,
682
+ rows=sorted_rows,
683
+ weeks_missing_snapshot=weeks_missing_snapshot,
684
+ warnings=warnings,
685
+ include_breakdown=args.breakdown,
686
+ week_snapshots=week_snapshots,
687
+ ))
688
+ return 0
689
+
690
+ # Terminal path
691
+ range_label = f"{since_dt.date().isoformat()} \u2014 {until_dt.date().isoformat()}"
692
+ title = f"Claude Token Usage Report - Projects ({range_label})"
693
+
694
+ if not sorted_rows:
695
+ eprint("No project usage found in range.")
696
+ return 0
697
+
698
+ # Session A (spec §7.3): the new --color flag overrides NO_COLOR
699
+ # env; --no-color overrides FORCE_COLOR env; deny-wins on the
700
+ # --color + --no-color clash. _resolve_color_enabled returns the
701
+ # effective bool; pass it as ``color=`` so the renderer skips its
702
+ # internal _supports_color_stdout() auto-detect (which would
703
+ # re-consult NO_COLOR and incorrectly disable color when the user
704
+ # passed --color under NO_COLOR=1).
705
+ print(c._render_project_table(
706
+ sorted_rows,
707
+ title=title,
708
+ breakdown=args.breakdown,
709
+ weeks_missing_snapshot=len(weeks_missing_snapshot),
710
+ weeks_in_range=len(weeks_in_range),
711
+ color=c._resolve_color_enabled(args),
712
+ compact=args.compact,
713
+ ))
714
+ return 0