cctally 1.22.1 → 1.22.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -0
- package/bin/_cctally_cache_report.py +1133 -880
- package/bin/_cctally_codex.py +518 -0
- package/bin/_cctally_dashboard.py +3 -3
- package/bin/_cctally_diff.py +240 -0
- package/bin/_cctally_doctor.py +479 -0
- package/bin/_cctally_five_hour.py +1688 -0
- package/bin/_cctally_forecast.py +1979 -0
- package/bin/_cctally_milestones.py +433 -0
- package/bin/_cctally_percent_breakdown.py +199 -0
- package/bin/_cctally_pricing_check.py +393 -0
- package/bin/_cctally_project.py +714 -0
- package/bin/_cctally_record.py +5 -3
- package/bin/_cctally_reporting.py +749 -0
- package/bin/_cctally_setup.py +172 -13
- package/bin/_cctally_statusline.py +630 -0
- package/bin/_cctally_sync_week.py +5 -4
- package/bin/_cctally_weekrefs.py +450 -0
- package/bin/_lib_cache_report.py +938 -0
- package/bin/_lib_pricing_debug.py +182 -0
- package/bin/_lib_subscription_weeks.py +2 -2
- package/bin/cctally +426 -9569
- package/package.json +15 -1
|
@@ -0,0 +1,714 @@
|
|
|
1
|
+
"""`cctally project` subcommand entry point.
|
|
2
|
+
|
|
3
|
+
Lazy I/O sibling: holds `cmd_project` + its 4 dedicated helpers
|
|
4
|
+
(`_load_week_snapshots`, `_accumulate_entry_into_bucket`,
|
|
5
|
+
`_project_json_output`, `_project_sort_key`). Aggregates session entries
|
|
6
|
+
by git-root project with per-project weekly usage attribution.
|
|
7
|
+
|
|
8
|
+
Honest imports are KERNEL-ONLY (`_cctally_core`). Every other symbol the
|
|
9
|
+
command calls is reached via the call-time `_cctally()` accessor so test
|
|
10
|
+
monkeypatches through `cctally`'s namespace are preserved — see the spec
|
|
11
|
+
§3.1 disposition table (the cache reads, the share builders/dispatch,
|
|
12
|
+
`_share_validate_args`, `_render_project_table`, `resolve_display_tz`,
|
|
13
|
+
and the `bin/cctally`-resident helpers all route through `c.`).
|
|
14
|
+
|
|
15
|
+
bin/cctally re-exports `cmd_project` (eager) so the parser's
|
|
16
|
+
`set_defaults(func=c.cmd_project)` resolves unchanged.
|
|
17
|
+
|
|
18
|
+
Spec: docs/superpowers/specs/2026-05-30-extract-project-cmd-design.md
|
|
19
|
+
"""
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import argparse
|
|
23
|
+
import bisect
|
|
24
|
+
import datetime as dt
|
|
25
|
+
import json
|
|
26
|
+
import os
|
|
27
|
+
import sys
|
|
28
|
+
|
|
29
|
+
from _cctally_core import _command_as_of, eprint, open_db, parse_iso_datetime
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _cctally():
|
|
33
|
+
"""Resolve the current `cctally` module at call-time (spec §3.1)."""
|
|
34
|
+
return sys.modules["cctally"]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _load_week_snapshots(
|
|
38
|
+
since: dt.datetime, until: dt.datetime
|
|
39
|
+
) -> dict[dt.datetime, float]:
|
|
40
|
+
"""Return {week_start_utc -> max(weekly_percent)} for weeks intersecting
|
|
41
|
+
the [since, until] range.
|
|
42
|
+
|
|
43
|
+
Reads the `weekly_percent` column of `weekly_usage_snapshots` (authoritative
|
|
44
|
+
column name — NOT `used_7d_percent`). A week's "used %" is the maximum
|
|
45
|
+
snapshot captured within that week (the monotonic-within-window invariant:
|
|
46
|
+
weekly_percent only increases across the life of a week). Skips rows
|
|
47
|
+
whose `week_start_at` or `week_end_at` are NULL (pre-migration legacy
|
|
48
|
+
rows that only carried date granularity).
|
|
49
|
+
|
|
50
|
+
MAX is computed in Python keyed on the parsed UTC datetime so that rows
|
|
51
|
+
holding different string spellings of the same instant (e.g. `+00:00` vs
|
|
52
|
+
`+03:00` from pre-UTC-cast canonicalizer history) coalesce into one
|
|
53
|
+
bucket instead of splitting and silently dropping the higher value.
|
|
54
|
+
|
|
55
|
+
Returns an empty dict if the stats DB has no relevant rows.
|
|
56
|
+
"""
|
|
57
|
+
conn = open_db()
|
|
58
|
+
try:
|
|
59
|
+
cur = conn.execute(
|
|
60
|
+
"SELECT week_start_at, weekly_percent FROM weekly_usage_snapshots "
|
|
61
|
+
"WHERE week_start_at IS NOT NULL "
|
|
62
|
+
"AND week_end_at IS NOT NULL "
|
|
63
|
+
"AND datetime(week_start_at) < datetime(?) "
|
|
64
|
+
"AND datetime(week_end_at) > datetime(?)",
|
|
65
|
+
(until.isoformat(), since.isoformat()),
|
|
66
|
+
)
|
|
67
|
+
result: dict[dt.datetime, float] = {}
|
|
68
|
+
for ws_iso, pct in cur.fetchall():
|
|
69
|
+
if ws_iso is None or pct is None:
|
|
70
|
+
continue
|
|
71
|
+
ws = dt.datetime.fromisoformat(
|
|
72
|
+
str(ws_iso).replace("Z", "+00:00")
|
|
73
|
+
)
|
|
74
|
+
key = ws.astimezone(dt.timezone.utc)
|
|
75
|
+
pct_f = float(pct)
|
|
76
|
+
prev = result.get(key)
|
|
77
|
+
if prev is None or pct_f > prev:
|
|
78
|
+
result[key] = pct_f
|
|
79
|
+
return result
|
|
80
|
+
finally:
|
|
81
|
+
conn.close()
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _accumulate_entry_into_bucket(
|
|
85
|
+
b: dict,
|
|
86
|
+
entry: "_JoinedClaudeEntry",
|
|
87
|
+
pre_computed_cost: float | None = None,
|
|
88
|
+
) -> None:
|
|
89
|
+
"""Add one joined-Claude entry's tokens, cost, session-id, and timestamps
|
|
90
|
+
into a project×week bucket dict.
|
|
91
|
+
|
|
92
|
+
Cost is computed via the same `_calculate_entry_cost(model, usage_dict,
|
|
93
|
+
mode="auto", cost_usd=...)` path used by `_aggregate_cache_by_session`
|
|
94
|
+
(the other `_JoinedClaudeEntry` consumer) so pricing updates flow through
|
|
95
|
+
uniformly. Per-model sub-buckets mirror the parent bucket's shape.
|
|
96
|
+
|
|
97
|
+
`pre_computed_cost`: if callers have already invoked `_calculate_entry_cost`
|
|
98
|
+
for this entry (e.g. to also feed the attribution denominator in
|
|
99
|
+
`cmd_project`), pass it in to avoid double work.
|
|
100
|
+
"""
|
|
101
|
+
c = _cctally()
|
|
102
|
+
# Mirror `_aggregate_claude_sessions`: NULL session_id falls back to the
|
|
103
|
+
# source-file basename so distinct files don't collapse into one bucket.
|
|
104
|
+
if entry.session_id:
|
|
105
|
+
sid = entry.session_id
|
|
106
|
+
else:
|
|
107
|
+
sid = os.path.splitext(os.path.basename(entry.source_path))[0]
|
|
108
|
+
b["sessions"].add(sid)
|
|
109
|
+
if entry.timestamp < b["first_seen"]:
|
|
110
|
+
b["first_seen"] = entry.timestamp
|
|
111
|
+
if entry.timestamp > b["last_seen"]:
|
|
112
|
+
b["last_seen"] = entry.timestamp
|
|
113
|
+
b["input"] += entry.input_tokens
|
|
114
|
+
b["output"] += entry.output_tokens
|
|
115
|
+
b["cache_write"] += entry.cache_creation_tokens
|
|
116
|
+
b["cache_read"] += entry.cache_read_tokens
|
|
117
|
+
if pre_computed_cost is not None:
|
|
118
|
+
cost = pre_computed_cost
|
|
119
|
+
else:
|
|
120
|
+
cost = c._calculate_entry_cost(
|
|
121
|
+
entry.model,
|
|
122
|
+
{
|
|
123
|
+
"input_tokens": entry.input_tokens,
|
|
124
|
+
"output_tokens": entry.output_tokens,
|
|
125
|
+
"cache_creation_input_tokens": entry.cache_creation_tokens,
|
|
126
|
+
"cache_read_input_tokens": entry.cache_read_tokens,
|
|
127
|
+
},
|
|
128
|
+
mode="auto",
|
|
129
|
+
cost_usd=entry.cost_usd,
|
|
130
|
+
)
|
|
131
|
+
b["cost_usd"] += cost
|
|
132
|
+
model = entry.model or "(unknown-model)"
|
|
133
|
+
mb = b["models"].get(model)
|
|
134
|
+
if mb is None:
|
|
135
|
+
mb = {
|
|
136
|
+
"cost_usd": 0.0,
|
|
137
|
+
"input": 0, "output": 0,
|
|
138
|
+
"cache_write": 0, "cache_read": 0,
|
|
139
|
+
"first_seen": entry.timestamp, "last_seen": entry.timestamp,
|
|
140
|
+
}
|
|
141
|
+
b["models"][model] = mb
|
|
142
|
+
if entry.timestamp < mb["first_seen"]:
|
|
143
|
+
mb["first_seen"] = entry.timestamp
|
|
144
|
+
if entry.timestamp > mb["last_seen"]:
|
|
145
|
+
mb["last_seen"] = entry.timestamp
|
|
146
|
+
mb["cost_usd"] += cost
|
|
147
|
+
mb["input"] += entry.input_tokens
|
|
148
|
+
mb["output"] += entry.output_tokens
|
|
149
|
+
mb["cache_write"] += entry.cache_creation_tokens
|
|
150
|
+
mb["cache_read"] += entry.cache_read_tokens
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _project_json_output(
|
|
154
|
+
*,
|
|
155
|
+
since: dt.datetime,
|
|
156
|
+
until: dt.datetime,
|
|
157
|
+
weeks_in_range: int,
|
|
158
|
+
group_mode: str,
|
|
159
|
+
rows: list[dict],
|
|
160
|
+
weeks_missing_snapshot: set[dt.datetime],
|
|
161
|
+
warnings: list[str],
|
|
162
|
+
include_breakdown: bool,
|
|
163
|
+
week_snapshots: dict[dt.datetime, float],
|
|
164
|
+
) -> str:
|
|
165
|
+
"""Render the project subcommand's --json payload per spec §4.
|
|
166
|
+
|
|
167
|
+
Accepts rows already sorted by the caller (so ordering flags apply
|
|
168
|
+
uniformly to both terminal and JSON modes). Aggregates `totals.costUsd`
|
|
169
|
+
from `rows` and `totals.usedPercent` from `week_snapshots` (sum over
|
|
170
|
+
all weeks with snapshots in the range — matches the conservation-law
|
|
171
|
+
denominator used by per-project attribution). `models[]` is included
|
|
172
|
+
per-project only when `--breakdown` is requested to avoid payload bloat.
|
|
173
|
+
"""
|
|
174
|
+
total_cost = sum(r["cost_usd"] for r in rows)
|
|
175
|
+
# Aggregate used % across all weeks with snapshots in the range.
|
|
176
|
+
total_used_pct: float | None
|
|
177
|
+
if week_snapshots:
|
|
178
|
+
total_used_pct = sum(week_snapshots.values())
|
|
179
|
+
else:
|
|
180
|
+
total_used_pct = None
|
|
181
|
+
|
|
182
|
+
def _fmt_dt(ts: dt.datetime) -> str:
|
|
183
|
+
return ts.astimezone(dt.timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
184
|
+
|
|
185
|
+
projects_json = []
|
|
186
|
+
for row in rows: # rows come already sorted by caller
|
|
187
|
+
p = {
|
|
188
|
+
"displayKey": row["key"].display_key,
|
|
189
|
+
"projectPath": row["key"].bucket_path,
|
|
190
|
+
"gitRoot": row["key"].git_root,
|
|
191
|
+
"sessions": len(row["sessions"]),
|
|
192
|
+
"firstSeen": _fmt_dt(row["first_seen"]),
|
|
193
|
+
"lastSeen": _fmt_dt(row["last_seen"]),
|
|
194
|
+
"inputTokens": row["input"],
|
|
195
|
+
"outputTokens": row["output"],
|
|
196
|
+
"cacheWriteTokens": row["cache_write"],
|
|
197
|
+
"cacheReadTokens": row["cache_read"],
|
|
198
|
+
"costUsd": round(row["cost_usd"], 4),
|
|
199
|
+
"attributedUsedPercent": (
|
|
200
|
+
round(row["attributed_pct"], 4)
|
|
201
|
+
if row["attributed_pct"] is not None else None
|
|
202
|
+
),
|
|
203
|
+
"costPerPercent": (
|
|
204
|
+
round(row["cost_per_pct"], 4)
|
|
205
|
+
if row["cost_per_pct"] is not None else None
|
|
206
|
+
),
|
|
207
|
+
}
|
|
208
|
+
if include_breakdown:
|
|
209
|
+
p["models"] = [
|
|
210
|
+
{
|
|
211
|
+
"model": mname,
|
|
212
|
+
"firstSeen": _fmt_dt(mb["first_seen"]),
|
|
213
|
+
"lastSeen": _fmt_dt(mb["last_seen"]),
|
|
214
|
+
"inputTokens": mb["input"],
|
|
215
|
+
"outputTokens": mb["output"],
|
|
216
|
+
"cacheWriteTokens": mb["cache_write"],
|
|
217
|
+
"cacheReadTokens": mb["cache_read"],
|
|
218
|
+
"costUsd": round(mb["cost_usd"], 4),
|
|
219
|
+
}
|
|
220
|
+
for mname, mb in sorted(row["models"].items())
|
|
221
|
+
]
|
|
222
|
+
projects_json.append(p)
|
|
223
|
+
|
|
224
|
+
payload = {
|
|
225
|
+
"rangeStart": since.date().isoformat(),
|
|
226
|
+
"rangeEnd": until.date().isoformat(),
|
|
227
|
+
"weeksInRange": weeks_in_range,
|
|
228
|
+
"groupMode": group_mode,
|
|
229
|
+
"totals": {
|
|
230
|
+
"costUsd": round(total_cost, 4),
|
|
231
|
+
"usedPercent": (
|
|
232
|
+
round(total_used_pct, 4) if total_used_pct is not None else None
|
|
233
|
+
),
|
|
234
|
+
"weeklyAttributionAvailable": len(weeks_missing_snapshot) == 0,
|
|
235
|
+
},
|
|
236
|
+
"projects": projects_json,
|
|
237
|
+
"warnings": warnings,
|
|
238
|
+
}
|
|
239
|
+
return json.dumps(payload, indent=2)
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def _project_sort_key(row: dict, sort_by: str, order: str):
|
|
243
|
+
"""Return (primary, dname) where the primary is flipped to match
|
|
244
|
+
``order``. Tie-break on dname ascending regardless of direction.
|
|
245
|
+
|
|
246
|
+
``sort_by`` values align with argparse choices: cost|used|name|last-seen.
|
|
247
|
+
"""
|
|
248
|
+
dname = row["key"].display_key.lower()
|
|
249
|
+
sign = -1 if order == "desc" else 1
|
|
250
|
+
if sort_by == "cost":
|
|
251
|
+
return (sign * row["cost_usd"], dname)
|
|
252
|
+
if sort_by == "used":
|
|
253
|
+
v = row["attributed_pct"] if row["attributed_pct"] is not None else -1
|
|
254
|
+
return (sign * v, dname)
|
|
255
|
+
if sort_by == "last-seen":
|
|
256
|
+
return (sign * row["last_seen"].timestamp(), dname)
|
|
257
|
+
if sort_by == "name":
|
|
258
|
+
# name is asc-natural; caller uses sorted(reverse=order=='desc').
|
|
259
|
+
return (dname,)
|
|
260
|
+
# Unreachable given argparse choices, but safe default.
|
|
261
|
+
return (sign * row["cost_usd"], dname)
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def cmd_project(args: argparse.Namespace) -> int:
|
|
265
|
+
"""Roll entries up by project (git-root) with per-project usage attribution."""
|
|
266
|
+
c = _cctally()
|
|
267
|
+
c._share_validate_args(args)
|
|
268
|
+
config = c._load_claude_config_for_args(args)
|
|
269
|
+
# Session A (spec §7.2): bridge -z/--timezone into args.tz so the
|
|
270
|
+
# existing resolve_display_tz precedence absorbs the new alias.
|
|
271
|
+
c._bridge_z_into_tz(args, config)
|
|
272
|
+
args._resolved_tz = c.resolve_display_tz(args, config)
|
|
273
|
+
|
|
274
|
+
# Flag-combination validation (must run before any expensive work).
|
|
275
|
+
if args.weeks is not None and args.weeks < 1:
|
|
276
|
+
eprint("Error: --weeks must be >= 1")
|
|
277
|
+
return 1
|
|
278
|
+
if args.weeks is not None and (args.since or args.until):
|
|
279
|
+
eprint("Error: --weeks cannot be combined with --since/--until")
|
|
280
|
+
return 1
|
|
281
|
+
if args.since and args.until:
|
|
282
|
+
# Parse both as dates using the same multi-format helper shape used
|
|
283
|
+
# elsewhere in the codebase so YYYY-MM-DD and YYYYMMDD both compare
|
|
284
|
+
# correctly (string compare alone breaks across mixed formats).
|
|
285
|
+
def _parse(raw: str) -> dt.date | None:
|
|
286
|
+
for fmt in ("%Y-%m-%d", "%Y%m%d"):
|
|
287
|
+
try:
|
|
288
|
+
return dt.datetime.strptime(raw, fmt).date()
|
|
289
|
+
except ValueError:
|
|
290
|
+
continue
|
|
291
|
+
return None
|
|
292
|
+
|
|
293
|
+
since_parsed = _parse(args.since)
|
|
294
|
+
until_parsed = _parse(args.until)
|
|
295
|
+
# Silent-skip if either date failed to parse: we only want to surface
|
|
296
|
+
# a "range order" error here when both inputs are well-formed. Any
|
|
297
|
+
# format error will be reported downstream by _parse_cli_date_range()
|
|
298
|
+
# so the user sees the parse problem first (not a misleading order
|
|
299
|
+
# complaint triggered by garbage input).
|
|
300
|
+
if since_parsed is not None and until_parsed is not None and since_parsed > until_parsed:
|
|
301
|
+
eprint("Error: --since must be <= --until")
|
|
302
|
+
return 1
|
|
303
|
+
|
|
304
|
+
now = _command_as_of()
|
|
305
|
+
conn = open_db()
|
|
306
|
+
|
|
307
|
+
# Resolve [since_dt, until_dt] in UTC.
|
|
308
|
+
if args.since or args.until:
|
|
309
|
+
parsed = c._parse_cli_date_range(args, now_utc=now)
|
|
310
|
+
if isinstance(parsed, int):
|
|
311
|
+
return parsed
|
|
312
|
+
since_dt, until_dt = parsed
|
|
313
|
+
since_dt = since_dt.astimezone(dt.timezone.utc)
|
|
314
|
+
until_dt = until_dt.astimezone(dt.timezone.utc)
|
|
315
|
+
else:
|
|
316
|
+
# Default to the current subscription week; --weeks N extends backwards.
|
|
317
|
+
# Widen by 1us so the emit loop fires when `now` is exactly at a reset
|
|
318
|
+
# boundary (zero-width [now, now] makes Case A's `current < range_end`
|
|
319
|
+
# false, which would otherwise wrongly fall through to the Monday
|
|
320
|
+
# fallback for non-Monday-reset accounts).
|
|
321
|
+
current_weeks = c._compute_subscription_weeks(
|
|
322
|
+
conn, now, now + dt.timedelta(microseconds=1), config=config,
|
|
323
|
+
)
|
|
324
|
+
if current_weeks:
|
|
325
|
+
cw_start = parse_iso_datetime(
|
|
326
|
+
current_weeks[0].start_ts, "week.start_ts"
|
|
327
|
+
).astimezone(dt.timezone.utc)
|
|
328
|
+
else:
|
|
329
|
+
# No snapshots available: fall back to a Monday-anchored week.
|
|
330
|
+
cw_start = (now - dt.timedelta(days=now.weekday())).replace(
|
|
331
|
+
hour=0, minute=0, second=0, microsecond=0
|
|
332
|
+
)
|
|
333
|
+
if args.weeks is not None:
|
|
334
|
+
since_dt = cw_start - dt.timedelta(days=7 * (args.weeks - 1))
|
|
335
|
+
else:
|
|
336
|
+
since_dt = cw_start
|
|
337
|
+
until_dt = now
|
|
338
|
+
|
|
339
|
+
# Pre-compute subscription-week bounds for the query window so each entry
|
|
340
|
+
# can be bucketed onto a canonical subscription-week start_ts. Mirrors
|
|
341
|
+
# `_aggregate_weekly`'s bisect pattern (first-match-wins on overlap).
|
|
342
|
+
subweeks = c._compute_subscription_weeks(
|
|
343
|
+
conn, since_dt, until_dt, config=config,
|
|
344
|
+
)
|
|
345
|
+
parsed_bounds: list[tuple[dt.datetime, dt.datetime]] = []
|
|
346
|
+
for sw in subweeks:
|
|
347
|
+
s_dt = parse_iso_datetime(sw.start_ts, "week.start_ts").astimezone(dt.timezone.utc)
|
|
348
|
+
e_dt = parse_iso_datetime(sw.end_ts, "week.end_ts").astimezone(dt.timezone.utc)
|
|
349
|
+
parsed_bounds.append((s_dt, e_dt))
|
|
350
|
+
week_starts = [b[0] for b in parsed_bounds]
|
|
351
|
+
|
|
352
|
+
def _week_start_for(ts: dt.datetime) -> dt.datetime | None:
|
|
353
|
+
"""Return the canonical subscription-week start_dt for `ts`, or None
|
|
354
|
+
if `ts` falls outside every SubWeek interval (may happen near the
|
|
355
|
+
boundaries of the requested [since_dt, until_dt] window)."""
|
|
356
|
+
ts_utc = ts.astimezone(dt.timezone.utc)
|
|
357
|
+
idx = bisect.bisect_right(week_starts, ts_utc) - 1
|
|
358
|
+
if idx < 0:
|
|
359
|
+
return None
|
|
360
|
+
# First-match-wins on Anthropic reset-day-drift overlap (same
|
|
361
|
+
# walk-back as `_aggregate_weekly`).
|
|
362
|
+
while idx > 0:
|
|
363
|
+
prev_start, prev_end = parsed_bounds[idx - 1]
|
|
364
|
+
if prev_start <= ts_utc < prev_end:
|
|
365
|
+
idx -= 1
|
|
366
|
+
else:
|
|
367
|
+
break
|
|
368
|
+
s_dt, e_dt = parsed_bounds[idx]
|
|
369
|
+
if s_dt <= ts_utc < e_dt:
|
|
370
|
+
return s_dt
|
|
371
|
+
return None
|
|
372
|
+
|
|
373
|
+
# Pre-lower filter patterns (substring, OR semantics, repeatable).
|
|
374
|
+
project_patterns = [p.lower() for p in (args.project or [])]
|
|
375
|
+
model_patterns = [m.lower() for m in (args.model or [])]
|
|
376
|
+
|
|
377
|
+
# Widen scan to full subscription-week bounds so the attribution
|
|
378
|
+
# denominator includes ALL week cost, even entries outside the
|
|
379
|
+
# user's [since_dt, until_dt] slice. Visible buckets are still
|
|
380
|
+
# gated on the user slice below. Without this, a partial-week
|
|
381
|
+
# --since/--until slice understates the denominator and inflates
|
|
382
|
+
# every row's Used %.
|
|
383
|
+
if parsed_bounds:
|
|
384
|
+
scan_start = min(since_dt, parsed_bounds[0][0])
|
|
385
|
+
scan_end = max(until_dt, parsed_bounds[-1][1])
|
|
386
|
+
else:
|
|
387
|
+
scan_start, scan_end = since_dt, until_dt
|
|
388
|
+
|
|
389
|
+
resolver_cache: dict[str, ProjectKey] = {}
|
|
390
|
+
buckets: dict[tuple[ProjectKey, dt.datetime], dict] = {}
|
|
391
|
+
total_cost_by_week: dict[dt.datetime, float] = {}
|
|
392
|
+
unknown_entry_count = 0
|
|
393
|
+
missing_sid_count = 0
|
|
394
|
+
|
|
395
|
+
# Issue #89: materialize the joined-entry iterator once so we can
|
|
396
|
+
# (a) pre-compute the --debug report's scope (entries passing all
|
|
397
|
+
# rendered-row filters — user slice + --model + --project) BEFORE
|
|
398
|
+
# the aggregation loop runs and (b) preserve the existing
|
|
399
|
+
# aggregation semantics (denominator widened to ALL entries; visible
|
|
400
|
+
# rows only the post-filter subset). The list is small enough to
|
|
401
|
+
# hold (entries already in memory via the cache row factory).
|
|
402
|
+
joined_entries_all = list(c.get_claude_session_entries(scan_start, scan_end))
|
|
403
|
+
|
|
404
|
+
# Build the --debug report dataset: skip synthetic + out-of-window
|
|
405
|
+
# entries, then apply --model and --project filters (mirroring the
|
|
406
|
+
# exact predicate at the aggregation loop below). This must match
|
|
407
|
+
# the rendered scope, NOT the denominator scope.
|
|
408
|
+
if getattr(args, "debug", False):
|
|
409
|
+
filtered_for_report = []
|
|
410
|
+
for je in joined_entries_all:
|
|
411
|
+
if je.model == "<synthetic>":
|
|
412
|
+
continue
|
|
413
|
+
if _week_start_for(je.timestamp) is None:
|
|
414
|
+
continue
|
|
415
|
+
if je.timestamp < since_dt or je.timestamp > until_dt:
|
|
416
|
+
continue
|
|
417
|
+
if model_patterns:
|
|
418
|
+
mname = (je.model or "").lower()
|
|
419
|
+
if not any(p in mname for p in model_patterns):
|
|
420
|
+
continue
|
|
421
|
+
key_for_filter = c._resolve_project_key(
|
|
422
|
+
je.project_path, args.group, resolver_cache,
|
|
423
|
+
)
|
|
424
|
+
if not c._project_filter_matches(key_for_filter, project_patterns):
|
|
425
|
+
continue
|
|
426
|
+
filtered_for_report.append(je)
|
|
427
|
+
c._emit_debug_samples_if_set(
|
|
428
|
+
args,
|
|
429
|
+
[c._usage_entry_from_joined(je) for je in filtered_for_report],
|
|
430
|
+
command_label="project",
|
|
431
|
+
)
|
|
432
|
+
|
|
433
|
+
for entry in joined_entries_all:
|
|
434
|
+
# Skip synthetic entries (Claude Code internal markers) to match
|
|
435
|
+
# `_aggregate_cache_by_session` / `_aggregate_claude_sessions`.
|
|
436
|
+
if entry.model == "<synthetic>":
|
|
437
|
+
continue
|
|
438
|
+
|
|
439
|
+
week_start = _week_start_for(entry.timestamp)
|
|
440
|
+
if week_start is None:
|
|
441
|
+
continue
|
|
442
|
+
|
|
443
|
+
entry_cost = c._calculate_entry_cost(
|
|
444
|
+
entry.model,
|
|
445
|
+
{
|
|
446
|
+
"input_tokens": entry.input_tokens,
|
|
447
|
+
"output_tokens": entry.output_tokens,
|
|
448
|
+
"cache_creation_input_tokens": entry.cache_creation_tokens,
|
|
449
|
+
"cache_read_input_tokens": entry.cache_read_tokens,
|
|
450
|
+
},
|
|
451
|
+
mode="auto",
|
|
452
|
+
cost_usd=entry.cost_usd,
|
|
453
|
+
)
|
|
454
|
+
|
|
455
|
+
# Denominator: always contribute (whole-week attribution) so
|
|
456
|
+
# `--model`/`--project`/partial-slice do NOT rescale it.
|
|
457
|
+
total_cost_by_week[week_start] = (
|
|
458
|
+
total_cost_by_week.get(week_start, 0.0) + entry_cost
|
|
459
|
+
)
|
|
460
|
+
|
|
461
|
+
# User-slice gate: visible rows only include entries within
|
|
462
|
+
# [since_dt, until_dt]. Entries outside the slice still
|
|
463
|
+
# contributed to the denominator above.
|
|
464
|
+
if entry.timestamp < since_dt or entry.timestamp > until_dt:
|
|
465
|
+
continue
|
|
466
|
+
|
|
467
|
+
if model_patterns:
|
|
468
|
+
mname = (entry.model or "").lower()
|
|
469
|
+
if not any(p in mname for p in model_patterns):
|
|
470
|
+
continue
|
|
471
|
+
|
|
472
|
+
key = c._resolve_project_key(entry.project_path, args.group, resolver_cache)
|
|
473
|
+
if key.is_unknown:
|
|
474
|
+
unknown_entry_count += 1
|
|
475
|
+
|
|
476
|
+
# --project filter: match against display_key OR the underlying
|
|
477
|
+
# path (git_root / bucket_path). Matching only display_key makes
|
|
478
|
+
# basename-collision suffixes (e.g. `foo (repos)`) impossible to
|
|
479
|
+
# select by their path segment.
|
|
480
|
+
if project_patterns:
|
|
481
|
+
dname = key.display_key.lower()
|
|
482
|
+
pname = (key.git_root or key.bucket_path or "").lower()
|
|
483
|
+
if not any((p in dname) or (p in pname) for p in project_patterns):
|
|
484
|
+
continue
|
|
485
|
+
|
|
486
|
+
if entry.session_id is None:
|
|
487
|
+
missing_sid_count += 1
|
|
488
|
+
|
|
489
|
+
bkey = (key, week_start)
|
|
490
|
+
b = buckets.get(bkey)
|
|
491
|
+
if b is None:
|
|
492
|
+
b = {
|
|
493
|
+
"key": key,
|
|
494
|
+
"week_start": week_start,
|
|
495
|
+
"sessions": set(),
|
|
496
|
+
"first_seen": entry.timestamp,
|
|
497
|
+
"last_seen": entry.timestamp,
|
|
498
|
+
"input": 0, "output": 0,
|
|
499
|
+
"cache_write": 0, "cache_read": 0,
|
|
500
|
+
"cost_usd": 0.0,
|
|
501
|
+
"models": {},
|
|
502
|
+
}
|
|
503
|
+
buckets[bkey] = b
|
|
504
|
+
_accumulate_entry_into_bucket(b, entry, pre_computed_cost=entry_cost)
|
|
505
|
+
|
|
506
|
+
if unknown_entry_count > 0:
|
|
507
|
+
eprint(
|
|
508
|
+
f"Warning: {unknown_entry_count} entries lacked project_path — "
|
|
509
|
+
f"run `cache-sync` to backfill."
|
|
510
|
+
)
|
|
511
|
+
if missing_sid_count > 0:
|
|
512
|
+
eprint(
|
|
513
|
+
f"Warning: {missing_sid_count} entries lacked session_files "
|
|
514
|
+
f"session_id — run `cache-sync` to backfill."
|
|
515
|
+
)
|
|
516
|
+
|
|
517
|
+
# --- Attribution math (Task 5) -----------------------------------------
|
|
518
|
+
# Load per-week `weekly_percent` (max within window) for every week that
|
|
519
|
+
# intersects [since_dt, until_dt]. Missing snapshots are tracked so we
|
|
520
|
+
# can surface `weeksMissingSnapshot` in the output — those weeks can't
|
|
521
|
+
# contribute to attributed %.
|
|
522
|
+
week_snapshots: dict[dt.datetime, float] = _load_week_snapshots(
|
|
523
|
+
since_dt, until_dt
|
|
524
|
+
)
|
|
525
|
+
|
|
526
|
+
# Set of every week the user asked about (from the computed SubWeek
|
|
527
|
+
# bounds), used to report `weeksInRange` and `weeksMissingSnapshot`
|
|
528
|
+
# independent of whether that week had any project activity.
|
|
529
|
+
weeks_in_range: set[dt.datetime] = {ws for ws in week_starts}
|
|
530
|
+
weeks_missing_snapshot: set[dt.datetime] = {
|
|
531
|
+
ws for ws in weeks_in_range if ws not in week_snapshots
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
# Collapse (project_key, week) buckets into one row per project, summing
|
|
535
|
+
# tokens / cost / sessions / first_seen / last_seen / models across the
|
|
536
|
+
# weeks the project appears in.
|
|
537
|
+
#
|
|
538
|
+
# Attribution: for each (project P, week W) bucket,
|
|
539
|
+
# attributed_pct[P,W] = (cost[P,W] / total_cost[W]) * weekly_percent[W]
|
|
540
|
+
# iff a snapshot exists for W. Weeks without a snapshot contribute None
|
|
541
|
+
# (their weeks are already counted in `weeks_missing_snapshot`).
|
|
542
|
+
project_rows: dict[str, dict] = {}
|
|
543
|
+
for (key, wstart), b in buckets.items():
|
|
544
|
+
row = project_rows.get(key.bucket_path)
|
|
545
|
+
if row is None:
|
|
546
|
+
row = {
|
|
547
|
+
"key": key,
|
|
548
|
+
"sessions": set(),
|
|
549
|
+
"first_seen": b["first_seen"],
|
|
550
|
+
"last_seen": b["last_seen"],
|
|
551
|
+
"input": 0, "output": 0,
|
|
552
|
+
"cache_write": 0, "cache_read": 0,
|
|
553
|
+
"cost_usd": 0.0,
|
|
554
|
+
# `None` until the first week with a snapshot contributes —
|
|
555
|
+
# preserves the distinction between "every contributing week
|
|
556
|
+
# lacked a snapshot" (→ None) and "genuine zero attribution"
|
|
557
|
+
# (→ 0.0 after a real contribution). Spec §3.
|
|
558
|
+
"attributed_pct": None,
|
|
559
|
+
"models": {},
|
|
560
|
+
}
|
|
561
|
+
project_rows[key.bucket_path] = row
|
|
562
|
+
row["sessions"] |= b["sessions"]
|
|
563
|
+
if b["first_seen"] < row["first_seen"]:
|
|
564
|
+
row["first_seen"] = b["first_seen"]
|
|
565
|
+
if b["last_seen"] > row["last_seen"]:
|
|
566
|
+
row["last_seen"] = b["last_seen"]
|
|
567
|
+
row["input"] += b["input"]
|
|
568
|
+
row["output"] += b["output"]
|
|
569
|
+
row["cache_write"] += b["cache_write"]
|
|
570
|
+
row["cache_read"] += b["cache_read"]
|
|
571
|
+
row["cost_usd"] += b["cost_usd"]
|
|
572
|
+
|
|
573
|
+
# Merge per-model sub-buckets.
|
|
574
|
+
for model, mb in b["models"].items():
|
|
575
|
+
rm = row["models"].get(model)
|
|
576
|
+
if rm is None:
|
|
577
|
+
rm = {
|
|
578
|
+
"cost_usd": 0.0,
|
|
579
|
+
"input": 0, "output": 0,
|
|
580
|
+
"cache_write": 0, "cache_read": 0,
|
|
581
|
+
"first_seen": mb["first_seen"],
|
|
582
|
+
"last_seen": mb["last_seen"],
|
|
583
|
+
}
|
|
584
|
+
row["models"][model] = rm
|
|
585
|
+
if mb["first_seen"] < rm["first_seen"]:
|
|
586
|
+
rm["first_seen"] = mb["first_seen"]
|
|
587
|
+
if mb["last_seen"] > rm["last_seen"]:
|
|
588
|
+
rm["last_seen"] = mb["last_seen"]
|
|
589
|
+
rm["cost_usd"] += mb["cost_usd"]
|
|
590
|
+
rm["input"] += mb["input"]
|
|
591
|
+
rm["output"] += mb["output"]
|
|
592
|
+
rm["cache_write"] += mb["cache_write"]
|
|
593
|
+
rm["cache_read"] += mb["cache_read"]
|
|
594
|
+
|
|
595
|
+
# Attribution contribution (only if this week has a snapshot and
|
|
596
|
+
# the week has nonzero total cost — a zero denominator would make
|
|
597
|
+
# the ratio meaningless). `attributed_pct` stays `None` until the
|
|
598
|
+
# first real contribution; subsequent contributions accumulate.
|
|
599
|
+
week_pct = week_snapshots.get(wstart)
|
|
600
|
+
week_total = total_cost_by_week.get(wstart, 0.0)
|
|
601
|
+
if week_pct is not None and week_total > 0:
|
|
602
|
+
contribution = (b["cost_usd"] / week_total) * week_pct
|
|
603
|
+
row["attributed_pct"] = (
|
|
604
|
+
(row["attributed_pct"] or 0.0) + contribution
|
|
605
|
+
)
|
|
606
|
+
|
|
607
|
+
# Compute $/1% per project: `cost_per_pct = cost_usd / attributed_pct`
|
|
608
|
+
# when attribution is positive; None otherwise (e.g. every contributing
|
|
609
|
+
# week lacked a snapshot — `attributed_pct` still None — or attribution
|
|
610
|
+
# came out to zero).
|
|
611
|
+
for row in project_rows.values():
|
|
612
|
+
ap = row["attributed_pct"]
|
|
613
|
+
if ap is not None and ap > 0:
|
|
614
|
+
row["cost_per_pct"] = row["cost_usd"] / ap
|
|
615
|
+
else:
|
|
616
|
+
row["cost_per_pct"] = None
|
|
617
|
+
|
|
618
|
+
# Collect warnings to surface in the JSON payload (terminal path emits
|
|
619
|
+
# them inline via eprint earlier, so this list stays JSON-specific).
|
|
620
|
+
warnings: list[str] = []
|
|
621
|
+
if unknown_entry_count > 0:
|
|
622
|
+
warnings.append(
|
|
623
|
+
f"{unknown_entry_count} entries lacked project_path — "
|
|
624
|
+
f"run `cache-sync` to backfill."
|
|
625
|
+
)
|
|
626
|
+
if missing_sid_count > 0:
|
|
627
|
+
warnings.append(
|
|
628
|
+
f"{missing_sid_count} entries lacked session_files session_id — "
|
|
629
|
+
f"run `cache-sync` to backfill."
|
|
630
|
+
)
|
|
631
|
+
|
|
632
|
+
# Honor --sort / --order. For numeric keys, `_project_sort_key` flips the
|
|
633
|
+
# primary-key sign to match the requested direction so natural `sorted()`
|
|
634
|
+
# ordering already produces the right answer; the dname tie-break stays
|
|
635
|
+
# ascending in both directions (ties never invert alphabetically). For
|
|
636
|
+
# `name`, the key is asc-natural (a-z) and `reverse=` is used for desc.
|
|
637
|
+
if args.sort == "name":
|
|
638
|
+
sorted_rows = sorted(
|
|
639
|
+
project_rows.values(),
|
|
640
|
+
key=lambda r: _project_sort_key(r, args.sort, args.order),
|
|
641
|
+
reverse=(args.order == "desc"),
|
|
642
|
+
)
|
|
643
|
+
else:
|
|
644
|
+
sorted_rows = sorted(
|
|
645
|
+
project_rows.values(),
|
|
646
|
+
key=lambda r: _project_sort_key(r, args.sort, args.order),
|
|
647
|
+
)
|
|
648
|
+
|
|
649
|
+
# Shareable-reports gate: --format short-circuits the JSON / table
|
|
650
|
+
# dispatch via `_share_render_and_emit`. The mutex in
|
|
651
|
+
# `_add_share_args` keeps `--format` and `--json` from coexisting.
|
|
652
|
+
# Privacy invariant (Section 8.4 / 5.3): the wrapper runs `_lib_share._scrub`
|
|
653
|
+
# before rendering, so default output anonymizes project labels to
|
|
654
|
+
# `project-1` / `project-2` / ...; `--reveal-projects` opts back in.
|
|
655
|
+
# The builder populates `ProjectCell.label` / `ChartPoint.project_label`
|
|
656
|
+
# / `ChartPoint.x_label` with REAL names; the wrapper-level scrubber is
|
|
657
|
+
# the single chokepoint that rewrites them.
|
|
658
|
+
if getattr(args, "format", None):
|
|
659
|
+
# Note: --breakdown is a no-op under --format (snapshot focuses on
|
|
660
|
+
# the headline per-project usage table + HBar chart; per-model
|
|
661
|
+
# sub-rows aren't in the share spec scope). Same convention as
|
|
662
|
+
# cmd_daily / cmd_weekly / cmd_report.
|
|
663
|
+
display_tz_str = c._share_display_tz_label(args._resolved_tz)
|
|
664
|
+
snap = c._build_project_snapshot(
|
|
665
|
+
list(sorted_rows),
|
|
666
|
+
period_start=since_dt,
|
|
667
|
+
period_end=until_dt,
|
|
668
|
+
display_tz=display_tz_str,
|
|
669
|
+
version=c._share_resolve_version(),
|
|
670
|
+
theme=args.theme,
|
|
671
|
+
reveal_projects=args.reveal_projects,
|
|
672
|
+
)
|
|
673
|
+
c._share_render_and_emit(snap, args)
|
|
674
|
+
return 0
|
|
675
|
+
|
|
676
|
+
if args.json:
|
|
677
|
+
print(_project_json_output(
|
|
678
|
+
since=since_dt,
|
|
679
|
+
until=until_dt,
|
|
680
|
+
weeks_in_range=len(weeks_in_range),
|
|
681
|
+
group_mode=args.group,
|
|
682
|
+
rows=sorted_rows,
|
|
683
|
+
weeks_missing_snapshot=weeks_missing_snapshot,
|
|
684
|
+
warnings=warnings,
|
|
685
|
+
include_breakdown=args.breakdown,
|
|
686
|
+
week_snapshots=week_snapshots,
|
|
687
|
+
))
|
|
688
|
+
return 0
|
|
689
|
+
|
|
690
|
+
# Terminal path
|
|
691
|
+
range_label = f"{since_dt.date().isoformat()} \u2014 {until_dt.date().isoformat()}"
|
|
692
|
+
title = f"Claude Token Usage Report - Projects ({range_label})"
|
|
693
|
+
|
|
694
|
+
if not sorted_rows:
|
|
695
|
+
eprint("No project usage found in range.")
|
|
696
|
+
return 0
|
|
697
|
+
|
|
698
|
+
# Session A (spec §7.3): the new --color flag overrides NO_COLOR
|
|
699
|
+
# env; --no-color overrides FORCE_COLOR env; deny-wins on the
|
|
700
|
+
# --color + --no-color clash. _resolve_color_enabled returns the
|
|
701
|
+
# effective bool; pass it as ``color=`` so the renderer skips its
|
|
702
|
+
# internal _supports_color_stdout() auto-detect (which would
|
|
703
|
+
# re-consult NO_COLOR and incorrectly disable color when the user
|
|
704
|
+
# passed --color under NO_COLOR=1).
|
|
705
|
+
print(c._render_project_table(
|
|
706
|
+
sorted_rows,
|
|
707
|
+
title=title,
|
|
708
|
+
breakdown=args.breakdown,
|
|
709
|
+
weeks_missing_snapshot=len(weeks_missing_snapshot),
|
|
710
|
+
weeks_in_range=len(weeks_in_range),
|
|
711
|
+
color=c._resolve_color_enabled(args),
|
|
712
|
+
compact=args.compact,
|
|
713
|
+
))
|
|
714
|
+
return 0
|