@da-trollefsen/claude-wrapped 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,520 @@
1
+ """Aggregate statistics from Claude Code conversation history for Wrapped."""
2
+
3
+ from collections import Counter, defaultdict
4
+ from dataclasses import dataclass, field
5
+ from datetime import datetime, timedelta
6
+
7
+ from .reader import Message, TokenUsage
8
+
9
+
10
+ @dataclass
11
+ class DailyStats:
12
+ """Statistics for a single day."""
13
+ date: datetime
14
+ message_count: int = 0
15
+ user_messages: int = 0
16
+ assistant_messages: int = 0
17
+ tokens: TokenUsage = field(default_factory=TokenUsage)
18
+ tool_calls: Counter = field(default_factory=Counter)
19
+ models_used: Counter = field(default_factory=Counter)
20
+ projects: set = field(default_factory=set)
21
+ session_count: int = 0
22
+
23
+
24
+ @dataclass
25
+ class WrappedStats:
26
+ """Complete wrapped statistics for a year or all-time."""
27
+ year: int | None # None for all-time stats
28
+
29
+ # Overall counts
30
+ total_messages: int = 0
31
+ total_user_messages: int = 0
32
+ total_assistant_messages: int = 0
33
+ total_sessions: int = 0
34
+ total_projects: int = 0
35
+
36
+ # Token usage
37
+ total_input_tokens: int = 0
38
+ total_output_tokens: int = 0
39
+ total_cache_creation_tokens: int = 0
40
+ total_cache_read_tokens: int = 0
41
+
42
+ # Time patterns
43
+ first_message_date: datetime | None = None
44
+ last_message_date: datetime | None = None
45
+ most_active_day: tuple[datetime, int] | None = None
46
+ most_active_hour: int | None = None
47
+ streak_longest: int = 0
48
+ streak_current: int = 0
49
+ streak_longest_start: datetime | None = None
50
+ streak_longest_end: datetime | None = None
51
+ active_days: int = 0
52
+
53
+ # Tool usage
54
+ tool_calls: Counter = field(default_factory=Counter)
55
+ top_tools: list[tuple[str, int]] = field(default_factory=list)
56
+
57
+ # MCP server usage (extracted from mcp__server__tool format)
58
+ mcp_servers: Counter = field(default_factory=Counter)
59
+ top_mcps: list[tuple[str, int]] = field(default_factory=list)
60
+
61
+ # Model usage
62
+ models_used: Counter = field(default_factory=Counter)
63
+ primary_model: str | None = None
64
+
65
+ # Projects
66
+ projects: Counter = field(default_factory=Counter)
67
+ top_projects: list[tuple[str, int]] = field(default_factory=list)
68
+
69
+ # Daily breakdown
70
+ daily_stats: dict[str, DailyStats] = field(default_factory=dict)
71
+
72
+ # Hour distribution (0-23 -> count)
73
+ hourly_distribution: list[int] = field(default_factory=lambda: [0] * 24)
74
+
75
+ # Day of week distribution (0=Monday, 6=Sunday)
76
+ weekday_distribution: list[int] = field(default_factory=lambda: [0] * 7)
77
+
78
+ # Late night coding days (unique dates with activity between midnight and 5am)
79
+ late_night_days: int = 0
80
+
81
+ # Fun stats
82
+ longest_conversation_tokens: int = 0
83
+ avg_tokens_per_message: float = 0.0
84
+
85
+ # Averages (messages)
86
+ avg_messages_per_day: float = 0.0
87
+ avg_messages_per_week: float = 0.0
88
+ avg_messages_per_month: float = 0.0
89
+
90
+ # Averages (cost)
91
+ avg_cost_per_day: float = 0.0
92
+ avg_cost_per_week: float = 0.0
93
+ avg_cost_per_month: float = 0.0
94
+
95
+ # Code activity (from Edit/Write tools)
96
+ total_edits: int = 0
97
+ total_writes: int = 0
98
+ avg_edits_per_day: float = 0.0
99
+ avg_edits_per_week: float = 0.0
100
+
101
+ # Cost tracking (per model)
102
+ model_token_usage: dict[str, dict[str, int]] = field(default_factory=dict)
103
+ estimated_cost: float | None = None
104
+ cost_by_model: dict[str, float] = field(default_factory=dict)
105
+
106
+ # Monthly breakdown for cost table
107
+ monthly_costs: dict[str, float] = field(default_factory=dict) # "YYYY-MM" -> cost
108
+ monthly_tokens: dict[str, dict[str, int]] = field(default_factory=dict) # "YYYY-MM" -> {input, output, ...}
109
+
110
+ # Longest conversation tracking
111
+ longest_conversation_messages: int = 0
112
+ longest_conversation_tokens: int = 0
113
+ longest_conversation_session: str | None = None
114
+ longest_conversation_date: datetime | None = None
115
+
116
+ @property
117
+ def total_tokens(self) -> int:
118
+ return (
119
+ self.total_input_tokens +
120
+ self.total_output_tokens +
121
+ self.total_cache_creation_tokens +
122
+ self.total_cache_read_tokens
123
+ )
124
+
125
+
126
+ def extract_project_name(project_path: str | None) -> str:
127
+ """Extract a readable project name from a path.
128
+
129
+ Handles common subdirectories by using the parent directory name
130
+ to aggregate related paths (e.g., /path/to/project/app and /path/to/project/src
131
+ both become 'project').
132
+ """
133
+ if not project_path:
134
+ return "Unknown"
135
+
136
+ # Common subdirectory names that should be ignored in favor of parent
137
+ common_subdirs = {
138
+ 'app', 'src', 'lib', 'dist', 'build', 'out', 'bin', 'target',
139
+ 'test', 'tests', '__tests__', 'spec', 'specs',
140
+ 'public', 'static', 'assets', 'resources',
141
+ 'frontend', 'backend', 'api', 'server', 'client',
142
+ 'packages', 'modules', 'components', 'utils', 'helpers',
143
+ 'scripts', 'tools', 'config', 'configs',
144
+ 'docs', 'documentation', 'examples',
145
+ }
146
+
147
+ # Split path and remove empty parts
148
+ parts = [p for p in project_path.rstrip('/').split('/') if p]
149
+
150
+ if not parts:
151
+ return "Unknown"
152
+
153
+ # If last part is a common subdirectory and we have a parent, use parent
154
+ if len(parts) >= 2 and parts[-1].lower() in common_subdirs:
155
+ return parts[-2]
156
+
157
+ # Otherwise use the last part
158
+ return parts[-1]
159
+
160
+
161
+ def calculate_streaks(daily_stats: dict[str, DailyStats], year: int | None) -> tuple[int, int, datetime | None, datetime | None]:
162
+ """Calculate longest and current coding streaks.
163
+
164
+ Args:
165
+ daily_stats: Dictionary of daily statistics
166
+ year: Year to analyze, or None for all-time
167
+
168
+ Returns:
169
+ Tuple of (longest_streak, current_streak, longest_start_date, longest_end_date)
170
+ """
171
+ # Get all active dates
172
+ active_dates = set()
173
+ for date_str, stats in daily_stats.items():
174
+ if stats.message_count > 0:
175
+ try:
176
+ active_dates.add(datetime.strptime(date_str, "%Y-%m-%d").date())
177
+ except ValueError:
178
+ continue
179
+
180
+ if not active_dates:
181
+ return 0, 0, None, None
182
+
183
+ # Sort dates
184
+ sorted_dates = sorted(active_dates)
185
+
186
+ # Calculate longest streak with date tracking
187
+ longest_streak = 1
188
+ current_streak = 1
189
+ longest_start_idx = 0
190
+ longest_end_idx = 0
191
+ current_start_idx = 0
192
+
193
+ for i in range(1, len(sorted_dates)):
194
+ if sorted_dates[i] - sorted_dates[i-1] == timedelta(days=1):
195
+ current_streak += 1
196
+ if current_streak > longest_streak:
197
+ longest_streak = current_streak
198
+ longest_start_idx = current_start_idx
199
+ longest_end_idx = i
200
+ else:
201
+ current_streak = 1
202
+ current_start_idx = i
203
+
204
+ # Convert indices to datetime objects
205
+ longest_start = datetime.combine(sorted_dates[longest_start_idx], datetime.min.time())
206
+ longest_end = datetime.combine(sorted_dates[longest_end_idx], datetime.min.time())
207
+
208
+ # Calculate current streak
209
+ today = datetime.now().date()
210
+ current = 0
211
+
212
+ # For past years, current streak is meaningless, so return 0
213
+ # For all-time or current year, count back from today
214
+ if year is not None and year < today.year:
215
+ return longest_streak, 0, longest_start, longest_end
216
+
217
+ # Start from today for current year or all-time
218
+ check_date = today
219
+
220
+ # For all-time, check back from today
221
+ # For specific year, only check within that year
222
+ min_date = datetime(year, 1, 1).date() if year else min(active_dates)
223
+
224
+ while check_date >= min_date:
225
+ if check_date in active_dates:
226
+ current += 1
227
+ check_date -= timedelta(days=1)
228
+ elif check_date == today:
229
+ # Today doesn't count against streak if we haven't coded yet
230
+ check_date -= timedelta(days=1)
231
+ else:
232
+ break
233
+
234
+ return longest_streak, current, longest_start, longest_end
235
+
236
+
237
+ def aggregate_stats(messages: list[Message], year: int | None) -> WrappedStats:
238
+ """Aggregate all messages into wrapped statistics.
239
+
240
+ Args:
241
+ messages: List of messages to aggregate
242
+ year: Year to analyze, or None for all-time stats
243
+ """
244
+ stats = WrappedStats(year=year)
245
+
246
+ if not messages:
247
+ return stats
248
+
249
+ # Track unique sessions and projects
250
+ sessions = set()
251
+ projects = Counter()
252
+ daily = defaultdict(lambda: DailyStats(date=datetime.now()))
253
+
254
+ # Track monthly token usage for cost breakdown
255
+ monthly_tokens: dict[str, dict[str, int]] = defaultdict(
256
+ lambda: {"input": 0, "output": 0, "cache_create": 0, "cache_read": 0}
257
+ )
258
+ monthly_model_tokens: dict[str, dict[str, dict[str, int]]] = defaultdict(
259
+ lambda: defaultdict(lambda: {"input": 0, "output": 0, "cache_create": 0, "cache_read": 0})
260
+ )
261
+
262
+ # Track per-session message counts for longest conversation
263
+ session_messages: dict[str, int] = Counter()
264
+ session_tokens: dict[str, int] = Counter()
265
+ session_first_time: dict[str, datetime] = {}
266
+
267
+ # Track late night coding dates (midnight to 5am)
268
+ late_night_dates: set[str] = set()
269
+
270
+ # Process each message
271
+ for msg in messages:
272
+ stats.total_messages += 1
273
+
274
+ if msg.role == 'user':
275
+ stats.total_user_messages += 1
276
+ else:
277
+ stats.total_assistant_messages += 1
278
+
279
+ # Session tracking
280
+ if msg.session_id:
281
+ sessions.add(msg.session_id)
282
+ session_messages[msg.session_id] += 1
283
+ # Track first timestamp for each session
284
+ if msg.session_id not in session_first_time and msg.timestamp:
285
+ session_first_time[msg.session_id] = msg.timestamp
286
+
287
+ # Project tracking
288
+ project_name = extract_project_name(msg.project)
289
+ if project_name != "Unknown":
290
+ projects[project_name] += 1
291
+
292
+ # Model usage and token tracking
293
+ raw_model = msg.model # Full model ID for accurate cost calculation
294
+ display_model = None # Simplified name for display
295
+ if msg.model:
296
+ model_lower = msg.model.lower()
297
+ if 'opus' in model_lower:
298
+ display_model = 'Opus'
299
+ elif 'sonnet' in model_lower:
300
+ display_model = 'Sonnet'
301
+ elif 'haiku' in model_lower:
302
+ display_model = 'Haiku'
303
+ elif msg.model == '<synthetic>':
304
+ display_model = None # Skip synthetic messages
305
+ else:
306
+ display_model = msg.model
307
+
308
+ if display_model:
309
+ stats.models_used[display_model] += 1
310
+
311
+ # Token usage (aggregate and per-model with FULL model name for accurate pricing)
312
+ if msg.usage:
313
+ stats.total_input_tokens += msg.usage.input_tokens
314
+ stats.total_output_tokens += msg.usage.output_tokens
315
+ stats.total_cache_creation_tokens += msg.usage.cache_creation_tokens
316
+ stats.total_cache_read_tokens += msg.usage.cache_read_tokens
317
+
318
+ # Track per-model token usage for cost calculation (use raw model ID)
319
+ if raw_model and raw_model != '<synthetic>':
320
+ if raw_model not in stats.model_token_usage:
321
+ stats.model_token_usage[raw_model] = {
322
+ "input": 0, "output": 0, "cache_create": 0, "cache_read": 0
323
+ }
324
+ stats.model_token_usage[raw_model]["input"] += msg.usage.input_tokens
325
+ stats.model_token_usage[raw_model]["output"] += msg.usage.output_tokens
326
+ stats.model_token_usage[raw_model]["cache_create"] += msg.usage.cache_creation_tokens
327
+ stats.model_token_usage[raw_model]["cache_read"] += msg.usage.cache_read_tokens
328
+
329
+ # Track monthly token usage for cost breakdown
330
+ if msg.timestamp:
331
+ month_key = msg.timestamp.strftime("%Y-%m")
332
+ monthly_tokens[month_key]["input"] += msg.usage.input_tokens
333
+ monthly_tokens[month_key]["output"] += msg.usage.output_tokens
334
+ monthly_tokens[month_key]["cache_create"] += msg.usage.cache_creation_tokens
335
+ monthly_tokens[month_key]["cache_read"] += msg.usage.cache_read_tokens
336
+
337
+ # Also track per-model per-month for accurate cost calculation
338
+ if raw_model and raw_model != '<synthetic>':
339
+ monthly_model_tokens[month_key][raw_model]["input"] += msg.usage.input_tokens
340
+ monthly_model_tokens[month_key][raw_model]["output"] += msg.usage.output_tokens
341
+ monthly_model_tokens[month_key][raw_model]["cache_create"] += msg.usage.cache_creation_tokens
342
+ monthly_model_tokens[month_key][raw_model]["cache_read"] += msg.usage.cache_read_tokens
343
+
344
+ # Track per-session tokens for longest conversation
345
+ if msg.session_id:
346
+ session_tokens[msg.session_id] += msg.usage.total_tokens
347
+
348
+ # Tool usage (separate MCPs from regular tools)
349
+ for tool in msg.tool_calls:
350
+ if tool.startswith("mcp__"):
351
+ # Extract MCP server name: mcp__servername__toolname -> servername
352
+ parts = tool.split("__")
353
+ if len(parts) >= 2:
354
+ mcp_server = parts[1]
355
+ stats.mcp_servers[mcp_server] += 1
356
+ else:
357
+ stats.tool_calls[tool] += 1
358
+
359
+ # Time-based stats
360
+ if msg.timestamp:
361
+ # Track first and last
362
+ if stats.first_message_date is None or msg.timestamp < stats.first_message_date:
363
+ stats.first_message_date = msg.timestamp
364
+ if stats.last_message_date is None or msg.timestamp > stats.last_message_date:
365
+ stats.last_message_date = msg.timestamp
366
+
367
+ # Hourly distribution
368
+ stats.hourly_distribution[msg.timestamp.hour] += 1
369
+
370
+ # Track late night days (midnight to 5am)
371
+ if 0 <= msg.timestamp.hour < 5:
372
+ late_night_dates.add(msg.timestamp.strftime("%Y-%m-%d"))
373
+
374
+ # Weekday distribution
375
+ stats.weekday_distribution[msg.timestamp.weekday()] += 1
376
+
377
+ # Daily stats
378
+ date_str = msg.timestamp.strftime("%Y-%m-%d")
379
+ if date_str not in daily:
380
+ daily[date_str] = DailyStats(date=msg.timestamp)
381
+
382
+ daily_stat = daily[date_str]
383
+ daily_stat.message_count += 1
384
+ if msg.role == 'user':
385
+ daily_stat.user_messages += 1
386
+ else:
387
+ daily_stat.assistant_messages += 1
388
+
389
+ # Finalize stats
390
+ stats.total_sessions = len(sessions)
391
+ stats.projects = projects
392
+ stats.total_projects = len(projects)
393
+ stats.daily_stats = dict(daily)
394
+ stats.active_days = len([d for d in daily.values() if d.message_count > 0])
395
+
396
+ # Most active day
397
+ if daily:
398
+ most_active = max(daily.items(), key=lambda x: x[1].message_count)
399
+ stats.most_active_day = (
400
+ datetime.strptime(most_active[0], "%Y-%m-%d"),
401
+ most_active[1].message_count
402
+ )
403
+
404
+ # Most active hour
405
+ if any(stats.hourly_distribution):
406
+ stats.most_active_hour = stats.hourly_distribution.index(max(stats.hourly_distribution))
407
+
408
+ # Late night days count
409
+ stats.late_night_days = len(late_night_dates)
410
+
411
+ # Top tools
412
+ stats.top_tools = stats.tool_calls.most_common(10)
413
+
414
+ # Top MCPs
415
+ stats.top_mcps = stats.mcp_servers.most_common(5)
416
+
417
+ # Top projects
418
+ stats.top_projects = projects.most_common(5)
419
+
420
+ # Primary model
421
+ if stats.models_used:
422
+ stats.primary_model = stats.models_used.most_common(1)[0][0]
423
+
424
+ # Streaks
425
+ stats.streak_longest, stats.streak_current, stats.streak_longest_start, stats.streak_longest_end = calculate_streaks(daily, year)
426
+
427
+ # Calculate estimated cost first (needed for averages)
428
+ from .pricing import calculate_total_cost_by_model
429
+ if stats.model_token_usage:
430
+ stats.estimated_cost, stats.cost_by_model = calculate_total_cost_by_model(
431
+ stats.model_token_usage
432
+ )
433
+
434
+ # Calculate monthly costs
435
+ stats.monthly_tokens = dict(monthly_tokens)
436
+ for month_key, model_usage in monthly_model_tokens.items():
437
+ month_cost, _ = calculate_total_cost_by_model(dict(model_usage))
438
+ stats.monthly_costs[month_key] = month_cost
439
+
440
+ # Find longest conversation
441
+ if session_messages:
442
+ longest_session = max(session_messages.items(), key=lambda x: x[1])
443
+ stats.longest_conversation_session = longest_session[0]
444
+ stats.longest_conversation_messages = longest_session[1]
445
+ if longest_session[0] in session_tokens:
446
+ stats.longest_conversation_tokens = session_tokens[longest_session[0]]
447
+ if longest_session[0] in session_first_time:
448
+ stats.longest_conversation_date = session_first_time[longest_session[0]]
449
+
450
+ # Calculate averages based on ACTIVE days (like ccusage)
451
+ active_days = stats.active_days if stats.active_days > 0 else 1
452
+ active_weeks = max(1, active_days / 7)
453
+ active_months = max(1, active_days / 30.44)
454
+
455
+ # Message averages (per active day)
456
+ stats.avg_messages_per_day = stats.total_messages / active_days
457
+ stats.avg_messages_per_week = stats.total_messages / active_weeks
458
+ stats.avg_messages_per_month = stats.total_messages / active_months
459
+
460
+ # Cost averages (per active day)
461
+ if stats.estimated_cost is not None:
462
+ stats.avg_cost_per_day = stats.estimated_cost / active_days
463
+ stats.avg_cost_per_week = stats.estimated_cost / active_weeks
464
+ stats.avg_cost_per_month = stats.estimated_cost / active_months
465
+
466
+ # Token averages
467
+ if stats.total_assistant_messages > 0:
468
+ stats.avg_tokens_per_message = stats.total_tokens / stats.total_assistant_messages
469
+
470
+ # Code activity from Edit/Write tools
471
+ stats.total_edits = stats.tool_calls.get("Edit", 0)
472
+ stats.total_writes = stats.tool_calls.get("Write", 0)
473
+ total_code_changes = stats.total_edits + stats.total_writes
474
+ if active_days > 0:
475
+ stats.avg_edits_per_day = total_code_changes / active_days
476
+ stats.avg_edits_per_week = total_code_changes / active_weeks
477
+
478
+ return stats
479
+
480
+
481
+ def format_tokens(tokens: int) -> str:
482
+ """Format token count for display."""
483
+ if tokens >= 1_000_000_000:
484
+ return f"{tokens / 1_000_000_000:.1f}B"
485
+ if tokens >= 1_000_000:
486
+ return f"{tokens / 1_000_000:.1f}M"
487
+ if tokens >= 1_000:
488
+ return f"{tokens / 1_000:.1f}K"
489
+ return str(tokens)
490
+
491
+
492
+ if __name__ == "__main__":
493
+ from .reader import load_all_messages, get_claude_dir
494
+
495
+ print("Loading messages...")
496
+ messages = load_all_messages(year=2025)
497
+ print(f"Loaded {len(messages)} messages")
498
+
499
+ print("\nCalculating stats...")
500
+ stats = aggregate_stats(messages, 2025)
501
+
502
+ print(f"\n=== Claude Code Wrapped 2025 ===")
503
+ print(f"Total messages: {stats.total_messages:,}")
504
+ print(f" User: {stats.total_user_messages:,}")
505
+ print(f" Assistant: {stats.total_assistant_messages:,}")
506
+ print(f"Total sessions: {stats.total_sessions}")
507
+ print(f"Total projects: {stats.total_projects}")
508
+ print(f"Active days: {stats.active_days}")
509
+ print(f"\nTokens: {format_tokens(stats.total_tokens)}")
510
+ print(f" Input: {format_tokens(stats.total_input_tokens)}")
511
+ print(f" Output: {format_tokens(stats.total_output_tokens)}")
512
+ print(f" Cache created: {format_tokens(stats.total_cache_creation_tokens)}")
513
+ print(f" Cache read: {format_tokens(stats.total_cache_read_tokens)}")
514
+ print(f"\nPrimary model: {stats.primary_model}")
515
+ print(f"Longest streak: {stats.streak_longest} days")
516
+ print(f"Most active hour: {stats.most_active_hour}:00")
517
+ if stats.most_active_day:
518
+ print(f"Most active day: {stats.most_active_day[0].strftime('%B %d')} ({stats.most_active_day[1]} messages)")
519
+ print(f"\nTop tools: {stats.top_tools[:5]}")
520
+ print(f"Top projects: {stats.top_projects}")