claude-code-wrapped 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,267 @@
1
+ """Read and parse Claude Code conversation history from local JSONL files."""
2
+
3
+ import json
4
+ import os
5
+ from dataclasses import dataclass, field
6
+ from datetime import datetime, timezone
7
+ from pathlib import Path
8
+ from typing import Iterator
9
+
10
+
11
+ @dataclass
12
+ class TokenUsage:
13
+ """Token usage for a single message."""
14
+ input_tokens: int = 0
15
+ output_tokens: int = 0
16
+ cache_creation_tokens: int = 0
17
+ cache_read_tokens: int = 0
18
+
19
+ @property
20
+ def total_tokens(self) -> int:
21
+ return self.input_tokens + self.output_tokens + self.cache_creation_tokens + self.cache_read_tokens
22
+
23
+
24
+ @dataclass
25
+ class Message:
26
+ """A single message from a conversation."""
27
+ role: str # 'user' or 'assistant'
28
+ content: str
29
+ timestamp: datetime | None = None
30
+ model: str | None = None
31
+ usage: TokenUsage | None = None
32
+ session_id: str | None = None
33
+ project: str | None = None
34
+ git_branch: str | None = None
35
+ tool_calls: list[str] = field(default_factory=list)
36
+ message_id: str | None = None # For deduplication
37
+
38
+
39
+ @dataclass
40
+ class Session:
41
+ """A conversation session."""
42
+ session_id: str
43
+ project: str
44
+ messages: list[Message] = field(default_factory=list)
45
+ start_time: datetime | None = None
46
+ end_time: datetime | None = None
47
+
48
+
49
+ def get_claude_dir() -> Path:
50
+ """Get the Claude Code data directory."""
51
+ claude_dir = Path.home() / ".claude"
52
+ if not claude_dir.exists():
53
+ raise FileNotFoundError(f"Claude Code directory not found: {claude_dir}")
54
+ return claude_dir
55
+
56
+
57
+ def parse_timestamp(ts: int | str | None) -> datetime | None:
58
+ """Parse a timestamp from various formats and convert to local time."""
59
+ if ts is None:
60
+ return None
61
+ if isinstance(ts, int):
62
+ # Milliseconds since epoch - fromtimestamp returns local time
63
+ return datetime.fromtimestamp(ts / 1000)
64
+ if isinstance(ts, str):
65
+ # ISO format with Z (UTC)
66
+ try:
67
+ # Parse as UTC
68
+ utc_dt = datetime.fromisoformat(ts.replace('Z', '+00:00'))
69
+ # Convert to local time (removes timezone info but shifts the time)
70
+ local_dt = utc_dt.astimezone().replace(tzinfo=None)
71
+ return local_dt
72
+ except ValueError:
73
+ return None
74
+ return None
75
+
76
+
77
+ def extract_tool_calls(content: list | str) -> list[str]:
78
+ """Extract tool call names from message content."""
79
+ tool_calls = []
80
+ if isinstance(content, list):
81
+ for item in content:
82
+ if isinstance(item, dict):
83
+ if item.get('type') == 'tool_use':
84
+ tool_calls.append(item.get('name', 'unknown'))
85
+ return tool_calls
86
+
87
+
88
+ def parse_jsonl_record(record: dict) -> Message | None:
89
+ """Parse a single JSONL record into a Message."""
90
+ record_type = record.get('type')
91
+
92
+ if record_type not in ('user', 'assistant'):
93
+ return None
94
+
95
+ message_data = record.get('message', {})
96
+ if not message_data:
97
+ return None
98
+
99
+ content = message_data.get('content', '')
100
+ if isinstance(content, list):
101
+ # Extract text from content blocks
102
+ text_parts = []
103
+ for item in content:
104
+ if isinstance(item, dict) and item.get('type') == 'text':
105
+ text_parts.append(item.get('text', ''))
106
+ elif isinstance(item, str):
107
+ text_parts.append(item)
108
+ content = '\n'.join(text_parts)
109
+
110
+ usage = None
111
+ usage_data = message_data.get('usage')
112
+ if usage_data:
113
+ usage = TokenUsage(
114
+ input_tokens=usage_data.get('input_tokens', 0),
115
+ output_tokens=usage_data.get('output_tokens', 0),
116
+ cache_creation_tokens=usage_data.get('cache_creation_input_tokens', 0),
117
+ cache_read_tokens=usage_data.get('cache_read_input_tokens', 0),
118
+ )
119
+
120
+ return Message(
121
+ role=message_data.get('role', record_type),
122
+ content=content,
123
+ timestamp=parse_timestamp(record.get('timestamp')),
124
+ model=message_data.get('model'),
125
+ usage=usage,
126
+ session_id=record.get('sessionId'),
127
+ project=record.get('cwd'),
128
+ git_branch=record.get('gitBranch'),
129
+ tool_calls=extract_tool_calls(message_data.get('content', [])),
130
+ message_id=message_data.get('id'), # Used for deduplication
131
+ )
132
+
133
+
134
+ def iter_project_sessions(claude_dir: Path) -> Iterator[tuple[str, Path]]:
135
+ """Iterate over all project session JSONL files."""
136
+ projects_dir = claude_dir / "projects"
137
+ if not projects_dir.exists():
138
+ return
139
+
140
+ for project_dir in projects_dir.iterdir():
141
+ if not project_dir.is_dir():
142
+ continue
143
+ for jsonl_file in project_dir.glob("*.jsonl"):
144
+ yield project_dir.name, jsonl_file
145
+
146
+
147
+ def read_session_file(jsonl_path: Path) -> list[Message]:
148
+ """Read all messages from a session JSONL file."""
149
+ messages = []
150
+ try:
151
+ with open(jsonl_path, 'r', encoding='utf-8') as f:
152
+ for line in f:
153
+ line = line.strip()
154
+ if not line:
155
+ continue
156
+ try:
157
+ record = json.loads(line)
158
+ message = parse_jsonl_record(record)
159
+ if message:
160
+ messages.append(message)
161
+ except json.JSONDecodeError:
162
+ continue
163
+ except (IOError, OSError):
164
+ pass
165
+ return messages
166
+
167
+
168
+ def read_history_file(claude_dir: Path) -> list[Message]:
169
+ """Read the main history.jsonl file (user prompts only)."""
170
+ history_file = claude_dir / "history.jsonl"
171
+ messages = []
172
+ if not history_file.exists():
173
+ return messages
174
+
175
+ try:
176
+ with open(history_file, 'r', encoding='utf-8') as f:
177
+ for line in f:
178
+ line = line.strip()
179
+ if not line:
180
+ continue
181
+ try:
182
+ record = json.loads(line)
183
+ # History file has different format
184
+ messages.append(Message(
185
+ role='user',
186
+ content=record.get('display', ''),
187
+ timestamp=parse_timestamp(record.get('timestamp')),
188
+ project=record.get('project'),
189
+ ))
190
+ except json.JSONDecodeError:
191
+ continue
192
+ except (IOError, OSError):
193
+ pass
194
+ return messages
195
+
196
+
197
+ def read_stats_cache(claude_dir: Path) -> dict | None:
198
+ """Read the pre-computed stats cache if available."""
199
+ stats_file = claude_dir / "stats-cache.json"
200
+ if not stats_file.exists():
201
+ return None
202
+ try:
203
+ with open(stats_file, 'r', encoding='utf-8') as f:
204
+ return json.load(f)
205
+ except (json.JSONDecodeError, IOError):
206
+ return None
207
+
208
+
209
+ def load_all_messages(claude_dir: Path | None = None, year: int | None = None) -> list[Message]:
210
+ """Load all messages from all sessions, optionally filtered by year.
211
+
212
+ Deduplicates messages by message_id to avoid counting duplicate entries
213
+ that can occur from streaming or retries.
214
+ """
215
+ if claude_dir is None:
216
+ claude_dir = get_claude_dir()
217
+
218
+ all_messages = []
219
+
220
+ # Read from project session files
221
+ for project_name, jsonl_path in iter_project_sessions(claude_dir):
222
+ messages = read_session_file(jsonl_path)
223
+ all_messages.extend(messages)
224
+
225
+ # Deduplicate by message_id (keep the last occurrence which has final token counts)
226
+ seen_ids: dict[str, Message] = {}
227
+ unique_messages = []
228
+ for msg in all_messages:
229
+ if msg.message_id:
230
+ # Keep latest version (overwrite previous)
231
+ seen_ids[msg.message_id] = msg
232
+ else:
233
+ # Messages without ID (user messages) - keep all
234
+ unique_messages.append(msg)
235
+
236
+ # Add deduplicated messages
237
+ unique_messages.extend(seen_ids.values())
238
+
239
+ # Filter by year if specified
240
+ if year:
241
+ unique_messages = [
242
+ m for m in unique_messages
243
+ if m.timestamp and m.timestamp.year == year
244
+ ]
245
+
246
+ # Sort by timestamp
247
+ unique_messages.sort(key=lambda m: m.timestamp or datetime.min)
248
+
249
+ return unique_messages
250
+
251
+
252
+ if __name__ == "__main__":
253
+ # Quick test
254
+ claude_dir = get_claude_dir()
255
+ print(f"Claude dir: {claude_dir}")
256
+
257
+ messages = load_all_messages(year=2025)
258
+ print(f"Total messages in 2025: {len(messages)}")
259
+
260
+ user_messages = [m for m in messages if m.role == 'user']
261
+ assistant_messages = [m for m in messages if m.role == 'assistant']
262
+ print(f"User messages: {len(user_messages)}")
263
+ print(f"Assistant messages: {len(assistant_messages)}")
264
+
265
+ # Token usage
266
+ total_tokens = sum(m.usage.total_tokens for m in messages if m.usage)
267
+ print(f"Total tokens: {total_tokens:,}")
@@ -0,0 +1,339 @@
1
+ """Aggregate statistics from Claude Code conversation history for Wrapped."""
2
+
3
+ from collections import Counter, defaultdict
4
+ from dataclasses import dataclass, field
5
+ from datetime import datetime, timedelta
6
+
7
+ from .reader import Message, TokenUsage
8
+
9
+
10
+ @dataclass
11
+ class DailyStats:
12
+ """Statistics for a single day."""
13
+ date: datetime
14
+ message_count: int = 0
15
+ user_messages: int = 0
16
+ assistant_messages: int = 0
17
+ tokens: TokenUsage = field(default_factory=TokenUsage)
18
+ tool_calls: Counter = field(default_factory=Counter)
19
+ models_used: Counter = field(default_factory=Counter)
20
+ projects: set = field(default_factory=set)
21
+ session_count: int = 0
22
+
23
+
24
+ @dataclass
25
+ class WrappedStats:
26
+ """Complete wrapped statistics for a year."""
27
+ year: int
28
+
29
+ # Overall counts
30
+ total_messages: int = 0
31
+ total_user_messages: int = 0
32
+ total_assistant_messages: int = 0
33
+ total_sessions: int = 0
34
+ total_projects: int = 0
35
+
36
+ # Token usage
37
+ total_input_tokens: int = 0
38
+ total_output_tokens: int = 0
39
+ total_cache_creation_tokens: int = 0
40
+ total_cache_read_tokens: int = 0
41
+
42
+ # Time patterns
43
+ first_message_date: datetime | None = None
44
+ last_message_date: datetime | None = None
45
+ most_active_day: tuple[datetime, int] | None = None
46
+ most_active_hour: int | None = None
47
+ streak_longest: int = 0
48
+ streak_current: int = 0
49
+ active_days: int = 0
50
+
51
+ # Tool usage
52
+ tool_calls: Counter = field(default_factory=Counter)
53
+ top_tools: list[tuple[str, int]] = field(default_factory=list)
54
+
55
+ # Model usage
56
+ models_used: Counter = field(default_factory=Counter)
57
+ primary_model: str | None = None
58
+
59
+ # Projects
60
+ projects: Counter = field(default_factory=Counter)
61
+ top_projects: list[tuple[str, int]] = field(default_factory=list)
62
+
63
+ # Daily breakdown
64
+ daily_stats: dict[str, DailyStats] = field(default_factory=dict)
65
+
66
+ # Hour distribution (0-23 -> count)
67
+ hourly_distribution: list[int] = field(default_factory=lambda: [0] * 24)
68
+
69
+ # Day of week distribution (0=Monday, 6=Sunday)
70
+ weekday_distribution: list[int] = field(default_factory=lambda: [0] * 7)
71
+
72
+ # Fun stats
73
+ longest_conversation_tokens: int = 0
74
+ avg_messages_per_day: float = 0.0
75
+ avg_tokens_per_message: float = 0.0
76
+
77
+ # Cost tracking (per model)
78
+ model_token_usage: dict[str, dict[str, int]] = field(default_factory=dict)
79
+ estimated_cost: float | None = None
80
+ cost_by_model: dict[str, float] = field(default_factory=dict)
81
+
82
+ @property
83
+ def total_tokens(self) -> int:
84
+ return (
85
+ self.total_input_tokens +
86
+ self.total_output_tokens +
87
+ self.total_cache_creation_tokens +
88
+ self.total_cache_read_tokens
89
+ )
90
+
91
+
92
+ def extract_project_name(project_path: str | None) -> str:
93
+ """Extract a readable project name from a path."""
94
+ if not project_path:
95
+ return "Unknown"
96
+ # Get the last part of the path
97
+ parts = project_path.rstrip('/').split('/')
98
+ return parts[-1] if parts else "Unknown"
99
+
100
+
101
+ def calculate_streaks(daily_stats: dict[str, DailyStats], year: int) -> tuple[int, int]:
102
+ """Calculate longest and current coding streaks."""
103
+ # Get all active dates in the year
104
+ active_dates = set()
105
+ for date_str, stats in daily_stats.items():
106
+ if stats.message_count > 0:
107
+ try:
108
+ active_dates.add(datetime.strptime(date_str, "%Y-%m-%d").date())
109
+ except ValueError:
110
+ continue
111
+
112
+ if not active_dates:
113
+ return 0, 0
114
+
115
+ # Sort dates
116
+ sorted_dates = sorted(active_dates)
117
+
118
+ # Calculate longest streak
119
+ longest_streak = 1
120
+ current_streak = 1
121
+
122
+ for i in range(1, len(sorted_dates)):
123
+ if sorted_dates[i] - sorted_dates[i-1] == timedelta(days=1):
124
+ current_streak += 1
125
+ longest_streak = max(longest_streak, current_streak)
126
+ else:
127
+ current_streak = 1
128
+
129
+ # Calculate current streak
130
+ today = datetime.now().date()
131
+ current = 0
132
+
133
+ # For past years, current streak is meaningless, so return 0
134
+ # For current year, count back from today
135
+ if year < today.year:
136
+ return longest_streak, 0
137
+
138
+ # Start from today for current year
139
+ check_date = today
140
+
141
+ while check_date >= datetime(year, 1, 1).date():
142
+ if check_date in active_dates:
143
+ current += 1
144
+ check_date -= timedelta(days=1)
145
+ elif check_date == today:
146
+ # Today doesn't count against streak if we haven't coded yet
147
+ check_date -= timedelta(days=1)
148
+ else:
149
+ break
150
+
151
+ return longest_streak, current
152
+
153
+
154
+ def aggregate_stats(messages: list[Message], year: int) -> WrappedStats:
155
+ """Aggregate all messages into wrapped statistics."""
156
+ stats = WrappedStats(year=year)
157
+
158
+ if not messages:
159
+ return stats
160
+
161
+ # Track unique sessions and projects
162
+ sessions = set()
163
+ projects = Counter()
164
+ daily = defaultdict(lambda: DailyStats(date=datetime.now()))
165
+
166
+ # Process each message
167
+ for msg in messages:
168
+ stats.total_messages += 1
169
+
170
+ if msg.role == 'user':
171
+ stats.total_user_messages += 1
172
+ else:
173
+ stats.total_assistant_messages += 1
174
+
175
+ # Session tracking
176
+ if msg.session_id:
177
+ sessions.add(msg.session_id)
178
+
179
+ # Project tracking
180
+ project_name = extract_project_name(msg.project)
181
+ if project_name != "Unknown":
182
+ projects[project_name] += 1
183
+
184
+ # Model usage and token tracking
185
+ raw_model = msg.model # Full model ID for accurate cost calculation
186
+ display_model = None # Simplified name for display
187
+ if msg.model:
188
+ model_lower = msg.model.lower()
189
+ if 'opus' in model_lower:
190
+ display_model = 'Opus'
191
+ elif 'sonnet' in model_lower:
192
+ display_model = 'Sonnet'
193
+ elif 'haiku' in model_lower:
194
+ display_model = 'Haiku'
195
+ elif msg.model == '<synthetic>':
196
+ display_model = None # Skip synthetic messages
197
+ else:
198
+ display_model = msg.model
199
+
200
+ if display_model:
201
+ stats.models_used[display_model] += 1
202
+
203
+ # Token usage (aggregate and per-model with FULL model name for accurate pricing)
204
+ if msg.usage:
205
+ stats.total_input_tokens += msg.usage.input_tokens
206
+ stats.total_output_tokens += msg.usage.output_tokens
207
+ stats.total_cache_creation_tokens += msg.usage.cache_creation_tokens
208
+ stats.total_cache_read_tokens += msg.usage.cache_read_tokens
209
+
210
+ # Track per-model token usage for cost calculation (use raw model ID)
211
+ if raw_model and raw_model != '<synthetic>':
212
+ if raw_model not in stats.model_token_usage:
213
+ stats.model_token_usage[raw_model] = {
214
+ "input": 0, "output": 0, "cache_create": 0, "cache_read": 0
215
+ }
216
+ stats.model_token_usage[raw_model]["input"] += msg.usage.input_tokens
217
+ stats.model_token_usage[raw_model]["output"] += msg.usage.output_tokens
218
+ stats.model_token_usage[raw_model]["cache_create"] += msg.usage.cache_creation_tokens
219
+ stats.model_token_usage[raw_model]["cache_read"] += msg.usage.cache_read_tokens
220
+
221
+ # Tool usage
222
+ for tool in msg.tool_calls:
223
+ stats.tool_calls[tool] += 1
224
+
225
+ # Time-based stats
226
+ if msg.timestamp:
227
+ # Track first and last
228
+ if stats.first_message_date is None or msg.timestamp < stats.first_message_date:
229
+ stats.first_message_date = msg.timestamp
230
+ if stats.last_message_date is None or msg.timestamp > stats.last_message_date:
231
+ stats.last_message_date = msg.timestamp
232
+
233
+ # Hourly distribution
234
+ stats.hourly_distribution[msg.timestamp.hour] += 1
235
+
236
+ # Weekday distribution
237
+ stats.weekday_distribution[msg.timestamp.weekday()] += 1
238
+
239
+ # Daily stats
240
+ date_str = msg.timestamp.strftime("%Y-%m-%d")
241
+ if date_str not in daily:
242
+ daily[date_str] = DailyStats(date=msg.timestamp)
243
+
244
+ daily_stat = daily[date_str]
245
+ daily_stat.message_count += 1
246
+ if msg.role == 'user':
247
+ daily_stat.user_messages += 1
248
+ else:
249
+ daily_stat.assistant_messages += 1
250
+
251
+ # Finalize stats
252
+ stats.total_sessions = len(sessions)
253
+ stats.projects = projects
254
+ stats.total_projects = len(projects)
255
+ stats.daily_stats = dict(daily)
256
+ stats.active_days = len([d for d in daily.values() if d.message_count > 0])
257
+
258
+ # Most active day
259
+ if daily:
260
+ most_active = max(daily.items(), key=lambda x: x[1].message_count)
261
+ stats.most_active_day = (
262
+ datetime.strptime(most_active[0], "%Y-%m-%d"),
263
+ most_active[1].message_count
264
+ )
265
+
266
+ # Most active hour
267
+ if any(stats.hourly_distribution):
268
+ stats.most_active_hour = stats.hourly_distribution.index(max(stats.hourly_distribution))
269
+
270
+ # Top tools
271
+ stats.top_tools = stats.tool_calls.most_common(10)
272
+
273
+ # Top projects
274
+ stats.top_projects = projects.most_common(5)
275
+
276
+ # Primary model
277
+ if stats.models_used:
278
+ stats.primary_model = stats.models_used.most_common(1)[0][0]
279
+
280
+ # Streaks
281
+ stats.streak_longest, stats.streak_current = calculate_streaks(daily, year)
282
+
283
+ # Averages
284
+ if stats.active_days > 0:
285
+ stats.avg_messages_per_day = stats.total_messages / stats.active_days
286
+
287
+ if stats.total_assistant_messages > 0:
288
+ stats.avg_tokens_per_message = stats.total_tokens / stats.total_assistant_messages
289
+
290
+ # Calculate estimated cost
291
+ from .pricing import calculate_total_cost_by_model
292
+ if stats.model_token_usage:
293
+ stats.estimated_cost, stats.cost_by_model = calculate_total_cost_by_model(
294
+ stats.model_token_usage
295
+ )
296
+
297
+ return stats
298
+
299
+
300
+ def format_tokens(tokens: int) -> str:
301
+ """Format token count for display."""
302
+ if tokens >= 1_000_000_000:
303
+ return f"{tokens / 1_000_000_000:.1f}B"
304
+ if tokens >= 1_000_000:
305
+ return f"{tokens / 1_000_000:.1f}M"
306
+ if tokens >= 1_000:
307
+ return f"{tokens / 1_000:.1f}K"
308
+ return str(tokens)
309
+
310
+
311
+ if __name__ == "__main__":
312
+ from .reader import load_all_messages, get_claude_dir
313
+
314
+ print("Loading messages...")
315
+ messages = load_all_messages(year=2025)
316
+ print(f"Loaded {len(messages)} messages")
317
+
318
+ print("\nCalculating stats...")
319
+ stats = aggregate_stats(messages, 2025)
320
+
321
+ print(f"\n=== Claude Code Wrapped 2025 ===")
322
+ print(f"Total messages: {stats.total_messages:,}")
323
+ print(f" User: {stats.total_user_messages:,}")
324
+ print(f" Assistant: {stats.total_assistant_messages:,}")
325
+ print(f"Total sessions: {stats.total_sessions}")
326
+ print(f"Total projects: {stats.total_projects}")
327
+ print(f"Active days: {stats.active_days}")
328
+ print(f"\nTokens: {format_tokens(stats.total_tokens)}")
329
+ print(f" Input: {format_tokens(stats.total_input_tokens)}")
330
+ print(f" Output: {format_tokens(stats.total_output_tokens)}")
331
+ print(f" Cache created: {format_tokens(stats.total_cache_creation_tokens)}")
332
+ print(f" Cache read: {format_tokens(stats.total_cache_read_tokens)}")
333
+ print(f"\nPrimary model: {stats.primary_model}")
334
+ print(f"Longest streak: {stats.streak_longest} days")
335
+ print(f"Most active hour: {stats.most_active_hour}:00")
336
+ if stats.most_active_day:
337
+ print(f"Most active day: {stats.most_active_day[0].strftime('%B %d')} ({stats.most_active_day[1]} messages)")
338
+ print(f"\nTop tools: {stats.top_tools[:5]}")
339
+ print(f"Top projects: {stats.top_projects}")