npm - claude-code-wrapped - Versions diffs - 0.1.2 - Mend

claude-code-wrapped 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/.python-version +1 -0
package/LICENSE +21 -0
package/README.md +98 -0
package/bin/cli.js +62 -0
package/claude_code_wrapped/__init__.py +3 -0
package/claude_code_wrapped/main.py +102 -0
package/claude_code_wrapped/pricing.py +179 -0
package/claude_code_wrapped/reader.py +267 -0
package/claude_code_wrapped/stats.py +339 -0
package/claude_code_wrapped/ui.py +604 -0
package/package.json +33 -0
package/pyproject.toml +35 -0
package/uv.lock +57 -0

package/claude_code_wrapped/reader.py ADDED Viewed

@@ -0,0 +1,267 @@
+"""Read and parse Claude Code conversation history from local JSONL files."""
+import json
+import os
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Iterator
+@dataclass
+class TokenUsage:
+    """Token usage for a single message."""
+    input_tokens: int = 0
+    output_tokens: int = 0
+    cache_creation_tokens: int = 0
+    cache_read_tokens: int = 0
+    @property
+    def total_tokens(self) -> int:
+        return self.input_tokens + self.output_tokens + self.cache_creation_tokens + self.cache_read_tokens
+@dataclass
+class Message:
+    """A single message from a conversation."""
+    role: str  # 'user' or 'assistant'
+    content: str
+    timestamp: datetime | None = None
+    model: str | None = None
+    usage: TokenUsage | None = None
+    session_id: str | None = None
+    project: str | None = None
+    git_branch: str | None = None
+    tool_calls: list[str] = field(default_factory=list)
+    message_id: str | None = None  # For deduplication
+@dataclass
+class Session:
+    """A conversation session."""
+    session_id: str
+    project: str
+    messages: list[Message] = field(default_factory=list)
+    start_time: datetime | None = None
+    end_time: datetime | None = None
+def get_claude_dir() -> Path:
+    """Get the Claude Code data directory."""
+    claude_dir = Path.home() / ".claude"
+    if not claude_dir.exists():
+        raise FileNotFoundError(f"Claude Code directory not found: {claude_dir}")
+    return claude_dir
+def parse_timestamp(ts: int | str | None) -> datetime | None:
+    """Parse a timestamp from various formats and convert to local time."""
+    if ts is None:
+        return None
+    if isinstance(ts, int):
+        # Milliseconds since epoch - fromtimestamp returns local time
+        return datetime.fromtimestamp(ts / 1000)
+    if isinstance(ts, str):
+        # ISO format with Z (UTC)
+        try:
+            # Parse as UTC
+            utc_dt = datetime.fromisoformat(ts.replace('Z', '+00:00'))
+            # Convert to local time (removes timezone info but shifts the time)
+            local_dt = utc_dt.astimezone().replace(tzinfo=None)
+            return local_dt
+        except ValueError:
+            return None
+    return None
+def extract_tool_calls(content: list | str) -> list[str]:
+    """Extract tool call names from message content."""
+    tool_calls = []
+    if isinstance(content, list):
+        for item in content:
+            if isinstance(item, dict):
+                if item.get('type') == 'tool_use':
+                    tool_calls.append(item.get('name', 'unknown'))
+    return tool_calls
+def parse_jsonl_record(record: dict) -> Message | None:
+    """Parse a single JSONL record into a Message."""
+    record_type = record.get('type')
+    if record_type not in ('user', 'assistant'):
+        return None
+    message_data = record.get('message', {})
+    if not message_data:
+        return None
+    content = message_data.get('content', '')
+    if isinstance(content, list):
+        # Extract text from content blocks
+        text_parts = []
+        for item in content:
+            if isinstance(item, dict) and item.get('type') == 'text':
+                text_parts.append(item.get('text', ''))
+            elif isinstance(item, str):
+                text_parts.append(item)
+        content = '\n'.join(text_parts)
+    usage = None
+    usage_data = message_data.get('usage')
+    if usage_data:
+        usage = TokenUsage(
+            input_tokens=usage_data.get('input_tokens', 0),
+            output_tokens=usage_data.get('output_tokens', 0),
+            cache_creation_tokens=usage_data.get('cache_creation_input_tokens', 0),
+            cache_read_tokens=usage_data.get('cache_read_input_tokens', 0),
+        )
+    return Message(
+        role=message_data.get('role', record_type),
+        content=content,
+        timestamp=parse_timestamp(record.get('timestamp')),
+        model=message_data.get('model'),
+        usage=usage,
+        session_id=record.get('sessionId'),
+        project=record.get('cwd'),
+        git_branch=record.get('gitBranch'),
+        tool_calls=extract_tool_calls(message_data.get('content', [])),
+        message_id=message_data.get('id'),  # Used for deduplication
+    )
+def iter_project_sessions(claude_dir: Path) -> Iterator[tuple[str, Path]]:
+    """Iterate over all project session JSONL files."""
+    projects_dir = claude_dir / "projects"
+    if not projects_dir.exists():
+        return
+    for project_dir in projects_dir.iterdir():
+        if not project_dir.is_dir():
+            continue
+        for jsonl_file in project_dir.glob("*.jsonl"):
+            yield project_dir.name, jsonl_file
+def read_session_file(jsonl_path: Path) -> list[Message]:
+    """Read all messages from a session JSONL file."""
+    messages = []
+    try:
+        with open(jsonl_path, 'r', encoding='utf-8') as f:
+            for line in f:
+                line = line.strip()
+                if not line:
+                    continue
+                try:
+                    record = json.loads(line)
+                    message = parse_jsonl_record(record)
+                    if message:
+                        messages.append(message)
+                except json.JSONDecodeError:
+                    continue
+    except (IOError, OSError):
+        pass
+    return messages
+def read_history_file(claude_dir: Path) -> list[Message]:
+    """Read the main history.jsonl file (user prompts only)."""
+    history_file = claude_dir / "history.jsonl"
+    messages = []
+    if not history_file.exists():
+        return messages
+    try:
+        with open(history_file, 'r', encoding='utf-8') as f:
+            for line in f:
+                line = line.strip()
+                if not line:
+                    continue
+                try:
+                    record = json.loads(line)
+                    # History file has different format
+                    messages.append(Message(
+                        role='user',
+                        content=record.get('display', ''),
+                        timestamp=parse_timestamp(record.get('timestamp')),
+                        project=record.get('project'),
+                    ))
+                except json.JSONDecodeError:
+                    continue
+    except (IOError, OSError):
+        pass
+    return messages
+def read_stats_cache(claude_dir: Path) -> dict | None:
+    """Read the pre-computed stats cache if available."""
+    stats_file = claude_dir / "stats-cache.json"
+    if not stats_file.exists():
+        return None
+    try:
+        with open(stats_file, 'r', encoding='utf-8') as f:
+            return json.load(f)
+    except (json.JSONDecodeError, IOError):
+        return None
+def load_all_messages(claude_dir: Path | None = None, year: int | None = None) -> list[Message]:
+    """Load all messages from all sessions, optionally filtered by year.
+    Deduplicates messages by message_id to avoid counting duplicate entries
+    that can occur from streaming or retries.
+    """
+    if claude_dir is None:
+        claude_dir = get_claude_dir()
+    all_messages = []
+    # Read from project session files
+    for project_name, jsonl_path in iter_project_sessions(claude_dir):
+        messages = read_session_file(jsonl_path)
+        all_messages.extend(messages)
+    # Deduplicate by message_id (keep the last occurrence which has final token counts)
+    seen_ids: dict[str, Message] = {}
+    unique_messages = []
+    for msg in all_messages:
+        if msg.message_id:
+            # Keep latest version (overwrite previous)
+            seen_ids[msg.message_id] = msg
+        else:
+            # Messages without ID (user messages) - keep all
+            unique_messages.append(msg)
+    # Add deduplicated messages
+    unique_messages.extend(seen_ids.values())
+    # Filter by year if specified
+    if year:
+        unique_messages = [
+            m for m in unique_messages
+            if m.timestamp and m.timestamp.year == year
+        ]
+    # Sort by timestamp
+    unique_messages.sort(key=lambda m: m.timestamp or datetime.min)
+    return unique_messages
+if __name__ == "__main__":
+    # Quick test
+    claude_dir = get_claude_dir()
+    print(f"Claude dir: {claude_dir}")
+    messages = load_all_messages(year=2025)
+    print(f"Total messages in 2025: {len(messages)}")
+    user_messages = [m for m in messages if m.role == 'user']
+    assistant_messages = [m for m in messages if m.role == 'assistant']
+    print(f"User messages: {len(user_messages)}")
+    print(f"Assistant messages: {len(assistant_messages)}")
+    # Token usage
+    total_tokens = sum(m.usage.total_tokens for m in messages if m.usage)
+    print(f"Total tokens: {total_tokens:,}")

package/claude_code_wrapped/stats.py ADDED Viewed

@@ -0,0 +1,339 @@
+"""Aggregate statistics from Claude Code conversation history for Wrapped."""
+from collections import Counter, defaultdict
+from dataclasses import dataclass, field
+from datetime import datetime, timedelta
+from .reader import Message, TokenUsage
+@dataclass
+class DailyStats:
+    """Statistics for a single day."""
+    date: datetime
+    message_count: int = 0
+    user_messages: int = 0
+    assistant_messages: int = 0
+    tokens: TokenUsage = field(default_factory=TokenUsage)
+    tool_calls: Counter = field(default_factory=Counter)
+    models_used: Counter = field(default_factory=Counter)
+    projects: set = field(default_factory=set)
+    session_count: int = 0
+@dataclass
+class WrappedStats:
+    """Complete wrapped statistics for a year."""
+    year: int
+    # Overall counts
+    total_messages: int = 0
+    total_user_messages: int = 0
+    total_assistant_messages: int = 0
+    total_sessions: int = 0
+    total_projects: int = 0
+    # Token usage
+    total_input_tokens: int = 0
+    total_output_tokens: int = 0
+    total_cache_creation_tokens: int = 0
+    total_cache_read_tokens: int = 0
+    # Time patterns
+    first_message_date: datetime | None = None
+    last_message_date: datetime | None = None
+    most_active_day: tuple[datetime, int] | None = None
+    most_active_hour: int | None = None
+    streak_longest: int = 0
+    streak_current: int = 0
+    active_days: int = 0
+    # Tool usage
+    tool_calls: Counter = field(default_factory=Counter)
+    top_tools: list[tuple[str, int]] = field(default_factory=list)
+    # Model usage
+    models_used: Counter = field(default_factory=Counter)
+    primary_model: str | None = None
+    # Projects
+    projects: Counter = field(default_factory=Counter)
+    top_projects: list[tuple[str, int]] = field(default_factory=list)
+    # Daily breakdown
+    daily_stats: dict[str, DailyStats] = field(default_factory=dict)
+    # Hour distribution (0-23 -> count)
+    hourly_distribution: list[int] = field(default_factory=lambda: [0] * 24)
+    # Day of week distribution (0=Monday, 6=Sunday)
+    weekday_distribution: list[int] = field(default_factory=lambda: [0] * 7)
+    # Fun stats
+    longest_conversation_tokens: int = 0
+    avg_messages_per_day: float = 0.0
+    avg_tokens_per_message: float = 0.0
+    # Cost tracking (per model)
+    model_token_usage: dict[str, dict[str, int]] = field(default_factory=dict)
+    estimated_cost: float | None = None
+    cost_by_model: dict[str, float] = field(default_factory=dict)
+    @property
+    def total_tokens(self) -> int:
+        return (
+            self.total_input_tokens +
+            self.total_output_tokens +
+            self.total_cache_creation_tokens +
+            self.total_cache_read_tokens
+        )
+def extract_project_name(project_path: str | None) -> str:
+    """Extract a readable project name from a path."""
+    if not project_path:
+        return "Unknown"
+    # Get the last part of the path
+    parts = project_path.rstrip('/').split('/')
+    return parts[-1] if parts else "Unknown"
+def calculate_streaks(daily_stats: dict[str, DailyStats], year: int) -> tuple[int, int]:
+    """Calculate longest and current coding streaks."""
+    # Get all active dates in the year
+    active_dates = set()
+    for date_str, stats in daily_stats.items():
+        if stats.message_count > 0:
+            try:
+                active_dates.add(datetime.strptime(date_str, "%Y-%m-%d").date())
+            except ValueError:
+                continue
+    if not active_dates:
+        return 0, 0
+    # Sort dates
+    sorted_dates = sorted(active_dates)
+    # Calculate longest streak
+    longest_streak = 1
+    current_streak = 1
+    for i in range(1, len(sorted_dates)):
+        if sorted_dates[i] - sorted_dates[i-1] == timedelta(days=1):
+            current_streak += 1
+            longest_streak = max(longest_streak, current_streak)
+        else:
+            current_streak = 1
+    # Calculate current streak
+    today = datetime.now().date()
+    current = 0
+    # For past years, current streak is meaningless, so return 0
+    # For current year, count back from today
+    if year < today.year:
+        return longest_streak, 0
+    # Start from today for current year
+    check_date = today
+    while check_date >= datetime(year, 1, 1).date():
+        if check_date in active_dates:
+            current += 1
+            check_date -= timedelta(days=1)
+        elif check_date == today:
+            # Today doesn't count against streak if we haven't coded yet
+            check_date -= timedelta(days=1)
+        else:
+            break
+    return longest_streak, current
+def aggregate_stats(messages: list[Message], year: int) -> WrappedStats:
+    """Aggregate all messages into wrapped statistics."""
+    stats = WrappedStats(year=year)
+    if not messages:
+        return stats
+    # Track unique sessions and projects
+    sessions = set()
+    projects = Counter()
+    daily = defaultdict(lambda: DailyStats(date=datetime.now()))
+    # Process each message
+    for msg in messages:
+        stats.total_messages += 1
+        if msg.role == 'user':
+            stats.total_user_messages += 1
+        else:
+            stats.total_assistant_messages += 1
+        # Session tracking
+        if msg.session_id:
+            sessions.add(msg.session_id)
+        # Project tracking
+        project_name = extract_project_name(msg.project)
+        if project_name != "Unknown":
+            projects[project_name] += 1
+        # Model usage and token tracking
+        raw_model = msg.model  # Full model ID for accurate cost calculation
+        display_model = None  # Simplified name for display
+        if msg.model:
+            model_lower = msg.model.lower()
+            if 'opus' in model_lower:
+                display_model = 'Opus'
+            elif 'sonnet' in model_lower:
+                display_model = 'Sonnet'
+            elif 'haiku' in model_lower:
+                display_model = 'Haiku'
+            elif msg.model == '<synthetic>':
+                display_model = None  # Skip synthetic messages
+            else:
+                display_model = msg.model
+            if display_model:
+                stats.models_used[display_model] += 1
+        # Token usage (aggregate and per-model with FULL model name for accurate pricing)
+        if msg.usage:
+            stats.total_input_tokens += msg.usage.input_tokens
+            stats.total_output_tokens += msg.usage.output_tokens
+            stats.total_cache_creation_tokens += msg.usage.cache_creation_tokens
+            stats.total_cache_read_tokens += msg.usage.cache_read_tokens
+            # Track per-model token usage for cost calculation (use raw model ID)
+            if raw_model and raw_model != '<synthetic>':
+                if raw_model not in stats.model_token_usage:
+                    stats.model_token_usage[raw_model] = {
+                        "input": 0, "output": 0, "cache_create": 0, "cache_read": 0
+                    }
+                stats.model_token_usage[raw_model]["input"] += msg.usage.input_tokens
+                stats.model_token_usage[raw_model]["output"] += msg.usage.output_tokens
+                stats.model_token_usage[raw_model]["cache_create"] += msg.usage.cache_creation_tokens
+                stats.model_token_usage[raw_model]["cache_read"] += msg.usage.cache_read_tokens
+        # Tool usage
+        for tool in msg.tool_calls:
+            stats.tool_calls[tool] += 1
+        # Time-based stats
+        if msg.timestamp:
+            # Track first and last
+            if stats.first_message_date is None or msg.timestamp < stats.first_message_date:
+                stats.first_message_date = msg.timestamp
+            if stats.last_message_date is None or msg.timestamp > stats.last_message_date:
+                stats.last_message_date = msg.timestamp
+            # Hourly distribution
+            stats.hourly_distribution[msg.timestamp.hour] += 1
+            # Weekday distribution
+            stats.weekday_distribution[msg.timestamp.weekday()] += 1
+            # Daily stats
+            date_str = msg.timestamp.strftime("%Y-%m-%d")
+            if date_str not in daily:
+                daily[date_str] = DailyStats(date=msg.timestamp)
+            daily_stat = daily[date_str]
+            daily_stat.message_count += 1
+            if msg.role == 'user':
+                daily_stat.user_messages += 1
+            else:
+                daily_stat.assistant_messages += 1
+    # Finalize stats
+    stats.total_sessions = len(sessions)
+    stats.projects = projects
+    stats.total_projects = len(projects)
+    stats.daily_stats = dict(daily)
+    stats.active_days = len([d for d in daily.values() if d.message_count > 0])
+    # Most active day
+    if daily:
+        most_active = max(daily.items(), key=lambda x: x[1].message_count)
+        stats.most_active_day = (
+            datetime.strptime(most_active[0], "%Y-%m-%d"),
+            most_active[1].message_count
+        )
+    # Most active hour
+    if any(stats.hourly_distribution):
+        stats.most_active_hour = stats.hourly_distribution.index(max(stats.hourly_distribution))
+    # Top tools
+    stats.top_tools = stats.tool_calls.most_common(10)
+    # Top projects
+    stats.top_projects = projects.most_common(5)
+    # Primary model
+    if stats.models_used:
+        stats.primary_model = stats.models_used.most_common(1)[0][0]
+    # Streaks
+    stats.streak_longest, stats.streak_current = calculate_streaks(daily, year)
+    # Averages
+    if stats.active_days > 0:
+        stats.avg_messages_per_day = stats.total_messages / stats.active_days
+    if stats.total_assistant_messages > 0:
+        stats.avg_tokens_per_message = stats.total_tokens / stats.total_assistant_messages
+    # Calculate estimated cost
+    from .pricing import calculate_total_cost_by_model
+    if stats.model_token_usage:
+        stats.estimated_cost, stats.cost_by_model = calculate_total_cost_by_model(
+            stats.model_token_usage
+        )
+    return stats
+def format_tokens(tokens: int) -> str:
+    """Format token count for display."""
+    if tokens >= 1_000_000_000:
+        return f"{tokens / 1_000_000_000:.1f}B"
+    if tokens >= 1_000_000:
+        return f"{tokens / 1_000_000:.1f}M"
+    if tokens >= 1_000:
+        return f"{tokens / 1_000:.1f}K"
+    return str(tokens)
+if __name__ == "__main__":
+    from .reader import load_all_messages, get_claude_dir
+    print("Loading messages...")
+    messages = load_all_messages(year=2025)
+    print(f"Loaded {len(messages)} messages")
+    print("\nCalculating stats...")
+    stats = aggregate_stats(messages, 2025)
+    print(f"\n=== Claude Code Wrapped 2025 ===")
+    print(f"Total messages: {stats.total_messages:,}")
+    print(f"  User: {stats.total_user_messages:,}")
+    print(f"  Assistant: {stats.total_assistant_messages:,}")
+    print(f"Total sessions: {stats.total_sessions}")
+    print(f"Total projects: {stats.total_projects}")
+    print(f"Active days: {stats.active_days}")
+    print(f"\nTokens: {format_tokens(stats.total_tokens)}")
+    print(f"  Input: {format_tokens(stats.total_input_tokens)}")
+    print(f"  Output: {format_tokens(stats.total_output_tokens)}")
+    print(f"  Cache created: {format_tokens(stats.total_cache_creation_tokens)}")
+    print(f"  Cache read: {format_tokens(stats.total_cache_read_tokens)}")
+    print(f"\nPrimary model: {stats.primary_model}")
+    print(f"Longest streak: {stats.streak_longest} days")
+    print(f"Most active hour: {stats.most_active_hour}:00")
+    if stats.most_active_day:
+        print(f"Most active day: {stats.most_active_day[0].strftime('%B %d')} ({stats.most_active_day[1]} messages)")
+    print(f"\nTop tools: {stats.top_tools[:5]}")
+    print(f"Top projects: {stats.top_projects}")