claude-cache-analyzer 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,89 @@
1
+ Metadata-Version: 2.4
2
+ Name: claude-cache-analyzer
3
+ Version: 0.1.0
4
+ Summary: Analyze Claude Code session cache efficiency
5
+ Author-email: Eugene Smith <easmith@mail.ru>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/AgiMateIo/claude-cache-analyzer
8
+ Project-URL: Repository, https://github.com/AgiMateIo/claude-cache-analyzer
9
+ Project-URL: Issues, https://github.com/AgiMateIo/claude-cache-analyzer/issues
10
+ Keywords: claude,cache,anthropic,cli,prompt-caching
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Environment :: Console
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Classifier: Topic :: Software Development :: Testing
18
+ Requires-Python: >=3.11
19
+ Description-Content-Type: text/markdown
20
+ Requires-Dist: typer>=0.12
21
+ Requires-Dist: rich>=13
22
+
23
+ # Claude Code Cache Efficiency Analyzer
24
+
25
+ Based on https://habr.com/ru/companies/bitrix/articles/1008320/
26
+
27
+ It helps to analyze you Claude Agent SDK usage.
28
+
29
+ CLI tool that reads Claude Code JSONL session files, computes prompt cache efficiency metrics using the Bitrix24/Habr formula, and displays results as rich terminal tables.
30
+
31
+ ## Formula
32
+
33
+ ```
34
+ C = S × [(1−h) × P_miss + h × P_hit] + D × P_miss + O × P_out
35
+ ```
36
+
37
+ Where: **S** = cacheable tokens, **h** = hit rate, **D** = dynamic input tokens, **O** = output tokens.
38
+
39
+ ## Install
40
+
41
+ ```bash
42
+ uv sync
43
+ ```
44
+
45
+ ## Usage
46
+
47
+ ```bash
48
+ # Analyze all sessions in ~/.claude
49
+ uv run python cli.py
50
+
51
+ # Last 5 sessions
52
+ uv run python cli.py --top 5
53
+
54
+ # Specific project
55
+ uv run python cli.py --project-name my-project
56
+
57
+ # Export metrics to JSON
58
+ uv run python cli.py --export-json metrics.json
59
+
60
+ # Analyze a specific path
61
+ uv run python cli.py ~/.claude/projects/abc123
62
+ ```
63
+
64
+ ## Metrics
65
+
66
+ | Metric | Description |
67
+ |--------|-------------|
68
+ | Cache hit rate | `cache_read / (cache_creation + cache_read)` |
69
+ | Actual cost | Real cost with cache pricing applied |
70
+ | Cost without cache | Hypothetical cost if all tokens were at input price |
71
+ | Savings | `cost_no_cache - actual_cost` |
72
+ | Net savings | Savings minus cache write overhead |
73
+ | Efficiency score | `hit_rate × (cacheable / (input + cacheable))` — range [0..1] |
74
+
75
+ ## Grades
76
+
77
+ | Grade | Efficiency Score |
78
+ |-------|-----------------|
79
+ | **A** | ≥ 0.70 |
80
+ | **B** | ≥ 0.50 |
81
+ | **C** | ≥ 0.30 |
82
+ | **D** | ≥ 0.10 |
83
+ | **F** | < 0.10 |
84
+
85
+ ## Tests
86
+
87
+ ```bash
88
+ uv run pytest -v
89
+ ```
@@ -0,0 +1,67 @@
1
+ # Claude Code Cache Efficiency Analyzer
2
+
3
+ Based on https://habr.com/ru/companies/bitrix/articles/1008320/
4
+
5
+ It helps to analyze you Claude Agent SDK usage.
6
+
7
+ CLI tool that reads Claude Code JSONL session files, computes prompt cache efficiency metrics using the Bitrix24/Habr formula, and displays results as rich terminal tables.
8
+
9
+ ## Formula
10
+
11
+ ```
12
+ C = S × [(1−h) × P_miss + h × P_hit] + D × P_miss + O × P_out
13
+ ```
14
+
15
+ Where: **S** = cacheable tokens, **h** = hit rate, **D** = dynamic input tokens, **O** = output tokens.
16
+
17
+ ## Install
18
+
19
+ ```bash
20
+ uv sync
21
+ ```
22
+
23
+ ## Usage
24
+
25
+ ```bash
26
+ # Analyze all sessions in ~/.claude
27
+ uv run python cli.py
28
+
29
+ # Last 5 sessions
30
+ uv run python cli.py --top 5
31
+
32
+ # Specific project
33
+ uv run python cli.py --project-name my-project
34
+
35
+ # Export metrics to JSON
36
+ uv run python cli.py --export-json metrics.json
37
+
38
+ # Analyze a specific path
39
+ uv run python cli.py ~/.claude/projects/abc123
40
+ ```
41
+
42
+ ## Metrics
43
+
44
+ | Metric | Description |
45
+ |--------|-------------|
46
+ | Cache hit rate | `cache_read / (cache_creation + cache_read)` |
47
+ | Actual cost | Real cost with cache pricing applied |
48
+ | Cost without cache | Hypothetical cost if all tokens were at input price |
49
+ | Savings | `cost_no_cache - actual_cost` |
50
+ | Net savings | Savings minus cache write overhead |
51
+ | Efficiency score | `hit_rate × (cacheable / (input + cacheable))` — range [0..1] |
52
+
53
+ ## Grades
54
+
55
+ | Grade | Efficiency Score |
56
+ |-------|-----------------|
57
+ | **A** | ≥ 0.70 |
58
+ | **B** | ≥ 0.50 |
59
+ | **C** | ≥ 0.30 |
60
+ | **D** | ≥ 0.10 |
61
+ | **F** | < 0.10 |
62
+
63
+ ## Tests
64
+
65
+ ```bash
66
+ uv run pytest -v
67
+ ```
@@ -0,0 +1,3 @@
1
+ """Claude Code Cache Efficiency Analyzer."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,219 @@
1
+ """CLI entry point for Claude Code Cache Efficiency Analyzer."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from dataclasses import asdict
7
+ from datetime import datetime
8
+ from pathlib import Path
9
+ from typing import Optional
10
+
11
+ import typer
12
+ from rich.console import Console
13
+
14
+ from claude_cache_analyzer import __version__
15
+ from claude_cache_analyzer.metrics import aggregate, compute_session_metrics
16
+ from claude_cache_analyzer.parser import discover_sessions, find_session_by_id, parse_session_file
17
+ from claude_cache_analyzer.report import (
18
+ print_grouped_report,
19
+ print_no_sessions_message,
20
+ print_project_report,
21
+ print_session_detail,
22
+ )
23
+
24
+ app = typer.Typer(
25
+ help="Analyze Claude Code session cache efficiency.",
26
+ context_settings={"help_option_names": ["-h", "--help"]},
27
+ )
28
+ console = Console()
29
+
30
+
31
+ def version_callback(value: bool) -> None:
32
+ if value:
33
+ console.print(f"claude-cache-analyzer {__version__}")
34
+ raise typer.Exit()
35
+
36
+
37
+ @app.command()
38
+ def main(
39
+ project_path: Optional[Path] = typer.Argument(
40
+ None,
41
+ help="Root of Claude data (~/.claude) or a specific project directory.",
42
+ ),
43
+ project_name: Optional[str] = typer.Option(
44
+ None, "--project-name", "-p", help="Filter by project directory name."
45
+ ),
46
+ top: Optional[int] = typer.Option(
47
+ None, "--top", "-n", help="Show only the N most recent sessions."
48
+ ),
49
+ min_turns: int = typer.Option(
50
+ 1, "--min-turns", help="Minimum number of turns to include a session."
51
+ ),
52
+ session: Optional[str] = typer.Option(
53
+ None, "--session", "-s", help="Show detailed view for a specific session (full or partial ID)."
54
+ ),
55
+ group_by_project: bool = typer.Option(
56
+ False, "--group-by-project", "-g", help="Group results by project."
57
+ ),
58
+ export_json: Optional[Path] = typer.Option(
59
+ None, "--export-json", help="Export raw metrics to a JSON file."
60
+ ),
61
+ version: Optional[bool] = typer.Option(
62
+ None, "--version", callback=version_callback, is_eager=True, help="Show version."
63
+ ),
64
+ ) -> None:
65
+ """Analyze Claude Code prompt cache efficiency."""
66
+ if project_path is None:
67
+ project_path = Path.home() / ".claude"
68
+
69
+ project_path = project_path.expanduser().resolve()
70
+
71
+ # Determine if path points to a specific project or the root
72
+ if (project_path / "projects").is_dir():
73
+ # Root claude dir
74
+ sessions = discover_sessions(project_path)
75
+ elif project_path.is_dir():
76
+ # Might be a specific project directory — look for JSONL files directly
77
+ jsonl_files = list(project_path.glob("*.jsonl"))
78
+ if jsonl_files:
79
+ sessions = []
80
+ for f in jsonl_files:
81
+ s = parse_session_file(f)
82
+ if s.turns:
83
+ sessions.append(s)
84
+ sessions.sort(
85
+ key=lambda s: s.started_at or datetime.min, reverse=True
86
+ )
87
+ else:
88
+ # Maybe it's a projects/ parent
89
+ sessions = discover_sessions(project_path)
90
+ else:
91
+ console.print(f"[red]Path does not exist: {project_path}[/red]")
92
+ raise typer.Exit(1)
93
+
94
+ if not sessions:
95
+ print_no_sessions_message()
96
+ raise typer.Exit()
97
+
98
+ # Session detail mode
99
+ if session:
100
+ match, candidates = find_session_by_id(sessions, session)
101
+ if match is None and not candidates:
102
+ console.print(f"[red]No session found matching '{session}'[/red]")
103
+ raise typer.Exit(1)
104
+ if match is None:
105
+ console.print(f"[yellow]Ambiguous session ID '{session}'. Candidates:[/yellow]")
106
+ for c in candidates:
107
+ date_str = c.started_at.strftime("%Y-%m-%d %H:%M") if c.started_at else "—"
108
+ console.print(f" {c.session_id} ({date_str}, {c.num_turns} turns)")
109
+ raise typer.Exit(1)
110
+
111
+ sm = compute_session_metrics(match)
112
+ print_session_detail(sm)
113
+
114
+ if export_json:
115
+ export_data = {
116
+ "session_id": match.session_id,
117
+ "project": match.project,
118
+ "model": match.model,
119
+ "started_at": match.started_at.isoformat() if match.started_at else None,
120
+ "num_turns": match.num_turns,
121
+ "hit_rate": sm.hit_rate,
122
+ "efficiency_score": sm.cache_efficiency_score,
123
+ "grade": sm.grade(),
124
+ "actual_cost": sm.actual_cost,
125
+ "cost_no_cache": sm.cost_no_cache,
126
+ "savings": sm.savings,
127
+ "net_savings": sm.net_savings,
128
+ "savings_pct": sm.savings_pct,
129
+ "turns": [
130
+ {
131
+ "timestamp": tm.turn.timestamp.isoformat() if tm.turn.timestamp else None,
132
+ "model": tm.turn.model,
133
+ "input_tokens": tm.turn.input_tokens,
134
+ "output_tokens": tm.turn.output_tokens,
135
+ "cache_creation_tokens": tm.turn.cache_creation_tokens,
136
+ "cache_read_tokens": tm.turn.cache_read_tokens,
137
+ "hit_rate": tm.turn.hit_rate,
138
+ "actual_cost": tm.actual_cost,
139
+ "cost_no_cache": tm.cost_no_cache,
140
+ "savings": tm.savings,
141
+ "savings_pct": tm.savings_pct,
142
+ }
143
+ for tm in sm.turns
144
+ ],
145
+ }
146
+ export_json.write_text(json.dumps(export_data, indent=2))
147
+ console.print(f"\n[green]Metrics exported to {export_json}[/green]")
148
+ raise typer.Exit()
149
+
150
+ # Filter by project name
151
+ if project_name:
152
+ sessions = [s for s in sessions if project_name in s.project]
153
+
154
+ # Filter by min turns
155
+ sessions = [s for s in sessions if s.num_turns >= min_turns]
156
+
157
+ if not sessions:
158
+ print_no_sessions_message()
159
+ raise typer.Exit()
160
+
161
+ # Limit to top N
162
+ if top is not None:
163
+ sessions = sessions[:top]
164
+
165
+ # Compute metrics
166
+ sessions_metrics = [compute_session_metrics(s) for s in sessions]
167
+
168
+ # Print report
169
+ if group_by_project:
170
+ print_grouped_report(sessions_metrics)
171
+ else:
172
+ display_name = project_name or (
173
+ sessions[0].project
174
+ if len(set(s.project for s in sessions)) == 1
175
+ else "all projects"
176
+ )
177
+ print_project_report(sessions_metrics, display_name)
178
+
179
+ # Export JSON
180
+ if export_json:
181
+ agg = aggregate(sessions_metrics)
182
+ export_data = {
183
+ "aggregate": {
184
+ k: v
185
+ for k, v in agg.items()
186
+ if k not in ("best_session", "worst_session")
187
+ },
188
+ "sessions": [],
189
+ }
190
+ for sm in sessions_metrics:
191
+ sess = sm.session
192
+ export_data["sessions"].append(
193
+ {
194
+ "session_id": sess.session_id,
195
+ "project": sess.project,
196
+ "model": sess.model,
197
+ "started_at": (
198
+ sess.started_at.isoformat() if sess.started_at else None
199
+ ),
200
+ "num_turns": sess.num_turns,
201
+ "total_input": sess.total_input,
202
+ "total_output": sess.total_output,
203
+ "total_cacheable": sess.total_cacheable,
204
+ "hit_rate": sm.hit_rate,
205
+ "efficiency_score": sm.cache_efficiency_score,
206
+ "grade": sm.grade(),
207
+ "actual_cost": sm.actual_cost,
208
+ "cost_no_cache": sm.cost_no_cache,
209
+ "savings": sm.savings,
210
+ "net_savings": sm.net_savings,
211
+ "savings_pct": sm.savings_pct,
212
+ }
213
+ )
214
+ export_json.write_text(json.dumps(export_data, indent=2))
215
+ console.print(f"\n[green]Metrics exported to {export_json}[/green]")
216
+
217
+
218
+ if __name__ == "__main__":
219
+ app()
@@ -0,0 +1,163 @@
1
+ """Compute cost metrics and efficiency scores for sessions."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+
7
+ from .parser import Session, TurnUsage
8
+ from .pricing import cost_per_token, get_pricing
9
+
10
+
11
+ @dataclass
12
+ class TurnMetrics:
13
+ turn: TurnUsage
14
+ actual_cost: float
15
+ cost_no_cache: float
16
+ cache_write_overhead: float
17
+
18
+ @property
19
+ def savings(self) -> float:
20
+ return self.cost_no_cache - self.actual_cost
21
+
22
+ @property
23
+ def net_savings(self) -> float:
24
+ return self.savings - self.cache_write_overhead
25
+
26
+ @property
27
+ def savings_pct(self) -> float:
28
+ if self.cost_no_cache == 0:
29
+ return 0.0
30
+ return self.savings / self.cost_no_cache * 100
31
+
32
+
33
+ @dataclass
34
+ class SessionMetrics:
35
+ session: Session
36
+ turns: list[TurnMetrics] = field(default_factory=list)
37
+
38
+ @property
39
+ def actual_cost(self) -> float:
40
+ return sum(t.actual_cost for t in self.turns)
41
+
42
+ @property
43
+ def cost_no_cache(self) -> float:
44
+ return sum(t.cost_no_cache for t in self.turns)
45
+
46
+ @property
47
+ def cache_write_overhead(self) -> float:
48
+ return sum(t.cache_write_overhead for t in self.turns)
49
+
50
+ @property
51
+ def savings(self) -> float:
52
+ return self.cost_no_cache - self.actual_cost
53
+
54
+ @property
55
+ def net_savings(self) -> float:
56
+ return self.savings - self.cache_write_overhead
57
+
58
+ @property
59
+ def savings_pct(self) -> float:
60
+ if self.cost_no_cache == 0:
61
+ return 0.0
62
+ return self.savings / self.cost_no_cache * 100
63
+
64
+ @property
65
+ def hit_rate(self) -> float:
66
+ return self.session.hit_rate
67
+
68
+ @property
69
+ def cache_efficiency_score(self) -> float:
70
+ total_input = self.session.total_input
71
+ total_cacheable = self.session.total_cacheable
72
+ denom = total_input + total_cacheable
73
+ if denom == 0:
74
+ return 0.0
75
+ return self.hit_rate * (total_cacheable / denom)
76
+
77
+ def grade(self) -> str:
78
+ score = self.cache_efficiency_score
79
+ if score >= 0.70:
80
+ return "A"
81
+ if score >= 0.50:
82
+ return "B"
83
+ if score >= 0.30:
84
+ return "C"
85
+ if score >= 0.10:
86
+ return "D"
87
+ return "F"
88
+
89
+
90
+ def _compute_turn_metrics(turn: TurnUsage) -> TurnMetrics:
91
+ pricing = get_pricing(turn.model)
92
+ ppt = cost_per_token(pricing)
93
+
94
+ s = turn.cacheable_tokens
95
+ h = turn.hit_rate
96
+ d = turn.input_tokens
97
+ o = turn.output_tokens
98
+
99
+ # Actual cost: C = S * [(1-h)*P_miss + h*P_hit] + D*P_miss + O*P_out
100
+ # But actual_cost should reflect what was actually charged, including cache_write pricing
101
+ # cache_creation tokens are charged at cache_write rate, cache_read at cache_read rate,
102
+ # input_tokens at input rate, output at output rate
103
+ actual_cost = (
104
+ turn.cache_creation_tokens * ppt["cache_write"]
105
+ + turn.cache_read_tokens * ppt["cache_read"]
106
+ + d * ppt["input"]
107
+ + o * ppt["output"]
108
+ )
109
+
110
+ # Baseline without cache: all input tokens at normal input price
111
+ cost_no_cache = (s + d) * ppt["input"] + o * ppt["output"]
112
+
113
+ # Overhead: cache_write is more expensive than regular input
114
+ cache_write_overhead = turn.cache_creation_tokens * (ppt["cache_write"] - ppt["input"])
115
+
116
+ return TurnMetrics(
117
+ turn=turn,
118
+ actual_cost=actual_cost,
119
+ cost_no_cache=cost_no_cache,
120
+ cache_write_overhead=cache_write_overhead,
121
+ )
122
+
123
+
124
+ def compute_session_metrics(session: Session) -> SessionMetrics:
125
+ turn_metrics = [_compute_turn_metrics(t) for t in session.turns]
126
+ return SessionMetrics(session=session, turns=turn_metrics)
127
+
128
+
129
+ def aggregate(sessions_metrics: list[SessionMetrics]) -> dict:
130
+ """Compute aggregate statistics across all sessions."""
131
+ if not sessions_metrics:
132
+ return {
133
+ "total_actual_cost": 0.0,
134
+ "total_cost_no_cache": 0.0,
135
+ "total_savings": 0.0,
136
+ "total_net_savings": 0.0,
137
+ "avg_hit_rate": 0.0,
138
+ "avg_efficiency_score": 0.0,
139
+ "best_session": None,
140
+ "worst_session": None,
141
+ }
142
+
143
+ total_actual = sum(sm.actual_cost for sm in sessions_metrics)
144
+ total_no_cache = sum(sm.cost_no_cache for sm in sessions_metrics)
145
+ total_savings = total_no_cache - total_actual
146
+ total_overhead = sum(sm.cache_write_overhead for sm in sessions_metrics)
147
+ total_net = total_savings - total_overhead
148
+ avg_hit = sum(sm.hit_rate for sm in sessions_metrics) / len(sessions_metrics)
149
+ avg_eff = sum(sm.cache_efficiency_score for sm in sessions_metrics) / len(sessions_metrics)
150
+
151
+ best = max(sessions_metrics, key=lambda s: s.cache_efficiency_score)
152
+ worst = min(sessions_metrics, key=lambda s: s.cache_efficiency_score)
153
+
154
+ return {
155
+ "total_actual_cost": total_actual,
156
+ "total_cost_no_cache": total_no_cache,
157
+ "total_savings": total_savings,
158
+ "total_net_savings": total_net,
159
+ "avg_hit_rate": avg_hit,
160
+ "avg_efficiency_score": avg_eff,
161
+ "best_session": best,
162
+ "worst_session": worst,
163
+ }