claude-code-tracker 1.1.7 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/install.sh CHANGED
@@ -1,6 +1,14 @@
1
1
  #!/usr/bin/env bash
2
2
  set -euo pipefail
3
3
 
4
+ # Windows detection — native Windows shells (Git Bash, MSYS, Cygwin) won't work correctly
5
+ if [[ "$OSTYPE" == msys* || "$OSTYPE" == cygwin* || -n "${WINDIR:-}" ]]; then
6
+ echo "Error: claude-code-tracker requires a Unix shell (macOS, Linux, or WSL)." >&2
7
+ echo "On Windows, install WSL and run this from a WSL terminal:" >&2
8
+ echo " https://learn.microsoft.com/windows/wsl/install" >&2
9
+ exit 1
10
+ fi
11
+
4
12
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
5
13
  INSTALL_DIR="$HOME/.claude/tracking"
6
14
  SETTINGS="$HOME/.claude/settings.json"
@@ -47,14 +55,31 @@ hook_entry = {"type": "command", "command": hook_cmd, "timeout": 30, "async": Tr
47
55
  hooks = data.setdefault("hooks", {})
48
56
  stop_hooks = hooks.setdefault("Stop", [])
49
57
 
50
- # Check if already registered
51
- for group in stop_hooks:
52
- for h in group.get("hooks", []):
53
- if h.get("command") == hook_cmd:
54
- print("Hook already registered.")
55
- sys.exit(0)
58
+ already_stop = any(
59
+ h.get("command") == hook_cmd
60
+ for group in stop_hooks for h in group.get("hooks", [])
61
+ )
62
+ if already_stop:
63
+ print("Hook already registered.")
64
+ else:
65
+ stop_hooks.append({"hooks": [hook_entry]})
66
+
67
+ # SessionStart hook
68
+ backfill_cmd = hook_cmd + " --backfill-only"
69
+ session_hooks = hooks.setdefault("SessionStart", [])
70
+ already_session = any(
71
+ h.get("command") == backfill_cmd
72
+ for group in session_hooks for h in group.get("hooks", [])
73
+ )
74
+ if not already_session:
75
+ session_hooks.append({"hooks": [{"type": "command", "command": backfill_cmd, "timeout": 60, "async": True}]})
56
76
 
57
- stop_hooks.append({"hooks": [hook_entry]})
77
+ # permissions.allow
78
+ allow_entry = f"Bash({hook_cmd}*)"
79
+ perms = data.setdefault("permissions", {})
80
+ allow_list = perms.setdefault("allow", [])
81
+ if allow_entry not in allow_list:
82
+ allow_list.append(allow_entry)
58
83
 
59
84
  os.makedirs(os.path.dirname(os.path.abspath(settings_file)), exist_ok=True)
60
85
  with open(settings_file, 'w') as f:
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-code-tracker",
3
- "version": "1.1.7",
3
+ "version": "1.2.0",
4
4
  "description": "Automatic token, cost, and prompt tracking for Claude Code sessions",
5
5
  "keywords": [
6
6
  "claude",
package/src/backfill.py CHANGED
@@ -6,9 +6,11 @@ Usage:
6
6
  python3 backfill.py <project_root>
7
7
 
8
8
  Scans ~/.claude/projects/<slug>/*.jsonl for transcripts belonging to the
9
- given project, parses token usage from each, and appends entries to
10
- <project_root>/.claude/tracking/tokens.json. Sessions already present
11
- are skipped.
9
+ given project, parses token usage from each turn, and upserts entries to
10
+ <project_root>/.claude/tracking/tokens.json. Sessions where all turns are
11
+ already present are skipped.
12
+
13
+ Old-format entries (no turn_index field) are replaced with per-turn entries.
12
14
  """
13
15
  import sys, json, os, glob
14
16
  from datetime import datetime
@@ -26,7 +28,7 @@ if not os.path.isdir(transcripts_dir):
26
28
  print("No transcript directory found, nothing to backfill.")
27
29
  sys.exit(0)
28
30
 
29
- # Load existing data and build set of known session IDs
31
+ # Load existing data
30
32
  data = []
31
33
  if os.path.exists(tokens_file):
32
34
  try:
@@ -35,22 +37,25 @@ if os.path.exists(tokens_file):
35
37
  except Exception:
36
38
  data = []
37
39
 
38
- known_ids = {e.get("session_id") for e in data}
40
+ # Remove old-format entries (no turn_index) they will be re-processed
41
+ old_sessions = {e.get("session_id") for e in data if "turn_index" not in e}
42
+ data = [e for e in data if "turn_index" in e]
39
43
 
40
- # Find all JSONL transcripts
41
- jsonl_files = sorted(glob.glob(os.path.join(transcripts_dir, "*.jsonl")))
42
- backfilled = 0
44
+ # Build index of existing (session_id, turn_index) pairs
45
+ existing_turns = {(e.get("session_id"), e.get("turn_index")) for e in data}
43
46
 
44
- for jf in jsonl_files:
45
- session_id = os.path.splitext(os.path.basename(jf))[0]
46
- if session_id in known_ids:
47
- continue
47
+ # Count turns per known session
48
+ turns_per_session = {}
49
+ for e in data:
50
+ sid = e.get("session_id")
51
+ turns_per_session[sid] = turns_per_session.get(sid, 0) + 1
48
52
 
49
- # Parse token usage — same logic as stop-hook.sh
50
- inp = out = cache_create = cache_read = 0
53
+ def parse_turns(jf):
54
+ """Parse a JSONL transcript into per-turn entries. Returns list of dicts."""
55
+ msgs = [] # (role, timestamp)
56
+ usages = [] # usage dicts from assistant messages, in order
51
57
  model = "unknown"
52
58
  first_ts = None
53
- msgs = []
54
59
 
55
60
  try:
56
61
  with open(jf) as f:
@@ -69,23 +74,25 @@ for jf in jsonl_files:
69
74
  if isinstance(msg, dict) and msg.get("role") == "assistant":
70
75
  usage = msg.get("usage", {})
71
76
  if usage:
72
- inp += usage.get("input_tokens", 0)
73
- out += usage.get("output_tokens", 0)
74
- cache_create += usage.get("cache_creation_input_tokens", 0)
75
- cache_read += usage.get("cache_read_input_tokens", 0)
77
+ usages.append(usage)
76
78
  m = msg.get("model", "")
77
79
  if m:
78
80
  model = m
79
81
  except Exception:
80
82
  pass
81
83
  except Exception:
82
- continue
84
+ return [], None, "unknown"
83
85
 
84
- total = inp + cache_create + cache_read + out
85
- if total == 0:
86
- continue
86
+ return msgs, first_ts, model, usages
87
+
88
+ def compute_turns(msgs, usages, first_ts, model, session_id, project_name):
89
+ """Convert message list + usages into per-turn entry dicts."""
90
+ entries = []
91
+ turn_index = 0
92
+ usage_index = 0
93
+ i = 0
87
94
 
88
- # Date from first timestamp in the transcript
95
+ # Date from first timestamp
89
96
  session_date = None
90
97
  if first_ts:
91
98
  try:
@@ -94,60 +101,132 @@ for jf in jsonl_files:
94
101
  ).strftime("%Y-%m-%d")
95
102
  except Exception:
96
103
  pass
97
- if not session_date:
98
- session_date = datetime.fromtimestamp(os.path.getmtime(jf)).strftime("%Y-%m-%d")
99
104
 
100
- # Duration: sum of per-turn active thinking time (user -> first assistant reply)
101
- duration = 0
102
- i = 0
103
105
  while i < len(msgs):
104
106
  if msgs[i][0] == "user":
107
+ user_ts = msgs[i][1]
105
108
  j = i + 1
106
109
  while j < len(msgs) and msgs[j][0] != "assistant":
107
110
  j += 1
108
111
  if j < len(msgs):
112
+ asst_ts = msgs[j][1]
113
+ # Consume next usage block for this turn
114
+ usage = {}
115
+ if usage_index < len(usages):
116
+ usage = usages[usage_index]
117
+ usage_index += 1
118
+
119
+ inp = usage.get("input_tokens", 0)
120
+ out = usage.get("output_tokens", 0)
121
+ cache_create = usage.get("cache_creation_input_tokens", 0)
122
+ cache_read = usage.get("cache_read_input_tokens", 0)
123
+ total = inp + cache_create + cache_read + out
124
+
125
+ if total == 0:
126
+ # Skip turns with no token data
127
+ i = j + 1
128
+ turn_index += 1
129
+ continue
130
+
131
+ duration = 0
132
+ try:
133
+ t0 = datetime.fromisoformat(user_ts.replace("Z", "+00:00"))
134
+ t1 = datetime.fromisoformat(asst_ts.replace("Z", "+00:00"))
135
+ duration = max(0, int((t1 - t0).total_seconds()))
136
+ except Exception:
137
+ pass
138
+
139
+ if "opus" in model:
140
+ cost = inp * 15 / 1e6 + cache_create * 18.75 / 1e6 + cache_read * 1.50 / 1e6 + out * 75 / 1e6
141
+ else:
142
+ cost = inp * 3 / 1e6 + cache_create * 3.75 / 1e6 + cache_read * 0.30 / 1e6 + out * 15 / 1e6
143
+
144
+ # Turn timestamp = user message timestamp
145
+ turn_ts = user_ts
146
+ # Normalize to Z format
147
+ try:
148
+ turn_ts = datetime.fromisoformat(user_ts.replace("Z", "+00:00")).strftime("%Y-%m-%dT%H:%M:%SZ")
149
+ except Exception:
150
+ pass
151
+
152
+ # Use date from this turn's timestamp if possible
153
+ turn_date = session_date
109
154
  try:
110
- t0 = datetime.fromisoformat(msgs[i][1].replace("Z", "+00:00"))
111
- t1 = datetime.fromisoformat(msgs[j][1].replace("Z", "+00:00"))
112
- duration += max(0, int((t1 - t0).total_seconds()))
155
+ turn_date = datetime.fromisoformat(user_ts.replace("Z", "+00:00")).strftime("%Y-%m-%d")
113
156
  except Exception:
114
157
  pass
115
- i += 1
116
158
 
117
- # Cost
118
- if "opus" in model:
119
- cost = inp * 15 / 1e6 + cache_create * 18.75 / 1e6 + cache_read * 1.50 / 1e6 + out * 75 / 1e6
159
+ entries.append({
160
+ "date": turn_date or session_date,
161
+ "project": project_name,
162
+ "session_id": session_id,
163
+ "turn_index": turn_index,
164
+ "turn_timestamp": turn_ts,
165
+ "input_tokens": inp,
166
+ "cache_creation_tokens": cache_create,
167
+ "cache_read_tokens": cache_read,
168
+ "output_tokens": out,
169
+ "total_tokens": total,
170
+ "estimated_cost_usd": round(cost, 4),
171
+ "model": model,
172
+ "duration_seconds": duration,
173
+ })
174
+ turn_index += 1
175
+ i = j + 1
176
+ else:
177
+ i += 1
178
+ else:
179
+ i += 1
180
+
181
+ return entries
182
+
183
+ # Find all JSONL transcripts
184
+ jsonl_files = sorted(glob.glob(os.path.join(transcripts_dir, "*.jsonl")))
185
+ new_entries = []
186
+ sessions_processed = 0
187
+
188
+ for jf in jsonl_files:
189
+ session_id = os.path.splitext(os.path.basename(jf))[0]
190
+
191
+ result = parse_turns(jf)
192
+ if len(result) == 4:
193
+ msgs, first_ts, model, usages = result
120
194
  else:
121
- cost = inp * 3 / 1e6 + cache_create * 3.75 / 1e6 + cache_read * 0.30 / 1e6 + out * 15 / 1e6
122
-
123
- entry = {
124
- "date": session_date,
125
- "project": project_name,
126
- "session_id": session_id,
127
- "input_tokens": inp,
128
- "cache_creation_tokens": cache_create,
129
- "cache_read_tokens": cache_read,
130
- "output_tokens": out,
131
- "total_tokens": total,
132
- "estimated_cost_usd": round(cost, 4),
133
- "model": model,
134
- "duration_seconds": duration,
135
- }
136
-
137
- data.append(entry)
138
- backfilled += 1
195
+ continue
196
+
197
+ turn_entries = compute_turns(msgs, usages, first_ts, model, session_id, project_name)
198
+
199
+ if not turn_entries:
200
+ continue
201
+
202
+ expected_count = len(turn_entries)
203
+ existing_count = turns_per_session.get(session_id, 0)
204
+
205
+ # If all turns already present and session not in old-format set, skip
206
+ if existing_count >= expected_count and session_id not in old_sessions:
207
+ continue
208
+
209
+ # Upsert: replace any existing turns for this session with fresh data
210
+ data = [e for e in data if e.get("session_id") != session_id]
211
+ data.extend(turn_entries)
212
+ new_entries.extend(turn_entries)
213
+ sessions_processed += 1
214
+
215
+ # Sort by (date, session_id, turn_index)
216
+ data.sort(key=lambda x: (x.get("date", ""), x.get("session_id", ""), x.get("turn_index", 0)))
139
217
 
140
218
  # Write updated tokens.json
141
- if backfilled > 0:
219
+ if new_entries:
142
220
  os.makedirs(os.path.dirname(tokens_file), exist_ok=True)
143
221
  with open(tokens_file, "w") as f:
144
222
  json.dump(data, f, indent=2)
145
223
  f.write("\n")
146
224
 
147
- print(f"{backfilled} session{'s' if backfilled != 1 else ''} backfilled.")
225
+ total_turns = len(new_entries)
226
+ print(f"{sessions_processed} session{'s' if sessions_processed != 1 else ''} processed, {total_turns} turn{'s' if total_turns != 1 else ''} written.")
148
227
 
149
228
  # Regenerate charts if we added anything
150
- if backfilled > 0:
229
+ if new_entries:
151
230
  script_dir = os.path.dirname(os.path.abspath(__file__))
152
231
  charts_html = os.path.join(tracking_dir, "charts.html")
153
232
  os.system(f'python3 "{script_dir}/generate-charts.py" "{tokens_file}" "{charts_html}" 2>/dev/null')
@@ -54,11 +54,13 @@ if not data:
54
54
  sys.exit(0)
55
55
 
56
56
  # --- Aggregate ---
57
- by_date = defaultdict(lambda: {"cost": 0, "sessions": 0, "output": 0, "cache_read": 0, "cache_create": 0, "input": 0, "duration": 0})
58
- by_model = defaultdict(lambda: {"cost": 0, "sessions": 0})
57
+ # Each entry is a turn. Sessions = unique session_ids. Prompts = total entries.
58
+ by_date = defaultdict(lambda: {"cost": 0, "prompts": 0, "output": 0, "cache_read": 0, "cache_create": 0, "input": 0, "duration": 0})
59
+ by_model = defaultdict(lambda: {"cost": 0, "prompts": 0})
59
60
  total_cost = 0
60
- total_sessions = len(data)
61
- sessions_with_tokens = 0
61
+ total_turns = len(data)
62
+ total_sessions = len({e.get("session_id") for e in data})
63
+ sessions_with_tokens = len({e.get("session_id") for e in data if e.get("total_tokens", 0) > 0})
62
64
 
63
65
  for e in data:
64
66
  d = e.get("date", "unknown")
@@ -67,7 +69,7 @@ for e in data:
67
69
  short_model = model.split("-20")[0] if "-20" in model else model
68
70
 
69
71
  by_date[d]["cost"] += cost
70
- by_date[d]["sessions"] += 1
72
+ by_date[d]["prompts"] += 1
71
73
  by_date[d]["output"] += e.get("output_tokens", 0)
72
74
  by_date[d]["cache_read"] += e.get("cache_read_tokens", 0)
73
75
  by_date[d]["cache_create"] += e.get("cache_creation_tokens", 0)
@@ -75,11 +77,9 @@ for e in data:
75
77
  by_date[d]["duration"] += e.get("duration_seconds", 0)
76
78
 
77
79
  by_model[short_model]["cost"] += cost
78
- by_model[short_model]["sessions"] += 1
80
+ by_model[short_model]["prompts"] += 1
79
81
 
80
82
  total_cost += cost
81
- if e.get("total_tokens", 0) > 0:
82
- sessions_with_tokens += 1
83
83
 
84
84
  total_output = sum(e.get("output_tokens", 0) for e in data)
85
85
  total_cache_read = sum(e.get("cache_read_tokens", 0) for e in data)
@@ -93,27 +93,28 @@ print(f" Cost Summary — {os.path.basename(os.path.dirname(os.path.dirname(tok
93
93
  print("=" * W)
94
94
 
95
95
  print(f"\nBy date:")
96
- print(f" {'Date':<12} {'Sessions':>8} {'Output':>10} {'Cache Read':>12} {'Duration':>10} {'Cost':>10}")
96
+ print(f" {'Date':<12} {'Prompts':>8} {'Output':>10} {'Cache Read':>12} {'Duration':>10} {'Cost':>10}")
97
97
  print(f" {'-'*12} {'-'*8} {'-'*10} {'-'*12} {'-'*10} {'-'*10}")
98
98
  for d in sorted(by_date):
99
99
  r = by_date[d]
100
- print(f" {d:<12} {r['sessions']:>8} {r['output']:>10,} {r['cache_read']:>12,} {format_duration(r['duration']):>10} ${r['cost']:>9.2f}")
100
+ print(f" {d:<12} {r['prompts']:>8} {r['output']:>10,} {r['cache_read']:>12,} {format_duration(r['duration']):>10} ${r['cost']:>9.2f}")
101
101
 
102
102
  print(f"\nBy model:")
103
- print(f" {'Model':<30} {'Sessions':>8} {'Cost':>10}")
103
+ print(f" {'Model':<30} {'Prompts':>8} {'Cost':>10}")
104
104
  print(f" {'-'*30} {'-'*8} {'-'*10}")
105
105
  for m in sorted(by_model, key=lambda x: -by_model[x]["cost"]):
106
106
  r = by_model[m]
107
- print(f" {m:<30} {r['sessions']:>8} ${r['cost']:>9.2f}")
107
+ print(f" {m:<30} {r['prompts']:>8} ${r['cost']:>9.2f}")
108
108
 
109
109
  print(f"\nTotals:")
110
110
  print(f" Sessions: {total_sessions:>8} ({sessions_with_tokens} with token data)")
111
+ print(f" Prompts: {total_turns:>8}")
111
112
  print(f" Input tokens: {total_input:>12,}")
112
113
  print(f" Cache write: {total_cache_create:>12,}")
113
114
  print(f" Cache read: {total_cache_read:>12,}")
114
115
  print(f" Output tokens: {total_output:>12,}")
115
116
  total_duration = sum(e.get("duration_seconds", 0) for e in data)
116
- print(f" Session time: {format_duration(total_duration):>12}")
117
+ print(f" Active time: {format_duration(total_duration):>12}")
117
118
  print(f" Estimated cost: ${total_cost:>11.2f}")
118
119
 
119
120
  if total_output > 0:
@@ -28,22 +28,24 @@ if not data:
28
28
  sys.exit(0)
29
29
 
30
30
  # --- Aggregate by date ---
31
- by_date = defaultdict(lambda: {"cost": 0, "sessions": 0, "output": 0,
31
+ # Each entry is a turn; group by date for bar charts, session_id for unique session count
32
+ by_date = defaultdict(lambda: {"cost": 0, "turns": 0, "output": 0,
32
33
  "cache_read": 0, "cache_create": 0, "input": 0,
33
34
  "opus_cost": 0, "sonnet_cost": 0, "duration": 0})
34
- by_model = defaultdict(lambda: {"cost": 0, "sessions": 0})
35
+ by_model = defaultdict(lambda: {"cost": 0, "turns": 0})
35
36
  cumulative = []
36
37
 
37
38
  running_cost = 0
38
39
  running_duration = 0
39
- for e in sorted(data, key=lambda x: (x.get("date", ""), x.get("session_id", ""))):
40
+ sort_key = lambda x: (x.get("date", ""), x.get("session_id", ""), x.get("turn_index", 0))
41
+ for e in sorted(data, key=sort_key):
40
42
  d = e.get("date", "unknown")
41
43
  cost = e.get("estimated_cost_usd", 0)
42
44
  model = e.get("model", "unknown")
43
45
  short = model.split("-20")[0] if "-20" in model else model
44
46
 
45
47
  by_date[d]["cost"] += cost
46
- by_date[d]["sessions"] += 1
48
+ by_date[d]["turns"] += 1
47
49
  by_date[d]["output"] += e.get("output_tokens", 0)
48
50
  by_date[d]["cache_read"] += e.get("cache_read_tokens", 0)
49
51
  by_date[d]["cache_create"] += e.get("cache_creation_tokens", 0)
@@ -55,24 +57,26 @@ for e in sorted(data, key=lambda x: (x.get("date", ""), x.get("session_id", ""))
55
57
  by_date[d]["duration"] += e.get("duration_seconds", 0)
56
58
 
57
59
  by_model[short]["cost"] += cost
58
- by_model[short]["sessions"] += 1
60
+ by_model[short]["turns"] += 1
59
61
 
60
62
  running_cost += cost
61
63
  running_duration += e.get("duration_seconds", 0)
62
64
  cumulative.append({"date": d, "cumulative_cost": round(running_cost, 4),
63
65
  "cumulative_duration": round(running_duration),
64
- "session_id": e.get("session_id", "")[:8]})
66
+ "session_id": e.get("session_id", "")[:8],
67
+ "turn_index": e.get("turn_index", 0)})
65
68
 
66
69
  dates = sorted(by_date.keys())
67
70
  total_cost = sum(e.get("estimated_cost_usd", 0) for e in data)
68
- total_sessions = len(data)
69
- sessions_with_data = sum(1 for e in data if e.get("total_tokens", 0) > 0)
71
+ total_turns = len(data)
72
+ total_sessions = len({e.get("session_id") for e in data})
73
+ sessions_with_data = len({e.get("session_id") for e in data if e.get("total_tokens", 0) > 0})
70
74
  total_output = sum(e.get("output_tokens", 0) for e in data)
71
75
  total_cache_read = sum(e.get("cache_read_tokens", 0) for e in data)
72
76
  total_all_tokens = sum(e.get("total_tokens", 0) for e in data)
73
77
  cache_pct = round(total_cache_read / total_all_tokens * 100, 1) if total_all_tokens > 0 else 0
74
78
  total_duration = sum(e.get("duration_seconds", 0) for e in data)
75
- avg_duration = total_duration // total_sessions if total_sessions > 0 else 0
79
+ avg_duration = total_duration // total_turns if total_turns > 0 else 0
76
80
 
77
81
  project_name = data[0].get("project", "Project") if data else "Project"
78
82
 
@@ -166,43 +170,43 @@ total_prompts = sum(v["total"] for v in prompt_by_date.values())
166
170
  # Build JS data structures
167
171
  dates_js = json.dumps(dates)
168
172
  cost_by_date_js = json.dumps([round(by_date[d]["cost"], 4) for d in dates])
169
- sessions_by_date_js = json.dumps([by_date[d]["sessions"] for d in dates])
173
+ sessions_by_date_js = json.dumps([by_date[d]["turns"] for d in dates])
170
174
  output_by_date_js = json.dumps([by_date[d]["output"] for d in dates])
171
175
  cache_read_by_date_js = json.dumps([by_date[d]["cache_read"] for d in dates])
172
176
  opus_by_date_js = json.dumps([round(by_date[d]["opus_cost"], 4) for d in dates])
173
177
  sonnet_by_date_js = json.dumps([round(by_date[d]["sonnet_cost"], 4) for d in dates])
174
178
  duration_by_date_js = json.dumps([by_date[d]["duration"] for d in dates])
175
179
 
176
- cumul_labels_js = json.dumps([f"{c['date']} #{i+1}" for i, c in enumerate(cumulative)])
180
+ cumul_labels_js = json.dumps([f"{c['date']} {c['session_id']}#{c['turn_index']}" for c in cumulative])
177
181
  cumul_values_js = json.dumps([c["cumulative_cost"] for c in cumulative])
178
182
  cumul_duration_js = json.dumps([c["cumulative_duration"] for c in cumulative])
179
183
 
180
184
  avg_duration_by_date_js = json.dumps([
181
- round(by_date[d]["duration"] / by_date[d]["sessions"])
182
- if by_date[d]["sessions"] > 0 else 0
185
+ round(by_date[d]["duration"] / by_date[d]["turns"])
186
+ if by_date[d]["turns"] > 0 else 0
183
187
  for d in dates
184
188
  ])
185
189
 
186
190
  scatter_data_js = json.dumps([
187
191
  {"x": e.get("duration_seconds", 0),
188
192
  "y": round(e.get("estimated_cost_usd", 0), 4),
189
- "label": f"{e.get('date', '')} {e.get('session_id', '')[:6]}"}
190
- for e in sorted(data, key=lambda x: x.get("date", ""))
193
+ "label": f"{e.get('date', '')} {e.get('session_id', '')[:6]}#{e.get('turn_index', 0)}"}
194
+ for e in sorted(data, key=sort_key)
191
195
  if e.get("duration_seconds", 0) > 0
192
196
  ])
193
197
 
194
- # Tokens per minute per session (output tokens / duration in minutes)
198
+ # Tokens per minute per turn (output tokens / duration in minutes)
195
199
  tpm_data_js = json.dumps([
196
200
  {"x": e.get("duration_seconds", 0),
197
201
  "y": round(e.get("output_tokens", 0) / (e["duration_seconds"] / 60), 1),
198
- "label": f"{e.get('date', '')} {e.get('session_id', '')[:6]}"}
199
- for e in sorted(data, key=lambda x: x.get("date", ""))
202
+ "label": f"{e.get('date', '')} {e.get('session_id', '')[:6]}#{e.get('turn_index', 0)}"}
203
+ for e in sorted(data, key=sort_key)
200
204
  if e.get("duration_seconds", 0) > 0 and e.get("output_tokens", 0) > 0
201
205
  ])
202
206
 
203
- # Duration histogram: bucket sessions into ranges
204
- _dur_buckets = [("0–2m", 0, 120), ("25m", 120, 300), ("515m", 300, 900),
205
- ("1530m", 900, 1800), ("30m+", 1800, None)]
207
+ # Duration histogram: bucket turns into ranges
208
+ _dur_buckets = [("<5s", 0, 5), ("515s", 5, 15), ("1530s", 15, 30),
209
+ ("30s2m", 30, 120), ("2m+", 120, None)]
206
210
  _dur_counts = {label: 0 for label, _, _ in _dur_buckets}
207
211
  for e in data:
208
212
  d = e.get("duration_seconds", 0)
@@ -217,7 +221,7 @@ dur_hist_values_js = json.dumps([_dur_counts[b[0]] for b in _dur_buckets])
217
221
 
218
222
  model_labels_js = json.dumps(list(by_model.keys()))
219
223
  model_costs_js = json.dumps([round(by_model[m]["cost"], 4) for m in by_model])
220
- model_sessions_js = json.dumps([by_model[m]["sessions"] for m in by_model])
224
+ model_sessions_js = json.dumps([by_model[m]["turns"] for m in by_model])
221
225
 
222
226
  # All dates union for prompts vs total chart
223
227
  all_prompt_dates = sorted(set(list(prompt_by_date.keys()) + list(human_by_date.keys())))
@@ -326,7 +330,7 @@ html = f"""<!DOCTYPE html>
326
330
  <div class="stat">
327
331
  <div class="stat-label">Sessions</div>
328
332
  <div class="stat-value">{total_sessions}</div>
329
- <div class="stat-sub">{sessions_with_data} with token data</div>
333
+ <div class="stat-sub">{total_turns} prompts total</div>
330
334
  </div>
331
335
  <div class="stat">
332
336
  <div class="stat-label">Output tokens</div>
@@ -339,9 +343,9 @@ html = f"""<!DOCTYPE html>
339
343
  <div class="stat-sub">of all tokens</div>
340
344
  </div>
341
345
  <div class="stat">
342
- <div class="stat-label">Session time</div>
346
+ <div class="stat-label">Active time</div>
343
347
  <div class="stat-value">{format_duration(total_duration)}</div>
344
- <div class="stat-sub">avg {format_duration(avg_duration)} / session</div>
348
+ <div class="stat-sub">avg {format_duration(avg_duration)} / prompt</div>
345
349
  </div>
346
350
  <div class="stat">
347
351
  <div class="stat-label">Key prompts captured</div>
@@ -370,7 +374,7 @@ html = f"""<!DOCTYPE html>
370
374
  </div>
371
375
 
372
376
  <div class="card">
373
- <h2>Sessions per day</h2>
377
+ <h2>Prompts per day</h2>
374
378
  <canvas id="sessDay"></canvas>
375
379
  </div>
376
380
 
@@ -429,7 +433,7 @@ html = f"""<!DOCTYPE html>
429
433
  </div>
430
434
 
431
435
  <div class="card">
432
- <h2>Session length distribution</h2>
436
+ <h2>Prompt length distribution</h2>
433
437
  <canvas id="durationDist"></canvas>
434
438
  </div>
435
439
 
@@ -524,12 +528,12 @@ new Chart(document.getElementById('costDay'), {{
524
528
  tooltip: {{ callbacks: {{ label: ctx => ' $' + ctx.parsed.y.toFixed(2) }} }} }} }}
525
529
  }});
526
530
 
527
- // Sessions per day
531
+ // Prompts per day
528
532
  new Chart(document.getElementById('sessDay'), {{
529
533
  type: 'bar',
530
534
  data: {{
531
535
  labels: DATES,
532
- datasets: [{{ label: 'Sessions', data: SESSIONS_BY_DATE,
536
+ datasets: [{{ label: 'Prompts', data: SESSIONS_BY_DATE,
533
537
  backgroundColor: '#22d3ee', borderRadius: 4 }}]
534
538
  }},
535
539
  options: baseOpts
@@ -605,7 +609,7 @@ new Chart(document.getElementById('cumulTime'), {{
605
609
  new Chart(document.getElementById('timeVsCost'), {{
606
610
  type: 'scatter',
607
611
  data: {{
608
- datasets: [{{ label: 'Session', data: SCATTER_DATA,
612
+ datasets: [{{ label: 'Prompt', data: SCATTER_DATA,
609
613
  backgroundColor: '#34d399', pointRadius: 5, pointHoverRadius: 7 }}]
610
614
  }},
611
615
  options: {{ ...baseOpts,
@@ -629,7 +633,7 @@ new Chart(document.getElementById('timeVsCost'), {{
629
633
  new Chart(document.getElementById('tokensPerMin'), {{
630
634
  type: 'scatter',
631
635
  data: {{
632
- datasets: [{{ label: 'Session', data: TPM_DATA,
636
+ datasets: [{{ label: 'Prompt', data: TPM_DATA,
633
637
  backgroundColor: '#818cf8', pointRadius: 5, pointHoverRadius: 7 }}]
634
638
  }},
635
639
  options: {{ ...baseOpts,
@@ -648,12 +652,12 @@ new Chart(document.getElementById('tokensPerMin'), {{
648
652
  }} }} }} }}
649
653
  }});
650
654
 
651
- // Session length distribution histogram
655
+ // Prompt length distribution histogram
652
656
  new Chart(document.getElementById('durationDist'), {{
653
657
  type: 'bar',
654
658
  data: {{
655
659
  labels: DUR_HIST_LABELS,
656
- datasets: [{{ label: 'Sessions', data: DUR_HIST_VALUES,
660
+ datasets: [{{ label: 'Prompts', data: DUR_HIST_VALUES,
657
661
  backgroundColor: '#34d399', borderRadius: 4 }}]
658
662
  }},
659
663
  options: {{ ...baseOpts,
@@ -1,6 +1,7 @@
1
1
  #!/usr/bin/env python3
2
2
  """
3
- Patch duration_seconds for existing tokens.json entries that have duration 0.
3
+ Patch duration_seconds for per-turn entries that have duration 0,
4
+ and migrate old single-entry-per-session entries to per-turn format.
4
5
 
5
6
  Usage:
6
7
  python3 patch-durations.py <project_root>
@@ -14,63 +15,187 @@ tokens_file = os.path.join(tracking_dir, "tokens.json")
14
15
 
15
16
  slug = project_root.replace("/", "-")
16
17
  transcripts_dir = os.path.expanduser("~/.claude/projects/" + slug)
18
+ project_name = os.path.basename(project_root)
17
19
 
18
20
  with open(tokens_file) as f:
19
21
  data = json.load(f)
20
22
 
21
- patched = 0
22
- for entry in data:
23
- sid = entry.get("session_id")
24
- if not sid:
25
- continue
26
- jf = os.path.join(transcripts_dir, sid + ".jsonl")
27
- if not os.path.exists(jf):
28
- continue
29
-
23
+ def parse_transcript(jf):
30
24
  msgs = []
25
+ usages = []
26
+ model = "unknown"
27
+ first_ts = None
31
28
  try:
32
29
  with open(jf) as f:
33
30
  for line in f:
34
31
  try:
35
32
  obj = json.loads(line)
36
- t = obj.get("type")
37
33
  ts = obj.get("timestamp")
34
+ if ts and first_ts is None:
35
+ first_ts = ts
36
+ t = obj.get("type")
38
37
  if t == "user" and not obj.get("isSidechain") and ts:
39
38
  msgs.append(("user", ts))
40
39
  elif t == "assistant" and ts:
41
40
  msgs.append(("assistant", ts))
41
+ msg = obj.get("message", {})
42
+ if isinstance(msg, dict) and msg.get("role") == "assistant":
43
+ usage = msg.get("usage", {})
44
+ if usage:
45
+ usages.append(usage)
46
+ m = msg.get("model", "")
47
+ if m:
48
+ model = m
42
49
  except Exception:
43
50
  pass
44
51
  except Exception:
52
+ pass
53
+ return msgs, usages, model, first_ts
54
+
55
+ # Separate old-format (no turn_index) from new-format entries
56
+ old_entries = [e for e in data if "turn_index" not in e]
57
+ new_entries = [e for e in data if "turn_index" in e]
58
+
59
+ # For new-format entries with duration 0, patch from transcript
60
+ existing_keys = {(e.get("session_id"), e.get("turn_index")): i for i, e in enumerate(new_entries)}
61
+ patched = 0
62
+
63
+ for entry in new_entries:
64
+ if entry.get("duration_seconds", 0) > 0:
45
65
  continue
66
+ sid = entry.get("session_id")
67
+ turn_index = entry.get("turn_index", 0)
68
+ jf = os.path.join(transcripts_dir, sid + ".jsonl")
69
+ if not os.path.exists(jf):
70
+ continue
71
+
72
+ msgs, usages, model, first_ts = parse_transcript(jf)
46
73
 
47
- duration = 0
74
+ # Walk to the target turn
75
+ ti = 0
48
76
  i = 0
49
77
  while i < len(msgs):
50
78
  if msgs[i][0] == "user":
51
79
  j = i + 1
52
80
  while j < len(msgs) and msgs[j][0] != "assistant":
53
81
  j += 1
54
- if j < len(msgs):
82
+ if j < len(msgs) and ti == turn_index:
55
83
  try:
56
84
  t0 = datetime.fromisoformat(msgs[i][1].replace("Z", "+00:00"))
57
85
  t1 = datetime.fromisoformat(msgs[j][1].replace("Z", "+00:00"))
58
- duration += max(0, int((t1 - t0).total_seconds()))
86
+ duration = max(0, int((t1 - t0).total_seconds()))
87
+ if duration > 0:
88
+ entry["duration_seconds"] = duration
89
+ patched += 1
90
+ print(f" patched {sid[:8]}#{turn_index} {duration}s")
59
91
  except Exception:
60
92
  pass
61
- i += 1
93
+ break
94
+ if j < len(msgs):
95
+ ti += 1
96
+ i = j + 1
97
+ else:
98
+ i += 1
99
+ else:
100
+ i += 1
101
+
102
+ # Migrate old-format entries to per-turn
103
+ migrated_sessions = 0
104
+ new_turn_entries = []
105
+ for old_entry in old_entries:
106
+ sid = old_entry.get("session_id")
107
+ if not sid:
108
+ continue
109
+ jf = os.path.join(transcripts_dir, sid + ".jsonl")
110
+ if not os.path.exists(jf):
111
+ # Keep old entry as-is if we can't reprocess
112
+ new_entries.append(old_entry)
113
+ continue
114
+
115
+ msgs, usages, model, first_ts = parse_transcript(jf)
116
+
117
+ turn_index = 0
118
+ usage_index = 0
119
+ i = 0
120
+ session_date = old_entry.get("date")
121
+
122
+ while i < len(msgs):
123
+ if msgs[i][0] == "user":
124
+ user_ts = msgs[i][1]
125
+ j = i + 1
126
+ while j < len(msgs) and msgs[j][0] != "assistant":
127
+ j += 1
128
+ if j < len(msgs):
129
+ asst_ts = msgs[j][1]
130
+ usage = {}
131
+ if usage_index < len(usages):
132
+ usage = usages[usage_index]
133
+ usage_index += 1
134
+
135
+ inp = usage.get("input_tokens", 0)
136
+ out = usage.get("output_tokens", 0)
137
+ cache_create = usage.get("cache_creation_input_tokens", 0)
138
+ cache_read = usage.get("cache_read_input_tokens", 0)
139
+ total = inp + cache_create + cache_read + out
140
+
141
+ if total > 0:
142
+ duration = 0
143
+ try:
144
+ t0 = datetime.fromisoformat(user_ts.replace("Z", "+00:00"))
145
+ t1 = datetime.fromisoformat(asst_ts.replace("Z", "+00:00"))
146
+ duration = max(0, int((t1 - t0).total_seconds()))
147
+ except Exception:
148
+ pass
149
+
150
+ if "opus" in model:
151
+ cost = inp * 15 / 1e6 + cache_create * 18.75 / 1e6 + cache_read * 1.50 / 1e6 + out * 75 / 1e6
152
+ else:
153
+ cost = inp * 3 / 1e6 + cache_create * 3.75 / 1e6 + cache_read * 0.30 / 1e6 + out * 15 / 1e6
154
+
155
+ try:
156
+ turn_ts = datetime.fromisoformat(user_ts.replace("Z", "+00:00")).strftime("%Y-%m-%dT%H:%M:%SZ")
157
+ turn_date = datetime.fromisoformat(user_ts.replace("Z", "+00:00")).strftime("%Y-%m-%d")
158
+ except Exception:
159
+ turn_ts = user_ts
160
+ turn_date = session_date
161
+
162
+ new_turn_entries.append({
163
+ "date": turn_date,
164
+ "project": project_name,
165
+ "session_id": sid,
166
+ "turn_index": turn_index,
167
+ "turn_timestamp": turn_ts,
168
+ "input_tokens": inp,
169
+ "cache_creation_tokens": cache_create,
170
+ "cache_read_tokens": cache_read,
171
+ "output_tokens": out,
172
+ "total_tokens": total,
173
+ "estimated_cost_usd": round(cost, 4),
174
+ "model": model,
175
+ "duration_seconds": duration,
176
+ })
177
+ turn_index += 1
178
+ i = j + 1
179
+ else:
180
+ i += 1
181
+ else:
182
+ i += 1
183
+
184
+ if turn_index > 0:
185
+ migrated_sessions += 1
186
+ print(f" migrated {sid[:8]} {turn_index} turn(s)")
187
+ else:
188
+ new_entries.append(old_entry)
62
189
 
63
- if duration > 0:
64
- entry["duration_seconds"] = duration
65
- patched += 1
66
- print(f" {sid[:8]} {duration}s")
190
+ new_entries.extend(new_turn_entries)
191
+ new_entries.sort(key=lambda x: (x.get("date", ""), x.get("session_id", ""), x.get("turn_index", 0)))
67
192
 
68
- if patched > 0:
193
+ if patched > 0 or migrated_sessions > 0:
69
194
  with open(tokens_file, "w") as f:
70
- json.dump(data, f, indent=2)
195
+ json.dump(new_entries, f, indent=2)
71
196
  f.write("\n")
72
197
  script_dir = os.path.dirname(os.path.abspath(__file__))
73
198
  charts_html = os.path.join(tracking_dir, "charts.html")
74
199
  os.system(f'python3 "{script_dir}/generate-charts.py" "{tokens_file}" "{charts_html}" 2>/dev/null')
75
200
 
76
- print(f"{patched} session{'s' if patched != 1 else ''} patched.")
201
+ print(f"{patched} turn(s) patched, {migrated_sessions} session(s) migrated to per-turn format.")
package/src/stop-hook.sh CHANGED
@@ -2,6 +2,25 @@
2
2
  set -euo pipefail
3
3
 
4
4
  SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
5
+
6
+ # --backfill-only: run backfill for current project and exit (used by SessionStart hook)
7
+ if [[ "${1:-}" == "--backfill-only" ]]; then
8
+ INPUT="$(cat)"
9
+ CWD="$(echo "$INPUT" | python3 -c "import sys,json; print(json.load(sys.stdin).get('cwd',''))" 2>/dev/null || true)"
10
+ if [[ -z "$CWD" ]]; then exit 0; fi
11
+ PROJECT_ROOT="$CWD"
12
+ while [[ "$PROJECT_ROOT" != "/" ]]; do
13
+ [[ -d "$PROJECT_ROOT/.git" ]] && break
14
+ PROJECT_ROOT="$(dirname "$PROJECT_ROOT")"
15
+ done
16
+ if [[ "$PROJECT_ROOT" == "/" ]]; then exit 0; fi
17
+ TRACKING_DIR="$PROJECT_ROOT/.claude/tracking"
18
+ if [[ -d "$TRACKING_DIR" ]]; then
19
+ python3 "$SCRIPT_DIR/backfill.py" "$PROJECT_ROOT" 2>/dev/null || true
20
+ fi
21
+ exit 0
22
+ fi
23
+
5
24
  INPUT="$(cat)"
6
25
 
7
26
  # Prevent loops
@@ -25,26 +44,26 @@ if [[ "$PROJECT_ROOT" == "/" ]]; then exit 0; fi
25
44
 
26
45
  TRACKING_DIR="$PROJECT_ROOT/.claude/tracking"
27
46
 
28
- # Auto-initialize if missing
47
+ # Auto-initialize if missing, then backfill
29
48
  if [[ ! -d "$TRACKING_DIR" ]]; then
30
49
  bash "$SCRIPT_DIR/init-templates.sh" "$TRACKING_DIR"
50
+ python3 "$SCRIPT_DIR/backfill.py" "$PROJECT_ROOT" 2>/dev/null || true
31
51
  fi
32
52
 
33
- # Parse token usage from JSONL and update tokens.json
53
+ # Parse token usage from JSONL emit one entry per turn, upsert into tokens.json
34
54
  python3 - "$TRANSCRIPT" "$TRACKING_DIR/tokens.json" "$SESSION_ID" "$(basename "$PROJECT_ROOT")" <<'PYEOF'
35
55
  import sys, json, os
36
- from datetime import date, datetime
56
+ from datetime import datetime, date
37
57
 
38
58
  transcript_path = sys.argv[1]
39
59
  tokens_file = sys.argv[2]
40
60
  session_id = sys.argv[3]
41
61
  project_name = sys.argv[4]
42
- today = date.today().isoformat()
43
62
 
44
- # Sum all token usage from assistant messages in this session
45
- inp = out = cache_create = cache_read = 0
63
+ msgs = [] # (role, timestamp)
64
+ usages = [] # usage dicts from assistant messages, in order
46
65
  model = "unknown"
47
- msgs = []
66
+
48
67
  with open(transcript_path) as f:
49
68
  for line in f:
50
69
  try:
@@ -59,40 +78,84 @@ with open(transcript_path) as f:
59
78
  if isinstance(msg, dict) and msg.get('role') == 'assistant':
60
79
  usage = msg.get('usage', {})
61
80
  if usage:
62
- inp += usage.get('input_tokens', 0)
63
- out += usage.get('output_tokens', 0)
64
- cache_create += usage.get('cache_creation_input_tokens', 0)
65
- cache_read += usage.get('cache_read_input_tokens', 0)
81
+ usages.append(usage)
66
82
  m = msg.get('model', '')
67
83
  if m:
68
84
  model = m
69
85
  except:
70
86
  pass
71
87
 
72
- # Compute active time: sum of (first assistant reply - user message) per turn
73
- duration = 0
88
+ # Build per-turn entries
89
+ turn_entries = []
90
+ turn_index = 0
91
+ usage_index = 0
74
92
  i = 0
75
93
  while i < len(msgs):
76
94
  if msgs[i][0] == 'user':
95
+ user_ts = msgs[i][1]
77
96
  j = i + 1
78
97
  while j < len(msgs) and msgs[j][0] != 'assistant':
79
98
  j += 1
80
99
  if j < len(msgs):
81
- try:
82
- t0 = datetime.fromisoformat(msgs[i][1].replace('Z', '+00:00'))
83
- t1 = datetime.fromisoformat(msgs[j][1].replace('Z', '+00:00'))
84
- duration += max(0, int((t1 - t0).total_seconds()))
85
- except:
86
- pass
87
- i += 1
88
-
89
- total = inp + cache_create + cache_read + out
90
- if 'opus' in model:
91
- cost = inp * 15 / 1e6 + cache_create * 18.75 / 1e6 + cache_read * 1.50 / 1e6 + out * 75 / 1e6
92
- else:
93
- cost = inp * 3 / 1e6 + cache_create * 3.75 / 1e6 + cache_read * 0.30 / 1e6 + out * 15 / 1e6
94
-
95
- # Load or create tokens.json
100
+ asst_ts = msgs[j][1]
101
+ usage = {}
102
+ if usage_index < len(usages):
103
+ usage = usages[usage_index]
104
+ usage_index += 1
105
+
106
+ inp = usage.get('input_tokens', 0)
107
+ out = usage.get('output_tokens', 0)
108
+ cache_create = usage.get('cache_creation_input_tokens', 0)
109
+ cache_read = usage.get('cache_read_input_tokens', 0)
110
+ total = inp + cache_create + cache_read + out
111
+
112
+ if total > 0:
113
+ duration = 0
114
+ try:
115
+ t0 = datetime.fromisoformat(user_ts.replace('Z', '+00:00'))
116
+ t1 = datetime.fromisoformat(asst_ts.replace('Z', '+00:00'))
117
+ duration = max(0, int((t1 - t0).total_seconds()))
118
+ except:
119
+ pass
120
+
121
+ if 'opus' in model:
122
+ cost = inp * 15 / 1e6 + cache_create * 18.75 / 1e6 + cache_read * 1.50 / 1e6 + out * 75 / 1e6
123
+ else:
124
+ cost = inp * 3 / 1e6 + cache_create * 3.75 / 1e6 + cache_read * 0.30 / 1e6 + out * 15 / 1e6
125
+
126
+ try:
127
+ turn_ts = datetime.fromisoformat(user_ts.replace('Z', '+00:00')).strftime('%Y-%m-%dT%H:%M:%SZ')
128
+ turn_date = datetime.fromisoformat(user_ts.replace('Z', '+00:00')).strftime('%Y-%m-%d')
129
+ except:
130
+ turn_ts = user_ts
131
+ turn_date = date.today().isoformat()
132
+
133
+ turn_entries.append({
134
+ 'date': turn_date,
135
+ 'project': project_name,
136
+ 'session_id': session_id,
137
+ 'turn_index': turn_index,
138
+ 'turn_timestamp': turn_ts,
139
+ 'input_tokens': inp,
140
+ 'cache_creation_tokens': cache_create,
141
+ 'cache_read_tokens': cache_read,
142
+ 'output_tokens': out,
143
+ 'total_tokens': total,
144
+ 'estimated_cost_usd': round(cost, 4),
145
+ 'model': model,
146
+ 'duration_seconds': duration,
147
+ })
148
+ turn_index += 1
149
+ i = j + 1
150
+ else:
151
+ i += 1
152
+ else:
153
+ i += 1
154
+
155
+ if not turn_entries:
156
+ sys.exit(0)
157
+
158
+ # Load existing data
96
159
  data = []
97
160
  if os.path.exists(tokens_file):
98
161
  try:
@@ -101,30 +164,39 @@ if os.path.exists(tokens_file):
101
164
  except:
102
165
  data = []
103
166
 
104
- # Build entry
105
- entry = {
106
- "date": today,
107
- "project": project_name,
108
- "session_id": session_id,
109
- "input_tokens": inp,
110
- "cache_creation_tokens": cache_create,
111
- "cache_read_tokens": cache_read,
112
- "output_tokens": out,
113
- "total_tokens": total,
114
- "estimated_cost_usd": round(cost, 4),
115
- "model": model,
116
- "duration_seconds": duration
117
- }
118
-
119
- # Update existing or append new
120
- found = False
121
- for i, e in enumerate(data):
122
- if e.get('session_id') == session_id:
123
- data[i] = entry
124
- found = True
167
+ # Build index of existing (session_id, turn_index) -> position
168
+ existing_idx = {}
169
+ for pos, e in enumerate(data):
170
+ key = (e.get('session_id'), e.get('turn_index'))
171
+ existing_idx[key] = pos
172
+
173
+ # Check if anything actually changed
174
+ changed = False
175
+ for entry in turn_entries:
176
+ key = (entry['session_id'], entry['turn_index'])
177
+ if key not in existing_idx:
178
+ changed = True
125
179
  break
126
- if not found:
127
- data.append(entry)
180
+ existing = data[existing_idx[key]]
181
+ if (existing.get('total_tokens') != entry['total_tokens'] or
182
+ existing.get('output_tokens') != entry['output_tokens']):
183
+ changed = True
184
+ break
185
+
186
+ if not changed:
187
+ sys.exit(0)
188
+
189
+ # Upsert: update existing entries or append new ones
190
+ for entry in turn_entries:
191
+ key = (entry['session_id'], entry['turn_index'])
192
+ if key in existing_idx:
193
+ data[existing_idx[key]] = entry
194
+ else:
195
+ data.append(entry)
196
+ existing_idx[key] = len(data) - 1
197
+
198
+ # Sort by (date, session_id, turn_index)
199
+ data.sort(key=lambda x: (x.get('date', ''), x.get('session_id', ''), x.get('turn_index', 0)))
128
200
 
129
201
  with open(tokens_file, 'w') as f:
130
202
  json.dump(data, f, indent=2)