codex-meter 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
codex_meter/parser.py ADDED
@@ -0,0 +1,498 @@
1
+ """Codex session JSONL + state DB parser."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import datetime as dt
6
+ import json
7
+ import sqlite3
8
+ import tomllib
9
+ from collections.abc import Iterable
10
+ from contextlib import closing
11
+ from dataclasses import asdict, replace
12
+ from pathlib import Path
13
+
14
+ from codex_meter.models import (
15
+ LoadResult,
16
+ RateLimitSample,
17
+ RuntimeOptions,
18
+ ThreadMeta,
19
+ TierOverride,
20
+ Usage,
21
+ UsageEvent,
22
+ )
23
+ from codex_meter.parse_cache import ParseCache
24
+ from codex_meter.pricing import normalize_model, normalize_service_tier
25
+ from codex_meter.timeutil import parse_datetime, parse_event_timestamp
26
+
27
+ PARSER_CACHE_VERSION = 3
28
+
29
+
30
+ def session_files(session_root: Path) -> Iterable[Path]:
31
+ if not session_root.exists():
32
+ return []
33
+ return sorted(session_root.glob("**/*.jsonl"))
34
+
35
+
36
+ def session_id_from_path(path: Path) -> str:
37
+ return path.stem.removeprefix("rollout-")
38
+
39
+
40
+ def _safe_int(value: object) -> int:
41
+ try:
42
+ return int(value or 0)
43
+ except (TypeError, ValueError):
44
+ return 0
45
+
46
+
47
+ def usage_key(path: Path, timestamp: str, usage: Usage) -> tuple:
48
+ return (
49
+ str(path),
50
+ timestamp,
51
+ usage.input_tokens,
52
+ usage.cached_input_tokens,
53
+ usage.output_tokens,
54
+ usage.reasoning_output_tokens,
55
+ usage.total_tokens,
56
+ )
57
+
58
+
59
+ def _usage_delta(current: Usage, previous: Usage | None) -> Usage:
60
+ if previous is None:
61
+ return current
62
+ if (
63
+ current.input_tokens < previous.input_tokens
64
+ or current.cached_input_tokens < previous.cached_input_tokens
65
+ or current.output_tokens < previous.output_tokens
66
+ or current.reasoning_output_tokens < previous.reasoning_output_tokens
67
+ or current.total_tokens < previous.total_tokens
68
+ ):
69
+ return current
70
+ return Usage(
71
+ input_tokens=max(0, current.input_tokens - previous.input_tokens),
72
+ cached_input_tokens=max(0, current.cached_input_tokens - previous.cached_input_tokens),
73
+ output_tokens=max(0, current.output_tokens - previous.output_tokens),
74
+ reasoning_output_tokens=max(
75
+ 0, current.reasoning_output_tokens - previous.reasoning_output_tokens
76
+ ),
77
+ total_tokens=max(0, current.total_tokens - previous.total_tokens),
78
+ )
79
+
80
+
81
+ def _is_total_reset(current: Usage, previous: Usage) -> bool:
82
+ return (
83
+ current.input_tokens < previous.input_tokens
84
+ or current.cached_input_tokens < previous.cached_input_tokens
85
+ or current.output_tokens < previous.output_tokens
86
+ or current.reasoning_output_tokens < previous.reasoning_output_tokens
87
+ or current.total_tokens < previous.total_tokens
88
+ )
89
+
90
+
91
+ def token_usage_from_info(
92
+ info: dict,
93
+ previous_total: Usage | None,
94
+ ) -> tuple[Usage, Usage | None, str, int, bool]:
95
+ model_context_window = _safe_int(info.get("model_context_window"))
96
+ total_raw = info.get("total_token_usage")
97
+ total_usage = Usage.from_dict(total_raw) if isinstance(total_raw, dict) else None
98
+
99
+ last_raw = info.get("last_token_usage")
100
+ if isinstance(last_raw, dict):
101
+ usage = Usage.from_dict(last_raw)
102
+ if total_usage is not None:
103
+ new_total = total_usage
104
+ elif previous_total is not None:
105
+ new_total = Usage(
106
+ input_tokens=previous_total.input_tokens + usage.input_tokens,
107
+ cached_input_tokens=previous_total.cached_input_tokens + usage.cached_input_tokens,
108
+ output_tokens=previous_total.output_tokens + usage.output_tokens,
109
+ reasoning_output_tokens=previous_total.reasoning_output_tokens
110
+ + usage.reasoning_output_tokens,
111
+ total_tokens=previous_total.total_tokens + usage.total_tokens,
112
+ )
113
+ else:
114
+ new_total = usage
115
+ return usage, new_total, "last_token_usage", model_context_window, False
116
+
117
+ if total_usage is not None:
118
+ reset = previous_total is not None and _is_total_reset(total_usage, previous_total)
119
+ return (
120
+ _usage_delta(total_usage, previous_total),
121
+ total_usage,
122
+ "total_delta_reset" if reset else "total_delta",
123
+ model_context_window,
124
+ reset,
125
+ )
126
+
127
+ return Usage(), previous_total, "", model_context_window, False
128
+
129
+
130
+ def load_thread_metadata(state_db: Path) -> dict[str, ThreadMeta]:
131
+ if not state_db.exists():
132
+ return {}
133
+ try:
134
+ with closing(sqlite3.connect(f"file:{state_db}?mode=ro", uri=True)) as conn:
135
+ columns = {row[1] for row in conn.execute("pragma table_info(threads)").fetchall()}
136
+ if "rollout_path" not in columns:
137
+ return {}
138
+
139
+ def text_col(name: str) -> str:
140
+ return f"coalesce({name}, '')" if name in columns else "''"
141
+
142
+ def int_col(name: str) -> str:
143
+ return f"coalesce({name}, 0)" if name in columns else "0"
144
+
145
+ rows = conn.execute(
146
+ f"""
147
+ select
148
+ rollout_path,
149
+ {text_col("title")},
150
+ {text_col("first_user_message")},
151
+ {text_col("cwd")},
152
+ {text_col("git_branch")},
153
+ {text_col("git_origin_url")},
154
+ {text_col("model")},
155
+ {text_col("reasoning_effort")},
156
+ {int_col("created_at")},
157
+ {int_col("updated_at")}
158
+ from threads
159
+ """
160
+ ).fetchall()
161
+ except sqlite3.Error:
162
+ return {}
163
+ metas: dict[str, ThreadMeta] = {}
164
+ for row in rows:
165
+ meta = ThreadMeta(
166
+ rollout_path=str(row[0]),
167
+ title=str(row[1]),
168
+ first_user_message=str(row[2]),
169
+ cwd=str(row[3]),
170
+ git_branch=str(row[4]),
171
+ git_origin_url=str(row[5]),
172
+ model=str(row[6]),
173
+ reasoning_effort=str(row[7]),
174
+ created_at=int(row[8] or 0),
175
+ updated_at=int(row[9] or 0),
176
+ )
177
+ metas[str(row[0])] = meta
178
+ metas[Path(str(row[0])).name] = meta
179
+ return metas
180
+
181
+
182
+ def current_config_service_tier(config_path: Path) -> str:
183
+ try:
184
+ raw = tomllib.loads(config_path.read_text(errors="replace"))
185
+ except (OSError, tomllib.TOMLDecodeError):
186
+ return ""
187
+ tier = normalize_service_tier(raw.get("service_tier"))
188
+ if tier:
189
+ return tier
190
+ features = raw.get("features")
191
+ if isinstance(features, dict) and features.get("fast_mode") is True:
192
+ return "fast"
193
+ return tier
194
+
195
+
196
+ def load_tier_overrides(path: Path | None) -> list[TierOverride]:
197
+ if path is None:
198
+ return []
199
+ try:
200
+ raw = json.loads(path.expanduser().read_text())
201
+ except (OSError, json.JSONDecodeError) as exc:
202
+ raise ValueError(f"Could not read tier override file {path}: {exc}") from exc
203
+ items = raw.get("overrides", raw) if isinstance(raw, dict) else raw
204
+ if not isinstance(items, list):
205
+ raise ValueError("--tier-overrides must be a JSON list or an object with an overrides list")
206
+ overrides: list[TierOverride] = []
207
+ for item in items:
208
+ if not isinstance(item, dict):
209
+ raise ValueError("Each tier override must be an object")
210
+ tier = normalize_service_tier(str(item.get("service_tier") or item.get("tier") or ""))
211
+ if tier not in {"standard", "fast"}:
212
+ raise ValueError("Each tier override must set service_tier to standard or fast")
213
+ start = parse_datetime(str(item["start"])) if item.get("start") else None
214
+ end = parse_datetime(str(item["end"])) if item.get("end") else None
215
+ if start and end and start >= end:
216
+ raise ValueError("Tier override start must be before end")
217
+ overrides.append(
218
+ TierOverride(
219
+ service_tier=tier,
220
+ session=str(item["session"]) if item.get("session") else None,
221
+ start=start.astimezone(dt.UTC) if start else None,
222
+ end=end.astimezone(dt.UTC) if end else None,
223
+ )
224
+ )
225
+ return overrides
226
+
227
+
228
+ def tier_override_for(path: Path, event_time: dt.datetime, overrides: list[TierOverride]) -> str:
229
+ path_text = str(path)
230
+ for override in overrides:
231
+ if (
232
+ override.session
233
+ and override.session not in {path_text, path.name}
234
+ and not path_text.endswith(override.session)
235
+ ):
236
+ continue
237
+ if override.start and event_time < override.start:
238
+ continue
239
+ if override.end and event_time >= override.end:
240
+ continue
241
+ return override.service_tier
242
+ return ""
243
+
244
+
245
+ def service_tier_for_event(
246
+ path: Path,
247
+ event_time: dt.datetime,
248
+ logged_tier: str,
249
+ options: RuntimeOptions,
250
+ config_tier: str,
251
+ overrides: list[TierOverride],
252
+ ) -> tuple[str, str]:
253
+ if options.service_tier != "auto":
254
+ return options.service_tier, "cli-override"
255
+ override = tier_override_for(path, event_time, overrides)
256
+ if override:
257
+ return override, "override-file"
258
+ if logged_tier:
259
+ return logged_tier, "logged"
260
+ if options.unknown_service_tier == "current-config" and config_tier:
261
+ return config_tier, "current-config"
262
+ if options.unknown_service_tier == "current-config":
263
+ return "standard", "assumed"
264
+ return options.unknown_service_tier, "assumed"
265
+
266
+
267
+ def update_context_from_event(
268
+ event: dict, current: ThreadMeta, current_tier: str
269
+ ) -> tuple[ThreadMeta, str]:
270
+ payload = event.get("payload") or {}
271
+ event_type = event.get("type")
272
+ payload_type = payload.get("type")
273
+
274
+ if event_type == "turn_context":
275
+ model = payload.get("model") or current.model
276
+ effort = payload.get("effort") or current.reasoning_effort
277
+ collaboration = (payload.get("collaboration_mode") or {}).get("settings") or {}
278
+ effort = collaboration.get("reasoning_effort") or effort
279
+ tier = normalize_service_tier(payload.get("service_tier")) or current_tier
280
+ return replace(current, model=str(model or ""), reasoning_effort=str(effort or "")), tier
281
+
282
+ if event_type == "session_meta":
283
+ tier = normalize_service_tier(payload.get("service_tier")) or current_tier
284
+ return current, tier
285
+
286
+ if event_type == "event_msg" and payload_type == "session_configured":
287
+ tier = normalize_service_tier(payload.get("service_tier")) or current_tier
288
+ return current, tier
289
+
290
+ if event_type == "event_msg" and payload_type == "user_message":
291
+ message = str(payload.get("message") or "").strip().lower()
292
+ if message.startswith("/fast on"):
293
+ return current, "fast"
294
+ if message.startswith("/fast off"):
295
+ return current, "standard"
296
+
297
+ return current, current_tier
298
+
299
+
300
+ def rate_limit_sample(*, path: Path, event_time: dt.datetime, rate_limits: dict) -> RateLimitSample:
301
+ primary = rate_limits.get("primary") or {}
302
+ secondary = rate_limits.get("secondary") or {}
303
+ return RateLimitSample(
304
+ timestamp=event_time,
305
+ path=path,
306
+ session_id=session_id_from_path(path),
307
+ plan_type=str(rate_limits.get("plan_type") or ""),
308
+ credits=rate_limits.get("credits"),
309
+ primary_used_percent=primary.get("used_percent"),
310
+ primary_window_minutes=primary.get("window_minutes"),
311
+ primary_resets_at=primary.get("resets_at"),
312
+ secondary_used_percent=secondary.get("used_percent"),
313
+ secondary_window_minutes=secondary.get("window_minutes"),
314
+ secondary_resets_at=secondary.get("resets_at"),
315
+ rate_limit_reached_type=str(rate_limits.get("rate_limit_reached_type") or ""),
316
+ )
317
+
318
+
319
+ def load_usage(options: RuntimeOptions) -> LoadResult:
320
+ start_utc = options.start.astimezone(dt.UTC)
321
+ end_utc = options.end.astimezone(dt.UTC)
322
+ config_tier = current_config_service_tier(options.config_path)
323
+ overrides = load_tier_overrides(options.tier_overrides)
324
+ metadata = load_thread_metadata(options.state_db)
325
+
326
+ events: list[UsageEvent] = []
327
+ duplicates = 0
328
+ seen: set[tuple] = set()
329
+ tier_sources: dict[str, int] = {}
330
+ plan_types: set[str] = set()
331
+ credit_samples: list[RateLimitSample] = []
332
+ warnings: list[str] = []
333
+ reset_warnings: set[Path] = set()
334
+ cache = ParseCache.default() if options.parse_cache else None
335
+
336
+ if not options.session_root.exists():
337
+ warnings.append(f"Session root does not exist: {options.session_root}")
338
+
339
+ for path in session_files(options.session_root):
340
+ thread_meta = metadata.get(
341
+ str(path), metadata.get(path.name, ThreadMeta(rollout_path=str(path)))
342
+ )
343
+ signature = _parse_cache_signature(options, config_tier, overrides, thread_meta)
344
+ parsed = cache.get(path, signature) if cache else None
345
+ if parsed is None:
346
+ parsed = list(
347
+ _parse_session(
348
+ path,
349
+ thread_meta=thread_meta,
350
+ options=options,
351
+ config_tier=config_tier,
352
+ overrides=overrides,
353
+ )
354
+ )
355
+ if cache:
356
+ cache.put(path, signature, parsed)
357
+ for usage_event, reset, sample in parsed:
358
+ sample_in_window = sample is not None and start_utc <= sample.timestamp < end_utc
359
+ if sample_in_window:
360
+ credit_samples.append(sample)
361
+ if sample.plan_type:
362
+ plan_types.add(sample.plan_type)
363
+ if usage_event is None:
364
+ continue
365
+ if usage_event.timestamp < start_utc or usage_event.timestamp >= end_utc:
366
+ continue
367
+ if reset and path not in reset_warnings:
368
+ warnings.append(f"Token counter reset detected in {path}; used current totals")
369
+ reset_warnings.add(path)
370
+ key = usage_key(path, usage_event.timestamp.isoformat(), usage_event.usage)
371
+ if options.dedupe and key in seen:
372
+ duplicates += 1
373
+ continue
374
+ seen.add(key)
375
+ tier_sources[usage_event.tier_source] = tier_sources.get(usage_event.tier_source, 0) + 1
376
+ events.append(usage_event)
377
+
378
+ credit_samples.sort(key=lambda sample: sample.timestamp)
379
+ events.sort(key=lambda event: event.timestamp)
380
+ return LoadResult(
381
+ events=events,
382
+ duplicates=duplicates,
383
+ tier_sources=tier_sources,
384
+ plan_types=plan_types,
385
+ credit_samples=credit_samples,
386
+ warnings=warnings,
387
+ )
388
+
389
+
390
+ def _parse_cache_signature(
391
+ options: RuntimeOptions,
392
+ config_tier: str,
393
+ overrides: list[TierOverride],
394
+ thread_meta: ThreadMeta,
395
+ ) -> str:
396
+ payload = {
397
+ "version": PARSER_CACHE_VERSION,
398
+ "service_tier": options.service_tier,
399
+ "unknown_service_tier": options.unknown_service_tier,
400
+ "default_model": options.default_model,
401
+ "config_tier": config_tier,
402
+ "overrides": [
403
+ {
404
+ "service_tier": item.service_tier,
405
+ "session": item.session,
406
+ "start": item.start.isoformat() if item.start else None,
407
+ "end": item.end.isoformat() if item.end else None,
408
+ }
409
+ for item in overrides
410
+ ],
411
+ "thread": asdict(thread_meta),
412
+ }
413
+ return json.dumps(payload, sort_keys=True)
414
+
415
+
416
+ def _parse_session(
417
+ path: Path,
418
+ *,
419
+ thread_meta: ThreadMeta,
420
+ options: RuntimeOptions,
421
+ config_tier: str,
422
+ overrides: list[TierOverride],
423
+ ):
424
+ try:
425
+ handle = path.open(encoding="utf-8", errors="replace")
426
+ except OSError:
427
+ return
428
+ current_meta = thread_meta
429
+ logged_tier = ""
430
+ previous_total_usage: Usage | None = None
431
+ with handle:
432
+ for line in handle:
433
+ try:
434
+ event = json.loads(line)
435
+ except json.JSONDecodeError:
436
+ continue
437
+ current_meta, logged_tier = update_context_from_event(event, current_meta, logged_tier)
438
+ if event.get("type") != "event_msg":
439
+ continue
440
+ payload = event.get("payload") or {}
441
+ if payload.get("type") != "token_count":
442
+ continue
443
+ event_time = parse_event_timestamp(event.get("timestamp") or "")
444
+ if event_time is None:
445
+ continue
446
+ info = payload.get("info") or {}
447
+ usage, previous_total_usage, usage_source, model_context_window, total_reset = (
448
+ token_usage_from_info(info if isinstance(info, dict) else {}, previous_total_usage)
449
+ )
450
+
451
+ rate_limits = payload.get("rate_limits") or {}
452
+ sample = (
453
+ rate_limit_sample(path=path, event_time=event_time, rate_limits=rate_limits)
454
+ if rate_limits
455
+ else None
456
+ )
457
+
458
+ if usage.is_zero():
459
+ yield None, total_reset, sample
460
+ continue
461
+
462
+ model = (
463
+ normalize_model(current_meta.model)
464
+ or normalize_model(thread_meta.model)
465
+ or normalize_model(options.default_model)
466
+ )
467
+ tier, tier_source = service_tier_for_event(
468
+ path=path,
469
+ event_time=event_time,
470
+ logged_tier=logged_tier,
471
+ options=options,
472
+ config_tier=config_tier,
473
+ overrides=overrides,
474
+ )
475
+ primary = rate_limits.get("primary") or {}
476
+ secondary = rate_limits.get("secondary") or {}
477
+ usage_event = UsageEvent(
478
+ timestamp=event_time,
479
+ path=path,
480
+ session_id=session_id_from_path(path),
481
+ usage=usage,
482
+ model=model,
483
+ service_tier=tier,
484
+ tier_source=tier_source,
485
+ thread=current_meta,
486
+ usage_source=usage_source,
487
+ model_context_window=model_context_window,
488
+ plan_type=str(rate_limits.get("plan_type") or ""),
489
+ credits=rate_limits.get("credits"),
490
+ primary_used_percent=primary.get("used_percent"),
491
+ primary_window_minutes=primary.get("window_minutes"),
492
+ primary_resets_at=primary.get("resets_at"),
493
+ secondary_used_percent=secondary.get("used_percent"),
494
+ secondary_window_minutes=secondary.get("window_minutes"),
495
+ secondary_resets_at=secondary.get("resets_at"),
496
+ rate_limit_reached_type=str(rate_limits.get("rate_limit_reached_type") or ""),
497
+ )
498
+ yield usage_event, total_reset, sample