cctx-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,690 @@
1
+ """Claude Code JSONL session parser."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from datetime import datetime, timezone
7
+ from pathlib import Path
8
+
9
+ from cctx.models import (
10
+ Attachment,
11
+ ParserError,
12
+ ParserWarning,
13
+ RawToolResultFile,
14
+ SessionTrace,
15
+ ToolResult,
16
+ ToolUse,
17
+ Turn,
18
+ Usage,
19
+ )
20
+
21
+ _BOOKKEEPING_TYPES = frozenset(
22
+ {
23
+ "last-prompt",
24
+ "permission-mode",
25
+ "ai-title",
26
+ "custom-title",
27
+ "queue-operation",
28
+ "file-history-snapshot",
29
+ "pr-link",
30
+ }
31
+ )
32
+
33
+
34
+ def parse_session(
35
+ session_path: Path,
36
+ *,
37
+ max_subagent_depth: int = 4,
38
+ _depth: int = 0,
39
+ _parent_session_id: str | None = None,
40
+ ) -> SessionTrace:
41
+ session_path = Path(session_path)
42
+ jsonl_path = _resolve_jsonl_path(session_path)
43
+
44
+ if not jsonl_path.exists():
45
+ raise ParserError(
46
+ path=jsonl_path,
47
+ line_number=None,
48
+ reason=f"file not found: {jsonl_path}",
49
+ )
50
+
51
+ session_id = jsonl_path.stem
52
+ project_dir = jsonl_path.parent
53
+ project_path = _decode_project_path(project_dir.name)
54
+
55
+ turns: list[Turn] = []
56
+ attachments: list[Attachment] = []
57
+ warnings: list[ParserWarning] = []
58
+ claude_code_version: str | None = None
59
+ observed_cwd: str | None = None
60
+
61
+ for line_number, raw, truncated, had_encoding_error in _iter_lines(jsonl_path):
62
+ if had_encoding_error:
63
+ warnings.append(
64
+ ParserWarning(
65
+ code="encoding_error",
66
+ detail=f"non-UTF-8 bytes replaced on line {line_number}",
67
+ line_number=line_number,
68
+ path=jsonl_path,
69
+ )
70
+ )
71
+ if raw is None:
72
+ if not truncated:
73
+ warnings.append(
74
+ ParserWarning(
75
+ code="malformed_json",
76
+ detail="failed to parse JSON",
77
+ line_number=line_number,
78
+ path=jsonl_path,
79
+ )
80
+ )
81
+ continue
82
+ if claude_code_version is None:
83
+ v = raw.get("version")
84
+ if v:
85
+ claude_code_version = str(v)
86
+ if observed_cwd is None:
87
+ c = raw.get("cwd")
88
+ if c:
89
+ observed_cwd = str(c)
90
+ line_type = raw.get("type")
91
+ if line_type == "user":
92
+ turn = _parse_user_line(raw)
93
+ if turn is not None:
94
+ turns.append(turn)
95
+ elif line_type == "assistant":
96
+ turn = _parse_assistant_line(raw)
97
+ if turn is not None:
98
+ turns.append(turn)
99
+ elif line_type == "system":
100
+ turn = _parse_system_line(raw)
101
+ if turn is not None:
102
+ turns.append(turn)
103
+ elif line_type == "attachment":
104
+ att = _parse_attachment_line(raw)
105
+ if att is not None:
106
+ attachments.append(att)
107
+ elif line_type in _BOOKKEEPING_TYPES:
108
+ # Known bookkeeping — drop silently.
109
+ continue
110
+ else:
111
+ warnings.append(
112
+ ParserWarning(
113
+ code="unknown_type",
114
+ detail=str(line_type) if line_type else "<missing>",
115
+ line_number=line_number,
116
+ path=jsonl_path,
117
+ )
118
+ )
119
+
120
+ _pair_tool_results(turns)
121
+
122
+ # Validate parent_uuid references — warn on orphaned links (spec §9).
123
+ seen_uuids = {t.uuid for t in turns if t.uuid}
124
+ for turn in turns:
125
+ if turn.parent_uuid is not None and turn.parent_uuid not in seen_uuids:
126
+ warnings.append(
127
+ ParserWarning(
128
+ code="orphan_parent",
129
+ detail=f"parent_uuid {turn.parent_uuid} not seen in this session",
130
+ path=jsonl_path,
131
+ )
132
+ )
133
+
134
+ # Number turns 1-based and compute start/end.
135
+ for i, turn in enumerate(turns, start=1):
136
+ turn.turn_number = i
137
+
138
+ start_time = turns[0].timestamp if turns else None
139
+ end_time = turns[-1].timestamp if turns else None
140
+
141
+ # Compute initial_context_tokens from the first assistant turn.
142
+ initial_context_tokens = 0
143
+ for turn in turns:
144
+ if turn.role == "assistant" and turn.usage is not None:
145
+ initial_context_tokens = turn.usage.cache_creation_5m + turn.usage.cache_creation_1h
146
+ break
147
+
148
+ # Metadata pass.
149
+ primary_model = _most_common([t.model for t in turns if t.role == "assistant" and t.model])
150
+ if observed_cwd is None:
151
+ observed_cwd = project_path
152
+ tool_names_loaded = _collect_tool_names(turns, attachments)
153
+ raw_tool_result_files = _enumerate_raw_tool_result_files(jsonl_path)
154
+
155
+ # Load subagent meta if this is a child session.
156
+ subagent_meta: dict = {}
157
+ if _depth > 0:
158
+ meta_path = jsonl_path.with_suffix(".meta.json")
159
+ if meta_path.exists():
160
+ try:
161
+ subagent_meta = json.loads(meta_path.read_text())
162
+ except json.JSONDecodeError:
163
+ subagent_meta = {}
164
+
165
+ subagents, subagent_parse_errors, depth_warnings = _parse_subagents(
166
+ jsonl_path,
167
+ max_subagent_depth=max_subagent_depth,
168
+ depth=_depth,
169
+ parent_session_id=session_id,
170
+ )
171
+ warnings.extend(depth_warnings)
172
+
173
+ _link_subagents(turns, subagents, warnings, jsonl_path)
174
+
175
+ parent_session_id = _parent_session_id
176
+
177
+ return SessionTrace(
178
+ session_id=session_id,
179
+ parent_session_id=parent_session_id,
180
+ project_path=project_path,
181
+ cwd=observed_cwd,
182
+ primary_model=primary_model,
183
+ claude_code_version=claude_code_version,
184
+ turns=turns,
185
+ subagents=subagents,
186
+ attachments=attachments,
187
+ raw_tool_result_files=raw_tool_result_files,
188
+ initial_context_tokens=initial_context_tokens,
189
+ tool_names_loaded=tool_names_loaded,
190
+ start_time=start_time,
191
+ end_time=end_time,
192
+ source_path=jsonl_path,
193
+ subagent_meta=subagent_meta,
194
+ warnings=warnings,
195
+ subagent_parse_errors=subagent_parse_errors,
196
+ )
197
+
198
+
199
+ def _iter_lines(path: Path):
200
+ """Yield (line_number, parsed_dict_or_None, is_last_line_truncated, had_encoding_error).
201
+
202
+ For a final line that lacks a newline AND fails JSON parse, the third
203
+ tuple element is True — the caller can drop it silently. For
204
+ mid-file JSON failures, the third element is False — the caller
205
+ records a malformed_json warning.
206
+
207
+ The fourth element is True when the Unicode replacement character (U+FFFD)
208
+ was introduced by the errors='replace' decoding, indicating non-UTF-8 bytes.
209
+ """
210
+ raw_bytes = path.read_bytes()
211
+ lines = raw_bytes.decode("utf-8", errors="replace").splitlines(keepends=True)
212
+
213
+ for i, line in enumerate(lines):
214
+ line_number = i + 1
215
+ is_last = i == len(lines) - 1
216
+ ends_with_newline = line.endswith("\n")
217
+ had_encoding_error = "\ufffd" in line
218
+ stripped = line.strip()
219
+ if not stripped:
220
+ continue
221
+ try:
222
+ yield line_number, json.loads(stripped), False, had_encoding_error
223
+ except json.JSONDecodeError:
224
+ truncated_final = is_last and not ends_with_newline
225
+ yield line_number, None, truncated_final, had_encoding_error
226
+
227
+
228
+ def _parse_user_line(raw: dict) -> Turn | None:
229
+ """Build a Turn from a `type: "user"` JSONL line.
230
+
231
+ Pattern-matches on the set of content block types so heterogeneous arrays
232
+ don't fall through to the unknown-type path. tool_name on each ToolResult
233
+ is set to "" here; the pairing pass fills it from prior ToolUses.
234
+ """
235
+ message = raw.get("message") or {}
236
+ content = message.get("content")
237
+
238
+ if isinstance(content, str):
239
+ text = content
240
+ tool_results: list[ToolResult] = []
241
+ role = "user"
242
+ elif isinstance(content, list):
243
+ block_types = {b.get("type") for b in content if isinstance(b, dict)}
244
+ if "tool_result" in block_types:
245
+ role = "tool_result"
246
+ text = "" # tool_result lines have no narrative text
247
+ tool_results = _extract_tool_results(content, structured=raw.get("toolUseResult"))
248
+ else:
249
+ role = "user"
250
+ text = _flatten_user_blocks(content)
251
+ tool_results = []
252
+ else:
253
+ # Defensive: unexpected content shape — keep as empty user turn with a marker.
254
+ role = "user"
255
+ text = ""
256
+ tool_results = []
257
+
258
+ return Turn(
259
+ turn_number=0,
260
+ uuid=raw.get("uuid", ""),
261
+ parent_uuid=raw.get("parentUuid"),
262
+ role=role,
263
+ text=text,
264
+ thinking="",
265
+ tool_uses=[],
266
+ tool_results=tool_results,
267
+ usage=None,
268
+ model=None,
269
+ stop_reason=None,
270
+ timestamp=_parse_timestamp(raw.get("timestamp")),
271
+ duration_ms=None,
272
+ is_sidechain=bool(raw.get("isSidechain", False)),
273
+ )
274
+
275
+
276
+ def _extract_tool_results(content: list, *, structured: dict | None) -> list[ToolResult]:
277
+ """Extract ToolResult objects from a list of content blocks.
278
+
279
+ `structured` is the parallel toolUseResult field; it's attached to every
280
+ ToolResult in this turn because a JSONL line carries one toolUseResult
281
+ even when there are multiple tool_result blocks. The decomposer can
282
+ inspect it; the parser doesn't try to split.
283
+ """
284
+ results: list[ToolResult] = []
285
+ for block in content:
286
+ if not isinstance(block, dict):
287
+ continue
288
+ if block.get("type") != "tool_result":
289
+ continue
290
+ raw_content = block.get("content")
291
+ if isinstance(raw_content, str):
292
+ content_str = raw_content
293
+ elif isinstance(raw_content, list):
294
+ content_str = "\n".join(
295
+ b.get("text", "")
296
+ for b in raw_content
297
+ if isinstance(b, dict) and b.get("type") == "text"
298
+ )
299
+ else:
300
+ content_str = ""
301
+ results.append(
302
+ ToolResult(
303
+ tool_name="", # filled by pairing pass
304
+ tool_use_id=block.get("tool_use_id", ""),
305
+ content=content_str,
306
+ structured=structured,
307
+ is_error=bool(block.get("is_error", False)),
308
+ )
309
+ )
310
+ return results
311
+
312
+
313
+ def _flatten_user_blocks(content: list) -> str:
314
+ """Join text blocks and inline image placeholders for a user-role list-content message."""
315
+ parts: list[str] = []
316
+ for block in content:
317
+ if not isinstance(block, dict):
318
+ continue
319
+ btype = block.get("type")
320
+ if btype == "text":
321
+ parts.append(block.get("text", ""))
322
+ elif btype == "image":
323
+ source = block.get("source") or {}
324
+ media_type = source.get("media_type", "?")
325
+ data = source.get("data", "")
326
+ size = len(data) if isinstance(data, str) else 0
327
+ parts.append(f"<image:{media_type},{size}B>")
328
+ return "\n".join(parts)
329
+
330
+
331
+ def _pair_tool_results(turns: list[Turn]) -> None:
332
+ """Populate ToolResult.tool_name by matching tool_use_id against earlier ToolUses."""
333
+ by_id: dict[str, str] = {}
334
+ for turn in turns:
335
+ for use in turn.tool_uses:
336
+ if use.tool_use_id:
337
+ by_id[use.tool_use_id] = use.tool_name
338
+ for result in turn.tool_results:
339
+ if result.tool_use_id and not result.tool_name:
340
+ result.tool_name = by_id.get(result.tool_use_id, "")
341
+
342
+
343
+ def _most_common(values: list[str]) -> str | None:
344
+ """Return the most frequent value, or None if the list is empty."""
345
+ if not values:
346
+ return None
347
+ counts: dict[str, int] = {}
348
+ for v in values:
349
+ counts[v] = counts.get(v, 0) + 1
350
+ return max(counts.items(), key=lambda item: item[1])[0]
351
+
352
+
353
+ def _collect_tool_names(turns: list[Turn], attachments: list[Attachment]) -> list[str]:
354
+ """Union of MCP names from pendingMcpServers attachments + names observed in tool_uses."""
355
+ names: list[str] = []
356
+ seen: set[str] = set()
357
+ # MCP names from attachments.
358
+ for att in attachments:
359
+ if att.kind != "mcp_servers":
360
+ continue
361
+ for n in att.raw.get("addedNames", []) or []:
362
+ if isinstance(n, str) and n not in seen:
363
+ seen.add(n)
364
+ names.append(n)
365
+ # Observed tool uses.
366
+ for turn in turns:
367
+ for use in turn.tool_uses:
368
+ if use.tool_name and use.tool_name not in seen:
369
+ seen.add(use.tool_name)
370
+ names.append(use.tool_name)
371
+ return names
372
+
373
+
374
+ def _parse_subagents(
375
+ parent_jsonl: Path,
376
+ *,
377
+ max_subagent_depth: int,
378
+ depth: int,
379
+ parent_session_id: str,
380
+ ) -> tuple[list[SessionTrace], list[dict], list[ParserWarning]]:
381
+ """Discover and recursively parse subagent JSONLs.
382
+
383
+ Returns (subagents, parse_errors, depth_warnings). Each subagent trace has
384
+ parent_session_id set.
385
+ """
386
+ if depth >= max_subagent_depth:
387
+ sub_dir = parent_jsonl.parent / parent_jsonl.stem / "subagents"
388
+ has_children = sub_dir.is_dir() and any(sub_dir.glob("agent-*.jsonl"))
389
+ if has_children:
390
+ return (
391
+ [],
392
+ [],
393
+ [
394
+ ParserWarning(
395
+ code="max_subagent_depth",
396
+ detail=(
397
+ f"depth {depth} reached at {sub_dir};"
398
+ " raise max_subagent_depth to recurse deeper"
399
+ ),
400
+ path=parent_jsonl,
401
+ )
402
+ ],
403
+ )
404
+ return [], [], []
405
+
406
+ sid = parent_jsonl.stem
407
+ sub_dir = parent_jsonl.parent / sid / "subagents"
408
+ if not sub_dir.is_dir():
409
+ return [], [], []
410
+
411
+ subagents: list[SessionTrace] = []
412
+ errors: list[dict] = []
413
+ for child_jsonl in sorted(sub_dir.glob("agent-*.jsonl")):
414
+ try:
415
+ child = parse_session(
416
+ child_jsonl,
417
+ max_subagent_depth=max_subagent_depth,
418
+ _depth=depth + 1,
419
+ _parent_session_id=parent_session_id,
420
+ )
421
+ subagents.append(child)
422
+ except ParserError as e:
423
+ errors.append({"path": child_jsonl, "reason": e.reason})
424
+ return subagents, errors, []
425
+
426
+
427
+ def _link_subagents(
428
+ turns: list[Turn],
429
+ subagents: list[SessionTrace],
430
+ warnings: list[ParserWarning],
431
+ path: Path,
432
+ ) -> None:
433
+ """Stamp ToolUse.subagent_session_id and emit orphan warnings.
434
+
435
+ Linking strategy (spec §7):
436
+ 1. Exact: child.subagent_meta["tool_use_id"] matches a parent ToolUse.tool_use_id.
437
+ 2. Fallback: not implemented in v1; orphans warn.
438
+
439
+ Both directions of orphan are warned:
440
+ - orphan_agent_call: parent has an Agent ToolUse with no matching child.
441
+ - orphan_subagent_file: child exists but no parent ToolUse claimed it.
442
+ """
443
+ # Index parent Agent tool_uses by tool_use_id.
444
+ agent_uses_by_id: dict[str, ToolUse] = {}
445
+ for turn in turns:
446
+ for use in turn.tool_uses:
447
+ if use.tool_name == "Agent" and use.tool_use_id:
448
+ agent_uses_by_id[use.tool_use_id] = use
449
+
450
+ matched_use_ids: set[str] = set()
451
+ for child in subagents:
452
+ meta_tool_use_id = (child.subagent_meta or {}).get("tool_use_id")
453
+ if meta_tool_use_id and meta_tool_use_id in agent_uses_by_id:
454
+ agent_uses_by_id[meta_tool_use_id].subagent_session_id = child.session_id
455
+ matched_use_ids.add(meta_tool_use_id)
456
+ else:
457
+ warnings.append(
458
+ ParserWarning(
459
+ code="orphan_subagent_file",
460
+ detail=f"subagent {child.session_id} has no matching parent Agent tool_use",
461
+ path=path,
462
+ )
463
+ )
464
+
465
+ # Agent tool_uses that never got linked.
466
+ for use_id, _use in agent_uses_by_id.items():
467
+ if use_id not in matched_use_ids:
468
+ warnings.append(
469
+ ParserWarning(
470
+ code="orphan_agent_call",
471
+ detail=f"Agent tool_use {use_id} has no matching subagent file",
472
+ path=path,
473
+ )
474
+ )
475
+
476
+
477
+ def _enumerate_raw_tool_result_files(jsonl_path: Path) -> list[RawToolResultFile]:
478
+ """List <sid>/tool-results/*.txt with sizes. Does NOT read contents."""
479
+ sid = jsonl_path.stem
480
+ tr_dir = jsonl_path.parent / sid / "tool-results"
481
+ if not tr_dir.is_dir():
482
+ return []
483
+ return [
484
+ RawToolResultFile(path=p, size_bytes=p.stat().st_size, tool_use_id=None)
485
+ for p in sorted(tr_dir.glob("*.txt"))
486
+ ]
487
+
488
+
489
+ def _parse_system_line(raw: dict) -> Turn | None:
490
+ """Build a Turn from a `type: "system"` line (compaction notices, model swaps)."""
491
+ text = raw.get("content") or raw.get("message", {}).get("content") or ""
492
+ if isinstance(text, list):
493
+ text = _flatten_user_blocks(text)
494
+ return Turn(
495
+ turn_number=0,
496
+ uuid=raw.get("uuid", ""),
497
+ parent_uuid=raw.get("parentUuid"),
498
+ role="system",
499
+ text=str(text),
500
+ thinking="",
501
+ tool_uses=[],
502
+ tool_results=[],
503
+ usage=None,
504
+ model=None,
505
+ stop_reason=None,
506
+ timestamp=_parse_timestamp(raw.get("timestamp")),
507
+ duration_ms=None,
508
+ is_sidechain=bool(raw.get("isSidechain", False)),
509
+ )
510
+
511
+
512
+ def _parse_attachment_line(raw: dict) -> Attachment | None:
513
+ """Build an Attachment from a `type: "attachment"` line.
514
+
515
+ Classification is by payload-key shape, not by hookEvent (which is only
516
+ present on hook-output attachments). Unknown shapes are preserved with
517
+ kind="other" — no warning, attachments are inherently polymorphic.
518
+ """
519
+ payload = raw.get("attachment")
520
+ if not isinstance(payload, dict):
521
+ return None
522
+
523
+ kind = _classify_attachment_shape(payload)
524
+ content = _extract_attachment_content(kind, payload)
525
+ timestamp = raw.get("timestamp")
526
+
527
+ return Attachment(
528
+ kind=kind,
529
+ raw=payload,
530
+ content=content,
531
+ timestamp=_parse_timestamp(timestamp) if timestamp else None,
532
+ parent_uuid=raw.get("parentUuid"),
533
+ )
534
+
535
+
536
+ def _classify_attachment_shape(payload: dict) -> str:
537
+ if "hookEvent" in payload:
538
+ return "hook_output"
539
+ if "pendingMcpServers" in payload:
540
+ return "mcp_servers"
541
+ if "skillCount" in payload:
542
+ return "skills"
543
+ if "allowedTools" in payload:
544
+ return "allowed_tools"
545
+ if "itemCount" in payload:
546
+ return "items"
547
+ return "other"
548
+
549
+
550
+ def _extract_attachment_content(kind: str, payload: dict) -> str | None:
551
+ """Best-effort extraction of human-readable content from an attachment.
552
+
553
+ Returns None when nothing useful is present.
554
+ """
555
+ if kind == "hook_output":
556
+ stdout = payload.get("stdout") or ""
557
+ try:
558
+ parsed = json.loads(stdout)
559
+ except (json.JSONDecodeError, TypeError):
560
+ return stdout or None
561
+ hook_specific = parsed.get("hookSpecificOutput") or {}
562
+ return hook_specific.get("additionalContext") or stdout or None
563
+
564
+ if kind in ("skills", "items"):
565
+ c = payload.get("content")
566
+ return c if isinstance(c, str) and c else None
567
+
568
+ return None
569
+
570
+
571
+ def _parse_assistant_line(raw: dict) -> Turn | None:
572
+ """Build a Turn from a `type: "assistant"` JSONL line."""
573
+ message = raw.get("message") or {}
574
+ content_blocks = message.get("content") or []
575
+
576
+ tool_uses: list[ToolUse] = []
577
+ text_parts: list[str] = []
578
+ thinking_parts: list[str] = []
579
+
580
+ for block in content_blocks:
581
+ if not isinstance(block, dict):
582
+ continue
583
+ block_type = block.get("type")
584
+ if block_type == "text":
585
+ text_parts.append(block.get("text", ""))
586
+ elif block_type == "thinking":
587
+ thinking_parts.append(block.get("thinking", ""))
588
+ elif block_type == "tool_use":
589
+ tool_uses.append(
590
+ ToolUse(
591
+ tool_name=block.get("name", ""),
592
+ tool_use_id=block.get("id", ""),
593
+ tool_input=block.get("input") if isinstance(block.get("input"), dict) else {},
594
+ )
595
+ )
596
+ elif block_type in ("server_tool_use", "advisor_tool_result"):
597
+ # Inline a marker so the text remains useful; structured handling deferred.
598
+ text_parts.append(f"<{block_type}:{block.get('id', '')}>")
599
+
600
+ return Turn(
601
+ turn_number=0,
602
+ uuid=raw.get("uuid", ""),
603
+ parent_uuid=raw.get("parentUuid"),
604
+ role="assistant",
605
+ text="\n".join(text_parts),
606
+ thinking="\n".join(thinking_parts),
607
+ tool_uses=tool_uses,
608
+ tool_results=[],
609
+ usage=_parse_usage(message.get("usage")),
610
+ model=message.get("model"),
611
+ stop_reason=message.get("stop_reason"),
612
+ timestamp=_parse_timestamp(raw.get("timestamp")),
613
+ duration_ms=None,
614
+ is_sidechain=bool(raw.get("isSidechain", False)),
615
+ error=("api_error" if raw.get("isApiErrorMessage") else None),
616
+ )
617
+
618
+
619
+ def _parse_usage(raw: dict | None) -> Usage | None:
620
+ """Build a Usage from the message.usage dict.
621
+
622
+ Defensive sum of iterations[] if present and divergent — spec §5.2.
623
+ """
624
+ if not isinstance(raw, dict):
625
+ return None
626
+
627
+ iterations = raw.get("iterations")
628
+ if isinstance(iterations, list) and iterations:
629
+ # Sum across iterations defensively.
630
+ input_t = sum(it.get("input_tokens", 0) for it in iterations)
631
+ output_t = sum(it.get("output_tokens", 0) for it in iterations)
632
+ cache_read = sum(it.get("cache_read_input_tokens", 0) for it in iterations)
633
+ cache_5m = sum(
634
+ (it.get("cache_creation") or {}).get("ephemeral_5m_input_tokens", 0)
635
+ for it in iterations
636
+ )
637
+ cache_1h = sum(
638
+ (it.get("cache_creation") or {}).get("ephemeral_1h_input_tokens", 0)
639
+ for it in iterations
640
+ )
641
+ else:
642
+ input_t = raw.get("input_tokens", 0)
643
+ output_t = raw.get("output_tokens", 0)
644
+ cache_read = raw.get("cache_read_input_tokens", 0)
645
+ cache_obj = raw.get("cache_creation") or {}
646
+ cache_5m = cache_obj.get("ephemeral_5m_input_tokens", 0)
647
+ cache_1h = cache_obj.get("ephemeral_1h_input_tokens", 0)
648
+
649
+ return Usage(
650
+ input_tokens=input_t,
651
+ output_tokens=output_t,
652
+ cache_creation_5m=cache_5m,
653
+ cache_creation_1h=cache_1h,
654
+ cache_read=cache_read,
655
+ service_tier=raw.get("service_tier"),
656
+ )
657
+
658
+
659
+ def _parse_timestamp(value: str | None) -> datetime:
660
+ """Parse an ISO 8601 timestamp. Accepts both 'Z' suffix and '+00:00'."""
661
+ if not value:
662
+ # Fallback for synthetic edge cases; should never be reached with real data.
663
+ return datetime.fromtimestamp(0, tz=timezone.utc)
664
+ if value.endswith("Z"):
665
+ value = value[:-1] + "+00:00"
666
+ return datetime.fromisoformat(value)
667
+
668
+
669
+ def _resolve_jsonl_path(path: Path) -> Path:
670
+ if path.is_dir():
671
+ return path.parent / f"{path.name}.jsonl"
672
+ return path
673
+
674
+
675
+ def _decode_project_path(dir_name: str) -> str:
676
+ """Decode Claude Code's project-dir naming convention to a filesystem path.
677
+
678
+ Convention: every '/' in the original cwd was replaced with '-'.
679
+ The reverse is lossy: a real '-' in the original path is
680
+ indistinguishable from a path separator, so this returns a
681
+ best-effort reconstruction. For paths that contain hyphens
682
+ (e.g. project names with dashes), the result will be wrong.
683
+
684
+ Downstream consumers should prefer `SessionTrace.cwd` (observed
685
+ from the line data) when an exact path is required;
686
+ `project_path` is a display-friendly fallback.
687
+ """
688
+ if not dir_name.startswith("-"):
689
+ return dir_name
690
+ return dir_name.replace("-", "/")
cctx/pricing.py ADDED
@@ -0,0 +1,18 @@
1
+ """Shared token pricing — single source of truth for all cost calculations."""
2
+ from __future__ import annotations
3
+
4
+ _INPUT_PRICE_PER_MTOK: dict[str, float] = {
5
+ "claude-opus-4": 15.0,
6
+ "claude-sonnet-4": 3.0,
7
+ "claude-haiku-4": 0.8,
8
+ }
9
+ _DEFAULT_PRICE_PER_MTOK = 3.0
10
+
11
+
12
+ def price_per_tok(model: str | None) -> float:
13
+ """Return per-token input price in USD for the given model string."""
14
+ if model is not None:
15
+ for prefix, mtok in _INPUT_PRICE_PER_MTOK.items():
16
+ if model.startswith(prefix):
17
+ return mtok / 1_000_000
18
+ return _DEFAULT_PRICE_PER_MTOK / 1_000_000