cctx-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cctx/__init__.py +3 -0
- cctx/cli.py +375 -0
- cctx/diagnostician/__init__.py +81 -0
- cctx/diagnostician/aggregate.py +40 -0
- cctx/diagnostician/inflection.py +19 -0
- cctx/diagnostician/patterns/__init__.py +1 -0
- cctx/diagnostician/patterns/retry_loop.py +145 -0
- cctx/diagnostician/patterns/scope_creep.py +87 -0
- cctx/diagnostician/patterns/stale_context.py +147 -0
- cctx/discovery.py +185 -0
- cctx/exporters/__init__.py +0 -0
- cctx/exporters/csv.py +64 -0
- cctx/exporters/jsonl.py +64 -0
- cctx/harvest.py +173 -0
- cctx/models.py +269 -0
- cctx/parsers/__init__.py +1 -0
- cctx/parsers/claude_code.py +690 -0
- cctx/pricing.py +18 -0
- cctx/recommender/__init__.py +0 -0
- cctx/recommender/claude_md.py +131 -0
- cctx/recommender/evidence.py +46 -0
- cctx/renderers/__init__.py +0 -0
- cctx/renderers/report.py +58 -0
- cctx/renderers/templates/autopsy.html.j2 +249 -0
- cctx/renderers/terminal.py +251 -0
- cctx/renderers/trace_tui.py +291 -0
- cctx/tokenizer.py +77 -0
- cctx_cli-0.1.0.dist-info/METADATA +159 -0
- cctx_cli-0.1.0.dist-info/RECORD +31 -0
- cctx_cli-0.1.0.dist-info/WHEEL +4 -0
- cctx_cli-0.1.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,690 @@
|
|
|
1
|
+
"""Claude Code JSONL session parser."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from datetime import datetime, timezone
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from cctx.models import (
|
|
10
|
+
Attachment,
|
|
11
|
+
ParserError,
|
|
12
|
+
ParserWarning,
|
|
13
|
+
RawToolResultFile,
|
|
14
|
+
SessionTrace,
|
|
15
|
+
ToolResult,
|
|
16
|
+
ToolUse,
|
|
17
|
+
Turn,
|
|
18
|
+
Usage,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
_BOOKKEEPING_TYPES = frozenset(
|
|
22
|
+
{
|
|
23
|
+
"last-prompt",
|
|
24
|
+
"permission-mode",
|
|
25
|
+
"ai-title",
|
|
26
|
+
"custom-title",
|
|
27
|
+
"queue-operation",
|
|
28
|
+
"file-history-snapshot",
|
|
29
|
+
"pr-link",
|
|
30
|
+
}
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def parse_session(
|
|
35
|
+
session_path: Path,
|
|
36
|
+
*,
|
|
37
|
+
max_subagent_depth: int = 4,
|
|
38
|
+
_depth: int = 0,
|
|
39
|
+
_parent_session_id: str | None = None,
|
|
40
|
+
) -> SessionTrace:
|
|
41
|
+
session_path = Path(session_path)
|
|
42
|
+
jsonl_path = _resolve_jsonl_path(session_path)
|
|
43
|
+
|
|
44
|
+
if not jsonl_path.exists():
|
|
45
|
+
raise ParserError(
|
|
46
|
+
path=jsonl_path,
|
|
47
|
+
line_number=None,
|
|
48
|
+
reason=f"file not found: {jsonl_path}",
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
session_id = jsonl_path.stem
|
|
52
|
+
project_dir = jsonl_path.parent
|
|
53
|
+
project_path = _decode_project_path(project_dir.name)
|
|
54
|
+
|
|
55
|
+
turns: list[Turn] = []
|
|
56
|
+
attachments: list[Attachment] = []
|
|
57
|
+
warnings: list[ParserWarning] = []
|
|
58
|
+
claude_code_version: str | None = None
|
|
59
|
+
observed_cwd: str | None = None
|
|
60
|
+
|
|
61
|
+
for line_number, raw, truncated, had_encoding_error in _iter_lines(jsonl_path):
|
|
62
|
+
if had_encoding_error:
|
|
63
|
+
warnings.append(
|
|
64
|
+
ParserWarning(
|
|
65
|
+
code="encoding_error",
|
|
66
|
+
detail=f"non-UTF-8 bytes replaced on line {line_number}",
|
|
67
|
+
line_number=line_number,
|
|
68
|
+
path=jsonl_path,
|
|
69
|
+
)
|
|
70
|
+
)
|
|
71
|
+
if raw is None:
|
|
72
|
+
if not truncated:
|
|
73
|
+
warnings.append(
|
|
74
|
+
ParserWarning(
|
|
75
|
+
code="malformed_json",
|
|
76
|
+
detail="failed to parse JSON",
|
|
77
|
+
line_number=line_number,
|
|
78
|
+
path=jsonl_path,
|
|
79
|
+
)
|
|
80
|
+
)
|
|
81
|
+
continue
|
|
82
|
+
if claude_code_version is None:
|
|
83
|
+
v = raw.get("version")
|
|
84
|
+
if v:
|
|
85
|
+
claude_code_version = str(v)
|
|
86
|
+
if observed_cwd is None:
|
|
87
|
+
c = raw.get("cwd")
|
|
88
|
+
if c:
|
|
89
|
+
observed_cwd = str(c)
|
|
90
|
+
line_type = raw.get("type")
|
|
91
|
+
if line_type == "user":
|
|
92
|
+
turn = _parse_user_line(raw)
|
|
93
|
+
if turn is not None:
|
|
94
|
+
turns.append(turn)
|
|
95
|
+
elif line_type == "assistant":
|
|
96
|
+
turn = _parse_assistant_line(raw)
|
|
97
|
+
if turn is not None:
|
|
98
|
+
turns.append(turn)
|
|
99
|
+
elif line_type == "system":
|
|
100
|
+
turn = _parse_system_line(raw)
|
|
101
|
+
if turn is not None:
|
|
102
|
+
turns.append(turn)
|
|
103
|
+
elif line_type == "attachment":
|
|
104
|
+
att = _parse_attachment_line(raw)
|
|
105
|
+
if att is not None:
|
|
106
|
+
attachments.append(att)
|
|
107
|
+
elif line_type in _BOOKKEEPING_TYPES:
|
|
108
|
+
# Known bookkeeping — drop silently.
|
|
109
|
+
continue
|
|
110
|
+
else:
|
|
111
|
+
warnings.append(
|
|
112
|
+
ParserWarning(
|
|
113
|
+
code="unknown_type",
|
|
114
|
+
detail=str(line_type) if line_type else "<missing>",
|
|
115
|
+
line_number=line_number,
|
|
116
|
+
path=jsonl_path,
|
|
117
|
+
)
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
_pair_tool_results(turns)
|
|
121
|
+
|
|
122
|
+
# Validate parent_uuid references — warn on orphaned links (spec §9).
|
|
123
|
+
seen_uuids = {t.uuid for t in turns if t.uuid}
|
|
124
|
+
for turn in turns:
|
|
125
|
+
if turn.parent_uuid is not None and turn.parent_uuid not in seen_uuids:
|
|
126
|
+
warnings.append(
|
|
127
|
+
ParserWarning(
|
|
128
|
+
code="orphan_parent",
|
|
129
|
+
detail=f"parent_uuid {turn.parent_uuid} not seen in this session",
|
|
130
|
+
path=jsonl_path,
|
|
131
|
+
)
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
# Number turns 1-based and compute start/end.
|
|
135
|
+
for i, turn in enumerate(turns, start=1):
|
|
136
|
+
turn.turn_number = i
|
|
137
|
+
|
|
138
|
+
start_time = turns[0].timestamp if turns else None
|
|
139
|
+
end_time = turns[-1].timestamp if turns else None
|
|
140
|
+
|
|
141
|
+
# Compute initial_context_tokens from the first assistant turn.
|
|
142
|
+
initial_context_tokens = 0
|
|
143
|
+
for turn in turns:
|
|
144
|
+
if turn.role == "assistant" and turn.usage is not None:
|
|
145
|
+
initial_context_tokens = turn.usage.cache_creation_5m + turn.usage.cache_creation_1h
|
|
146
|
+
break
|
|
147
|
+
|
|
148
|
+
# Metadata pass.
|
|
149
|
+
primary_model = _most_common([t.model for t in turns if t.role == "assistant" and t.model])
|
|
150
|
+
if observed_cwd is None:
|
|
151
|
+
observed_cwd = project_path
|
|
152
|
+
tool_names_loaded = _collect_tool_names(turns, attachments)
|
|
153
|
+
raw_tool_result_files = _enumerate_raw_tool_result_files(jsonl_path)
|
|
154
|
+
|
|
155
|
+
# Load subagent meta if this is a child session.
|
|
156
|
+
subagent_meta: dict = {}
|
|
157
|
+
if _depth > 0:
|
|
158
|
+
meta_path = jsonl_path.with_suffix(".meta.json")
|
|
159
|
+
if meta_path.exists():
|
|
160
|
+
try:
|
|
161
|
+
subagent_meta = json.loads(meta_path.read_text())
|
|
162
|
+
except json.JSONDecodeError:
|
|
163
|
+
subagent_meta = {}
|
|
164
|
+
|
|
165
|
+
subagents, subagent_parse_errors, depth_warnings = _parse_subagents(
|
|
166
|
+
jsonl_path,
|
|
167
|
+
max_subagent_depth=max_subagent_depth,
|
|
168
|
+
depth=_depth,
|
|
169
|
+
parent_session_id=session_id,
|
|
170
|
+
)
|
|
171
|
+
warnings.extend(depth_warnings)
|
|
172
|
+
|
|
173
|
+
_link_subagents(turns, subagents, warnings, jsonl_path)
|
|
174
|
+
|
|
175
|
+
parent_session_id = _parent_session_id
|
|
176
|
+
|
|
177
|
+
return SessionTrace(
|
|
178
|
+
session_id=session_id,
|
|
179
|
+
parent_session_id=parent_session_id,
|
|
180
|
+
project_path=project_path,
|
|
181
|
+
cwd=observed_cwd,
|
|
182
|
+
primary_model=primary_model,
|
|
183
|
+
claude_code_version=claude_code_version,
|
|
184
|
+
turns=turns,
|
|
185
|
+
subagents=subagents,
|
|
186
|
+
attachments=attachments,
|
|
187
|
+
raw_tool_result_files=raw_tool_result_files,
|
|
188
|
+
initial_context_tokens=initial_context_tokens,
|
|
189
|
+
tool_names_loaded=tool_names_loaded,
|
|
190
|
+
start_time=start_time,
|
|
191
|
+
end_time=end_time,
|
|
192
|
+
source_path=jsonl_path,
|
|
193
|
+
subagent_meta=subagent_meta,
|
|
194
|
+
warnings=warnings,
|
|
195
|
+
subagent_parse_errors=subagent_parse_errors,
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def _iter_lines(path: Path):
|
|
200
|
+
"""Yield (line_number, parsed_dict_or_None, is_last_line_truncated, had_encoding_error).
|
|
201
|
+
|
|
202
|
+
For a final line that lacks a newline AND fails JSON parse, the third
|
|
203
|
+
tuple element is True — the caller can drop it silently. For
|
|
204
|
+
mid-file JSON failures, the third element is False — the caller
|
|
205
|
+
records a malformed_json warning.
|
|
206
|
+
|
|
207
|
+
The fourth element is True when the Unicode replacement character (U+FFFD)
|
|
208
|
+
was introduced by the errors='replace' decoding, indicating non-UTF-8 bytes.
|
|
209
|
+
"""
|
|
210
|
+
raw_bytes = path.read_bytes()
|
|
211
|
+
lines = raw_bytes.decode("utf-8", errors="replace").splitlines(keepends=True)
|
|
212
|
+
|
|
213
|
+
for i, line in enumerate(lines):
|
|
214
|
+
line_number = i + 1
|
|
215
|
+
is_last = i == len(lines) - 1
|
|
216
|
+
ends_with_newline = line.endswith("\n")
|
|
217
|
+
had_encoding_error = "\ufffd" in line
|
|
218
|
+
stripped = line.strip()
|
|
219
|
+
if not stripped:
|
|
220
|
+
continue
|
|
221
|
+
try:
|
|
222
|
+
yield line_number, json.loads(stripped), False, had_encoding_error
|
|
223
|
+
except json.JSONDecodeError:
|
|
224
|
+
truncated_final = is_last and not ends_with_newline
|
|
225
|
+
yield line_number, None, truncated_final, had_encoding_error
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def _parse_user_line(raw: dict) -> Turn | None:
|
|
229
|
+
"""Build a Turn from a `type: "user"` JSONL line.
|
|
230
|
+
|
|
231
|
+
Pattern-matches on the set of content block types so heterogeneous arrays
|
|
232
|
+
don't fall through to the unknown-type path. tool_name on each ToolResult
|
|
233
|
+
is set to "" here; the pairing pass fills it from prior ToolUses.
|
|
234
|
+
"""
|
|
235
|
+
message = raw.get("message") or {}
|
|
236
|
+
content = message.get("content")
|
|
237
|
+
|
|
238
|
+
if isinstance(content, str):
|
|
239
|
+
text = content
|
|
240
|
+
tool_results: list[ToolResult] = []
|
|
241
|
+
role = "user"
|
|
242
|
+
elif isinstance(content, list):
|
|
243
|
+
block_types = {b.get("type") for b in content if isinstance(b, dict)}
|
|
244
|
+
if "tool_result" in block_types:
|
|
245
|
+
role = "tool_result"
|
|
246
|
+
text = "" # tool_result lines have no narrative text
|
|
247
|
+
tool_results = _extract_tool_results(content, structured=raw.get("toolUseResult"))
|
|
248
|
+
else:
|
|
249
|
+
role = "user"
|
|
250
|
+
text = _flatten_user_blocks(content)
|
|
251
|
+
tool_results = []
|
|
252
|
+
else:
|
|
253
|
+
# Defensive: unexpected content shape — keep as empty user turn with a marker.
|
|
254
|
+
role = "user"
|
|
255
|
+
text = ""
|
|
256
|
+
tool_results = []
|
|
257
|
+
|
|
258
|
+
return Turn(
|
|
259
|
+
turn_number=0,
|
|
260
|
+
uuid=raw.get("uuid", ""),
|
|
261
|
+
parent_uuid=raw.get("parentUuid"),
|
|
262
|
+
role=role,
|
|
263
|
+
text=text,
|
|
264
|
+
thinking="",
|
|
265
|
+
tool_uses=[],
|
|
266
|
+
tool_results=tool_results,
|
|
267
|
+
usage=None,
|
|
268
|
+
model=None,
|
|
269
|
+
stop_reason=None,
|
|
270
|
+
timestamp=_parse_timestamp(raw.get("timestamp")),
|
|
271
|
+
duration_ms=None,
|
|
272
|
+
is_sidechain=bool(raw.get("isSidechain", False)),
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def _extract_tool_results(content: list, *, structured: dict | None) -> list[ToolResult]:
|
|
277
|
+
"""Extract ToolResult objects from a list of content blocks.
|
|
278
|
+
|
|
279
|
+
`structured` is the parallel toolUseResult field; it's attached to every
|
|
280
|
+
ToolResult in this turn because a JSONL line carries one toolUseResult
|
|
281
|
+
even when there are multiple tool_result blocks. The decomposer can
|
|
282
|
+
inspect it; the parser doesn't try to split.
|
|
283
|
+
"""
|
|
284
|
+
results: list[ToolResult] = []
|
|
285
|
+
for block in content:
|
|
286
|
+
if not isinstance(block, dict):
|
|
287
|
+
continue
|
|
288
|
+
if block.get("type") != "tool_result":
|
|
289
|
+
continue
|
|
290
|
+
raw_content = block.get("content")
|
|
291
|
+
if isinstance(raw_content, str):
|
|
292
|
+
content_str = raw_content
|
|
293
|
+
elif isinstance(raw_content, list):
|
|
294
|
+
content_str = "\n".join(
|
|
295
|
+
b.get("text", "")
|
|
296
|
+
for b in raw_content
|
|
297
|
+
if isinstance(b, dict) and b.get("type") == "text"
|
|
298
|
+
)
|
|
299
|
+
else:
|
|
300
|
+
content_str = ""
|
|
301
|
+
results.append(
|
|
302
|
+
ToolResult(
|
|
303
|
+
tool_name="", # filled by pairing pass
|
|
304
|
+
tool_use_id=block.get("tool_use_id", ""),
|
|
305
|
+
content=content_str,
|
|
306
|
+
structured=structured,
|
|
307
|
+
is_error=bool(block.get("is_error", False)),
|
|
308
|
+
)
|
|
309
|
+
)
|
|
310
|
+
return results
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
def _flatten_user_blocks(content: list) -> str:
|
|
314
|
+
"""Join text blocks and inline image placeholders for a user-role list-content message."""
|
|
315
|
+
parts: list[str] = []
|
|
316
|
+
for block in content:
|
|
317
|
+
if not isinstance(block, dict):
|
|
318
|
+
continue
|
|
319
|
+
btype = block.get("type")
|
|
320
|
+
if btype == "text":
|
|
321
|
+
parts.append(block.get("text", ""))
|
|
322
|
+
elif btype == "image":
|
|
323
|
+
source = block.get("source") or {}
|
|
324
|
+
media_type = source.get("media_type", "?")
|
|
325
|
+
data = source.get("data", "")
|
|
326
|
+
size = len(data) if isinstance(data, str) else 0
|
|
327
|
+
parts.append(f"<image:{media_type},{size}B>")
|
|
328
|
+
return "\n".join(parts)
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
def _pair_tool_results(turns: list[Turn]) -> None:
|
|
332
|
+
"""Populate ToolResult.tool_name by matching tool_use_id against earlier ToolUses."""
|
|
333
|
+
by_id: dict[str, str] = {}
|
|
334
|
+
for turn in turns:
|
|
335
|
+
for use in turn.tool_uses:
|
|
336
|
+
if use.tool_use_id:
|
|
337
|
+
by_id[use.tool_use_id] = use.tool_name
|
|
338
|
+
for result in turn.tool_results:
|
|
339
|
+
if result.tool_use_id and not result.tool_name:
|
|
340
|
+
result.tool_name = by_id.get(result.tool_use_id, "")
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
def _most_common(values: list[str]) -> str | None:
|
|
344
|
+
"""Return the most frequent value, or None if the list is empty."""
|
|
345
|
+
if not values:
|
|
346
|
+
return None
|
|
347
|
+
counts: dict[str, int] = {}
|
|
348
|
+
for v in values:
|
|
349
|
+
counts[v] = counts.get(v, 0) + 1
|
|
350
|
+
return max(counts.items(), key=lambda item: item[1])[0]
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
def _collect_tool_names(turns: list[Turn], attachments: list[Attachment]) -> list[str]:
|
|
354
|
+
"""Union of MCP names from pendingMcpServers attachments + names observed in tool_uses."""
|
|
355
|
+
names: list[str] = []
|
|
356
|
+
seen: set[str] = set()
|
|
357
|
+
# MCP names from attachments.
|
|
358
|
+
for att in attachments:
|
|
359
|
+
if att.kind != "mcp_servers":
|
|
360
|
+
continue
|
|
361
|
+
for n in att.raw.get("addedNames", []) or []:
|
|
362
|
+
if isinstance(n, str) and n not in seen:
|
|
363
|
+
seen.add(n)
|
|
364
|
+
names.append(n)
|
|
365
|
+
# Observed tool uses.
|
|
366
|
+
for turn in turns:
|
|
367
|
+
for use in turn.tool_uses:
|
|
368
|
+
if use.tool_name and use.tool_name not in seen:
|
|
369
|
+
seen.add(use.tool_name)
|
|
370
|
+
names.append(use.tool_name)
|
|
371
|
+
return names
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
def _parse_subagents(
|
|
375
|
+
parent_jsonl: Path,
|
|
376
|
+
*,
|
|
377
|
+
max_subagent_depth: int,
|
|
378
|
+
depth: int,
|
|
379
|
+
parent_session_id: str,
|
|
380
|
+
) -> tuple[list[SessionTrace], list[dict], list[ParserWarning]]:
|
|
381
|
+
"""Discover and recursively parse subagent JSONLs.
|
|
382
|
+
|
|
383
|
+
Returns (subagents, parse_errors, depth_warnings). Each subagent trace has
|
|
384
|
+
parent_session_id set.
|
|
385
|
+
"""
|
|
386
|
+
if depth >= max_subagent_depth:
|
|
387
|
+
sub_dir = parent_jsonl.parent / parent_jsonl.stem / "subagents"
|
|
388
|
+
has_children = sub_dir.is_dir() and any(sub_dir.glob("agent-*.jsonl"))
|
|
389
|
+
if has_children:
|
|
390
|
+
return (
|
|
391
|
+
[],
|
|
392
|
+
[],
|
|
393
|
+
[
|
|
394
|
+
ParserWarning(
|
|
395
|
+
code="max_subagent_depth",
|
|
396
|
+
detail=(
|
|
397
|
+
f"depth {depth} reached at {sub_dir};"
|
|
398
|
+
" raise max_subagent_depth to recurse deeper"
|
|
399
|
+
),
|
|
400
|
+
path=parent_jsonl,
|
|
401
|
+
)
|
|
402
|
+
],
|
|
403
|
+
)
|
|
404
|
+
return [], [], []
|
|
405
|
+
|
|
406
|
+
sid = parent_jsonl.stem
|
|
407
|
+
sub_dir = parent_jsonl.parent / sid / "subagents"
|
|
408
|
+
if not sub_dir.is_dir():
|
|
409
|
+
return [], [], []
|
|
410
|
+
|
|
411
|
+
subagents: list[SessionTrace] = []
|
|
412
|
+
errors: list[dict] = []
|
|
413
|
+
for child_jsonl in sorted(sub_dir.glob("agent-*.jsonl")):
|
|
414
|
+
try:
|
|
415
|
+
child = parse_session(
|
|
416
|
+
child_jsonl,
|
|
417
|
+
max_subagent_depth=max_subagent_depth,
|
|
418
|
+
_depth=depth + 1,
|
|
419
|
+
_parent_session_id=parent_session_id,
|
|
420
|
+
)
|
|
421
|
+
subagents.append(child)
|
|
422
|
+
except ParserError as e:
|
|
423
|
+
errors.append({"path": child_jsonl, "reason": e.reason})
|
|
424
|
+
return subagents, errors, []
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
def _link_subagents(
|
|
428
|
+
turns: list[Turn],
|
|
429
|
+
subagents: list[SessionTrace],
|
|
430
|
+
warnings: list[ParserWarning],
|
|
431
|
+
path: Path,
|
|
432
|
+
) -> None:
|
|
433
|
+
"""Stamp ToolUse.subagent_session_id and emit orphan warnings.
|
|
434
|
+
|
|
435
|
+
Linking strategy (spec §7):
|
|
436
|
+
1. Exact: child.subagent_meta["tool_use_id"] matches a parent ToolUse.tool_use_id.
|
|
437
|
+
2. Fallback: not implemented in v1; orphans warn.
|
|
438
|
+
|
|
439
|
+
Both directions of orphan are warned:
|
|
440
|
+
- orphan_agent_call: parent has an Agent ToolUse with no matching child.
|
|
441
|
+
- orphan_subagent_file: child exists but no parent ToolUse claimed it.
|
|
442
|
+
"""
|
|
443
|
+
# Index parent Agent tool_uses by tool_use_id.
|
|
444
|
+
agent_uses_by_id: dict[str, ToolUse] = {}
|
|
445
|
+
for turn in turns:
|
|
446
|
+
for use in turn.tool_uses:
|
|
447
|
+
if use.tool_name == "Agent" and use.tool_use_id:
|
|
448
|
+
agent_uses_by_id[use.tool_use_id] = use
|
|
449
|
+
|
|
450
|
+
matched_use_ids: set[str] = set()
|
|
451
|
+
for child in subagents:
|
|
452
|
+
meta_tool_use_id = (child.subagent_meta or {}).get("tool_use_id")
|
|
453
|
+
if meta_tool_use_id and meta_tool_use_id in agent_uses_by_id:
|
|
454
|
+
agent_uses_by_id[meta_tool_use_id].subagent_session_id = child.session_id
|
|
455
|
+
matched_use_ids.add(meta_tool_use_id)
|
|
456
|
+
else:
|
|
457
|
+
warnings.append(
|
|
458
|
+
ParserWarning(
|
|
459
|
+
code="orphan_subagent_file",
|
|
460
|
+
detail=f"subagent {child.session_id} has no matching parent Agent tool_use",
|
|
461
|
+
path=path,
|
|
462
|
+
)
|
|
463
|
+
)
|
|
464
|
+
|
|
465
|
+
# Agent tool_uses that never got linked.
|
|
466
|
+
for use_id, _use in agent_uses_by_id.items():
|
|
467
|
+
if use_id not in matched_use_ids:
|
|
468
|
+
warnings.append(
|
|
469
|
+
ParserWarning(
|
|
470
|
+
code="orphan_agent_call",
|
|
471
|
+
detail=f"Agent tool_use {use_id} has no matching subagent file",
|
|
472
|
+
path=path,
|
|
473
|
+
)
|
|
474
|
+
)
|
|
475
|
+
|
|
476
|
+
|
|
477
|
+
def _enumerate_raw_tool_result_files(jsonl_path: Path) -> list[RawToolResultFile]:
|
|
478
|
+
"""List <sid>/tool-results/*.txt with sizes. Does NOT read contents."""
|
|
479
|
+
sid = jsonl_path.stem
|
|
480
|
+
tr_dir = jsonl_path.parent / sid / "tool-results"
|
|
481
|
+
if not tr_dir.is_dir():
|
|
482
|
+
return []
|
|
483
|
+
return [
|
|
484
|
+
RawToolResultFile(path=p, size_bytes=p.stat().st_size, tool_use_id=None)
|
|
485
|
+
for p in sorted(tr_dir.glob("*.txt"))
|
|
486
|
+
]
|
|
487
|
+
|
|
488
|
+
|
|
489
|
+
def _parse_system_line(raw: dict) -> Turn | None:
|
|
490
|
+
"""Build a Turn from a `type: "system"` line (compaction notices, model swaps)."""
|
|
491
|
+
text = raw.get("content") or raw.get("message", {}).get("content") or ""
|
|
492
|
+
if isinstance(text, list):
|
|
493
|
+
text = _flatten_user_blocks(text)
|
|
494
|
+
return Turn(
|
|
495
|
+
turn_number=0,
|
|
496
|
+
uuid=raw.get("uuid", ""),
|
|
497
|
+
parent_uuid=raw.get("parentUuid"),
|
|
498
|
+
role="system",
|
|
499
|
+
text=str(text),
|
|
500
|
+
thinking="",
|
|
501
|
+
tool_uses=[],
|
|
502
|
+
tool_results=[],
|
|
503
|
+
usage=None,
|
|
504
|
+
model=None,
|
|
505
|
+
stop_reason=None,
|
|
506
|
+
timestamp=_parse_timestamp(raw.get("timestamp")),
|
|
507
|
+
duration_ms=None,
|
|
508
|
+
is_sidechain=bool(raw.get("isSidechain", False)),
|
|
509
|
+
)
|
|
510
|
+
|
|
511
|
+
|
|
512
|
+
def _parse_attachment_line(raw: dict) -> Attachment | None:
|
|
513
|
+
"""Build an Attachment from a `type: "attachment"` line.
|
|
514
|
+
|
|
515
|
+
Classification is by payload-key shape, not by hookEvent (which is only
|
|
516
|
+
present on hook-output attachments). Unknown shapes are preserved with
|
|
517
|
+
kind="other" — no warning, attachments are inherently polymorphic.
|
|
518
|
+
"""
|
|
519
|
+
payload = raw.get("attachment")
|
|
520
|
+
if not isinstance(payload, dict):
|
|
521
|
+
return None
|
|
522
|
+
|
|
523
|
+
kind = _classify_attachment_shape(payload)
|
|
524
|
+
content = _extract_attachment_content(kind, payload)
|
|
525
|
+
timestamp = raw.get("timestamp")
|
|
526
|
+
|
|
527
|
+
return Attachment(
|
|
528
|
+
kind=kind,
|
|
529
|
+
raw=payload,
|
|
530
|
+
content=content,
|
|
531
|
+
timestamp=_parse_timestamp(timestamp) if timestamp else None,
|
|
532
|
+
parent_uuid=raw.get("parentUuid"),
|
|
533
|
+
)
|
|
534
|
+
|
|
535
|
+
|
|
536
|
+
def _classify_attachment_shape(payload: dict) -> str:
|
|
537
|
+
if "hookEvent" in payload:
|
|
538
|
+
return "hook_output"
|
|
539
|
+
if "pendingMcpServers" in payload:
|
|
540
|
+
return "mcp_servers"
|
|
541
|
+
if "skillCount" in payload:
|
|
542
|
+
return "skills"
|
|
543
|
+
if "allowedTools" in payload:
|
|
544
|
+
return "allowed_tools"
|
|
545
|
+
if "itemCount" in payload:
|
|
546
|
+
return "items"
|
|
547
|
+
return "other"
|
|
548
|
+
|
|
549
|
+
|
|
550
|
+
def _extract_attachment_content(kind: str, payload: dict) -> str | None:
|
|
551
|
+
"""Best-effort extraction of human-readable content from an attachment.
|
|
552
|
+
|
|
553
|
+
Returns None when nothing useful is present.
|
|
554
|
+
"""
|
|
555
|
+
if kind == "hook_output":
|
|
556
|
+
stdout = payload.get("stdout") or ""
|
|
557
|
+
try:
|
|
558
|
+
parsed = json.loads(stdout)
|
|
559
|
+
except (json.JSONDecodeError, TypeError):
|
|
560
|
+
return stdout or None
|
|
561
|
+
hook_specific = parsed.get("hookSpecificOutput") or {}
|
|
562
|
+
return hook_specific.get("additionalContext") or stdout or None
|
|
563
|
+
|
|
564
|
+
if kind in ("skills", "items"):
|
|
565
|
+
c = payload.get("content")
|
|
566
|
+
return c if isinstance(c, str) and c else None
|
|
567
|
+
|
|
568
|
+
return None
|
|
569
|
+
|
|
570
|
+
|
|
571
|
+
def _parse_assistant_line(raw: dict) -> Turn | None:
|
|
572
|
+
"""Build a Turn from a `type: "assistant"` JSONL line."""
|
|
573
|
+
message = raw.get("message") or {}
|
|
574
|
+
content_blocks = message.get("content") or []
|
|
575
|
+
|
|
576
|
+
tool_uses: list[ToolUse] = []
|
|
577
|
+
text_parts: list[str] = []
|
|
578
|
+
thinking_parts: list[str] = []
|
|
579
|
+
|
|
580
|
+
for block in content_blocks:
|
|
581
|
+
if not isinstance(block, dict):
|
|
582
|
+
continue
|
|
583
|
+
block_type = block.get("type")
|
|
584
|
+
if block_type == "text":
|
|
585
|
+
text_parts.append(block.get("text", ""))
|
|
586
|
+
elif block_type == "thinking":
|
|
587
|
+
thinking_parts.append(block.get("thinking", ""))
|
|
588
|
+
elif block_type == "tool_use":
|
|
589
|
+
tool_uses.append(
|
|
590
|
+
ToolUse(
|
|
591
|
+
tool_name=block.get("name", ""),
|
|
592
|
+
tool_use_id=block.get("id", ""),
|
|
593
|
+
tool_input=block.get("input") if isinstance(block.get("input"), dict) else {},
|
|
594
|
+
)
|
|
595
|
+
)
|
|
596
|
+
elif block_type in ("server_tool_use", "advisor_tool_result"):
|
|
597
|
+
# Inline a marker so the text remains useful; structured handling deferred.
|
|
598
|
+
text_parts.append(f"<{block_type}:{block.get('id', '')}>")
|
|
599
|
+
|
|
600
|
+
return Turn(
|
|
601
|
+
turn_number=0,
|
|
602
|
+
uuid=raw.get("uuid", ""),
|
|
603
|
+
parent_uuid=raw.get("parentUuid"),
|
|
604
|
+
role="assistant",
|
|
605
|
+
text="\n".join(text_parts),
|
|
606
|
+
thinking="\n".join(thinking_parts),
|
|
607
|
+
tool_uses=tool_uses,
|
|
608
|
+
tool_results=[],
|
|
609
|
+
usage=_parse_usage(message.get("usage")),
|
|
610
|
+
model=message.get("model"),
|
|
611
|
+
stop_reason=message.get("stop_reason"),
|
|
612
|
+
timestamp=_parse_timestamp(raw.get("timestamp")),
|
|
613
|
+
duration_ms=None,
|
|
614
|
+
is_sidechain=bool(raw.get("isSidechain", False)),
|
|
615
|
+
error=("api_error" if raw.get("isApiErrorMessage") else None),
|
|
616
|
+
)
|
|
617
|
+
|
|
618
|
+
|
|
619
|
+
def _parse_usage(raw: dict | None) -> Usage | None:
|
|
620
|
+
"""Build a Usage from the message.usage dict.
|
|
621
|
+
|
|
622
|
+
Defensive sum of iterations[] if present and divergent — spec §5.2.
|
|
623
|
+
"""
|
|
624
|
+
if not isinstance(raw, dict):
|
|
625
|
+
return None
|
|
626
|
+
|
|
627
|
+
iterations = raw.get("iterations")
|
|
628
|
+
if isinstance(iterations, list) and iterations:
|
|
629
|
+
# Sum across iterations defensively.
|
|
630
|
+
input_t = sum(it.get("input_tokens", 0) for it in iterations)
|
|
631
|
+
output_t = sum(it.get("output_tokens", 0) for it in iterations)
|
|
632
|
+
cache_read = sum(it.get("cache_read_input_tokens", 0) for it in iterations)
|
|
633
|
+
cache_5m = sum(
|
|
634
|
+
(it.get("cache_creation") or {}).get("ephemeral_5m_input_tokens", 0)
|
|
635
|
+
for it in iterations
|
|
636
|
+
)
|
|
637
|
+
cache_1h = sum(
|
|
638
|
+
(it.get("cache_creation") or {}).get("ephemeral_1h_input_tokens", 0)
|
|
639
|
+
for it in iterations
|
|
640
|
+
)
|
|
641
|
+
else:
|
|
642
|
+
input_t = raw.get("input_tokens", 0)
|
|
643
|
+
output_t = raw.get("output_tokens", 0)
|
|
644
|
+
cache_read = raw.get("cache_read_input_tokens", 0)
|
|
645
|
+
cache_obj = raw.get("cache_creation") or {}
|
|
646
|
+
cache_5m = cache_obj.get("ephemeral_5m_input_tokens", 0)
|
|
647
|
+
cache_1h = cache_obj.get("ephemeral_1h_input_tokens", 0)
|
|
648
|
+
|
|
649
|
+
return Usage(
|
|
650
|
+
input_tokens=input_t,
|
|
651
|
+
output_tokens=output_t,
|
|
652
|
+
cache_creation_5m=cache_5m,
|
|
653
|
+
cache_creation_1h=cache_1h,
|
|
654
|
+
cache_read=cache_read,
|
|
655
|
+
service_tier=raw.get("service_tier"),
|
|
656
|
+
)
|
|
657
|
+
|
|
658
|
+
|
|
659
|
+
def _parse_timestamp(value: str | None) -> datetime:
|
|
660
|
+
"""Parse an ISO 8601 timestamp. Accepts both 'Z' suffix and '+00:00'."""
|
|
661
|
+
if not value:
|
|
662
|
+
# Fallback for synthetic edge cases; should never be reached with real data.
|
|
663
|
+
return datetime.fromtimestamp(0, tz=timezone.utc)
|
|
664
|
+
if value.endswith("Z"):
|
|
665
|
+
value = value[:-1] + "+00:00"
|
|
666
|
+
return datetime.fromisoformat(value)
|
|
667
|
+
|
|
668
|
+
|
|
669
|
+
def _resolve_jsonl_path(path: Path) -> Path:
|
|
670
|
+
if path.is_dir():
|
|
671
|
+
return path.parent / f"{path.name}.jsonl"
|
|
672
|
+
return path
|
|
673
|
+
|
|
674
|
+
|
|
675
|
+
def _decode_project_path(dir_name: str) -> str:
|
|
676
|
+
"""Decode Claude Code's project-dir naming convention to a filesystem path.
|
|
677
|
+
|
|
678
|
+
Convention: every '/' in the original cwd was replaced with '-'.
|
|
679
|
+
The reverse is lossy: a real '-' in the original path is
|
|
680
|
+
indistinguishable from a path separator, so this returns a
|
|
681
|
+
best-effort reconstruction. For paths that contain hyphens
|
|
682
|
+
(e.g. project names with dashes), the result will be wrong.
|
|
683
|
+
|
|
684
|
+
Downstream consumers should prefer `SessionTrace.cwd` (observed
|
|
685
|
+
from the line data) when an exact path is required;
|
|
686
|
+
`project_path` is a display-friendly fallback.
|
|
687
|
+
"""
|
|
688
|
+
if not dir_name.startswith("-"):
|
|
689
|
+
return dir_name
|
|
690
|
+
return dir_name.replace("-", "/")
|
cctx/pricing.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""Shared token pricing — single source of truth for all cost calculations."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
_INPUT_PRICE_PER_MTOK: dict[str, float] = {
|
|
5
|
+
"claude-opus-4": 15.0,
|
|
6
|
+
"claude-sonnet-4": 3.0,
|
|
7
|
+
"claude-haiku-4": 0.8,
|
|
8
|
+
}
|
|
9
|
+
_DEFAULT_PRICE_PER_MTOK = 3.0
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def price_per_tok(model: str | None) -> float:
|
|
13
|
+
"""Return per-token input price in USD for the given model string."""
|
|
14
|
+
if model is not None:
|
|
15
|
+
for prefix, mtok in _INPUT_PRICE_PER_MTOK.items():
|
|
16
|
+
if model.startswith(prefix):
|
|
17
|
+
return mtok / 1_000_000
|
|
18
|
+
return _DEFAULT_PRICE_PER_MTOK / 1_000_000
|