scrollback 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scrollback/__init__.py +8 -0
- scrollback/assets/icon-256.png +0 -0
- scrollback/assets/icon.icns +0 -0
- scrollback/cli.py +1139 -0
- scrollback/clipboard.py +34 -0
- scrollback/export.py +293 -0
- scrollback/fts.py +307 -0
- scrollback/highlight.py +128 -0
- scrollback/katexbundle.py +81 -0
- scrollback/launcher_install.py +209 -0
- scrollback/launchers/scrollback.bat +19 -0
- scrollback/launchers/scrollback.command +19 -0
- scrollback/launchers/scrollback.desktop +10 -0
- scrollback/launchers/scrollback.sh +12 -0
- scrollback/mathspan.py +180 -0
- scrollback/minimd.py +205 -0
- scrollback/models.py +135 -0
- scrollback/serialize.py +83 -0
- scrollback/serverconfig.py +66 -0
- scrollback/sources/__init__.py +6 -0
- scrollback/sources/aider.py +244 -0
- scrollback/sources/base.py +117 -0
- scrollback/sources/claudecode.py +631 -0
- scrollback/sources/codex.py +281 -0
- scrollback/sources/opencode.py +357 -0
- scrollback/sources/registry.py +39 -0
- scrollback/store.py +384 -0
- scrollback/termrender.py +170 -0
- scrollback/web/__init__.py +1 -0
- scrollback/web/app.py +359 -0
- scrollback/web/static/app.js +1245 -0
- scrollback/web/static/apple-touch-icon.png +0 -0
- scrollback/web/static/favicon.png +0 -0
- scrollback/web/static/favicon.svg +41 -0
- scrollback/web/static/index.html +75 -0
- scrollback/web/static/style.css +628 -0
- scrollback/web/static/vendor/highlight.min.js +1213 -0
- scrollback/web/static/vendor/hljs-dark.min.css +10 -0
- scrollback/web/static/vendor/hljs-light.min.css +10 -0
- scrollback/web/static/vendor/katex/fonts/KaTeX_AMS-Regular.woff2 +0 -0
- scrollback/web/static/vendor/katex/fonts/KaTeX_Caligraphic-Bold.woff2 +0 -0
- scrollback/web/static/vendor/katex/fonts/KaTeX_Caligraphic-Regular.woff2 +0 -0
- scrollback/web/static/vendor/katex/fonts/KaTeX_Fraktur-Bold.woff2 +0 -0
- scrollback/web/static/vendor/katex/fonts/KaTeX_Fraktur-Regular.woff2 +0 -0
- scrollback/web/static/vendor/katex/fonts/KaTeX_Main-Bold.woff2 +0 -0
- scrollback/web/static/vendor/katex/fonts/KaTeX_Main-BoldItalic.woff2 +0 -0
- scrollback/web/static/vendor/katex/fonts/KaTeX_Main-Italic.woff2 +0 -0
- scrollback/web/static/vendor/katex/fonts/KaTeX_Main-Regular.woff2 +0 -0
- scrollback/web/static/vendor/katex/fonts/KaTeX_Math-BoldItalic.woff2 +0 -0
- scrollback/web/static/vendor/katex/fonts/KaTeX_Math-Italic.woff2 +0 -0
- scrollback/web/static/vendor/katex/fonts/KaTeX_SansSerif-Bold.woff2 +0 -0
- scrollback/web/static/vendor/katex/fonts/KaTeX_SansSerif-Italic.woff2 +0 -0
- scrollback/web/static/vendor/katex/fonts/KaTeX_SansSerif-Regular.woff2 +0 -0
- scrollback/web/static/vendor/katex/fonts/KaTeX_Script-Regular.woff2 +0 -0
- scrollback/web/static/vendor/katex/fonts/KaTeX_Size1-Regular.woff2 +0 -0
- scrollback/web/static/vendor/katex/fonts/KaTeX_Size2-Regular.woff2 +0 -0
- scrollback/web/static/vendor/katex/fonts/KaTeX_Size3-Regular.woff2 +0 -0
- scrollback/web/static/vendor/katex/fonts/KaTeX_Size4-Regular.woff2 +0 -0
- scrollback/web/static/vendor/katex/fonts/KaTeX_Typewriter-Regular.woff2 +0 -0
- scrollback/web/static/vendor/katex/katex.min.css +1 -0
- scrollback/web/static/vendor/katex/katex.min.js +1 -0
- scrollback/web/static/vendor/marked.min.js +6 -0
- scrollback/web/static/vendor/purify.min.js +3 -0
- scrollback/webopen.py +96 -0
- scrollback-0.1.0.dist-info/METADATA +391 -0
- scrollback-0.1.0.dist-info/RECORD +69 -0
- scrollback-0.1.0.dist-info/WHEEL +4 -0
- scrollback-0.1.0.dist-info/entry_points.txt +4 -0
- scrollback-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,631 @@
|
|
|
1
|
+
"""Claude Code source adapter (read-only JSONL).
|
|
2
|
+
|
|
3
|
+
Claude Code stores one directory per project under ~/.claude/projects/,
|
|
4
|
+
each containing one `<session-uuid>.jsonl` file per session (plus, for
|
|
5
|
+
subagents, `<uuid>` directories / sidechain files). Each line is a JSON
|
|
6
|
+
object with a top-level `type`. The lines we care about have
|
|
7
|
+
`type in {"user", "assistant"}` and carry a `message` object whose
|
|
8
|
+
`content` is either a plain string or a list of typed blocks
|
|
9
|
+
(text / thinking / tool_use / tool_result).
|
|
10
|
+
|
|
11
|
+
All reads are read-only file reads; we never modify the JSONL files.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import json
|
|
17
|
+
import os
|
|
18
|
+
from collections.abc import Iterator
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
from typing import Any
|
|
21
|
+
|
|
22
|
+
from ..models import Message, Part, Session, _to_dt
|
|
23
|
+
from .base import Source
|
|
24
|
+
|
|
25
|
+
_DEFAULT_ROOT = Path.home() / ".claude" / "projects"
|
|
26
|
+
|
|
27
|
+
# Separator embedding a subagent's agent id within a synthetic child session
|
|
28
|
+
# id: "<parent_uuid>::agent-<agentId>". Uses "::" so it never collides with
|
|
29
|
+
# the store's "source:id" selector parsing (which splits on a single ":").
|
|
30
|
+
_CHILD_SEP = "::"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _child_id(parent_id: str, agent_stem: str) -> str:
|
|
34
|
+
return f"{parent_id}{_CHILD_SEP}{agent_stem}"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# Cache for _scan_metadata results, keyed by (path, mtime_ns, size). The same
|
|
38
|
+
# transcript is scanned by list_sessions, then again when a session is loaded
|
|
39
|
+
# or searched; caching on the file's mtime+size avoids repeated full-file scans
|
|
40
|
+
# while staying correct (any change to the file invalidates the entry).
|
|
41
|
+
_META_CACHE: dict[str, tuple[tuple[float, int], dict[str, Any] | None]] = {}
|
|
42
|
+
_META_CACHE_MAX = 4096
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
# Cache of byte offsets for the content-bearing message lines of a transcript,
|
|
46
|
+
# keyed by (path, mtime_ns, size). Lets load_messages seek directly to the Nth
|
|
47
|
+
# message instead of re-scanning the file from the top for every page, turning
|
|
48
|
+
# K-page traversal from O(K*n) into O(n + K*page).
|
|
49
|
+
_OFFSET_CACHE: dict[str, tuple[tuple[int, int], list[int]]] = {}
|
|
50
|
+
_OFFSET_CACHE_MAX = 512
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _content_line_offsets(path: Path) -> list[int]:
|
|
54
|
+
"""Byte offsets of each content-bearing user/assistant line in `path`.
|
|
55
|
+
|
|
56
|
+
"Content-bearing" matches load_messages' own filter (user/assistant,
|
|
57
|
+
not meta, with at least one renderable part). Built in a single pass and
|
|
58
|
+
cached on the file's mtime+size.
|
|
59
|
+
"""
|
|
60
|
+
try:
|
|
61
|
+
st = path.stat()
|
|
62
|
+
sig = (st.st_mtime_ns, st.st_size)
|
|
63
|
+
except OSError:
|
|
64
|
+
return []
|
|
65
|
+
key = str(path)
|
|
66
|
+
hit = _OFFSET_CACHE.get(key)
|
|
67
|
+
if hit is not None and hit[0] == sig:
|
|
68
|
+
return hit[1]
|
|
69
|
+
|
|
70
|
+
offsets: list[int] = []
|
|
71
|
+
try:
|
|
72
|
+
# Read in binary to track exact byte offsets; decode per line.
|
|
73
|
+
with path.open("rb") as fh:
|
|
74
|
+
pos = 0
|
|
75
|
+
for raw in fh:
|
|
76
|
+
line_start = pos
|
|
77
|
+
pos += len(raw)
|
|
78
|
+
line = raw.strip()
|
|
79
|
+
if not line:
|
|
80
|
+
continue
|
|
81
|
+
try:
|
|
82
|
+
obj = json.loads(line.decode("utf-8", "replace"))
|
|
83
|
+
except json.JSONDecodeError:
|
|
84
|
+
continue
|
|
85
|
+
if not isinstance(obj, dict):
|
|
86
|
+
continue
|
|
87
|
+
t = obj.get("type")
|
|
88
|
+
if t not in ("user", "assistant") or obj.get("isMeta"):
|
|
89
|
+
continue
|
|
90
|
+
m = obj.get("message")
|
|
91
|
+
if not isinstance(m, dict):
|
|
92
|
+
continue
|
|
93
|
+
if _content_to_parts("probe", m.get("content")):
|
|
94
|
+
offsets.append(line_start)
|
|
95
|
+
except OSError:
|
|
96
|
+
return []
|
|
97
|
+
|
|
98
|
+
if len(_OFFSET_CACHE) >= _OFFSET_CACHE_MAX:
|
|
99
|
+
_OFFSET_CACHE.clear()
|
|
100
|
+
_OFFSET_CACHE[key] = (sig, offsets)
|
|
101
|
+
return offsets
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _read_line_at(path: Path, byte_offset: int) -> dict[str, Any] | None:
|
|
105
|
+
"""Parse the single JSONL record starting at `byte_offset`."""
|
|
106
|
+
try:
|
|
107
|
+
with path.open("rb") as fh:
|
|
108
|
+
fh.seek(byte_offset)
|
|
109
|
+
raw = fh.readline()
|
|
110
|
+
obj = json.loads(raw.decode("utf-8", "replace"))
|
|
111
|
+
return obj if isinstance(obj, dict) else None
|
|
112
|
+
except (OSError, json.JSONDecodeError):
|
|
113
|
+
return None
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _cached_scan_metadata(path: Path) -> dict[str, Any] | None:
|
|
117
|
+
try:
|
|
118
|
+
st = path.stat()
|
|
119
|
+
sig = (st.st_mtime_ns, st.st_size)
|
|
120
|
+
except OSError:
|
|
121
|
+
return None
|
|
122
|
+
key = str(path)
|
|
123
|
+
hit = _META_CACHE.get(key)
|
|
124
|
+
if hit is not None and hit[0] == sig:
|
|
125
|
+
return hit[1]
|
|
126
|
+
result = _scan_metadata(path)
|
|
127
|
+
if len(_META_CACHE) >= _META_CACHE_MAX:
|
|
128
|
+
_META_CACHE.clear() # simple bound; correctness over LRU sophistication
|
|
129
|
+
_META_CACHE[key] = (sig, result)
|
|
130
|
+
return result
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _read_meta_json(sub_path: Path) -> dict[str, Any]:
|
|
134
|
+
"""Read the sibling `<agent>.meta.json` (agentType, description)."""
|
|
135
|
+
meta_path = sub_path.with_suffix(".meta.json")
|
|
136
|
+
try:
|
|
137
|
+
with meta_path.open("r", encoding="utf-8") as fh:
|
|
138
|
+
data = json.load(fh)
|
|
139
|
+
return data if isinstance(data, dict) else {}
|
|
140
|
+
except (OSError, json.JSONDecodeError):
|
|
141
|
+
return {}
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _env_root() -> Path:
|
|
145
|
+
override = os.environ.get("SCROLLBACK_CLAUDE_DIR")
|
|
146
|
+
if override:
|
|
147
|
+
p = Path(override).expanduser()
|
|
148
|
+
# Accept either ~/.claude or ~/.claude/projects.
|
|
149
|
+
return p / "projects" if (p / "projects").is_dir() else p
|
|
150
|
+
return _DEFAULT_ROOT
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
class ClaudeCodeSource(Source):
|
|
154
|
+
name = "claudecode"
|
|
155
|
+
label = "Claude Code"
|
|
156
|
+
|
|
157
|
+
def __init__(self, root: Path | None = None) -> None:
|
|
158
|
+
self._root = root or _env_root()
|
|
159
|
+
|
|
160
|
+
def resume_command(self, session) -> str | None:
|
|
161
|
+
# `claude --resume <id>` resumes a conversation (verified via --help).
|
|
162
|
+
# Subagent sidechains aren't separately resumable -> use the parent id.
|
|
163
|
+
import shlex
|
|
164
|
+
|
|
165
|
+
sid = session.parent_id or session.id
|
|
166
|
+
if _CHILD_SEP in sid:
|
|
167
|
+
sid = sid.split(_CHILD_SEP, 1)[0]
|
|
168
|
+
cmd = f"claude --resume {sid}"
|
|
169
|
+
if session.directory:
|
|
170
|
+
return f"cd {shlex.quote(session.directory)} && {cmd}"
|
|
171
|
+
return cmd
|
|
172
|
+
|
|
173
|
+
# -- availability / location -------------------------------------------
|
|
174
|
+
|
|
175
|
+
def is_available(self) -> bool:
|
|
176
|
+
return self._root.is_dir()
|
|
177
|
+
|
|
178
|
+
def location(self) -> Path | None:
|
|
179
|
+
return self._root if self.is_available() else None
|
|
180
|
+
|
|
181
|
+
# -- discovery ----------------------------------------------------------
|
|
182
|
+
|
|
183
|
+
def _session_files(self) -> Iterator[Path]:
|
|
184
|
+
# Top-level <uuid>.jsonl files are the primary sessions; nested
|
|
185
|
+
# subagent sidechains are folded under their parent as children.
|
|
186
|
+
for project_dir in sorted(self._root.iterdir()):
|
|
187
|
+
if not project_dir.is_dir():
|
|
188
|
+
continue
|
|
189
|
+
for f in sorted(project_dir.glob("*.jsonl")):
|
|
190
|
+
yield f
|
|
191
|
+
|
|
192
|
+
def _subagent_files(self, parent_path: Path) -> list[Path]:
|
|
193
|
+
"""Return the subagent transcript files for a parent session.
|
|
194
|
+
|
|
195
|
+
Claude Code stores them in `<parent_dir>/<uuid>/subagents/agent-*.jsonl`
|
|
196
|
+
(a sibling directory next to the `<uuid>.jsonl` transcript).
|
|
197
|
+
"""
|
|
198
|
+
sub_dir = parent_path.with_suffix("") / "subagents"
|
|
199
|
+
if not sub_dir.is_dir():
|
|
200
|
+
return []
|
|
201
|
+
return sorted(sub_dir.glob("agent-*.jsonl"))
|
|
202
|
+
|
|
203
|
+
# -- listing ------------------------------------------------------------
|
|
204
|
+
|
|
205
|
+
def list_sessions(self) -> Iterator[Session]:
|
|
206
|
+
if not self.is_available():
|
|
207
|
+
return iter(())
|
|
208
|
+
return self._list_sessions()
|
|
209
|
+
|
|
210
|
+
def _list_sessions(self) -> Iterator[Session]:
|
|
211
|
+
for f in self._session_files():
|
|
212
|
+
meta = _cached_scan_metadata(f)
|
|
213
|
+
if meta is None:
|
|
214
|
+
continue
|
|
215
|
+
children = tuple(
|
|
216
|
+
self._subagent_summary(f, sub) for sub in self._subagent_files(f)
|
|
217
|
+
)
|
|
218
|
+
yield Session(
|
|
219
|
+
id=meta["session_id"],
|
|
220
|
+
source=self.name,
|
|
221
|
+
title=meta["title"],
|
|
222
|
+
directory=meta["cwd"],
|
|
223
|
+
created=_to_dt(meta["first_ts"]),
|
|
224
|
+
updated=_to_dt(meta["last_ts"]),
|
|
225
|
+
model=meta["model"],
|
|
226
|
+
agent=None,
|
|
227
|
+
parent_id=None,
|
|
228
|
+
message_count=meta["msg_count"],
|
|
229
|
+
children=children,
|
|
230
|
+
raw={"path": str(f), "git_branch": meta["git_branch"]},
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
def _subagent_summary(self, parent_path: Path, sub_path: Path) -> Session:
|
|
234
|
+
"""Build a lightweight child Session for a subagent transcript."""
|
|
235
|
+
parent_id = parent_path.stem
|
|
236
|
+
agent_id = sub_path.stem # e.g. "agent-a04011b25b0a152ee"
|
|
237
|
+
info = _read_meta_json(sub_path)
|
|
238
|
+
title = info.get("description") or agent_id
|
|
239
|
+
agent_type = info.get("agentType")
|
|
240
|
+
if agent_type:
|
|
241
|
+
title = f"{title} (@{agent_type})"
|
|
242
|
+
sm = _cached_scan_metadata(sub_path)
|
|
243
|
+
return Session(
|
|
244
|
+
id=_child_id(parent_id, agent_id),
|
|
245
|
+
source=self.name,
|
|
246
|
+
title=title,
|
|
247
|
+
directory=(sm or {}).get("cwd"),
|
|
248
|
+
created=_to_dt((sm or {}).get("first_ts")),
|
|
249
|
+
updated=_to_dt((sm or {}).get("last_ts")),
|
|
250
|
+
model=(sm or {}).get("model"),
|
|
251
|
+
agent=agent_type,
|
|
252
|
+
parent_id=parent_id,
|
|
253
|
+
message_count=(sm or {}).get("msg_count", 0),
|
|
254
|
+
raw={"path": str(sub_path)},
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
# -- single session -----------------------------------------------------
|
|
258
|
+
|
|
259
|
+
def load_session(self, session_id: str) -> Session | None:
|
|
260
|
+
if not self.is_available():
|
|
261
|
+
return None
|
|
262
|
+
path = self._find_path(session_id)
|
|
263
|
+
if path is None:
|
|
264
|
+
return None
|
|
265
|
+
if _CHILD_SEP in session_id:
|
|
266
|
+
return _parse_session(
|
|
267
|
+
path, self.name, override=self._child_override(session_id, path)
|
|
268
|
+
)
|
|
269
|
+
return _parse_session(path, self.name)
|
|
270
|
+
|
|
271
|
+
def _child_override(self, child_id: str, sub_path: Path) -> dict[str, Any]:
|
|
272
|
+
"""Title/id/parent override so a loaded subagent keeps its child id."""
|
|
273
|
+
# rsplit to match _find_path: agent segment is the tail.
|
|
274
|
+
parent_id = child_id.rsplit(_CHILD_SEP, 1)[0]
|
|
275
|
+
info = _read_meta_json(sub_path)
|
|
276
|
+
title = info.get("description") or sub_path.stem
|
|
277
|
+
if info.get("agentType"):
|
|
278
|
+
title = f"{title} (@{info['agentType']})"
|
|
279
|
+
return {"id": child_id, "title": title,
|
|
280
|
+
"parent_id": parent_id, "agent": info.get("agentType")}
|
|
281
|
+
|
|
282
|
+
def resolve_session_id(self, selector: str) -> str | None:
|
|
283
|
+
# Child (subagent) ids are self-describing; resolve directly.
|
|
284
|
+
if _CHILD_SEP in selector and self._find_path(selector) is not None:
|
|
285
|
+
return selector
|
|
286
|
+
return super().resolve_session_id(selector)
|
|
287
|
+
|
|
288
|
+
def _find_path(self, session_id: str) -> Path | None:
|
|
289
|
+
# Subagent child id: "<parent>::agent-<id>" -> nested subagents file.
|
|
290
|
+
# rsplit on the LAST separator: the agent segment is always the tail,
|
|
291
|
+
# and this is robust even if a parent id ever contained the separator.
|
|
292
|
+
if _CHILD_SEP in session_id:
|
|
293
|
+
parent_id, agent_id = session_id.rsplit(_CHILD_SEP, 1)
|
|
294
|
+
# Reject path-escaping agent ids before touching the filesystem.
|
|
295
|
+
if not agent_id or "/" in agent_id or "\\" in agent_id or ".." in agent_id:
|
|
296
|
+
return None
|
|
297
|
+
parent = self._find_path(parent_id)
|
|
298
|
+
if parent is None:
|
|
299
|
+
return None
|
|
300
|
+
sub_dir = (parent.with_suffix("") / "subagents").resolve()
|
|
301
|
+
cand = (sub_dir / f"{agent_id}.jsonl").resolve()
|
|
302
|
+
# Containment check: the resolved candidate must live inside the
|
|
303
|
+
# parent's subagents directory (defends against traversal).
|
|
304
|
+
try:
|
|
305
|
+
cand.relative_to(sub_dir)
|
|
306
|
+
except ValueError:
|
|
307
|
+
return None
|
|
308
|
+
return cand if cand.is_file() else None
|
|
309
|
+
for f in self._session_files():
|
|
310
|
+
if f.stem == session_id:
|
|
311
|
+
return f
|
|
312
|
+
# prefix match
|
|
313
|
+
candidates = [f for f in self._session_files() if f.stem.startswith(session_id)]
|
|
314
|
+
return candidates[0] if len(candidates) == 1 else None
|
|
315
|
+
|
|
316
|
+
# -- windowed loading ---------------------------------------------------
|
|
317
|
+
|
|
318
|
+
def load_session_meta(self, session_id: str) -> Session | None:
|
|
319
|
+
if not self.is_available():
|
|
320
|
+
return None
|
|
321
|
+
path = self._find_path(session_id)
|
|
322
|
+
if path is None:
|
|
323
|
+
return None
|
|
324
|
+
meta = _cached_scan_metadata(path)
|
|
325
|
+
if meta is None:
|
|
326
|
+
return None
|
|
327
|
+
ovr = self._child_override(session_id, path) if _CHILD_SEP in session_id else {}
|
|
328
|
+
return Session(
|
|
329
|
+
id=ovr.get("id", meta["session_id"]),
|
|
330
|
+
source=self.name,
|
|
331
|
+
title=ovr.get("title", meta["title"]),
|
|
332
|
+
directory=meta["cwd"],
|
|
333
|
+
created=_to_dt(meta["first_ts"]),
|
|
334
|
+
updated=_to_dt(meta["last_ts"]),
|
|
335
|
+
model=meta["model"],
|
|
336
|
+
agent=ovr.get("agent"),
|
|
337
|
+
parent_id=ovr.get("parent_id"),
|
|
338
|
+
message_count=meta["msg_count"],
|
|
339
|
+
messages=(),
|
|
340
|
+
raw={"path": str(path), "git_branch": meta["git_branch"]},
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
def load_messages(
|
|
344
|
+
self, session_id: str, *, offset: int = 0, limit: int | None = None
|
|
345
|
+
) -> list[Message]:
|
|
346
|
+
if not self.is_available():
|
|
347
|
+
return []
|
|
348
|
+
path = self._find_path(session_id)
|
|
349
|
+
if path is None:
|
|
350
|
+
return []
|
|
351
|
+
# Use the cached byte-offset index to seek directly to the requested
|
|
352
|
+
# window instead of re-scanning from the top of the file each page.
|
|
353
|
+
offsets = _content_line_offsets(path)
|
|
354
|
+
window = offsets[offset:] if limit is None else offsets[offset : offset + limit]
|
|
355
|
+
out: list[Message] = []
|
|
356
|
+
for i, byte_off in enumerate(window):
|
|
357
|
+
obj = _read_line_at(path, byte_off)
|
|
358
|
+
if obj is None:
|
|
359
|
+
continue
|
|
360
|
+
m = obj.get("message", {})
|
|
361
|
+
if not isinstance(m, dict):
|
|
362
|
+
continue
|
|
363
|
+
uuid = obj.get("uuid") or f"{path.stem}:{offset + i}"
|
|
364
|
+
parts = _content_to_parts(uuid, m.get("content"))
|
|
365
|
+
if not parts:
|
|
366
|
+
continue
|
|
367
|
+
out.append(
|
|
368
|
+
Message(
|
|
369
|
+
id=uuid,
|
|
370
|
+
role=m.get("role", obj.get("type")),
|
|
371
|
+
created=_to_dt(obj.get("timestamp")),
|
|
372
|
+
parts=tuple(parts),
|
|
373
|
+
model=_clean_model(m.get("model")),
|
|
374
|
+
raw=obj,
|
|
375
|
+
)
|
|
376
|
+
)
|
|
377
|
+
return out
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
# -- parsing helpers -------------------------------------------------------
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
def _iter_lines(path: Path) -> Iterator[dict[str, Any]]:
|
|
384
|
+
try:
|
|
385
|
+
# errors="replace": a single invalid UTF-8 byte must not abort the
|
|
386
|
+
# whole-file iteration (it would silently truncate a session).
|
|
387
|
+
with path.open("r", encoding="utf-8", errors="replace") as fh:
|
|
388
|
+
for line in fh:
|
|
389
|
+
line = line.strip()
|
|
390
|
+
if not line:
|
|
391
|
+
continue
|
|
392
|
+
try:
|
|
393
|
+
obj = json.loads(line)
|
|
394
|
+
except json.JSONDecodeError:
|
|
395
|
+
continue
|
|
396
|
+
if isinstance(obj, dict):
|
|
397
|
+
yield obj
|
|
398
|
+
except OSError:
|
|
399
|
+
return
|
|
400
|
+
|
|
401
|
+
|
|
402
|
+
def _scan_metadata(path: Path) -> dict[str, Any] | None:
|
|
403
|
+
"""Single pass over a transcript collecting just the metadata fields."""
|
|
404
|
+
session_id = path.stem
|
|
405
|
+
cwd: str | None = None
|
|
406
|
+
git_branch: str | None = None
|
|
407
|
+
model: str | None = None
|
|
408
|
+
title: str | None = None
|
|
409
|
+
first_ts: str | None = None
|
|
410
|
+
last_ts: str | None = None
|
|
411
|
+
first_user_text: str | None = None
|
|
412
|
+
msg_count = 0
|
|
413
|
+
seen = False
|
|
414
|
+
|
|
415
|
+
for obj in _iter_lines(path):
|
|
416
|
+
seen = True
|
|
417
|
+
t = obj.get("type")
|
|
418
|
+
if obj.get("sessionId"):
|
|
419
|
+
session_id = obj["sessionId"]
|
|
420
|
+
if cwd is None and obj.get("cwd"):
|
|
421
|
+
cwd = obj["cwd"]
|
|
422
|
+
if git_branch is None and obj.get("gitBranch"):
|
|
423
|
+
git_branch = obj["gitBranch"]
|
|
424
|
+
if t == "ai-title":
|
|
425
|
+
# Claude Code writes the title under `aiTitle` (newer) and may
|
|
426
|
+
# also use `title`; the last one in the file wins.
|
|
427
|
+
new_title = obj.get("aiTitle") or obj.get("title")
|
|
428
|
+
if new_title:
|
|
429
|
+
title = new_title
|
|
430
|
+
if t in ("user", "assistant"):
|
|
431
|
+
msg_count += 1
|
|
432
|
+
ts = obj.get("timestamp")
|
|
433
|
+
if ts:
|
|
434
|
+
if first_ts is None:
|
|
435
|
+
first_ts = ts
|
|
436
|
+
last_ts = ts
|
|
437
|
+
m = obj.get("message", {})
|
|
438
|
+
if model is None and isinstance(m, dict):
|
|
439
|
+
mv = m.get("model")
|
|
440
|
+
if mv and mv != "<synthetic>":
|
|
441
|
+
model = mv
|
|
442
|
+
if (
|
|
443
|
+
first_user_text is None
|
|
444
|
+
and t == "user"
|
|
445
|
+
and not obj.get("isMeta")
|
|
446
|
+
and isinstance(m, dict)
|
|
447
|
+
):
|
|
448
|
+
first_user_text = _first_text(m.get("content"))
|
|
449
|
+
|
|
450
|
+
if not seen:
|
|
451
|
+
return None
|
|
452
|
+
return {
|
|
453
|
+
"session_id": session_id,
|
|
454
|
+
"cwd": cwd,
|
|
455
|
+
"git_branch": git_branch,
|
|
456
|
+
"model": model,
|
|
457
|
+
"title": title or _fallback_title(path, cwd, first_user_text),
|
|
458
|
+
"first_ts": first_ts,
|
|
459
|
+
"last_ts": last_ts,
|
|
460
|
+
"msg_count": msg_count,
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
|
|
464
|
+
def _clean_model(model: Any) -> str | None:
|
|
465
|
+
"""Drop Claude Code's '<synthetic>' placeholder used on system turns."""
|
|
466
|
+
if not model or model == "<synthetic>":
|
|
467
|
+
return None
|
|
468
|
+
return model
|
|
469
|
+
|
|
470
|
+
|
|
471
|
+
def _first_text(content: Any) -> str | None:
|
|
472
|
+
"""Extract the first human-readable text from a message content field."""
|
|
473
|
+
if isinstance(content, str):
|
|
474
|
+
s = content.strip()
|
|
475
|
+
return s or None
|
|
476
|
+
if isinstance(content, list):
|
|
477
|
+
for block in content:
|
|
478
|
+
if isinstance(block, dict) and block.get("type") == "text":
|
|
479
|
+
s = (block.get("text") or "").strip()
|
|
480
|
+
if s:
|
|
481
|
+
return s
|
|
482
|
+
elif isinstance(block, str) and block.strip():
|
|
483
|
+
return block.strip()
|
|
484
|
+
return None
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
def _fallback_title(path: Path, cwd: str | None, first_user_text: str | None) -> str:
|
|
488
|
+
"""Build a readable title when the transcript has no ai-title.
|
|
489
|
+
|
|
490
|
+
Prefer the first user line (trimmed), prefixed by the project basename
|
|
491
|
+
for context; fall back to the directory basename, then the UUID prefix.
|
|
492
|
+
"""
|
|
493
|
+
project = ""
|
|
494
|
+
if cwd:
|
|
495
|
+
project = cwd.rstrip("/").split("/")[-1]
|
|
496
|
+
if first_user_text:
|
|
497
|
+
snippet = " ".join(first_user_text.split())
|
|
498
|
+
if len(snippet) > 60:
|
|
499
|
+
snippet = snippet[:57] + "..."
|
|
500
|
+
return f"{project}: {snippet}" if project else snippet
|
|
501
|
+
if project:
|
|
502
|
+
return project
|
|
503
|
+
return path.stem[:8]
|
|
504
|
+
|
|
505
|
+
|
|
506
|
+
def _parse_session(
|
|
507
|
+
path: Path, source_name: str, *, override: dict[str, Any] | None = None
|
|
508
|
+
) -> Session:
|
|
509
|
+
override = override or {}
|
|
510
|
+
meta = _cached_scan_metadata(path) or {
|
|
511
|
+
"session_id": path.stem,
|
|
512
|
+
"cwd": None,
|
|
513
|
+
"git_branch": None,
|
|
514
|
+
"model": None,
|
|
515
|
+
"title": path.stem[:8],
|
|
516
|
+
"first_ts": None,
|
|
517
|
+
"last_ts": None,
|
|
518
|
+
"msg_count": 0,
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
messages: list[Message] = []
|
|
522
|
+
idx = 0
|
|
523
|
+
for obj in _iter_lines(path):
|
|
524
|
+
t = obj.get("type")
|
|
525
|
+
if t not in ("user", "assistant"):
|
|
526
|
+
continue
|
|
527
|
+
if obj.get("isMeta"):
|
|
528
|
+
# Skip local-command caveats and similar meta turns.
|
|
529
|
+
continue
|
|
530
|
+
m = obj.get("message", {})
|
|
531
|
+
if not isinstance(m, dict):
|
|
532
|
+
continue
|
|
533
|
+
role = m.get("role", t)
|
|
534
|
+
uuid = obj.get("uuid") or f"{path.stem}:{idx}"
|
|
535
|
+
idx += 1
|
|
536
|
+
parts = _content_to_parts(uuid, m.get("content"))
|
|
537
|
+
if not parts:
|
|
538
|
+
continue
|
|
539
|
+
messages.append(
|
|
540
|
+
Message(
|
|
541
|
+
id=uuid,
|
|
542
|
+
role=role,
|
|
543
|
+
created=_to_dt(obj.get("timestamp")),
|
|
544
|
+
parts=tuple(parts),
|
|
545
|
+
model=_clean_model(m.get("model")),
|
|
546
|
+
raw=obj,
|
|
547
|
+
)
|
|
548
|
+
)
|
|
549
|
+
|
|
550
|
+
return Session(
|
|
551
|
+
id=override.get("id", meta["session_id"]),
|
|
552
|
+
source=source_name,
|
|
553
|
+
title=override.get("title", meta["title"]),
|
|
554
|
+
directory=meta["cwd"],
|
|
555
|
+
created=_to_dt(meta["first_ts"]),
|
|
556
|
+
updated=_to_dt(meta["last_ts"]),
|
|
557
|
+
model=meta["model"],
|
|
558
|
+
agent=override.get("agent"),
|
|
559
|
+
parent_id=override.get("parent_id"),
|
|
560
|
+
message_count=len(messages),
|
|
561
|
+
messages=tuple(messages),
|
|
562
|
+
raw={"path": str(path), "git_branch": meta["git_branch"]},
|
|
563
|
+
)
|
|
564
|
+
|
|
565
|
+
|
|
566
|
+
def _content_to_parts(msg_uuid: str, content: Any) -> list[Part]:
|
|
567
|
+
"""Normalize Claude Code message content into Parts."""
|
|
568
|
+
parts: list[Part] = []
|
|
569
|
+
if content is None:
|
|
570
|
+
return parts
|
|
571
|
+
if isinstance(content, str):
|
|
572
|
+
if content.strip():
|
|
573
|
+
parts.append(Part(id=f"{msg_uuid}:0", type="text", text=content))
|
|
574
|
+
return parts
|
|
575
|
+
if isinstance(content, list):
|
|
576
|
+
for i, block in enumerate(content):
|
|
577
|
+
part = _block_to_part(f"{msg_uuid}:{i}", block)
|
|
578
|
+
if part is not None:
|
|
579
|
+
parts.append(part)
|
|
580
|
+
return parts
|
|
581
|
+
|
|
582
|
+
|
|
583
|
+
def _block_to_part(pid: str, block: Any) -> Part | None:
|
|
584
|
+
if not isinstance(block, dict):
|
|
585
|
+
if isinstance(block, str) and block.strip():
|
|
586
|
+
return Part(id=pid, type="text", text=block)
|
|
587
|
+
return None
|
|
588
|
+
btype = block.get("type")
|
|
589
|
+
if btype == "text":
|
|
590
|
+
text = block.get("text", "")
|
|
591
|
+
return Part(id=pid, type="text", text=text, raw=block) if text else None
|
|
592
|
+
if btype == "thinking":
|
|
593
|
+
text = block.get("thinking", "")
|
|
594
|
+
return Part(id=pid, type="reasoning", text=text, raw=block) if text else None
|
|
595
|
+
if btype == "tool_use":
|
|
596
|
+
name = block.get("name")
|
|
597
|
+
inp = block.get("input")
|
|
598
|
+
text = f"$ {name} {json.dumps(inp, ensure_ascii=False)}" if inp is not None else f"$ {name}"
|
|
599
|
+
return Part(id=pid, type="tool", text=text, tool_name=name, tool_status="call", raw=block)
|
|
600
|
+
if btype == "tool_result":
|
|
601
|
+
content = block.get("content")
|
|
602
|
+
text = _stringify_tool_result(content)
|
|
603
|
+
is_err = bool(block.get("is_error"))
|
|
604
|
+
return Part(
|
|
605
|
+
id=pid,
|
|
606
|
+
type="tool",
|
|
607
|
+
text=("[error] " + text) if is_err else text,
|
|
608
|
+
tool_status="error" if is_err else "result",
|
|
609
|
+
raw=block,
|
|
610
|
+
)
|
|
611
|
+
if btype == "image":
|
|
612
|
+
return Part(id=pid, type="file", text="[image]", raw=block)
|
|
613
|
+
return Part(id=pid, type="unknown", raw=block)
|
|
614
|
+
|
|
615
|
+
|
|
616
|
+
def _stringify_tool_result(content: Any) -> str:
|
|
617
|
+
if content is None:
|
|
618
|
+
return ""
|
|
619
|
+
if isinstance(content, str):
|
|
620
|
+
return content
|
|
621
|
+
if isinstance(content, list):
|
|
622
|
+
out: list[str] = []
|
|
623
|
+
for b in content:
|
|
624
|
+
if isinstance(b, dict) and b.get("type") == "text":
|
|
625
|
+
out.append(b.get("text", ""))
|
|
626
|
+
elif isinstance(b, str):
|
|
627
|
+
out.append(b)
|
|
628
|
+
else:
|
|
629
|
+
out.append(json.dumps(b, ensure_ascii=False))
|
|
630
|
+
return "\n".join(out)
|
|
631
|
+
return json.dumps(content, ensure_ascii=False)
|