scrollback 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scrollback/__init__.py +8 -0
- scrollback/assets/icon-256.png +0 -0
- scrollback/assets/icon.icns +0 -0
- scrollback/cli.py +1139 -0
- scrollback/clipboard.py +34 -0
- scrollback/export.py +293 -0
- scrollback/fts.py +307 -0
- scrollback/highlight.py +128 -0
- scrollback/katexbundle.py +81 -0
- scrollback/launcher_install.py +209 -0
- scrollback/launchers/scrollback.bat +19 -0
- scrollback/launchers/scrollback.command +19 -0
- scrollback/launchers/scrollback.desktop +10 -0
- scrollback/launchers/scrollback.sh +12 -0
- scrollback/mathspan.py +180 -0
- scrollback/minimd.py +205 -0
- scrollback/models.py +135 -0
- scrollback/serialize.py +83 -0
- scrollback/serverconfig.py +66 -0
- scrollback/sources/__init__.py +6 -0
- scrollback/sources/aider.py +244 -0
- scrollback/sources/base.py +117 -0
- scrollback/sources/claudecode.py +631 -0
- scrollback/sources/codex.py +281 -0
- scrollback/sources/opencode.py +357 -0
- scrollback/sources/registry.py +39 -0
- scrollback/store.py +384 -0
- scrollback/termrender.py +170 -0
- scrollback/web/__init__.py +1 -0
- scrollback/web/app.py +359 -0
- scrollback/web/static/app.js +1245 -0
- scrollback/web/static/apple-touch-icon.png +0 -0
- scrollback/web/static/favicon.png +0 -0
- scrollback/web/static/favicon.svg +41 -0
- scrollback/web/static/index.html +75 -0
- scrollback/web/static/style.css +628 -0
- scrollback/web/static/vendor/highlight.min.js +1213 -0
- scrollback/web/static/vendor/hljs-dark.min.css +10 -0
- scrollback/web/static/vendor/hljs-light.min.css +10 -0
- scrollback/web/static/vendor/katex/fonts/KaTeX_AMS-Regular.woff2 +0 -0
- scrollback/web/static/vendor/katex/fonts/KaTeX_Caligraphic-Bold.woff2 +0 -0
- scrollback/web/static/vendor/katex/fonts/KaTeX_Caligraphic-Regular.woff2 +0 -0
- scrollback/web/static/vendor/katex/fonts/KaTeX_Fraktur-Bold.woff2 +0 -0
- scrollback/web/static/vendor/katex/fonts/KaTeX_Fraktur-Regular.woff2 +0 -0
- scrollback/web/static/vendor/katex/fonts/KaTeX_Main-Bold.woff2 +0 -0
- scrollback/web/static/vendor/katex/fonts/KaTeX_Main-BoldItalic.woff2 +0 -0
- scrollback/web/static/vendor/katex/fonts/KaTeX_Main-Italic.woff2 +0 -0
- scrollback/web/static/vendor/katex/fonts/KaTeX_Main-Regular.woff2 +0 -0
- scrollback/web/static/vendor/katex/fonts/KaTeX_Math-BoldItalic.woff2 +0 -0
- scrollback/web/static/vendor/katex/fonts/KaTeX_Math-Italic.woff2 +0 -0
- scrollback/web/static/vendor/katex/fonts/KaTeX_SansSerif-Bold.woff2 +0 -0
- scrollback/web/static/vendor/katex/fonts/KaTeX_SansSerif-Italic.woff2 +0 -0
- scrollback/web/static/vendor/katex/fonts/KaTeX_SansSerif-Regular.woff2 +0 -0
- scrollback/web/static/vendor/katex/fonts/KaTeX_Script-Regular.woff2 +0 -0
- scrollback/web/static/vendor/katex/fonts/KaTeX_Size1-Regular.woff2 +0 -0
- scrollback/web/static/vendor/katex/fonts/KaTeX_Size2-Regular.woff2 +0 -0
- scrollback/web/static/vendor/katex/fonts/KaTeX_Size3-Regular.woff2 +0 -0
- scrollback/web/static/vendor/katex/fonts/KaTeX_Size4-Regular.woff2 +0 -0
- scrollback/web/static/vendor/katex/fonts/KaTeX_Typewriter-Regular.woff2 +0 -0
- scrollback/web/static/vendor/katex/katex.min.css +1 -0
- scrollback/web/static/vendor/katex/katex.min.js +1 -0
- scrollback/web/static/vendor/marked.min.js +6 -0
- scrollback/web/static/vendor/purify.min.js +3 -0
- scrollback/webopen.py +96 -0
- scrollback-0.1.0.dist-info/METADATA +391 -0
- scrollback-0.1.0.dist-info/RECORD +69 -0
- scrollback-0.1.0.dist-info/WHEEL +4 -0
- scrollback-0.1.0.dist-info/entry_points.txt +4 -0
- scrollback-0.1.0.dist-info/licenses/LICENSE +21 -0
scrollback/clipboard.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""Cross-platform clipboard copy using only stdlib + OS utilities.
|
|
2
|
+
|
|
3
|
+
Falls back gracefully: on macOS uses `pbcopy`, on Linux `wl-copy`/`xclip`/
|
|
4
|
+
`xsel`, on Windows `clip`. Returns True on success.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import shutil
|
|
10
|
+
import subprocess
|
|
11
|
+
import sys
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _candidates() -> list[list[str]]:
|
|
15
|
+
if sys.platform == "darwin":
|
|
16
|
+
return [["pbcopy"]]
|
|
17
|
+
if sys.platform == "win32":
|
|
18
|
+
return [["clip"]]
|
|
19
|
+
# Linux / BSD: prefer Wayland, then X11 tools.
|
|
20
|
+
return [["wl-copy"], ["xclip", "-selection", "clipboard"], ["xsel", "--clipboard", "--input"]]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def copy(text: str) -> bool:
|
|
24
|
+
"""Copy `text` to the system clipboard. Returns True if it worked."""
|
|
25
|
+
for cmd in _candidates():
|
|
26
|
+
if shutil.which(cmd[0]) is None:
|
|
27
|
+
continue
|
|
28
|
+
try:
|
|
29
|
+
proc = subprocess.run(cmd, input=text.encode("utf-8"), check=False)
|
|
30
|
+
if proc.returncode == 0:
|
|
31
|
+
return True
|
|
32
|
+
except OSError:
|
|
33
|
+
continue
|
|
34
|
+
return False
|
scrollback/export.py
ADDED
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
"""Render a Session to portable formats: markdown, json, html, text.
|
|
2
|
+
|
|
3
|
+
These are pure functions from a Session to a string, so they are trivial
|
|
4
|
+
to test and to reuse from both the CLI (export/copy) and the web app.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import html as _html
|
|
10
|
+
import json
|
|
11
|
+
from dataclasses import asdict
|
|
12
|
+
from datetime import datetime
|
|
13
|
+
|
|
14
|
+
from . import minimd
|
|
15
|
+
from .models import Message, Part, Session
|
|
16
|
+
|
|
17
|
+
_ROLE_LABEL = {
|
|
18
|
+
"user": "User",
|
|
19
|
+
"assistant": "Assistant",
|
|
20
|
+
"system": "System",
|
|
21
|
+
"tool": "Tool",
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _fmt_dt(dt: datetime | None) -> str:
|
|
26
|
+
return dt.strftime("%Y-%m-%d %H:%M:%S %Z").strip() if dt else "?"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
# -- markdown --------------------------------------------------------------
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def to_markdown(session: Session, *, include_reasoning: bool = True,
|
|
33
|
+
include_tools: bool = True, math: str = "raw") -> str:
|
|
34
|
+
# Markdown export is verbatim text, so delimited LaTeX is already
|
|
35
|
+
# preserved exactly; `math` is accepted for a uniform CLI/web surface
|
|
36
|
+
# but does not transform the source (there is nothing to typeset in a
|
|
37
|
+
# plain .md file).
|
|
38
|
+
del math
|
|
39
|
+
lines: list[str] = []
|
|
40
|
+
lines.append(f"# {session.title}")
|
|
41
|
+
lines.append("")
|
|
42
|
+
lines.append(f"- **Source**: {session.source}")
|
|
43
|
+
lines.append(f"- **Session**: `{session.id}`")
|
|
44
|
+
if session.directory:
|
|
45
|
+
lines.append(f"- **Directory**: `{session.directory}`")
|
|
46
|
+
if session.model:
|
|
47
|
+
lines.append(f"- **Model**: {session.model}")
|
|
48
|
+
if session.agent:
|
|
49
|
+
lines.append(f"- **Agent**: {session.agent}")
|
|
50
|
+
lines.append(f"- **Created**: {_fmt_dt(session.created)}")
|
|
51
|
+
lines.append(f"- **Updated**: {_fmt_dt(session.updated)}")
|
|
52
|
+
lines.append(f"- **Messages**: {len(session.messages)}")
|
|
53
|
+
lines.append("")
|
|
54
|
+
lines.append("---")
|
|
55
|
+
lines.append("")
|
|
56
|
+
|
|
57
|
+
for msg in session.messages:
|
|
58
|
+
rendered = _markdown_message(msg, include_reasoning, include_tools)
|
|
59
|
+
if rendered:
|
|
60
|
+
lines.append(rendered)
|
|
61
|
+
lines.append("")
|
|
62
|
+
return "\n".join(lines).rstrip() + "\n"
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _markdown_message(msg: Message, include_reasoning: bool, include_tools: bool) -> str:
|
|
66
|
+
blocks: list[str] = []
|
|
67
|
+
header = f"## {_ROLE_LABEL.get(msg.role, msg.role)}"
|
|
68
|
+
when = _fmt_dt(msg.created)
|
|
69
|
+
if when != "?":
|
|
70
|
+
header += f" \n*{when}*"
|
|
71
|
+
blocks.append(header)
|
|
72
|
+
for part in msg.parts:
|
|
73
|
+
b = _markdown_part(part, include_reasoning, include_tools)
|
|
74
|
+
if b:
|
|
75
|
+
blocks.append(b)
|
|
76
|
+
# Only emit the message if it has content beyond the header.
|
|
77
|
+
return "\n\n".join(blocks) if len(blocks) > 1 else ""
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _markdown_part(part: Part, include_reasoning: bool, include_tools: bool) -> str:
|
|
81
|
+
if part.type == "text":
|
|
82
|
+
return part.text
|
|
83
|
+
if part.type == "reasoning":
|
|
84
|
+
if not include_reasoning or not part.text:
|
|
85
|
+
return ""
|
|
86
|
+
return "> **reasoning**\n>\n" + "\n".join(f"> {ln}" for ln in part.text.splitlines())
|
|
87
|
+
if part.type == "tool":
|
|
88
|
+
if not include_tools or not part.text:
|
|
89
|
+
return ""
|
|
90
|
+
label = part.tool_name or part.tool_status or "tool"
|
|
91
|
+
status = f" ({part.tool_status})" if part.tool_status and part.tool_name else ""
|
|
92
|
+
return f"**tool: {label}{status}**\n\n```\n{part.text}\n```"
|
|
93
|
+
return ""
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
# -- json ------------------------------------------------------------------
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def to_json(session: Session, *, indent: int = 2) -> str:
|
|
100
|
+
def default(o: object) -> object:
|
|
101
|
+
if isinstance(o, datetime):
|
|
102
|
+
return o.isoformat()
|
|
103
|
+
return str(o)
|
|
104
|
+
|
|
105
|
+
payload = asdict(session)
|
|
106
|
+
# Drop bulky raw blobs from the default JSON export for readability.
|
|
107
|
+
payload.pop("raw", None)
|
|
108
|
+
for m in payload.get("messages", []):
|
|
109
|
+
m.pop("raw", None)
|
|
110
|
+
for p in m.get("parts", []):
|
|
111
|
+
p.pop("raw", None)
|
|
112
|
+
return json.dumps(payload, indent=indent, default=default, ensure_ascii=False)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
# -- text ------------------------------------------------------------------
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def to_text(session: Session, *, include_reasoning: bool = False,
|
|
119
|
+
include_tools: bool = True, math: str = "raw") -> str:
|
|
120
|
+
# Plain-text export is verbatim; LaTeX is preserved as-is. `math` is a
|
|
121
|
+
# no-op here (kept for a uniform export surface).
|
|
122
|
+
del math
|
|
123
|
+
lines = [session.title, "=" * len(session.title), ""]
|
|
124
|
+
for msg in session.messages:
|
|
125
|
+
role = _ROLE_LABEL.get(msg.role, msg.role).upper()
|
|
126
|
+
chunk: list[str] = []
|
|
127
|
+
for part in msg.parts:
|
|
128
|
+
if part.type == "text" and part.text:
|
|
129
|
+
chunk.append(part.text)
|
|
130
|
+
elif part.type == "reasoning" and include_reasoning and part.text:
|
|
131
|
+
chunk.append(f"[reasoning] {part.text}")
|
|
132
|
+
elif part.type == "tool" and include_tools and part.text:
|
|
133
|
+
label = part.tool_name or part.tool_status or "tool"
|
|
134
|
+
chunk.append(f"[tool:{label}] {part.text}")
|
|
135
|
+
if chunk:
|
|
136
|
+
lines.append(f"--- {role} ---")
|
|
137
|
+
lines.append("\n".join(chunk))
|
|
138
|
+
lines.append("")
|
|
139
|
+
return "\n".join(lines).rstrip() + "\n"
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
# -- html ------------------------------------------------------------------
|
|
143
|
+
|
|
144
|
+
_HTML_TEMPLATE = """<!doctype html>
|
|
145
|
+
<html lang="en"><head><meta charset="utf-8">
|
|
146
|
+
<meta name="viewport" content="width=device-width, initial-scale=1">
|
|
147
|
+
<title>{title}</title>
|
|
148
|
+
<style>
|
|
149
|
+
:root {{ color-scheme: light dark; }}
|
|
150
|
+
body {{ font: 15px/1.6 -apple-system, system-ui, sans-serif; max-width: 820px;
|
|
151
|
+
margin: 2rem auto; padding: 0 1rem; }}
|
|
152
|
+
.meta {{ color: #888; font-size: 13px; margin-bottom: 1.5rem; }}
|
|
153
|
+
.msg {{ border-radius: 10px; padding: .75rem 1rem; margin: .75rem 0; }}
|
|
154
|
+
.user {{ background: rgba(120,140,255,.12); }}
|
|
155
|
+
.assistant {{ background: rgba(140,140,140,.10); }}
|
|
156
|
+
.role {{ font-weight: 600; font-size: 12px; text-transform: uppercase;
|
|
157
|
+
letter-spacing: .05em; opacity: .7; }}
|
|
158
|
+
.reasoning {{ opacity: .65; font-style: italic; border-left: 3px solid #aaa;
|
|
159
|
+
padding-left: .75rem; margin: .5rem 0; }}
|
|
160
|
+
.tool {{ background: rgba(0,0,0,.06); border-radius: 6px; padding: .5rem .75rem;
|
|
161
|
+
margin: .5rem 0; }}
|
|
162
|
+
pre {{ white-space: pre-wrap; word-break: break-word; margin: .25rem 0; }}
|
|
163
|
+
.tool-name {{ font-size: 12px; font-weight: 600; opacity: .7; }}
|
|
164
|
+
/* rendered markdown */
|
|
165
|
+
.md > *:first-child {{ margin-top: 0; }}
|
|
166
|
+
.md > *:last-child {{ margin-bottom: 0; }}
|
|
167
|
+
.md h1, .md h2, .md h3, .md h4 {{ line-height: 1.3; margin: 1em 0 .4em; }}
|
|
168
|
+
.md h1 {{ font-size: 1.5em; }} .md h2 {{ font-size: 1.3em; }}
|
|
169
|
+
.md h3 {{ font-size: 1.12em; }} .md h4 {{ font-size: 1em; }}
|
|
170
|
+
.md p {{ margin: .5em 0; }}
|
|
171
|
+
.md ul, .md ol {{ margin: .4em 0; padding-left: 1.5em; }}
|
|
172
|
+
.md li {{ margin: .15em 0; }}
|
|
173
|
+
.md a {{ color: #2a6fb0; }}
|
|
174
|
+
.md blockquote {{ margin: .5em 0; padding: .15em 0 .15em 1em;
|
|
175
|
+
border-left: 3px solid #ccc; color: #777; }}
|
|
176
|
+
.md hr {{ border: none; border-top: 1px solid #ccc; margin: 1em 0; }}
|
|
177
|
+
.md code {{ font-family: ui-monospace, Menlo, Consolas, monospace; font-size: .88em;
|
|
178
|
+
background: rgba(127,127,127,.18); border-radius: 4px; padding: .1em .35em; }}
|
|
179
|
+
.md pre {{ background: rgba(127,127,127,.12); border: 1px solid rgba(127,127,127,.25);
|
|
180
|
+
border-radius: 8px; padding: .7rem .9rem; overflow-x: auto; }}
|
|
181
|
+
.md pre code {{ background: none; padding: 0; }}
|
|
182
|
+
.math-src {{ font-family: ui-monospace, Menlo, Consolas, monospace; font-size: .9em;
|
|
183
|
+
background: rgba(127,127,127,.18); border-radius: 4px; padding: .1em .35em; }}
|
|
184
|
+
.math-display {{ display: block; text-align: center; margin: .6em 0; overflow-x: auto; }}
|
|
185
|
+
{hl_css}
|
|
186
|
+
@media print {{
|
|
187
|
+
body {{ max-width: none; }}
|
|
188
|
+
.msg {{ break-inside: avoid; }}
|
|
189
|
+
.tool, .md pre {{ break-inside: avoid; }}
|
|
190
|
+
}}
|
|
191
|
+
</style>
|
|
192
|
+
{math_head}
|
|
193
|
+
</head><body>
|
|
194
|
+
<h1>{title}</h1>
|
|
195
|
+
<div class="meta">{meta}</div>
|
|
196
|
+
{body}
|
|
197
|
+
{math_body}
|
|
198
|
+
</body></html>
|
|
199
|
+
"""
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def to_html(session: Session, *, include_reasoning: bool = True,
|
|
203
|
+
include_tools: bool = True, math: str = "raw") -> str:
|
|
204
|
+
meta_bits = [
|
|
205
|
+
f"source: {session.source}",
|
|
206
|
+
f"id: {session.id}",
|
|
207
|
+
]
|
|
208
|
+
if session.directory:
|
|
209
|
+
meta_bits.append(f"dir: {session.directory}")
|
|
210
|
+
if session.model:
|
|
211
|
+
meta_bits.append(f"model: {session.model}")
|
|
212
|
+
meta_bits.append(f"created: {_fmt_dt(session.created)}")
|
|
213
|
+
meta_bits.append(f"messages: {len(session.messages)}")
|
|
214
|
+
meta = " · ".join(_html.escape(b) for b in meta_bits)
|
|
215
|
+
|
|
216
|
+
body_parts: list[str] = []
|
|
217
|
+
for msg in session.messages:
|
|
218
|
+
inner = _html_message(msg, include_reasoning, include_tools, math)
|
|
219
|
+
if inner:
|
|
220
|
+
body_parts.append(inner)
|
|
221
|
+
|
|
222
|
+
# In `rendered` mode embed KaTeX so the static file typesets offline; in
|
|
223
|
+
# the other modes the math is inert source (no asset needed).
|
|
224
|
+
math_head = math_body = ""
|
|
225
|
+
if math == "rendered":
|
|
226
|
+
from . import katexbundle
|
|
227
|
+
|
|
228
|
+
math_head = katexbundle.head_assets()
|
|
229
|
+
math_body = katexbundle.autorender_script()
|
|
230
|
+
|
|
231
|
+
return _HTML_TEMPLATE.format(
|
|
232
|
+
title=_html.escape(session.title),
|
|
233
|
+
meta=meta,
|
|
234
|
+
body="\n".join(body_parts),
|
|
235
|
+
hl_css=minimd_highlight_css(),
|
|
236
|
+
math_head=math_head,
|
|
237
|
+
math_body=math_body,
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def minimd_highlight_css() -> str:
|
|
242
|
+
from . import highlight
|
|
243
|
+
|
|
244
|
+
return highlight.HL_CSS
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def _html_message(msg: Message, include_reasoning: bool, include_tools: bool,
|
|
248
|
+
math: str = "raw") -> str:
|
|
249
|
+
inner: list[str] = []
|
|
250
|
+
for part in msg.parts:
|
|
251
|
+
if part.type == "text" and part.text:
|
|
252
|
+
# Render markdown (stdlib-only) so the static export reads nicely.
|
|
253
|
+
inner.append(f'<div class="md">{minimd.render(part.text, math=math)}</div>')
|
|
254
|
+
elif part.type == "reasoning" and include_reasoning and part.text:
|
|
255
|
+
inner.append(f'<div class="reasoning"><pre>{_html.escape(part.text)}</pre></div>')
|
|
256
|
+
elif part.type == "tool" and include_tools and part.text:
|
|
257
|
+
name = _html.escape(part.tool_name or part.tool_status or "tool")
|
|
258
|
+
inner.append(
|
|
259
|
+
f'<div class="tool"><div class="tool-name">{name}</div>'
|
|
260
|
+
f"<pre>{_html.escape(part.text)}</pre></div>"
|
|
261
|
+
)
|
|
262
|
+
if not inner:
|
|
263
|
+
return ""
|
|
264
|
+
role = _ROLE_LABEL.get(msg.role, msg.role)
|
|
265
|
+
cls = msg.role if msg.role in ("user", "assistant") else "assistant"
|
|
266
|
+
return (
|
|
267
|
+
f'<div class="msg {cls}"><div class="role">{_html.escape(role)}</div>'
|
|
268
|
+
+ "\n".join(inner)
|
|
269
|
+
+ "</div>"
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
FORMATS = {
|
|
274
|
+
"markdown": to_markdown,
|
|
275
|
+
"md": to_markdown,
|
|
276
|
+
"json": to_json,
|
|
277
|
+
"html": to_html,
|
|
278
|
+
"text": to_text,
|
|
279
|
+
"txt": to_text,
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
# Math render modes for delimited-LaTeX spans (see minimd.render / katexbundle):
|
|
283
|
+
# raw -- verbatim source, shielded from the Markdown pass
|
|
284
|
+
# latex -- verbatim source, wrapped so it is never typeset (paste-ready)
|
|
285
|
+
# rendered -- typeset with KaTeX (HTML export embeds KaTeX to do so offline)
|
|
286
|
+
MATH_MODES = ("raw", "latex", "rendered")
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def render(session: Session, fmt: str, **kwargs: object) -> str:
|
|
290
|
+
func = FORMATS.get(fmt)
|
|
291
|
+
if func is None:
|
|
292
|
+
raise ValueError(f"unknown format: {fmt!r}; choose from {sorted(set(FORMATS))}")
|
|
293
|
+
return func(session, **kwargs) # type: ignore[arg-type]
|
scrollback/fts.py
ADDED
|
@@ -0,0 +1,307 @@
|
|
|
1
|
+
"""Optional full-text search index (SQLite FTS5).
|
|
2
|
+
|
|
3
|
+
The default search path is a lexical scan over the live data (zero setup,
|
|
4
|
+
always correct, but O(corpus) per query). For large histories this builds
|
|
5
|
+
an opt-in inverted index in a *separate* cache database so queries are
|
|
6
|
+
near-instant. The source data stores are never touched for writing -- the
|
|
7
|
+
index is derived, disposable, and rebuilt from the read-only adapters.
|
|
8
|
+
|
|
9
|
+
Design
|
|
10
|
+
------
|
|
11
|
+
* Index DB lives at ``~/.cache/scrollback/index.db`` (override with
|
|
12
|
+
``SCROLLBACK_INDEX``). Deleting it just disables the fast path.
|
|
13
|
+
* ``parts`` is an FTS5 table holding each searchable part's text plus the
|
|
14
|
+
metadata needed to reconstruct a hit (source, session id, message id,
|
|
15
|
+
role, part type, tool name).
|
|
16
|
+
* ``synced`` records a per-session signature ``(updated_iso, message_count)``
|
|
17
|
+
so :func:`sync` only re-indexes new/changed sessions and prunes deleted
|
|
18
|
+
ones -- an incremental update, not a full rebuild.
|
|
19
|
+
|
|
20
|
+
Availability degrades gracefully: if FTS5 is missing, :func:`available`
|
|
21
|
+
returns False and callers fall back to the lexical scan.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
import os
|
|
27
|
+
import sqlite3
|
|
28
|
+
from collections.abc import Iterator
|
|
29
|
+
from dataclasses import dataclass
|
|
30
|
+
from pathlib import Path
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def default_index_path() -> Path:
|
|
34
|
+
override = os.environ.get("SCROLLBACK_INDEX")
|
|
35
|
+
if override:
|
|
36
|
+
return Path(override).expanduser()
|
|
37
|
+
return Path.home() / ".cache" / "scrollback" / "index.db"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def fts5_available() -> bool:
|
|
41
|
+
"""True if this Python's SQLite was built with FTS5."""
|
|
42
|
+
try:
|
|
43
|
+
conn = sqlite3.connect(":memory:")
|
|
44
|
+
try:
|
|
45
|
+
conn.execute("CREATE VIRTUAL TABLE _t USING fts5(x)")
|
|
46
|
+
return True
|
|
47
|
+
finally:
|
|
48
|
+
conn.close()
|
|
49
|
+
except sqlite3.OperationalError:
|
|
50
|
+
return False
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@dataclass(frozen=True, slots=True)
|
|
54
|
+
class IndexHit:
|
|
55
|
+
"""A raw FTS match -- enough to rebuild a SearchHit without re-scanning."""
|
|
56
|
+
|
|
57
|
+
source: str
|
|
58
|
+
session_id: str
|
|
59
|
+
message_id: str
|
|
60
|
+
role: str
|
|
61
|
+
part_type: str
|
|
62
|
+
tool_name: str | None
|
|
63
|
+
text: str
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
_SCHEMA = """
|
|
67
|
+
CREATE TABLE IF NOT EXISTS meta (
|
|
68
|
+
key TEXT PRIMARY KEY,
|
|
69
|
+
value TEXT
|
|
70
|
+
);
|
|
71
|
+
CREATE TABLE IF NOT EXISTS synced (
|
|
72
|
+
source TEXT NOT NULL,
|
|
73
|
+
session_id TEXT NOT NULL,
|
|
74
|
+
updated TEXT,
|
|
75
|
+
message_count INTEGER,
|
|
76
|
+
PRIMARY KEY (source, session_id)
|
|
77
|
+
);
|
|
78
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS parts USING fts5(
|
|
79
|
+
source UNINDEXED,
|
|
80
|
+
session_id UNINDEXED,
|
|
81
|
+
message_id UNINDEXED,
|
|
82
|
+
role UNINDEXED,
|
|
83
|
+
part_type UNINDEXED,
|
|
84
|
+
tool_name UNINDEXED,
|
|
85
|
+
text,
|
|
86
|
+
tokenize = 'unicode61'
|
|
87
|
+
);
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class FtsIndex:
|
|
92
|
+
"""Read/write wrapper around the cache index database."""
|
|
93
|
+
|
|
94
|
+
def __init__(self, path: Path | None = None) -> None:
|
|
95
|
+
self.path = path or default_index_path()
|
|
96
|
+
|
|
97
|
+
# -- lifecycle ----------------------------------------------------------
|
|
98
|
+
|
|
99
|
+
def exists(self) -> bool:
|
|
100
|
+
return self.path.is_file()
|
|
101
|
+
|
|
102
|
+
def _connect(self, *, write: bool) -> sqlite3.Connection:
|
|
103
|
+
if write:
|
|
104
|
+
self.path.parent.mkdir(parents=True, exist_ok=True)
|
|
105
|
+
conn = sqlite3.connect(self.path)
|
|
106
|
+
conn.executescript(_SCHEMA)
|
|
107
|
+
else:
|
|
108
|
+
# Read-only open; raises if the file doesn't exist.
|
|
109
|
+
uri = f"file:{self.path}?mode=ro"
|
|
110
|
+
conn = sqlite3.connect(uri, uri=True)
|
|
111
|
+
conn.row_factory = sqlite3.Row
|
|
112
|
+
return conn
|
|
113
|
+
|
|
114
|
+
# -- sync ---------------------------------------------------------------
|
|
115
|
+
|
|
116
|
+
def sync(self, store, *, progress=None) -> dict[str, int]:
|
|
117
|
+
"""Incrementally bring the index in line with `store`.
|
|
118
|
+
|
|
119
|
+
Returns counts: {"added", "updated", "removed", "unchanged"}.
|
|
120
|
+
`progress(done, total)` is called per session if provided.
|
|
121
|
+
"""
|
|
122
|
+
stats = {"added": 0, "updated": 0, "removed": 0, "unchanged": 0}
|
|
123
|
+
with self._connect(write=True) as conn:
|
|
124
|
+
# Current signatures already in the index.
|
|
125
|
+
have = {
|
|
126
|
+
(r["source"], r["session_id"]): (r["updated"], r["message_count"])
|
|
127
|
+
for r in conn.execute(
|
|
128
|
+
"SELECT source, session_id, updated, message_count FROM synced"
|
|
129
|
+
)
|
|
130
|
+
}
|
|
131
|
+
# Live sessions (metadata only; cheap). Don't fold -- we want every
|
|
132
|
+
# session, including subagents, individually indexed.
|
|
133
|
+
live = store.list_sessions(fold_subagents=False)
|
|
134
|
+
live_keys = {(s.source, s.id) for s in live}
|
|
135
|
+
total = len(live)
|
|
136
|
+
|
|
137
|
+
for i, meta in enumerate(live):
|
|
138
|
+
key = (meta.source, meta.id)
|
|
139
|
+
sig = (
|
|
140
|
+
meta.updated.isoformat() if meta.updated else None,
|
|
141
|
+
meta.message_count,
|
|
142
|
+
)
|
|
143
|
+
prev = have.get(key)
|
|
144
|
+
if prev == sig:
|
|
145
|
+
stats["unchanged"] += 1
|
|
146
|
+
else:
|
|
147
|
+
self._reindex_session(conn, store, meta, sig)
|
|
148
|
+
stats["added" if prev is None else "updated"] += 1
|
|
149
|
+
if progress:
|
|
150
|
+
progress(i + 1, total)
|
|
151
|
+
|
|
152
|
+
# Prune sessions that no longer exist on disk.
|
|
153
|
+
for key in set(have) - live_keys:
|
|
154
|
+
self._drop_session(conn, *key)
|
|
155
|
+
stats["removed"] += 1
|
|
156
|
+
# Record a sync marker: the newest source mtime we've indexed, so
|
|
157
|
+
# staleness can be checked cheaply later without re-listing.
|
|
158
|
+
conn.execute(
|
|
159
|
+
"INSERT OR REPLACE INTO meta (key, value) VALUES ('source_mtime', ?)",
|
|
160
|
+
(str(_max_source_mtime(store)),),
|
|
161
|
+
)
|
|
162
|
+
conn.commit()
|
|
163
|
+
return stats
|
|
164
|
+
|
|
165
|
+
def is_stale(self, store) -> bool:
|
|
166
|
+
"""Cheap staleness check: True if any source file is newer than the
|
|
167
|
+
newest mtime recorded at the last sync.
|
|
168
|
+
|
|
169
|
+
Compares filesystem mtimes only (no full session enumeration), so it
|
|
170
|
+
is fast enough to call before a search.
|
|
171
|
+
"""
|
|
172
|
+
if not self.exists():
|
|
173
|
+
return False
|
|
174
|
+
try:
|
|
175
|
+
with self._connect(write=False) as conn:
|
|
176
|
+
row = conn.execute(
|
|
177
|
+
"SELECT value FROM meta WHERE key = 'source_mtime'"
|
|
178
|
+
).fetchone()
|
|
179
|
+
except sqlite3.Error:
|
|
180
|
+
return False
|
|
181
|
+
if row is None or row[0] is None:
|
|
182
|
+
return True # old index without a marker -> treat as stale
|
|
183
|
+
try:
|
|
184
|
+
recorded = float(row[0])
|
|
185
|
+
except (TypeError, ValueError):
|
|
186
|
+
return True
|
|
187
|
+
return _max_source_mtime(store) > recorded + 1.0 # 1s slack
|
|
188
|
+
|
|
189
|
+
def _drop_session(self, conn: sqlite3.Connection, source: str, sid: str) -> None:
|
|
190
|
+
conn.execute("DELETE FROM parts WHERE source = ? AND session_id = ?", (source, sid))
|
|
191
|
+
conn.execute("DELETE FROM synced WHERE source = ? AND session_id = ?", (source, sid))
|
|
192
|
+
|
|
193
|
+
def _reindex_session(self, conn, store, meta, sig) -> None:
|
|
194
|
+
self._drop_session(conn, meta.source, meta.id)
|
|
195
|
+
# Pass source explicitly (not "source:id"): the selector form only
|
|
196
|
+
# recognizes source prefixes registered in the global registry.
|
|
197
|
+
sess = store.load_session(meta.id, source=meta.source)
|
|
198
|
+
if sess is not None:
|
|
199
|
+
rows = []
|
|
200
|
+
for m in sess.messages:
|
|
201
|
+
for p in m.parts:
|
|
202
|
+
if p.text:
|
|
203
|
+
rows.append(
|
|
204
|
+
(meta.source, meta.id, m.id, m.role, p.type,
|
|
205
|
+
p.tool_name, p.text)
|
|
206
|
+
)
|
|
207
|
+
if rows:
|
|
208
|
+
conn.executemany(
|
|
209
|
+
"INSERT INTO parts (source, session_id, message_id, role, "
|
|
210
|
+
"part_type, tool_name, text) VALUES (?, ?, ?, ?, ?, ?, ?)",
|
|
211
|
+
rows,
|
|
212
|
+
)
|
|
213
|
+
conn.execute(
|
|
214
|
+
"INSERT OR REPLACE INTO synced (source, session_id, updated, message_count) "
|
|
215
|
+
"VALUES (?, ?, ?, ?)",
|
|
216
|
+
(meta.source, meta.id, sig[0], sig[1]),
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
# -- query --------------------------------------------------------------
|
|
220
|
+
|
|
221
|
+
def search(self, query: str, *, limit: int | None = None,
|
|
222
|
+
sources: list[str] | None = None) -> Iterator[IndexHit]:
|
|
223
|
+
"""Yield IndexHits for an FTS query, most-recently-indexed first.
|
|
224
|
+
|
|
225
|
+
Ordering is by descending rowid: a session re-indexed after a change
|
|
226
|
+
is appended, so recently-updated sessions surface first -- a close
|
|
227
|
+
proxy for the lexical path's newest-session-first order (FTS5 does
|
|
228
|
+
not store the session's update time to sort on exactly).
|
|
229
|
+
"""
|
|
230
|
+
if not self.exists() or not query.strip():
|
|
231
|
+
return iter(())
|
|
232
|
+
return self._search(query, limit, sources)
|
|
233
|
+
|
|
234
|
+
def _search(self, query, limit, sources) -> Iterator[IndexHit]:
|
|
235
|
+
match = _to_match_query(query)
|
|
236
|
+
sql = (
|
|
237
|
+
"SELECT source, session_id, message_id, role, part_type, tool_name, "
|
|
238
|
+
" snippet(parts, 6, '\x02', '\x03', '…', 16) AS snip "
|
|
239
|
+
"FROM parts WHERE parts MATCH ?"
|
|
240
|
+
)
|
|
241
|
+
params: list[object] = [match]
|
|
242
|
+
if sources:
|
|
243
|
+
placeholders = ",".join("?" * len(sources))
|
|
244
|
+
sql += f" AND source IN ({placeholders})"
|
|
245
|
+
params.extend(sources)
|
|
246
|
+
sql += " ORDER BY rowid DESC"
|
|
247
|
+
if limit is not None:
|
|
248
|
+
sql += " LIMIT ?"
|
|
249
|
+
params.append(limit)
|
|
250
|
+
with self._connect(write=False) as conn:
|
|
251
|
+
for r in conn.execute(sql, params):
|
|
252
|
+
yield IndexHit(
|
|
253
|
+
source=r["source"],
|
|
254
|
+
session_id=r["session_id"],
|
|
255
|
+
message_id=r["message_id"],
|
|
256
|
+
role=r["role"],
|
|
257
|
+
part_type=r["part_type"],
|
|
258
|
+
tool_name=r["tool_name"],
|
|
259
|
+
text=r["snip"],
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
def stats(self) -> dict[str, int]:
|
|
263
|
+
if not self.exists():
|
|
264
|
+
return {"sessions": 0, "parts": 0}
|
|
265
|
+
with self._connect(write=False) as conn:
|
|
266
|
+
sessions = conn.execute("SELECT COUNT(*) FROM synced").fetchone()[0]
|
|
267
|
+
parts = conn.execute("SELECT COUNT(*) FROM parts").fetchone()[0]
|
|
268
|
+
return {"sessions": sessions, "parts": parts}
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def _max_source_mtime(store) -> float:
|
|
272
|
+
"""Newest mtime across all source locations (files + dirs).
|
|
273
|
+
|
|
274
|
+
For opencode this is the DB file's mtime; for Claude Code it walks the
|
|
275
|
+
projects tree. Cheap relative to parsing, and a good staleness signal.
|
|
276
|
+
"""
|
|
277
|
+
newest = 0.0
|
|
278
|
+
for src in getattr(store, "sources", []):
|
|
279
|
+
loc = src.location()
|
|
280
|
+
if loc is None:
|
|
281
|
+
continue
|
|
282
|
+
try:
|
|
283
|
+
if loc.is_file():
|
|
284
|
+
newest = max(newest, loc.stat().st_mtime)
|
|
285
|
+
elif loc.is_dir():
|
|
286
|
+
for p in loc.rglob("*.jsonl"):
|
|
287
|
+
try:
|
|
288
|
+
newest = max(newest, p.stat().st_mtime)
|
|
289
|
+
except OSError:
|
|
290
|
+
continue
|
|
291
|
+
except OSError:
|
|
292
|
+
continue
|
|
293
|
+
return newest
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def _to_match_query(query: str) -> str:
|
|
297
|
+
"""Turn a user query into a safe FTS5 MATCH expression.
|
|
298
|
+
|
|
299
|
+
We quote each whitespace-separated term as a phrase (doubling embedded
|
|
300
|
+
quotes) and AND them together. This avoids FTS5 operator-syntax errors
|
|
301
|
+
from arbitrary user input (e.g. a stray `*` or `:`), while still giving
|
|
302
|
+
multi-word AND semantics.
|
|
303
|
+
"""
|
|
304
|
+
terms = query.split()
|
|
305
|
+
if not terms:
|
|
306
|
+
return '""'
|
|
307
|
+
return " ".join('"' + t.replace('"', '""') + '"' for t in terms)
|