ragradar 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ragradar/__init__.py ADDED
@@ -0,0 +1,74 @@
1
+ """ragradar — the single public import surface.
2
+
3
+ Users only ever write ``import ragradar``: capture entry points
4
+ (capture/start and the staged proxies), evaluation entry points
5
+ (check/evaluate/available_metrics), and the schema dataclasses are all
6
+ re-exported here. The underlying distributions (ragradar-core,
7
+ ragradar-capture, ragradar-evaluate) stay separately installable so a
8
+ production pipeline can depend on ragradar-capture alone without
9
+ pulling the evaluation stack (scipy/ragas) — but importing their
10
+ modules directly is an internal concern, not the public API.
11
+ """
12
+
13
+ from ragradar_capture import (
14
+ Capture,
15
+ cache,
16
+ capture,
17
+ chunks,
18
+ commit,
19
+ context,
20
+ history,
21
+ response,
22
+ set_strict,
23
+ start,
24
+ tool_call,
25
+ )
26
+ from ragradar_core.schema import (
27
+ CacheEvent,
28
+ ChunkRecord,
29
+ RunRecord,
30
+ TokenBudget,
31
+ TokenUsage,
32
+ ToolCallRecord,
33
+ Turn,
34
+ )
35
+ from ragradar_evaluate import (
36
+ CheckResult,
37
+ EvalResult,
38
+ InputQualityPolicy,
39
+ MetricInfo,
40
+ available_metrics,
41
+ check,
42
+ evaluate,
43
+ )
44
+
45
+ __all__ = [
46
+ # Capture
47
+ "Capture",
48
+ "start",
49
+ "capture",
50
+ "set_strict",
51
+ "chunks",
52
+ "context",
53
+ "history",
54
+ "response",
55
+ "cache",
56
+ "tool_call",
57
+ "commit",
58
+ # Evaluation
59
+ "check",
60
+ "evaluate",
61
+ "available_metrics",
62
+ "CheckResult",
63
+ "EvalResult",
64
+ "MetricInfo",
65
+ "InputQualityPolicy",
66
+ # Schema dataclasses (advanced path; primitives coerce everywhere)
67
+ "ChunkRecord",
68
+ "TokenBudget",
69
+ "TokenUsage",
70
+ "Turn",
71
+ "CacheEvent",
72
+ "ToolCallRecord",
73
+ "RunRecord",
74
+ ]
ragradar/cli.py ADDED
@@ -0,0 +1,231 @@
1
+ import re
2
+ from datetime import date
3
+
4
+ import click
5
+ from rich.console import Console
6
+ from rich.table import Table
7
+
8
+ from ragradar import store
9
+ from ragradar.explain import loader
10
+ from ragradar.explain.renderer import html as html_renderer
11
+ from ragradar.explain.renderer import terminal as terminal_renderer
12
+
13
+ console = Console()
14
+
15
+ _SESSION_RE = re.compile(r"^s(\d+)$", re.IGNORECASE)
16
+
17
+
18
+ def _parse_session_id(value: str) -> int:
19
+ m = _SESSION_RE.match(value)
20
+ if m:
21
+ return int(m.group(1))
22
+ return int(value)
23
+
24
+
25
+ def _disambiguate(results: list[dict]) -> dict | None:
26
+ console.print("\n [bold]Multiple matches:[/bold]\n")
27
+ for i, r in enumerate(results, 1):
28
+ title = r.get("session_title") or ""
29
+ query_preview = r["query"][:60]
30
+ console.print(
31
+ f" {i} s{r['session_id']} r{r['run_seq']} "
32
+ f"{r['created_at'][:10]} {title} "
33
+ f'— "{query_preview}"'
34
+ )
35
+ console.print()
36
+ try:
37
+ choice = click.prompt(
38
+ " Pick (number) or press Enter to cancel",
39
+ default="",
40
+ show_default=False,
41
+ )
42
+ if not choice:
43
+ return None
44
+ idx = int(choice) - 1
45
+ if 0 <= idx < len(results):
46
+ r = results[idx]
47
+ return store.get_run(r["session_id"], r["run_seq"])
48
+ except (ValueError, KeyboardInterrupt, EOFError):
49
+ pass
50
+ return None
51
+
52
+
53
+ def _resolve_and_load(target: str | None = None):
54
+ result = store.resolve_target(target)
55
+ if result is None:
56
+ console.print("No runs found.")
57
+ return None, None
58
+ if isinstance(result, list):
59
+ run_row = _disambiguate(result)
60
+ if run_row is None:
61
+ return None, None
62
+ else:
63
+ run_row = result
64
+ record = loader.load_run_record(run_row)
65
+ return run_row, record
66
+
67
+
68
+ @click.group()
69
+ def main():
70
+ """ragradar — analyst CLI for the ragradar observability system."""
71
+
72
+
73
+ @main.command("list")
74
+ @click.argument("session_id", required=False)
75
+ def list_cmd(session_id):
76
+ """List sessions, or runs within a session."""
77
+ if session_id is not None:
78
+ sid = _parse_session_id(session_id)
79
+ runs = store.list_runs(sid)
80
+ if not runs:
81
+ console.print(f"No runs found in session {sid}.")
82
+ return
83
+ tbl = Table(title=f"Session {sid} — Runs")
84
+ tbl.add_column("Run", style="cyan")
85
+ tbl.add_column("Date")
86
+ tbl.add_column("Query")
87
+ for r in runs:
88
+ tbl.add_row(
89
+ f"s{r['session_id']} r{r['run_seq']}",
90
+ r["created_at"][:10],
91
+ r["query"][:80],
92
+ )
93
+ console.print(tbl)
94
+ else:
95
+ sessions = store.list_sessions()
96
+ if not sessions:
97
+ console.print("No sessions found.")
98
+ return
99
+ tbl = Table(title="Sessions")
100
+ tbl.add_column("ID", style="cyan")
101
+ tbl.add_column("Runs", justify="right")
102
+ tbl.add_column("Pipeline")
103
+ tbl.add_column("Created")
104
+ tbl.add_column("Title")
105
+ for s in sessions:
106
+ tbl.add_row(
107
+ f"s{s['session_id']}",
108
+ str(s["run_count"]),
109
+ s["pipeline"] or "",
110
+ s["created_at"][:10],
111
+ s["title"] or "",
112
+ )
113
+ console.print(tbl)
114
+
115
+
116
+ @main.command()
117
+ @click.argument("hint", required=False)
118
+ @click.option("--exact", is_flag=True)
119
+ @click.option("--from", "from_dt", default=None)
120
+ @click.option("--to", "to_dt", default=None)
121
+ @click.option("--today", is_flag=True)
122
+ @click.option("--session", "session_filter", default=None)
123
+ @click.option("--pipeline", default=None)
124
+ @click.option("--recent", default=None, type=int)
125
+ def find(hint, exact, from_dt, to_dt, today, session_filter, pipeline, recent):
126
+ """Search runs by query text."""
127
+ if today:
128
+ today_str = date.today().isoformat()
129
+ if from_dt is None:
130
+ from_dt = today_str
131
+ if to_dt is None:
132
+ to_dt = today_str + "T23:59:59.999999Z"
133
+
134
+ sid = None
135
+ if session_filter is not None:
136
+ sid = _parse_session_id(session_filter)
137
+
138
+ results = store.search_runs(
139
+ hint=hint,
140
+ exact=exact,
141
+ session_id=sid,
142
+ pipeline=pipeline,
143
+ from_dt=from_dt,
144
+ to_dt=to_dt,
145
+ recent_n=recent,
146
+ )
147
+
148
+ if not results:
149
+ console.print("No matching runs found.")
150
+ return
151
+
152
+ tbl = Table(title=f"Search results ({len(results)})")
153
+ tbl.add_column("Run", style="cyan")
154
+ tbl.add_column("Date")
155
+ tbl.add_column("Session")
156
+ tbl.add_column("Query")
157
+ for r in results:
158
+ tbl.add_row(
159
+ f"s{r['session_id']} r{r['run_seq']}",
160
+ r["created_at"][:10],
161
+ r.get("session_title") or "",
162
+ r["query"][:80],
163
+ )
164
+ console.print(tbl)
165
+
166
+
167
+ @main.command()
168
+ @click.argument("target", required=False)
169
+ @click.option("--full", is_flag=True)
170
+ @click.option("--html", "to_html", is_flag=True)
171
+ def explain(target, full, to_html):
172
+ """Explain a run — all analysis factors."""
173
+ run_row, record = _resolve_and_load(target)
174
+ if record is None:
175
+ return
176
+
177
+ if to_html:
178
+ run_id = f"s{run_row['session_id']}r{run_row['run_seq']}"
179
+ path = html_renderer.render(record, run_id)
180
+ console.print(f"Report written to {path}")
181
+ else:
182
+ terminal_renderer.render(record, full=full, run_row=run_row)
183
+
184
+
185
+ @main.command()
186
+ @click.argument("target_a")
187
+ @click.argument("target_b")
188
+ def diff(target_a, target_b):
189
+ """Compare two runs side by side."""
190
+ row_a = store.resolve_target(target_a)
191
+ row_b = store.resolve_target(target_b)
192
+
193
+ if row_a is None or row_b is None:
194
+ console.print("Could not resolve both targets.")
195
+ return
196
+ if isinstance(row_a, list) or isinstance(row_b, list):
197
+ console.print("Ambiguous target — use exact run ID (e.g. s2r3).")
198
+ return
199
+
200
+ rec_a = loader.load_run_record(row_a)
201
+ rec_b = loader.load_run_record(row_b)
202
+
203
+ id_a = f"s{row_a['session_id']}r{row_a['run_seq']}"
204
+ id_b = f"s{row_b['session_id']}r{row_b['run_seq']}"
205
+
206
+ terminal_renderer.render_diff(rec_a, rec_b, id_a, id_b)
207
+
208
+
209
+ @main.command()
210
+ @click.argument("target")
211
+ def budget(target):
212
+ """Token waterfall only."""
213
+ run_row, record = _resolve_and_load(target)
214
+ if record is None:
215
+ return
216
+ terminal_renderer.render_budget(record)
217
+
218
+
219
+ @main.group()
220
+ def session():
221
+ """Session management commands."""
222
+
223
+
224
+ @session.command()
225
+ @click.argument("session_id")
226
+ @click.argument("title")
227
+ def rename(session_id, title):
228
+ """Rename a session."""
229
+ sid = _parse_session_id(session_id)
230
+ store.rename_session(sid, title)
231
+ console.print(f'Session {sid} renamed to "{title}".')
File without changes
File without changes
@@ -0,0 +1,19 @@
1
+ from ragradar_core.schema import RunRecord
2
+
3
+
4
+ def analyze(record: RunRecord) -> dict | None:
5
+ if not record.cache_events:
6
+ return None
7
+
8
+ hits = [e for e in record.cache_events if e.hit]
9
+ misses = [e for e in record.cache_events if not e.hit]
10
+ total = len(record.cache_events)
11
+
12
+ return {
13
+ "total_events": total,
14
+ "hits": len(hits),
15
+ "misses": len(misses),
16
+ "hit_ratio": len(hits) / total if total else 0.0,
17
+ "hit_chunks": [e.chunk_id for e in hits],
18
+ "miss_chunks": [e.chunk_id for e in misses],
19
+ }
@@ -0,0 +1,49 @@
1
+ from ragradar_core.schema import RunRecord
2
+
3
+
4
+ def analyze(record: RunRecord) -> dict | None:
5
+ if not record.chunks:
6
+ return None
7
+
8
+ # PATH DUP: same chunk_id appears multiple times with different retrieval_path
9
+ chunk_paths: dict[str, list[str]] = {}
10
+ for c in record.chunks:
11
+ chunk_paths.setdefault(c.chunk_id, [])
12
+ if c.retrieval_path:
13
+ chunk_paths[c.chunk_id].append(c.retrieval_path)
14
+
15
+ path_dups = [
16
+ {"chunk_id": cid, "paths": paths} for cid, paths in chunk_paths.items() if len(paths) > 1
17
+ ]
18
+
19
+ # WINDOW DUP: same source_doc_id, overlapping content (substring check)
20
+ by_source: dict[str, list] = {}
21
+ for c in record.chunks:
22
+ by_source.setdefault(c.source_doc_id, []).append(c)
23
+
24
+ window_dups = []
25
+ for source_id, chunks in by_source.items():
26
+ if len(chunks) < 2:
27
+ continue
28
+ dup_ids: set[str] = set()
29
+ for i, a in enumerate(chunks):
30
+ for b in chunks[i + 1 :]:
31
+ if a.content in b.content or b.content in a.content:
32
+ dup_ids.add(a.chunk_id)
33
+ dup_ids.add(b.chunk_id)
34
+ if dup_ids:
35
+ window_dups.append({"chunk_ids": sorted(dup_ids), "source_doc_id": source_id})
36
+
37
+ dup_chunk_ids: set[str] = set()
38
+ for d in path_dups:
39
+ dup_chunk_ids.add(d["chunk_id"])
40
+ for d in window_dups:
41
+ dup_chunk_ids.update(d["chunk_ids"])
42
+
43
+ total = len(record.chunks)
44
+ return {
45
+ "path_dups": path_dups,
46
+ "window_dups": window_dups,
47
+ "semantic_dups": [],
48
+ "duplicate_ratio": len(dup_chunk_ids) / total if total else 0.0,
49
+ }
@@ -0,0 +1,25 @@
1
+ from ragradar_core.schema import RunRecord
2
+
3
+
4
+ def analyze(record: RunRecord) -> dict | None:
5
+ if not record.history_pre and not record.history_post:
6
+ return None
7
+
8
+ pre = record.history_pre or []
9
+ post = record.history_post or []
10
+
11
+ pre_vals = [t.tokens for t in pre if t.tokens is not None]
12
+ post_vals = [t.tokens for t in post if t.tokens is not None]
13
+
14
+ post_contents = {(t.role, t.content) for t in post}
15
+ dropped = [t for t in pre if (t.role, t.content) not in post_contents]
16
+
17
+ return {
18
+ "pre_turn_count": len(pre),
19
+ "post_turn_count": len(post),
20
+ "dropped_turn_count": len(dropped),
21
+ "eviction_reason": record.eviction_reason,
22
+ "dropped_turns": dropped,
23
+ "pre_tokens": sum(pre_vals) if pre_vals else None,
24
+ "post_tokens": sum(post_vals) if post_vals else None,
25
+ }
@@ -0,0 +1,38 @@
1
+ from ragradar_core.schema import RunRecord
2
+
3
+
4
+ def analyze(record: RunRecord) -> dict | None:
5
+ if not record.chunks:
6
+ return None
7
+
8
+ retrieval_scores = [c.retrieval_score for c in record.chunks if c.retrieval_score is not None]
9
+ rerank_scores = [c.rerank_score for c in record.chunks if c.rerank_score is not None]
10
+
11
+ if not retrieval_scores and not rerank_scores:
12
+ return None
13
+
14
+ top_retrieval = max(retrieval_scores) if retrieval_scores else None
15
+ bottom_retrieval = min(retrieval_scores) if retrieval_scores else None
16
+ top_rerank = max(rerank_scores) if rerank_scores else None
17
+ bottom_rerank = min(rerank_scores) if rerank_scores else None
18
+
19
+ rerank_delta = None
20
+ if retrieval_scores and rerank_scores:
21
+ mean_rerank = sum(rerank_scores) / len(rerank_scores)
22
+ mean_retrieval = sum(retrieval_scores) / len(retrieval_scores)
23
+ rerank_delta = round(mean_rerank - mean_retrieval, 4)
24
+
25
+ total = len(record.chunks)
26
+ low_rerank = [s for s in rerank_scores if s < 0.5]
27
+ low_score_ratio = round(len(low_rerank) / total, 4) if total else 0.0
28
+
29
+ return {
30
+ "retrieval_scores": retrieval_scores,
31
+ "rerank_scores": rerank_scores,
32
+ "top_retrieval": top_retrieval,
33
+ "top_rerank": top_rerank,
34
+ "bottom_retrieval": bottom_retrieval,
35
+ "bottom_rerank": bottom_rerank,
36
+ "rerank_delta": rerank_delta,
37
+ "low_score_ratio": low_score_ratio,
38
+ }
@@ -0,0 +1,36 @@
1
+ from ragradar_core.schema import RunRecord
2
+
3
+
4
+ def analyze(record: RunRecord) -> dict | None:
5
+ if not record.chunks and not record.final_prompt:
6
+ return None
7
+
8
+ chunks_tokens = sum(c.token_count for c in (record.chunks or []))
9
+
10
+ history_tokens = sum(t.tokens or 0 for t in (record.history_post or []))
11
+
12
+ system_tokens = 0
13
+ headroom = 0
14
+ model_limit = None
15
+ if record.token_budget:
16
+ system_tokens = record.token_budget.system_allocated
17
+ headroom = record.token_budget.headroom
18
+ model_limit = record.token_budget.total_limit
19
+
20
+ total = chunks_tokens + history_tokens + system_tokens
21
+ utilisation = (total / model_limit * 100) if model_limit else 0.0
22
+
23
+ per_chunk = [
24
+ {"chunk_id": c.chunk_id, "token_count": c.token_count} for c in (record.chunks or [])
25
+ ]
26
+
27
+ return {
28
+ "total_tokens": total,
29
+ "chunks_tokens": chunks_tokens,
30
+ "history_tokens": history_tokens,
31
+ "system_tokens": system_tokens,
32
+ "headroom": headroom,
33
+ "model_limit": model_limit,
34
+ "utilisation_pct": round(utilisation, 1),
35
+ "per_chunk": per_chunk,
36
+ }
@@ -0,0 +1,32 @@
1
+ from ragradar_core.schema import RunRecord
2
+
3
+
4
+ def analyze(record: RunRecord) -> dict | None:
5
+ if not record.chunks:
6
+ return None
7
+
8
+ truncated = [c for c in record.chunks if c.truncated]
9
+ high_score = [
10
+ c for c in truncated if (c.retrieval_score or 0) > 0.7 or (c.rerank_score or 0) > 0.7
11
+ ]
12
+
13
+ if not truncated:
14
+ severity = "none"
15
+ elif high_score:
16
+ severity = "high"
17
+ else:
18
+ severity = "low"
19
+
20
+ return {
21
+ "truncated_count": len(truncated),
22
+ "truncated_chunks": [
23
+ {
24
+ "chunk_id": c.chunk_id,
25
+ "score": c.retrieval_score,
26
+ "rerank_score": c.rerank_score,
27
+ }
28
+ for c in truncated
29
+ ],
30
+ "high_score_truncations": len(high_score),
31
+ "severity": severity,
32
+ }
@@ -0,0 +1,8 @@
1
+ import json
2
+
3
+ from ragradar_core.schema import RunRecord
4
+
5
+
6
+ def load_run_record(run_row: dict) -> RunRecord:
7
+ data = json.loads(run_row["run_data"])
8
+ return RunRecord.from_json(data)
File without changes
@@ -0,0 +1,152 @@
1
+ from pathlib import Path
2
+
3
+ from ragradar_core import store as _core_store
4
+ from ragradar_core.schema import RunRecord
5
+
6
+ from ragradar.explain.analyzers import (
7
+ cache as cache_mod,
8
+ )
9
+ from ragradar.explain.analyzers import (
10
+ duplicates as duplicates_mod,
11
+ )
12
+ from ragradar.explain.analyzers import (
13
+ history as history_mod,
14
+ )
15
+ from ragradar.explain.analyzers import (
16
+ scores as scores_mod,
17
+ )
18
+ from ragradar.explain.analyzers import (
19
+ tokens as tokens_mod,
20
+ )
21
+ from ragradar.explain.analyzers import (
22
+ truncation as truncation_mod,
23
+ )
24
+
25
+
26
+ def _esc(text: str) -> str:
27
+ return (
28
+ text.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;").replace('"', "&quot;")
29
+ )
30
+
31
+
32
+ def _section(title: str, content: str) -> str:
33
+ return (
34
+ f"<details open>\n <summary>{_esc(title)}</summary>\n"
35
+ f" <pre>{_esc(content)}</pre>\n</details>\n"
36
+ )
37
+
38
+
39
+ def render(record: RunRecord, run_id: str) -> Path:
40
+ reports_dir = _core_store._ragradar_dir() / "reports"
41
+ reports_dir.mkdir(parents=True, exist_ok=True)
42
+ out = reports_dir / f"{run_id}.html"
43
+
44
+ sections = ""
45
+
46
+ tok = tokens_mod.analyze(record)
47
+ if tok:
48
+ body = f"Total: {tok['total_tokens']}"
49
+ if tok["model_limit"]:
50
+ body += f" / {tok['model_limit']} ({tok['utilisation_pct']}%)"
51
+ body += f"\nChunks: {tok['chunks_tokens']}"
52
+ body += f"\nHistory: {tok['history_tokens']}"
53
+ body += f"\nSystem: {tok['system_tokens']}"
54
+ body += f"\nHeadroom: {tok['headroom']}"
55
+ if tok["per_chunk"]:
56
+ body += "\n\nPer chunk:"
57
+ for pc in tok["per_chunk"]:
58
+ body += f"\n {pc['chunk_id']}: {pc['token_count']}"
59
+ sections += _section("Token Usage", body)
60
+
61
+ sc = scores_mod.analyze(record)
62
+ if sc:
63
+ body = ""
64
+ if sc["retrieval_scores"]:
65
+ body += f"Retrieval: {sc['bottom_retrieval']:.2f}–{sc['top_retrieval']:.2f}\n"
66
+ if sc["rerank_scores"]:
67
+ body += f"Rerank: {sc['bottom_rerank']:.2f}–{sc['top_rerank']:.2f}\n"
68
+ if sc["rerank_delta"] is not None:
69
+ body += f"Rerank delta: {sc['rerank_delta']:+.4f}\n"
70
+ body += f"Low-score ratio: {sc['low_score_ratio']:.0%}"
71
+ sections += _section("Chunk Scores", body)
72
+
73
+ dup = duplicates_mod.analyze(record)
74
+ if dup:
75
+ body = f"Path dups: {len(dup['path_dups'])}\n"
76
+ body += f"Window dups: {len(dup['window_dups'])}\n"
77
+ body += f"Duplicate ratio: {dup['duplicate_ratio']:.0%}"
78
+ for d in dup["path_dups"]:
79
+ body += f"\n [PATH DUP] {d['chunk_id']} via {', '.join(d['paths'])}"
80
+ for d in dup["window_dups"]:
81
+ ids = ", ".join(d["chunk_ids"])
82
+ body += f"\n [WINDOW DUP] {ids} (source: {d['source_doc_id']})"
83
+ sections += _section("Duplicate Chunks", body)
84
+
85
+ tr = truncation_mod.analyze(record)
86
+ if tr:
87
+ body = f"Truncated: {tr['truncated_count']} chunks\n"
88
+ body += f"High-score truncations: {tr['high_score_truncations']}\n"
89
+ body += f"Severity: {tr['severity']}"
90
+ for tc in tr["truncated_chunks"]:
91
+ body += f"\n {tc['chunk_id']}: retrieval={tc['score']}, rerank={tc['rerank_score']}"
92
+ sections += _section("Truncation", body)
93
+
94
+ hist = history_mod.analyze(record)
95
+ if hist:
96
+ body = (
97
+ f"Turns: {hist['pre_turn_count']} -> {hist['post_turn_count']} "
98
+ f"({hist['dropped_turn_count']} dropped)\n"
99
+ )
100
+ if hist["eviction_reason"]:
101
+ body += f"Reason: {hist['eviction_reason']}\n"
102
+ if hist["pre_tokens"] is not None:
103
+ body += f"Pre tokens: {hist['pre_tokens']}\n"
104
+ if hist["post_tokens"] is not None:
105
+ body += f"Post tokens: {hist['post_tokens']}\n"
106
+ for t in hist["dropped_turns"]:
107
+ body += f"\n [{t.role}] {t.content}"
108
+ sections += _section("Dropped History", body)
109
+
110
+ ca = cache_mod.analyze(record)
111
+ if ca:
112
+ body = f"Hits: {ca['hits']}/{ca['total_events']} ({ca['hit_ratio']:.0%})\n"
113
+ if ca["hit_chunks"]:
114
+ body += f"Hit chunks: {', '.join(ca['hit_chunks'])}\n"
115
+ if ca["miss_chunks"]:
116
+ body += f"Miss chunks: {', '.join(ca['miss_chunks'])}"
117
+ sections += _section("Cache Hits", body)
118
+
119
+ if record.final_prompt:
120
+ sections += _section("Final Prompt", record.final_prompt)
121
+
122
+ model_line = ""
123
+ if record.model:
124
+ model_line = f"<p><b>Model:</b> {_esc(record.model)}</p>"
125
+
126
+ html = f"""<!DOCTYPE html>
127
+ <html>
128
+ <head>
129
+ <meta charset="utf-8">
130
+ <title>ragradar report — {_esc(run_id)}</title>
131
+ <style>
132
+ body {{ font-family: system-ui, monospace; max-width: 900px; margin: 2em auto; padding: 0 1em; }}
133
+ details {{ margin: 1em 0; border: 1px solid #ccc; border-radius: 4px; padding: 0.5em 1em; }}
134
+ summary {{ cursor: pointer; font-weight: bold; padding: 0.3em 0; }}
135
+ pre {{ background: #f5f5f5; padding: 1em; overflow-x: auto; white-space: pre-wrap; }}
136
+ h1 {{ color: #333; }}
137
+ .meta {{ color: #666; margin-bottom: 2em; }}
138
+ </style>
139
+ </head>
140
+ <body>
141
+ <h1>ragradar report — {_esc(run_id)}</h1>
142
+ <div class="meta">
143
+ <p><b>Query:</b> {_esc(record.query)}</p>
144
+ <p><b>Response:</b> {_esc(record.response[:500])}</p>
145
+ {model_line}
146
+ </div>
147
+ {sections}
148
+ </body>
149
+ </html>"""
150
+
151
+ out.write_text(html, encoding="utf-8")
152
+ return out