ragradar 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ragradar/__init__.py +74 -0
- ragradar/cli.py +231 -0
- ragradar/explain/__init__.py +0 -0
- ragradar/explain/analyzers/__init__.py +0 -0
- ragradar/explain/analyzers/cache.py +19 -0
- ragradar/explain/analyzers/duplicates.py +49 -0
- ragradar/explain/analyzers/history.py +25 -0
- ragradar/explain/analyzers/scores.py +38 -0
- ragradar/explain/analyzers/tokens.py +36 -0
- ragradar/explain/analyzers/truncation.py +32 -0
- ragradar/explain/loader.py +8 -0
- ragradar/explain/renderer/__init__.py +0 -0
- ragradar/explain/renderer/html.py +152 -0
- ragradar/explain/renderer/terminal.py +351 -0
- ragradar/find/__init__.py +0 -0
- ragradar/find/bm25.py +7 -0
- ragradar/find/query_builder.py +65 -0
- ragradar/store.py +128 -0
- ragradar-0.1.0.dist-info/METADATA +142 -0
- ragradar-0.1.0.dist-info/RECORD +22 -0
- ragradar-0.1.0.dist-info/WHEEL +4 -0
- ragradar-0.1.0.dist-info/entry_points.txt +2 -0
ragradar/__init__.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"""ragradar — the single public import surface.
|
|
2
|
+
|
|
3
|
+
Users only ever write ``import ragradar``: capture entry points
|
|
4
|
+
(capture/start and the staged proxies), evaluation entry points
|
|
5
|
+
(check/evaluate/available_metrics), and the schema dataclasses are all
|
|
6
|
+
re-exported here. The underlying distributions (ragradar-core,
|
|
7
|
+
ragradar-capture, ragradar-evaluate) stay separately installable so a
|
|
8
|
+
production pipeline can depend on ragradar-capture alone without
|
|
9
|
+
pulling the evaluation stack (scipy/ragas) — but importing their
|
|
10
|
+
modules directly is an internal concern, not the public API.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from ragradar_capture import (
|
|
14
|
+
Capture,
|
|
15
|
+
cache,
|
|
16
|
+
capture,
|
|
17
|
+
chunks,
|
|
18
|
+
commit,
|
|
19
|
+
context,
|
|
20
|
+
history,
|
|
21
|
+
response,
|
|
22
|
+
set_strict,
|
|
23
|
+
start,
|
|
24
|
+
tool_call,
|
|
25
|
+
)
|
|
26
|
+
from ragradar_core.schema import (
|
|
27
|
+
CacheEvent,
|
|
28
|
+
ChunkRecord,
|
|
29
|
+
RunRecord,
|
|
30
|
+
TokenBudget,
|
|
31
|
+
TokenUsage,
|
|
32
|
+
ToolCallRecord,
|
|
33
|
+
Turn,
|
|
34
|
+
)
|
|
35
|
+
from ragradar_evaluate import (
|
|
36
|
+
CheckResult,
|
|
37
|
+
EvalResult,
|
|
38
|
+
InputQualityPolicy,
|
|
39
|
+
MetricInfo,
|
|
40
|
+
available_metrics,
|
|
41
|
+
check,
|
|
42
|
+
evaluate,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
__all__ = [
|
|
46
|
+
# Capture
|
|
47
|
+
"Capture",
|
|
48
|
+
"start",
|
|
49
|
+
"capture",
|
|
50
|
+
"set_strict",
|
|
51
|
+
"chunks",
|
|
52
|
+
"context",
|
|
53
|
+
"history",
|
|
54
|
+
"response",
|
|
55
|
+
"cache",
|
|
56
|
+
"tool_call",
|
|
57
|
+
"commit",
|
|
58
|
+
# Evaluation
|
|
59
|
+
"check",
|
|
60
|
+
"evaluate",
|
|
61
|
+
"available_metrics",
|
|
62
|
+
"CheckResult",
|
|
63
|
+
"EvalResult",
|
|
64
|
+
"MetricInfo",
|
|
65
|
+
"InputQualityPolicy",
|
|
66
|
+
# Schema dataclasses (advanced path; primitives coerce everywhere)
|
|
67
|
+
"ChunkRecord",
|
|
68
|
+
"TokenBudget",
|
|
69
|
+
"TokenUsage",
|
|
70
|
+
"Turn",
|
|
71
|
+
"CacheEvent",
|
|
72
|
+
"ToolCallRecord",
|
|
73
|
+
"RunRecord",
|
|
74
|
+
]
|
ragradar/cli.py
ADDED
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from datetime import date
|
|
3
|
+
|
|
4
|
+
import click
|
|
5
|
+
from rich.console import Console
|
|
6
|
+
from rich.table import Table
|
|
7
|
+
|
|
8
|
+
from ragradar import store
|
|
9
|
+
from ragradar.explain import loader
|
|
10
|
+
from ragradar.explain.renderer import html as html_renderer
|
|
11
|
+
from ragradar.explain.renderer import terminal as terminal_renderer
|
|
12
|
+
|
|
13
|
+
console = Console()
|
|
14
|
+
|
|
15
|
+
_SESSION_RE = re.compile(r"^s(\d+)$", re.IGNORECASE)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _parse_session_id(value: str) -> int:
|
|
19
|
+
m = _SESSION_RE.match(value)
|
|
20
|
+
if m:
|
|
21
|
+
return int(m.group(1))
|
|
22
|
+
return int(value)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _disambiguate(results: list[dict]) -> dict | None:
|
|
26
|
+
console.print("\n [bold]Multiple matches:[/bold]\n")
|
|
27
|
+
for i, r in enumerate(results, 1):
|
|
28
|
+
title = r.get("session_title") or ""
|
|
29
|
+
query_preview = r["query"][:60]
|
|
30
|
+
console.print(
|
|
31
|
+
f" {i} s{r['session_id']} r{r['run_seq']} "
|
|
32
|
+
f"{r['created_at'][:10]} {title} "
|
|
33
|
+
f'— "{query_preview}"'
|
|
34
|
+
)
|
|
35
|
+
console.print()
|
|
36
|
+
try:
|
|
37
|
+
choice = click.prompt(
|
|
38
|
+
" Pick (number) or press Enter to cancel",
|
|
39
|
+
default="",
|
|
40
|
+
show_default=False,
|
|
41
|
+
)
|
|
42
|
+
if not choice:
|
|
43
|
+
return None
|
|
44
|
+
idx = int(choice) - 1
|
|
45
|
+
if 0 <= idx < len(results):
|
|
46
|
+
r = results[idx]
|
|
47
|
+
return store.get_run(r["session_id"], r["run_seq"])
|
|
48
|
+
except (ValueError, KeyboardInterrupt, EOFError):
|
|
49
|
+
pass
|
|
50
|
+
return None
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _resolve_and_load(target: str | None = None):
|
|
54
|
+
result = store.resolve_target(target)
|
|
55
|
+
if result is None:
|
|
56
|
+
console.print("No runs found.")
|
|
57
|
+
return None, None
|
|
58
|
+
if isinstance(result, list):
|
|
59
|
+
run_row = _disambiguate(result)
|
|
60
|
+
if run_row is None:
|
|
61
|
+
return None, None
|
|
62
|
+
else:
|
|
63
|
+
run_row = result
|
|
64
|
+
record = loader.load_run_record(run_row)
|
|
65
|
+
return run_row, record
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@click.group()
|
|
69
|
+
def main():
|
|
70
|
+
"""ragradar — analyst CLI for the ragradar observability system."""
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
@main.command("list")
|
|
74
|
+
@click.argument("session_id", required=False)
|
|
75
|
+
def list_cmd(session_id):
|
|
76
|
+
"""List sessions, or runs within a session."""
|
|
77
|
+
if session_id is not None:
|
|
78
|
+
sid = _parse_session_id(session_id)
|
|
79
|
+
runs = store.list_runs(sid)
|
|
80
|
+
if not runs:
|
|
81
|
+
console.print(f"No runs found in session {sid}.")
|
|
82
|
+
return
|
|
83
|
+
tbl = Table(title=f"Session {sid} — Runs")
|
|
84
|
+
tbl.add_column("Run", style="cyan")
|
|
85
|
+
tbl.add_column("Date")
|
|
86
|
+
tbl.add_column("Query")
|
|
87
|
+
for r in runs:
|
|
88
|
+
tbl.add_row(
|
|
89
|
+
f"s{r['session_id']} r{r['run_seq']}",
|
|
90
|
+
r["created_at"][:10],
|
|
91
|
+
r["query"][:80],
|
|
92
|
+
)
|
|
93
|
+
console.print(tbl)
|
|
94
|
+
else:
|
|
95
|
+
sessions = store.list_sessions()
|
|
96
|
+
if not sessions:
|
|
97
|
+
console.print("No sessions found.")
|
|
98
|
+
return
|
|
99
|
+
tbl = Table(title="Sessions")
|
|
100
|
+
tbl.add_column("ID", style="cyan")
|
|
101
|
+
tbl.add_column("Runs", justify="right")
|
|
102
|
+
tbl.add_column("Pipeline")
|
|
103
|
+
tbl.add_column("Created")
|
|
104
|
+
tbl.add_column("Title")
|
|
105
|
+
for s in sessions:
|
|
106
|
+
tbl.add_row(
|
|
107
|
+
f"s{s['session_id']}",
|
|
108
|
+
str(s["run_count"]),
|
|
109
|
+
s["pipeline"] or "",
|
|
110
|
+
s["created_at"][:10],
|
|
111
|
+
s["title"] or "",
|
|
112
|
+
)
|
|
113
|
+
console.print(tbl)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
@main.command()
|
|
117
|
+
@click.argument("hint", required=False)
|
|
118
|
+
@click.option("--exact", is_flag=True)
|
|
119
|
+
@click.option("--from", "from_dt", default=None)
|
|
120
|
+
@click.option("--to", "to_dt", default=None)
|
|
121
|
+
@click.option("--today", is_flag=True)
|
|
122
|
+
@click.option("--session", "session_filter", default=None)
|
|
123
|
+
@click.option("--pipeline", default=None)
|
|
124
|
+
@click.option("--recent", default=None, type=int)
|
|
125
|
+
def find(hint, exact, from_dt, to_dt, today, session_filter, pipeline, recent):
|
|
126
|
+
"""Search runs by query text."""
|
|
127
|
+
if today:
|
|
128
|
+
today_str = date.today().isoformat()
|
|
129
|
+
if from_dt is None:
|
|
130
|
+
from_dt = today_str
|
|
131
|
+
if to_dt is None:
|
|
132
|
+
to_dt = today_str + "T23:59:59.999999Z"
|
|
133
|
+
|
|
134
|
+
sid = None
|
|
135
|
+
if session_filter is not None:
|
|
136
|
+
sid = _parse_session_id(session_filter)
|
|
137
|
+
|
|
138
|
+
results = store.search_runs(
|
|
139
|
+
hint=hint,
|
|
140
|
+
exact=exact,
|
|
141
|
+
session_id=sid,
|
|
142
|
+
pipeline=pipeline,
|
|
143
|
+
from_dt=from_dt,
|
|
144
|
+
to_dt=to_dt,
|
|
145
|
+
recent_n=recent,
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
if not results:
|
|
149
|
+
console.print("No matching runs found.")
|
|
150
|
+
return
|
|
151
|
+
|
|
152
|
+
tbl = Table(title=f"Search results ({len(results)})")
|
|
153
|
+
tbl.add_column("Run", style="cyan")
|
|
154
|
+
tbl.add_column("Date")
|
|
155
|
+
tbl.add_column("Session")
|
|
156
|
+
tbl.add_column("Query")
|
|
157
|
+
for r in results:
|
|
158
|
+
tbl.add_row(
|
|
159
|
+
f"s{r['session_id']} r{r['run_seq']}",
|
|
160
|
+
r["created_at"][:10],
|
|
161
|
+
r.get("session_title") or "",
|
|
162
|
+
r["query"][:80],
|
|
163
|
+
)
|
|
164
|
+
console.print(tbl)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
@main.command()
|
|
168
|
+
@click.argument("target", required=False)
|
|
169
|
+
@click.option("--full", is_flag=True)
|
|
170
|
+
@click.option("--html", "to_html", is_flag=True)
|
|
171
|
+
def explain(target, full, to_html):
|
|
172
|
+
"""Explain a run — all analysis factors."""
|
|
173
|
+
run_row, record = _resolve_and_load(target)
|
|
174
|
+
if record is None:
|
|
175
|
+
return
|
|
176
|
+
|
|
177
|
+
if to_html:
|
|
178
|
+
run_id = f"s{run_row['session_id']}r{run_row['run_seq']}"
|
|
179
|
+
path = html_renderer.render(record, run_id)
|
|
180
|
+
console.print(f"Report written to {path}")
|
|
181
|
+
else:
|
|
182
|
+
terminal_renderer.render(record, full=full, run_row=run_row)
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
@main.command()
|
|
186
|
+
@click.argument("target_a")
|
|
187
|
+
@click.argument("target_b")
|
|
188
|
+
def diff(target_a, target_b):
|
|
189
|
+
"""Compare two runs side by side."""
|
|
190
|
+
row_a = store.resolve_target(target_a)
|
|
191
|
+
row_b = store.resolve_target(target_b)
|
|
192
|
+
|
|
193
|
+
if row_a is None or row_b is None:
|
|
194
|
+
console.print("Could not resolve both targets.")
|
|
195
|
+
return
|
|
196
|
+
if isinstance(row_a, list) or isinstance(row_b, list):
|
|
197
|
+
console.print("Ambiguous target — use exact run ID (e.g. s2r3).")
|
|
198
|
+
return
|
|
199
|
+
|
|
200
|
+
rec_a = loader.load_run_record(row_a)
|
|
201
|
+
rec_b = loader.load_run_record(row_b)
|
|
202
|
+
|
|
203
|
+
id_a = f"s{row_a['session_id']}r{row_a['run_seq']}"
|
|
204
|
+
id_b = f"s{row_b['session_id']}r{row_b['run_seq']}"
|
|
205
|
+
|
|
206
|
+
terminal_renderer.render_diff(rec_a, rec_b, id_a, id_b)
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
@main.command()
|
|
210
|
+
@click.argument("target")
|
|
211
|
+
def budget(target):
|
|
212
|
+
"""Token waterfall only."""
|
|
213
|
+
run_row, record = _resolve_and_load(target)
|
|
214
|
+
if record is None:
|
|
215
|
+
return
|
|
216
|
+
terminal_renderer.render_budget(record)
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
@main.group()
|
|
220
|
+
def session():
|
|
221
|
+
"""Session management commands."""
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
@session.command()
|
|
225
|
+
@click.argument("session_id")
|
|
226
|
+
@click.argument("title")
|
|
227
|
+
def rename(session_id, title):
|
|
228
|
+
"""Rename a session."""
|
|
229
|
+
sid = _parse_session_id(session_id)
|
|
230
|
+
store.rename_session(sid, title)
|
|
231
|
+
console.print(f'Session {sid} renamed to "{title}".')
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from ragradar_core.schema import RunRecord
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def analyze(record: RunRecord) -> dict | None:
|
|
5
|
+
if not record.cache_events:
|
|
6
|
+
return None
|
|
7
|
+
|
|
8
|
+
hits = [e for e in record.cache_events if e.hit]
|
|
9
|
+
misses = [e for e in record.cache_events if not e.hit]
|
|
10
|
+
total = len(record.cache_events)
|
|
11
|
+
|
|
12
|
+
return {
|
|
13
|
+
"total_events": total,
|
|
14
|
+
"hits": len(hits),
|
|
15
|
+
"misses": len(misses),
|
|
16
|
+
"hit_ratio": len(hits) / total if total else 0.0,
|
|
17
|
+
"hit_chunks": [e.chunk_id for e in hits],
|
|
18
|
+
"miss_chunks": [e.chunk_id for e in misses],
|
|
19
|
+
}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
from ragradar_core.schema import RunRecord
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def analyze(record: RunRecord) -> dict | None:
|
|
5
|
+
if not record.chunks:
|
|
6
|
+
return None
|
|
7
|
+
|
|
8
|
+
# PATH DUP: same chunk_id appears multiple times with different retrieval_path
|
|
9
|
+
chunk_paths: dict[str, list[str]] = {}
|
|
10
|
+
for c in record.chunks:
|
|
11
|
+
chunk_paths.setdefault(c.chunk_id, [])
|
|
12
|
+
if c.retrieval_path:
|
|
13
|
+
chunk_paths[c.chunk_id].append(c.retrieval_path)
|
|
14
|
+
|
|
15
|
+
path_dups = [
|
|
16
|
+
{"chunk_id": cid, "paths": paths} for cid, paths in chunk_paths.items() if len(paths) > 1
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
# WINDOW DUP: same source_doc_id, overlapping content (substring check)
|
|
20
|
+
by_source: dict[str, list] = {}
|
|
21
|
+
for c in record.chunks:
|
|
22
|
+
by_source.setdefault(c.source_doc_id, []).append(c)
|
|
23
|
+
|
|
24
|
+
window_dups = []
|
|
25
|
+
for source_id, chunks in by_source.items():
|
|
26
|
+
if len(chunks) < 2:
|
|
27
|
+
continue
|
|
28
|
+
dup_ids: set[str] = set()
|
|
29
|
+
for i, a in enumerate(chunks):
|
|
30
|
+
for b in chunks[i + 1 :]:
|
|
31
|
+
if a.content in b.content or b.content in a.content:
|
|
32
|
+
dup_ids.add(a.chunk_id)
|
|
33
|
+
dup_ids.add(b.chunk_id)
|
|
34
|
+
if dup_ids:
|
|
35
|
+
window_dups.append({"chunk_ids": sorted(dup_ids), "source_doc_id": source_id})
|
|
36
|
+
|
|
37
|
+
dup_chunk_ids: set[str] = set()
|
|
38
|
+
for d in path_dups:
|
|
39
|
+
dup_chunk_ids.add(d["chunk_id"])
|
|
40
|
+
for d in window_dups:
|
|
41
|
+
dup_chunk_ids.update(d["chunk_ids"])
|
|
42
|
+
|
|
43
|
+
total = len(record.chunks)
|
|
44
|
+
return {
|
|
45
|
+
"path_dups": path_dups,
|
|
46
|
+
"window_dups": window_dups,
|
|
47
|
+
"semantic_dups": [],
|
|
48
|
+
"duplicate_ratio": len(dup_chunk_ids) / total if total else 0.0,
|
|
49
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from ragradar_core.schema import RunRecord
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def analyze(record: RunRecord) -> dict | None:
|
|
5
|
+
if not record.history_pre and not record.history_post:
|
|
6
|
+
return None
|
|
7
|
+
|
|
8
|
+
pre = record.history_pre or []
|
|
9
|
+
post = record.history_post or []
|
|
10
|
+
|
|
11
|
+
pre_vals = [t.tokens for t in pre if t.tokens is not None]
|
|
12
|
+
post_vals = [t.tokens for t in post if t.tokens is not None]
|
|
13
|
+
|
|
14
|
+
post_contents = {(t.role, t.content) for t in post}
|
|
15
|
+
dropped = [t for t in pre if (t.role, t.content) not in post_contents]
|
|
16
|
+
|
|
17
|
+
return {
|
|
18
|
+
"pre_turn_count": len(pre),
|
|
19
|
+
"post_turn_count": len(post),
|
|
20
|
+
"dropped_turn_count": len(dropped),
|
|
21
|
+
"eviction_reason": record.eviction_reason,
|
|
22
|
+
"dropped_turns": dropped,
|
|
23
|
+
"pre_tokens": sum(pre_vals) if pre_vals else None,
|
|
24
|
+
"post_tokens": sum(post_vals) if post_vals else None,
|
|
25
|
+
}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from ragradar_core.schema import RunRecord
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def analyze(record: RunRecord) -> dict | None:
|
|
5
|
+
if not record.chunks:
|
|
6
|
+
return None
|
|
7
|
+
|
|
8
|
+
retrieval_scores = [c.retrieval_score for c in record.chunks if c.retrieval_score is not None]
|
|
9
|
+
rerank_scores = [c.rerank_score for c in record.chunks if c.rerank_score is not None]
|
|
10
|
+
|
|
11
|
+
if not retrieval_scores and not rerank_scores:
|
|
12
|
+
return None
|
|
13
|
+
|
|
14
|
+
top_retrieval = max(retrieval_scores) if retrieval_scores else None
|
|
15
|
+
bottom_retrieval = min(retrieval_scores) if retrieval_scores else None
|
|
16
|
+
top_rerank = max(rerank_scores) if rerank_scores else None
|
|
17
|
+
bottom_rerank = min(rerank_scores) if rerank_scores else None
|
|
18
|
+
|
|
19
|
+
rerank_delta = None
|
|
20
|
+
if retrieval_scores and rerank_scores:
|
|
21
|
+
mean_rerank = sum(rerank_scores) / len(rerank_scores)
|
|
22
|
+
mean_retrieval = sum(retrieval_scores) / len(retrieval_scores)
|
|
23
|
+
rerank_delta = round(mean_rerank - mean_retrieval, 4)
|
|
24
|
+
|
|
25
|
+
total = len(record.chunks)
|
|
26
|
+
low_rerank = [s for s in rerank_scores if s < 0.5]
|
|
27
|
+
low_score_ratio = round(len(low_rerank) / total, 4) if total else 0.0
|
|
28
|
+
|
|
29
|
+
return {
|
|
30
|
+
"retrieval_scores": retrieval_scores,
|
|
31
|
+
"rerank_scores": rerank_scores,
|
|
32
|
+
"top_retrieval": top_retrieval,
|
|
33
|
+
"top_rerank": top_rerank,
|
|
34
|
+
"bottom_retrieval": bottom_retrieval,
|
|
35
|
+
"bottom_rerank": bottom_rerank,
|
|
36
|
+
"rerank_delta": rerank_delta,
|
|
37
|
+
"low_score_ratio": low_score_ratio,
|
|
38
|
+
}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
from ragradar_core.schema import RunRecord
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def analyze(record: RunRecord) -> dict | None:
|
|
5
|
+
if not record.chunks and not record.final_prompt:
|
|
6
|
+
return None
|
|
7
|
+
|
|
8
|
+
chunks_tokens = sum(c.token_count for c in (record.chunks or []))
|
|
9
|
+
|
|
10
|
+
history_tokens = sum(t.tokens or 0 for t in (record.history_post or []))
|
|
11
|
+
|
|
12
|
+
system_tokens = 0
|
|
13
|
+
headroom = 0
|
|
14
|
+
model_limit = None
|
|
15
|
+
if record.token_budget:
|
|
16
|
+
system_tokens = record.token_budget.system_allocated
|
|
17
|
+
headroom = record.token_budget.headroom
|
|
18
|
+
model_limit = record.token_budget.total_limit
|
|
19
|
+
|
|
20
|
+
total = chunks_tokens + history_tokens + system_tokens
|
|
21
|
+
utilisation = (total / model_limit * 100) if model_limit else 0.0
|
|
22
|
+
|
|
23
|
+
per_chunk = [
|
|
24
|
+
{"chunk_id": c.chunk_id, "token_count": c.token_count} for c in (record.chunks or [])
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
return {
|
|
28
|
+
"total_tokens": total,
|
|
29
|
+
"chunks_tokens": chunks_tokens,
|
|
30
|
+
"history_tokens": history_tokens,
|
|
31
|
+
"system_tokens": system_tokens,
|
|
32
|
+
"headroom": headroom,
|
|
33
|
+
"model_limit": model_limit,
|
|
34
|
+
"utilisation_pct": round(utilisation, 1),
|
|
35
|
+
"per_chunk": per_chunk,
|
|
36
|
+
}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from ragradar_core.schema import RunRecord
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def analyze(record: RunRecord) -> dict | None:
|
|
5
|
+
if not record.chunks:
|
|
6
|
+
return None
|
|
7
|
+
|
|
8
|
+
truncated = [c for c in record.chunks if c.truncated]
|
|
9
|
+
high_score = [
|
|
10
|
+
c for c in truncated if (c.retrieval_score or 0) > 0.7 or (c.rerank_score or 0) > 0.7
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
if not truncated:
|
|
14
|
+
severity = "none"
|
|
15
|
+
elif high_score:
|
|
16
|
+
severity = "high"
|
|
17
|
+
else:
|
|
18
|
+
severity = "low"
|
|
19
|
+
|
|
20
|
+
return {
|
|
21
|
+
"truncated_count": len(truncated),
|
|
22
|
+
"truncated_chunks": [
|
|
23
|
+
{
|
|
24
|
+
"chunk_id": c.chunk_id,
|
|
25
|
+
"score": c.retrieval_score,
|
|
26
|
+
"rerank_score": c.rerank_score,
|
|
27
|
+
}
|
|
28
|
+
for c in truncated
|
|
29
|
+
],
|
|
30
|
+
"high_score_truncations": len(high_score),
|
|
31
|
+
"severity": severity,
|
|
32
|
+
}
|
|
File without changes
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from ragradar_core import store as _core_store
|
|
4
|
+
from ragradar_core.schema import RunRecord
|
|
5
|
+
|
|
6
|
+
from ragradar.explain.analyzers import (
|
|
7
|
+
cache as cache_mod,
|
|
8
|
+
)
|
|
9
|
+
from ragradar.explain.analyzers import (
|
|
10
|
+
duplicates as duplicates_mod,
|
|
11
|
+
)
|
|
12
|
+
from ragradar.explain.analyzers import (
|
|
13
|
+
history as history_mod,
|
|
14
|
+
)
|
|
15
|
+
from ragradar.explain.analyzers import (
|
|
16
|
+
scores as scores_mod,
|
|
17
|
+
)
|
|
18
|
+
from ragradar.explain.analyzers import (
|
|
19
|
+
tokens as tokens_mod,
|
|
20
|
+
)
|
|
21
|
+
from ragradar.explain.analyzers import (
|
|
22
|
+
truncation as truncation_mod,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _esc(text: str) -> str:
|
|
27
|
+
return (
|
|
28
|
+
text.replace("&", "&").replace("<", "<").replace(">", ">").replace('"', """)
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _section(title: str, content: str) -> str:
|
|
33
|
+
return (
|
|
34
|
+
f"<details open>\n <summary>{_esc(title)}</summary>\n"
|
|
35
|
+
f" <pre>{_esc(content)}</pre>\n</details>\n"
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def render(record: RunRecord, run_id: str) -> Path:
|
|
40
|
+
reports_dir = _core_store._ragradar_dir() / "reports"
|
|
41
|
+
reports_dir.mkdir(parents=True, exist_ok=True)
|
|
42
|
+
out = reports_dir / f"{run_id}.html"
|
|
43
|
+
|
|
44
|
+
sections = ""
|
|
45
|
+
|
|
46
|
+
tok = tokens_mod.analyze(record)
|
|
47
|
+
if tok:
|
|
48
|
+
body = f"Total: {tok['total_tokens']}"
|
|
49
|
+
if tok["model_limit"]:
|
|
50
|
+
body += f" / {tok['model_limit']} ({tok['utilisation_pct']}%)"
|
|
51
|
+
body += f"\nChunks: {tok['chunks_tokens']}"
|
|
52
|
+
body += f"\nHistory: {tok['history_tokens']}"
|
|
53
|
+
body += f"\nSystem: {tok['system_tokens']}"
|
|
54
|
+
body += f"\nHeadroom: {tok['headroom']}"
|
|
55
|
+
if tok["per_chunk"]:
|
|
56
|
+
body += "\n\nPer chunk:"
|
|
57
|
+
for pc in tok["per_chunk"]:
|
|
58
|
+
body += f"\n {pc['chunk_id']}: {pc['token_count']}"
|
|
59
|
+
sections += _section("Token Usage", body)
|
|
60
|
+
|
|
61
|
+
sc = scores_mod.analyze(record)
|
|
62
|
+
if sc:
|
|
63
|
+
body = ""
|
|
64
|
+
if sc["retrieval_scores"]:
|
|
65
|
+
body += f"Retrieval: {sc['bottom_retrieval']:.2f}–{sc['top_retrieval']:.2f}\n"
|
|
66
|
+
if sc["rerank_scores"]:
|
|
67
|
+
body += f"Rerank: {sc['bottom_rerank']:.2f}–{sc['top_rerank']:.2f}\n"
|
|
68
|
+
if sc["rerank_delta"] is not None:
|
|
69
|
+
body += f"Rerank delta: {sc['rerank_delta']:+.4f}\n"
|
|
70
|
+
body += f"Low-score ratio: {sc['low_score_ratio']:.0%}"
|
|
71
|
+
sections += _section("Chunk Scores", body)
|
|
72
|
+
|
|
73
|
+
dup = duplicates_mod.analyze(record)
|
|
74
|
+
if dup:
|
|
75
|
+
body = f"Path dups: {len(dup['path_dups'])}\n"
|
|
76
|
+
body += f"Window dups: {len(dup['window_dups'])}\n"
|
|
77
|
+
body += f"Duplicate ratio: {dup['duplicate_ratio']:.0%}"
|
|
78
|
+
for d in dup["path_dups"]:
|
|
79
|
+
body += f"\n [PATH DUP] {d['chunk_id']} via {', '.join(d['paths'])}"
|
|
80
|
+
for d in dup["window_dups"]:
|
|
81
|
+
ids = ", ".join(d["chunk_ids"])
|
|
82
|
+
body += f"\n [WINDOW DUP] {ids} (source: {d['source_doc_id']})"
|
|
83
|
+
sections += _section("Duplicate Chunks", body)
|
|
84
|
+
|
|
85
|
+
tr = truncation_mod.analyze(record)
|
|
86
|
+
if tr:
|
|
87
|
+
body = f"Truncated: {tr['truncated_count']} chunks\n"
|
|
88
|
+
body += f"High-score truncations: {tr['high_score_truncations']}\n"
|
|
89
|
+
body += f"Severity: {tr['severity']}"
|
|
90
|
+
for tc in tr["truncated_chunks"]:
|
|
91
|
+
body += f"\n {tc['chunk_id']}: retrieval={tc['score']}, rerank={tc['rerank_score']}"
|
|
92
|
+
sections += _section("Truncation", body)
|
|
93
|
+
|
|
94
|
+
hist = history_mod.analyze(record)
|
|
95
|
+
if hist:
|
|
96
|
+
body = (
|
|
97
|
+
f"Turns: {hist['pre_turn_count']} -> {hist['post_turn_count']} "
|
|
98
|
+
f"({hist['dropped_turn_count']} dropped)\n"
|
|
99
|
+
)
|
|
100
|
+
if hist["eviction_reason"]:
|
|
101
|
+
body += f"Reason: {hist['eviction_reason']}\n"
|
|
102
|
+
if hist["pre_tokens"] is not None:
|
|
103
|
+
body += f"Pre tokens: {hist['pre_tokens']}\n"
|
|
104
|
+
if hist["post_tokens"] is not None:
|
|
105
|
+
body += f"Post tokens: {hist['post_tokens']}\n"
|
|
106
|
+
for t in hist["dropped_turns"]:
|
|
107
|
+
body += f"\n [{t.role}] {t.content}"
|
|
108
|
+
sections += _section("Dropped History", body)
|
|
109
|
+
|
|
110
|
+
ca = cache_mod.analyze(record)
|
|
111
|
+
if ca:
|
|
112
|
+
body = f"Hits: {ca['hits']}/{ca['total_events']} ({ca['hit_ratio']:.0%})\n"
|
|
113
|
+
if ca["hit_chunks"]:
|
|
114
|
+
body += f"Hit chunks: {', '.join(ca['hit_chunks'])}\n"
|
|
115
|
+
if ca["miss_chunks"]:
|
|
116
|
+
body += f"Miss chunks: {', '.join(ca['miss_chunks'])}"
|
|
117
|
+
sections += _section("Cache Hits", body)
|
|
118
|
+
|
|
119
|
+
if record.final_prompt:
|
|
120
|
+
sections += _section("Final Prompt", record.final_prompt)
|
|
121
|
+
|
|
122
|
+
model_line = ""
|
|
123
|
+
if record.model:
|
|
124
|
+
model_line = f"<p><b>Model:</b> {_esc(record.model)}</p>"
|
|
125
|
+
|
|
126
|
+
html = f"""<!DOCTYPE html>
|
|
127
|
+
<html>
|
|
128
|
+
<head>
|
|
129
|
+
<meta charset="utf-8">
|
|
130
|
+
<title>ragradar report — {_esc(run_id)}</title>
|
|
131
|
+
<style>
|
|
132
|
+
body {{ font-family: system-ui, monospace; max-width: 900px; margin: 2em auto; padding: 0 1em; }}
|
|
133
|
+
details {{ margin: 1em 0; border: 1px solid #ccc; border-radius: 4px; padding: 0.5em 1em; }}
|
|
134
|
+
summary {{ cursor: pointer; font-weight: bold; padding: 0.3em 0; }}
|
|
135
|
+
pre {{ background: #f5f5f5; padding: 1em; overflow-x: auto; white-space: pre-wrap; }}
|
|
136
|
+
h1 {{ color: #333; }}
|
|
137
|
+
.meta {{ color: #666; margin-bottom: 2em; }}
|
|
138
|
+
</style>
|
|
139
|
+
</head>
|
|
140
|
+
<body>
|
|
141
|
+
<h1>ragradar report — {_esc(run_id)}</h1>
|
|
142
|
+
<div class="meta">
|
|
143
|
+
<p><b>Query:</b> {_esc(record.query)}</p>
|
|
144
|
+
<p><b>Response:</b> {_esc(record.response[:500])}</p>
|
|
145
|
+
{model_line}
|
|
146
|
+
</div>
|
|
147
|
+
{sections}
|
|
148
|
+
</body>
|
|
149
|
+
</html>"""
|
|
150
|
+
|
|
151
|
+
out.write_text(html, encoding="utf-8")
|
|
152
|
+
return out
|