ragradar-core 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,38 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.egg-info/
5
+ .venv/
6
+ dist/
7
+ build/
8
+ *.so
9
+
10
+ # uv
11
+ .uv/
12
+ uv.lock
13
+
14
+ # ragradar runtime — never commit user run data
15
+ .ragradar/
16
+
17
+ # environment
18
+ .env
19
+ *.env
20
+ .env.*
21
+
22
+ # IDE
23
+ .vscode/
24
+ .idea/
25
+ *.swp
26
+
27
+ # OS
28
+ .DS_Store
29
+ Thumbs.db
30
+
31
+ # test output
32
+ .pytest_cache/
33
+ htmlcov/
34
+ .coverage
35
+
36
+ # example output
37
+ examples/rag_pipeline/output/
38
+ .claude/
@@ -0,0 +1,89 @@
1
+ Metadata-Version: 2.4
2
+ Name: ragradar-core
3
+ Version: 0.1.0
4
+ Summary: Shared schema, store, and target parsing for the ragradar observability system
5
+ Project-URL: Homepage, https://github.com/pleokarthik/RAGRadar
6
+ Project-URL: Repository, https://github.com/pleokarthik/RAGRadar
7
+ Project-URL: Issues, https://github.com/pleokarthik/RAGRadar/issues
8
+ Author-email: Leo Karthik Paramasivan <pleokarthik@gmail.com>
9
+ License-Expression: MIT
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
16
+ Requires-Python: >=3.11
17
+ Description-Content-Type: text/markdown
18
+
19
+ # ragradar-core
20
+
21
+ Shared kernel for the ragradar observability system: the run-record schema, the
22
+ single SQLite store, and the sNrN target parser. `ragradar-capture`, `ragradar`, and
23
+ `ragradar-evaluate` all depend on it — it depends on nothing.
24
+
25
+ **You normally do not import this directly.** Instrument pipelines with
26
+ `ragradar_capture`, evaluate with `ragradar_evaluate` — both re-export the schema
27
+ dataclasses. `ragradar_core` exists so those packages share one store contract
28
+ instead of three copies of it.
29
+
30
+ ## Zero-dependency guarantee
31
+
32
+ `ragradar_core` imports only the Python standard library (`sqlite3`,
33
+ `dataclasses`, `json`, `re`, `pathlib`, `datetime`). This is enforced by a
34
+ test (`tests/test_zero_deps.py`) that imports the package in a subprocess
35
+ and asserts nothing outside the stdlib was loaded.
36
+
37
+ ## What lives here
38
+
39
+ | Module | Contents |
40
+ |---|---|
41
+ | `ragradar_core.schema` | `RunRecord` and its child dataclasses (`ChunkRecord`, `TokenBudget`, `TokenUsage`, `Turn`, `CacheEvent`, `ToolCallRecord`), all tolerant of unknown kwargs |
42
+ | `ragradar_core.store` | store location, schema + migrations, and every persistence primitive (runs, eval scores, benchmark, policies) |
43
+ | `ragradar_core.targets` | `parse_target_id("s4r3") -> (4, 3)` — the one sNrN parser |
44
+
45
+ ## Environment setup contract
46
+
47
+ `ragradar_core.store.connect()` guarantees the environment before returning a
48
+ connection:
49
+
50
+ 1. `~/.ragradar/` exists (created if missing),
51
+ 2. `~/.ragradar/runs.db` exists (created if missing),
52
+ 3. the schema is at the latest version — fresh databases are created
53
+ directly at the latest version; databases written by older package
54
+ versions are migrated in place.
55
+
56
+ Any entry point — a library call, a CLI command, an example script — works
57
+ on a fresh machine with no prior CLI invocation.
58
+
59
+ ## Schema version + migration story
60
+
61
+ One constant, `ragradar_core.store.SCHEMA_VERSION` (currently `"3"`), recorded
62
+ in the `meta` table. The migration chain walks old databases forward on
63
+ first connect:
64
+
65
+ - **v1 → v2**: adds `eval_scores` / `risk_score` / `evaluated_at` columns
66
+ to `runs`; creates the `benchmark` and `policies` tables.
67
+ - **v2 → v3**: creates the `runs_fts` FTS5 index over run queries (with
68
+ insert/update/delete sync triggers, backfilled from existing rows) and
69
+ drops the now-redundant `idx_runs_query` index.
70
+
71
+ A database reporting a version this package doesn't know raises
72
+ `RuntimeError` rather than guessing.
73
+
74
+ ## DB location and layout
75
+
76
+ The store lives at `~/.ragradar/runs.db` (SQLite, WAL mode).
77
+
78
+ | Table | Columns |
79
+ |---|---|
80
+ | `meta` | `key`, `value` — holds `schema_version` |
81
+ | `sessions` | `session_id`, `title`, `pipeline`, `created_at` |
82
+ | `runs` | `session_id`, `run_seq`, `query`, `pipeline`, `created_at`, `run_data` (JSON `RunRecord`), `eval_scores` (JSON), `risk_score`, `evaluated_at` |
83
+ | `benchmark` | `pipeline`, `factor`, `threshold`, `correlation`, `sample_count`, `updated_at` |
84
+ | `policies` | `pipeline`, `policy_data` (JSON), `updated_at` |
85
+ | `runs_fts` | FTS5 index over `runs.query` |
86
+
87
+ Runs are addressed as `s{session_id}r{run_seq}` (e.g. `s2r3`) everywhere —
88
+ "run" is the data noun; capturing is the verb, and belongs to
89
+ `ragradar-capture`.
@@ -0,0 +1,71 @@
1
+ # ragradar-core
2
+
3
+ Shared kernel for the ragradar observability system: the run-record schema, the
4
+ single SQLite store, and the sNrN target parser. `ragradar-capture`, `ragradar`, and
5
+ `ragradar-evaluate` all depend on it — it depends on nothing.
6
+
7
+ **You normally do not import this directly.** Instrument pipelines with
8
+ `ragradar_capture`, evaluate with `ragradar_evaluate` — both re-export the schema
9
+ dataclasses. `ragradar_core` exists so those packages share one store contract
10
+ instead of three copies of it.
11
+
12
+ ## Zero-dependency guarantee
13
+
14
+ `ragradar_core` imports only the Python standard library (`sqlite3`,
15
+ `dataclasses`, `json`, `re`, `pathlib`, `datetime`). This is enforced by a
16
+ test (`tests/test_zero_deps.py`) that imports the package in a subprocess
17
+ and asserts nothing outside the stdlib was loaded.
18
+
19
+ ## What lives here
20
+
21
+ | Module | Contents |
22
+ |---|---|
23
+ | `ragradar_core.schema` | `RunRecord` and its child dataclasses (`ChunkRecord`, `TokenBudget`, `TokenUsage`, `Turn`, `CacheEvent`, `ToolCallRecord`), all tolerant of unknown kwargs |
24
+ | `ragradar_core.store` | store location, schema + migrations, and every persistence primitive (runs, eval scores, benchmark, policies) |
25
+ | `ragradar_core.targets` | `parse_target_id("s4r3") -> (4, 3)` — the one sNrN parser |
26
+
27
+ ## Environment setup contract
28
+
29
+ `ragradar_core.store.connect()` guarantees the environment before returning a
30
+ connection:
31
+
32
+ 1. `~/.ragradar/` exists (created if missing),
33
+ 2. `~/.ragradar/runs.db` exists (created if missing),
34
+ 3. the schema is at the latest version — fresh databases are created
35
+ directly at the latest version; databases written by older package
36
+ versions are migrated in place.
37
+
38
+ Any entry point — a library call, a CLI command, an example script — works
39
+ on a fresh machine with no prior CLI invocation.
40
+
41
+ ## Schema version + migration story
42
+
43
+ One constant, `ragradar_core.store.SCHEMA_VERSION` (currently `"3"`), recorded
44
+ in the `meta` table. The migration chain walks old databases forward on
45
+ first connect:
46
+
47
+ - **v1 → v2**: adds `eval_scores` / `risk_score` / `evaluated_at` columns
48
+ to `runs`; creates the `benchmark` and `policies` tables.
49
+ - **v2 → v3**: creates the `runs_fts` FTS5 index over run queries (with
50
+ insert/update/delete sync triggers, backfilled from existing rows) and
51
+ drops the now-redundant `idx_runs_query` index.
52
+
53
+ A database reporting a version this package doesn't know raises
54
+ `RuntimeError` rather than guessing.
55
+
56
+ ## DB location and layout
57
+
58
+ The store lives at `~/.ragradar/runs.db` (SQLite, WAL mode).
59
+
60
+ | Table | Columns |
61
+ |---|---|
62
+ | `meta` | `key`, `value` — holds `schema_version` |
63
+ | `sessions` | `session_id`, `title`, `pipeline`, `created_at` |
64
+ | `runs` | `session_id`, `run_seq`, `query`, `pipeline`, `created_at`, `run_data` (JSON `RunRecord`), `eval_scores` (JSON), `risk_score`, `evaluated_at` |
65
+ | `benchmark` | `pipeline`, `factor`, `threshold`, `correlation`, `sample_count`, `updated_at` |
66
+ | `policies` | `pipeline`, `policy_data` (JSON), `updated_at` |
67
+ | `runs_fts` | FTS5 index over `runs.query` |
68
+
69
+ Runs are addressed as `s{session_id}r{run_seq}` (e.g. `s2r3`) everywhere —
70
+ "run" is the data noun; capturing is the verb, and belongs to
71
+ `ragradar-capture`.
@@ -0,0 +1,31 @@
1
+ [project]
2
+ name = "ragradar-core"
3
+ version = "0.1.0"
4
+ description = "Shared schema, store, and target parsing for the ragradar observability system"
5
+ readme = "README.md"
6
+ requires-python = ">=3.11"
7
+ license = "MIT"
8
+ authors = [
9
+ { name = "Leo Karthik Paramasivan", email = "pleokarthik@gmail.com" },
10
+ ]
11
+ classifiers = [
12
+ "Development Status :: 3 - Alpha",
13
+ "License :: OSI Approved :: MIT License",
14
+ "Programming Language :: Python :: 3.11",
15
+ "Programming Language :: Python :: 3.12",
16
+ "Intended Audience :: Developers",
17
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
18
+ ]
19
+ dependencies = []
20
+
21
+ [project.urls]
22
+ Homepage = "https://github.com/pleokarthik/RAGRadar"
23
+ Repository = "https://github.com/pleokarthik/RAGRadar"
24
+ Issues = "https://github.com/pleokarthik/RAGRadar/issues"
25
+
26
+ [build-system]
27
+ requires = ["hatchling"]
28
+ build-backend = "hatchling.build"
29
+
30
+ [tool.hatch.build.targets.wheel]
31
+ packages = ["src/ragradar_core"]
@@ -0,0 +1,25 @@
1
+ # ragradar-core is internal plumbing shared by ragradar-capture, ragradar, and
2
+ # ragradar-evaluate: the run-record dataclasses, the single SQLite store, and
3
+ # the sNrN target parser. End users normally import from ragradar_capture or
4
+ # ragradar_evaluate, both of which re-export the dataclasses.
5
+ from ragradar_core.schema import (
6
+ CacheEvent,
7
+ ChunkRecord,
8
+ RunRecord,
9
+ TokenBudget,
10
+ TokenUsage,
11
+ ToolCallRecord,
12
+ Turn,
13
+ )
14
+ from ragradar_core.targets import parse_target_id
15
+
16
+ __all__ = [
17
+ "ChunkRecord",
18
+ "TokenBudget",
19
+ "TokenUsage",
20
+ "Turn",
21
+ "CacheEvent",
22
+ "ToolCallRecord",
23
+ "RunRecord",
24
+ "parse_target_id",
25
+ ]
@@ -0,0 +1,229 @@
1
+ """Coercion of plain-Python inputs into the ragradar_core schema dataclasses.
2
+
3
+ This is the shared user-input boundary: ragradar_capture's entry points
4
+ (Capture methods, capture(), the thread-local proxies) and ragradar_evaluate's
5
+ target resolution (evaluate()/check() on a hand-built RunRecord) route
6
+ user input through these functions, so naive callers can pass primitives
7
+ — shorthand dicts, tuples, a bare int budget — without knowing the
8
+ dataclasses exist. The dataclasses (Turn, ChunkRecord, TokenBudget,
9
+ CacheEvent, TokenUsage, ToolCallRecord) remain the advanced path and
10
+ always pass through untouched; explicitly provided fields always win
11
+ over computed defaults.
12
+
13
+ Token counts are estimated with a deterministic ~4-characters-per-token
14
+ heuristic (no tokenizer dependency — ragradar-core stays stdlib-only). Pass
15
+ explicit ``tokens`` / ``token_count`` values to override.
16
+
17
+ All functions are pure and raise TypeError/KeyError on unusable input;
18
+ callers decide the failure policy (ragradar_capture swallows/logs by default
19
+ and raises in strict mode; ragradar_evaluate raises ValueError).
20
+ """
21
+
22
+ from collections.abc import Mapping
23
+
24
+ from ragradar_core.schema import (
25
+ CacheEvent,
26
+ ChunkRecord,
27
+ RunRecord,
28
+ TokenBudget,
29
+ TokenUsage,
30
+ ToolCallRecord,
31
+ Turn,
32
+ )
33
+
34
+
35
+ def estimate_tokens(text) -> int:
36
+ """Deterministic token estimate: ~4 characters per token. Pure.
37
+
38
+ Returns 0 for None/empty text, at least 1 for any non-empty text.
39
+ Used wherever a token count is derivable but not explicitly given.
40
+ """
41
+ if not text:
42
+ return 0
43
+ return max(1, round(len(text) / 4))
44
+
45
+
46
+ def coerce_turn(turn) -> Turn:
47
+ """Coerce one history turn. Pure.
48
+
49
+ Accepts: a Turn (passed through untouched); a ("role", "content")
50
+ pair; a full dict with a "role" key; or the shorthand single-entry
51
+ dict {"user": "..."} / {"assistant": "..."} (optionally with a
52
+ "tokens" entry alongside). Tokens are estimated from the content
53
+ unless explicitly provided.
54
+ """
55
+ if isinstance(turn, Turn):
56
+ return turn
57
+ if isinstance(turn, (tuple, list)):
58
+ if len(turn) != 2:
59
+ raise TypeError(f"Turn tuples must be (role, content), got {len(turn)} items: {turn!r}")
60
+ role, content = turn
61
+ return Turn(role=role, content=content, tokens=estimate_tokens(content))
62
+ if isinstance(turn, Mapping):
63
+ d = dict(turn)
64
+ if "role" in d:
65
+ content = d.get("content", "")
66
+ tokens = d["tokens"] if d.get("tokens") is not None else estimate_tokens(content)
67
+ return Turn(role=d["role"], content=content, tokens=tokens)
68
+ tokens = d.pop("tokens", None)
69
+ if len(d) != 1:
70
+ raise TypeError(
71
+ "Shorthand turn dicts must have exactly one role entry, e.g. "
72
+ f'{{"user": "..."}} (plus an optional "tokens"), got: {turn!r}'
73
+ )
74
+ ((role, content),) = d.items()
75
+ if tokens is None:
76
+ tokens = estimate_tokens(content)
77
+ return Turn(role=role, content=content, tokens=tokens)
78
+ raise TypeError(f"Cannot coerce {type(turn).__name__} into a history turn: {turn!r}")
79
+
80
+
81
+ def coerce_turns(turns) -> list[Turn]:
82
+ """Coerce a sequence of history turns (see coerce_turn). Pure."""
83
+ return [coerce_turn(t) for t in turns]
84
+
85
+
86
+ def coerce_chunk(chunk, index: int) -> ChunkRecord:
87
+ """Coerce one retrieval chunk. Pure.
88
+
89
+ Accepts a ChunkRecord (passed through untouched) or a dict; "content"
90
+ is the only required key. Missing boilerplate is filled: chunk_id
91
+ defaults to "chunk_{index}", source_doc_id to "unknown", token_count
92
+ to an estimate of the content. Score/path/flag fields keep their
93
+ dataclass defaults when absent.
94
+ """
95
+ if isinstance(chunk, ChunkRecord):
96
+ return chunk
97
+ if isinstance(chunk, Mapping):
98
+ d = dict(chunk)
99
+ d.setdefault("chunk_id", f"chunk_{index}")
100
+ d.setdefault("source_doc_id", "unknown")
101
+ if d.get("token_count") is None:
102
+ d["token_count"] = estimate_tokens(d.get("content"))
103
+ return ChunkRecord(**d)
104
+ raise TypeError(f"Cannot coerce {type(chunk).__name__} into a chunk: {chunk!r}")
105
+
106
+
107
+ def coerce_chunks(chunks) -> list[ChunkRecord]:
108
+ """Coerce a sequence of retrieval chunks (see coerce_chunk). Pure."""
109
+ return [coerce_chunk(c, i) for i, c in enumerate(chunks)]
110
+
111
+
112
+ def coerce_token_budget(budget, final_prompt=None) -> TokenBudget:
113
+ """Coerce a token budget. Pure.
114
+
115
+ Accepts a TokenBudget (passed through untouched), a bare int (the
116
+ total limit), or a dict with at least "total_limit". Allocation
117
+ fields default to 0. A missing headroom is derived, in order of
118
+ preference: total_limit minus the given allocations (when any
119
+ allocation was provided), total_limit minus the estimated
120
+ final_prompt tokens (when a prompt is available), else total_limit.
121
+ Derived headroom may be negative — that is the over-budget signal.
122
+ """
123
+ if isinstance(budget, TokenBudget):
124
+ return budget
125
+ if isinstance(budget, bool) or not isinstance(budget, (int, Mapping)):
126
+ raise TypeError(f"Cannot coerce {type(budget).__name__} into a token budget: {budget!r}")
127
+ d = {"total_limit": budget} if isinstance(budget, int) else dict(budget)
128
+
129
+ alloc_keys = ("chunks_allocated", "history_allocated", "system_allocated")
130
+ alloc_given = any(d.get(k) is not None for k in alloc_keys)
131
+ for k in alloc_keys:
132
+ if d.get(k) is None:
133
+ d[k] = 0
134
+
135
+ if d.get("headroom") is None:
136
+ total = d["total_limit"]
137
+ if alloc_given:
138
+ d["headroom"] = total - sum(d[k] for k in alloc_keys)
139
+ elif final_prompt:
140
+ d["headroom"] = total - estimate_tokens(final_prompt)
141
+ else:
142
+ d["headroom"] = total
143
+ return TokenBudget(**d)
144
+
145
+
146
+ def coerce_cache_events(events) -> list[CacheEvent]:
147
+ """Coerce cache events. Pure.
148
+
149
+ Accepts a mapping of {chunk_id: hit} for the whole call, or a
150
+ sequence whose items are CacheEvents (passed through untouched),
151
+ dicts, or ("chunk_id", hit) pairs.
152
+ """
153
+ if isinstance(events, Mapping):
154
+ return [CacheEvent(chunk_id=k, hit=bool(v)) for k, v in events.items()]
155
+ out = []
156
+ for e in events:
157
+ if isinstance(e, CacheEvent):
158
+ out.append(e)
159
+ elif isinstance(e, Mapping):
160
+ out.append(CacheEvent(**e))
161
+ elif isinstance(e, (tuple, list)) and len(e) == 2:
162
+ out.append(CacheEvent(chunk_id=e[0], hit=bool(e[1])))
163
+ else:
164
+ raise TypeError(f"Cannot coerce {type(e).__name__} into a cache event: {e!r}")
165
+ return out
166
+
167
+
168
+ def coerce_token_usage(usage) -> TokenUsage:
169
+ """Coerce token usage. Pure.
170
+
171
+ Accepts a TokenUsage (passed through untouched) or a dict; a missing
172
+ total_tokens is derived as input_tokens + output_tokens.
173
+ """
174
+ if isinstance(usage, TokenUsage):
175
+ return usage
176
+ if isinstance(usage, Mapping):
177
+ d = dict(usage)
178
+ if d.get("total_tokens") is None:
179
+ d["total_tokens"] = d.get("input_tokens", 0) + d.get("output_tokens", 0)
180
+ return TokenUsage(**d)
181
+ raise TypeError(f"Cannot coerce {type(usage).__name__} into token usage: {usage!r}")
182
+
183
+
184
+ def coerce_tool_call(call) -> ToolCallRecord:
185
+ """Coerce one tool call: a ToolCallRecord (untouched) or a dict. Pure."""
186
+ if isinstance(call, ToolCallRecord):
187
+ return call
188
+ if isinstance(call, Mapping):
189
+ return ToolCallRecord(**call)
190
+ raise TypeError(f"Cannot coerce {type(call).__name__} into a tool call: {call!r}")
191
+
192
+
193
+ def coerce_run_record(record: RunRecord) -> RunRecord:
194
+ """Normalized copy of ``record`` with every nested field coerced. Pure.
195
+
196
+ RunRecord's constructor stores nested values as given, so a
197
+ hand-built record may carry primitive chunks/turns/budget where the
198
+ metric layers expect dataclasses. This runs each nested field
199
+ through its coercer (dataclass instances pass through untouched)
200
+ and returns a new RunRecord; the input is never mutated.
201
+ """
202
+ return RunRecord(
203
+ query=record.query,
204
+ response=record.response,
205
+ chunks=(coerce_chunks(record.chunks) if record.chunks is not None else None),
206
+ final_prompt=record.final_prompt,
207
+ token_budget=(
208
+ coerce_token_budget(record.token_budget, record.final_prompt)
209
+ if record.token_budget is not None
210
+ else None
211
+ ),
212
+ history_pre=(coerce_turns(record.history_pre) if record.history_pre is not None else None),
213
+ history_post=(
214
+ coerce_turns(record.history_post) if record.history_post is not None else None
215
+ ),
216
+ eviction_reason=record.eviction_reason,
217
+ cache_events=(
218
+ coerce_cache_events(record.cache_events) if record.cache_events is not None else None
219
+ ),
220
+ tool_calls=(
221
+ [coerce_tool_call(c) for c in record.tool_calls]
222
+ if record.tool_calls is not None
223
+ else None
224
+ ),
225
+ model=record.model,
226
+ token_usage=(
227
+ coerce_token_usage(record.token_usage) if record.token_usage is not None else None
228
+ ),
229
+ )
@@ -0,0 +1,183 @@
1
+ """Run record dataclasses shared by every ragradar package.
2
+
3
+ Pure data definitions — nothing in this module touches the store. All
4
+ dataclasses are decorated with ``_flexible`` so unknown keyword arguments
5
+ are silently dropped: instrumentation with extra fields never raises
6
+ ``TypeError`` in a caller's pipeline, and future fields never break old
7
+ readers.
8
+ """
9
+
10
+ import functools
11
+ from dataclasses import asdict, dataclass, fields
12
+ from typing import Optional
13
+
14
+
15
+ def _flexible(cls):
16
+ """Make dataclass __init__ accept and ignore unknown keyword arguments."""
17
+ original_init = cls.__init__
18
+
19
+ @functools.wraps(original_init)
20
+ def init(self, *args, **kwargs):
21
+ valid = {f.name for f in fields(cls)}
22
+ original_init(self, *args, **{k: v for k, v in kwargs.items() if k in valid})
23
+
24
+ cls.__init__ = init
25
+ return cls
26
+
27
+
28
+ @_flexible
29
+ @dataclass
30
+ class ChunkRecord:
31
+ """One retrieved chunk in a run's context window.
32
+
33
+ The advanced/typed path — most callers never construct this directly.
34
+ ``ragradar.capture()``/``cap.chunks()`` accept plain dicts (only
35
+ ``content`` is required; everything else, including ``chunk_id`` and
36
+ ``source_doc_id``, gets a sensible default) and coerce them into this
37
+ shape internally. Construct ``ChunkRecord`` yourself only if you want
38
+ static typing or are round-tripping data you already have in this form.
39
+ """
40
+
41
+ chunk_id: str
42
+ source_doc_id: str
43
+ content: str
44
+ token_count: int
45
+ retrieval_score: Optional[float] = None
46
+ rerank_score: Optional[float] = None
47
+ retrieval_path: Optional[str] = None
48
+ truncated: bool = False
49
+ cache_hit: Optional[bool] = None
50
+
51
+
52
+ @_flexible
53
+ @dataclass
54
+ class TokenBudget:
55
+ """How a run's token limit was allocated across chunks/history/system.
56
+
57
+ Advanced/typed path — ``cap.context(prompt, token_budget=...)`` also
58
+ accepts a bare int (the total limit) or a partial dict; missing
59
+ allocations default to 0 and ``headroom`` is derived when omitted.
60
+ """
61
+
62
+ total_limit: int
63
+ chunks_allocated: int
64
+ history_allocated: int
65
+ system_allocated: int
66
+ headroom: int
67
+
68
+
69
+ @_flexible
70
+ @dataclass
71
+ class TokenUsage:
72
+ """Actual token counts an LLM call reported (as opposed to the budget).
73
+
74
+ Advanced/typed path — ``cap.response(text, token_usage=...)`` also
75
+ accepts a dict; a missing ``total_tokens`` is derived as
76
+ ``input_tokens + output_tokens``.
77
+ """
78
+
79
+ input_tokens: int
80
+ output_tokens: int
81
+ total_tokens: int
82
+
83
+
84
+ @_flexible
85
+ @dataclass
86
+ class Turn:
87
+ """One turn of conversation history, before or after eviction.
88
+
89
+ Advanced/typed path — ``cap.history(pre=..., post=...)`` also accepts
90
+ shorthand ``{"user": "..."}`` / ``{"assistant": "..."}`` dicts or
91
+ ``(role, content)`` tuples; a missing ``tokens`` count is estimated
92
+ from the content.
93
+ """
94
+
95
+ role: str
96
+ content: str
97
+ tokens: Optional[int] = None
98
+
99
+
100
+ @_flexible
101
+ @dataclass
102
+ class CacheEvent:
103
+ """Whether one chunk was served from cache for this run.
104
+
105
+ Advanced/typed path — ``cap.cache(...)`` also accepts a whole-call
106
+ ``{chunk_id: hit}`` mapping or ``(chunk_id, hit)`` pairs.
107
+ """
108
+
109
+ chunk_id: str
110
+ hit: bool
111
+ cache_source: Optional[str] = None
112
+
113
+
114
+ @_flexible
115
+ @dataclass
116
+ class ToolCallRecord:
117
+ """One tool/function call made while producing a run's response.
118
+
119
+ Advanced/typed path — ``cap.tool_call(...)`` also accepts a plain
120
+ dict with the same field names.
121
+ """
122
+
123
+ tool_name: str
124
+ arguments: dict
125
+ result: Optional[str] = None
126
+ error: Optional[str] = None
127
+ latency_ms: Optional[float] = None
128
+
129
+
130
+ @_flexible
131
+ @dataclass
132
+ class RunRecord:
133
+ """The complete captured record of one pipeline run.
134
+
135
+ This is what ``ragradar.capture()``/``Capture`` build up and persist,
136
+ and what ``ragradar.evaluate()``/``check()`` score. Everything past
137
+ ``query``/``response`` is optional — instrument as much or as little
138
+ of your pipeline as you have. Most callers never construct one by
139
+ hand; it is assembled for you from the primitives passed to
140
+ ``capture()`` or the staged ``Capture`` methods.
141
+ """
142
+
143
+ query: str
144
+ response: str
145
+ chunks: Optional[list[ChunkRecord]] = None
146
+ final_prompt: Optional[str] = None
147
+ token_budget: Optional[TokenBudget] = None
148
+ history_pre: Optional[list[Turn]] = None
149
+ history_post: Optional[list[Turn]] = None
150
+ eviction_reason: Optional[str] = None
151
+ cache_events: Optional[list[CacheEvent]] = None
152
+ tool_calls: Optional[list[ToolCallRecord]] = None
153
+ model: Optional[str] = None
154
+ token_usage: Optional[TokenUsage] = None
155
+
156
+ def to_json(self) -> dict:
157
+ """This record as a plain, JSON-serializable dict. Pure."""
158
+ return asdict(self)
159
+
160
+ @classmethod
161
+ def from_json(cls, data: dict) -> "RunRecord":
162
+ """Rebuild a ``RunRecord`` from ``to_json()``'s output. Pure.
163
+
164
+ Nested dicts are reinflated into their dataclasses (``chunks``
165
+ into ``ChunkRecord``s, etc.) so the result is fully typed, not
166
+ just a dict of dicts.
167
+ """
168
+ data = dict(data)
169
+ if data.get("chunks") is not None:
170
+ data["chunks"] = [ChunkRecord(**c) for c in data["chunks"]]
171
+ if data.get("token_budget") is not None:
172
+ data["token_budget"] = TokenBudget(**data["token_budget"])
173
+ if data.get("history_pre") is not None:
174
+ data["history_pre"] = [Turn(**t) for t in data["history_pre"]]
175
+ if data.get("history_post") is not None:
176
+ data["history_post"] = [Turn(**t) for t in data["history_post"]]
177
+ if data.get("cache_events") is not None:
178
+ data["cache_events"] = [CacheEvent(**e) for e in data["cache_events"]]
179
+ if data.get("tool_calls") is not None:
180
+ data["tool_calls"] = [ToolCallRecord(**t) for t in data["tool_calls"]]
181
+ if data.get("token_usage") is not None:
182
+ data["token_usage"] = TokenUsage(**data["token_usage"])
183
+ return cls(**data)