ragradar-core 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ragradar_core-0.1.0/.gitignore +38 -0
- ragradar_core-0.1.0/PKG-INFO +89 -0
- ragradar_core-0.1.0/README.md +71 -0
- ragradar_core-0.1.0/pyproject.toml +31 -0
- ragradar_core-0.1.0/src/ragradar_core/__init__.py +25 -0
- ragradar_core-0.1.0/src/ragradar_core/coerce.py +229 -0
- ragradar_core-0.1.0/src/ragradar_core/schema.py +183 -0
- ragradar_core-0.1.0/src/ragradar_core/store.py +773 -0
- ragradar_core-0.1.0/src/ragradar_core/targets.py +26 -0
- ragradar_core-0.1.0/tests/conftest.py +86 -0
- ragradar_core-0.1.0/tests/test_fts5_triggers.py +194 -0
- ragradar_core-0.1.0/tests/test_migration.py +254 -0
- ragradar_core-0.1.0/tests/test_schema.py +310 -0
- ragradar_core-0.1.0/tests/test_store.py +368 -0
- ragradar_core-0.1.0/tests/test_targets.py +29 -0
- ragradar_core-0.1.0/tests/test_zero_deps.py +25 -0
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.egg-info/
|
|
5
|
+
.venv/
|
|
6
|
+
dist/
|
|
7
|
+
build/
|
|
8
|
+
*.so
|
|
9
|
+
|
|
10
|
+
# uv
|
|
11
|
+
.uv/
|
|
12
|
+
uv.lock
|
|
13
|
+
|
|
14
|
+
# ragradar runtime — never commit user run data
|
|
15
|
+
.ragradar/
|
|
16
|
+
|
|
17
|
+
# environment
|
|
18
|
+
.env
|
|
19
|
+
*.env
|
|
20
|
+
.env.*
|
|
21
|
+
|
|
22
|
+
# IDE
|
|
23
|
+
.vscode/
|
|
24
|
+
.idea/
|
|
25
|
+
*.swp
|
|
26
|
+
|
|
27
|
+
# OS
|
|
28
|
+
.DS_Store
|
|
29
|
+
Thumbs.db
|
|
30
|
+
|
|
31
|
+
# test output
|
|
32
|
+
.pytest_cache/
|
|
33
|
+
htmlcov/
|
|
34
|
+
.coverage
|
|
35
|
+
|
|
36
|
+
# example output
|
|
37
|
+
examples/rag_pipeline/output/
|
|
38
|
+
.claude/
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ragradar-core
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Shared schema, store, and target parsing for the ragradar observability system
|
|
5
|
+
Project-URL: Homepage, https://github.com/pleokarthik/RAGRadar
|
|
6
|
+
Project-URL: Repository, https://github.com/pleokarthik/RAGRadar
|
|
7
|
+
Project-URL: Issues, https://github.com/pleokarthik/RAGRadar/issues
|
|
8
|
+
Author-email: Leo Karthik Paramasivan <pleokarthik@gmail.com>
|
|
9
|
+
License-Expression: MIT
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
16
|
+
Requires-Python: >=3.11
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
|
|
19
|
+
# ragradar-core
|
|
20
|
+
|
|
21
|
+
Shared kernel for the ragradar observability system: the run-record schema, the
|
|
22
|
+
single SQLite store, and the sNrN target parser. `ragradar-capture`, `ragradar`, and
|
|
23
|
+
`ragradar-evaluate` all depend on it — it depends on nothing.
|
|
24
|
+
|
|
25
|
+
**You normally do not import this directly.** Instrument pipelines with
|
|
26
|
+
`ragradar_capture`, evaluate with `ragradar_evaluate` — both re-export the schema
|
|
27
|
+
dataclasses. `ragradar_core` exists so those packages share one store contract
|
|
28
|
+
instead of three copies of it.
|
|
29
|
+
|
|
30
|
+
## Zero-dependency guarantee
|
|
31
|
+
|
|
32
|
+
`ragradar_core` imports only the Python standard library (`sqlite3`,
|
|
33
|
+
`dataclasses`, `json`, `re`, `pathlib`, `datetime`). This is enforced by a
|
|
34
|
+
test (`tests/test_zero_deps.py`) that imports the package in a subprocess
|
|
35
|
+
and asserts nothing outside the stdlib was loaded.
|
|
36
|
+
|
|
37
|
+
## What lives here
|
|
38
|
+
|
|
39
|
+
| Module | Contents |
|
|
40
|
+
|---|---|
|
|
41
|
+
| `ragradar_core.schema` | `RunRecord` and its child dataclasses (`ChunkRecord`, `TokenBudget`, `TokenUsage`, `Turn`, `CacheEvent`, `ToolCallRecord`), all tolerant of unknown kwargs |
|
|
42
|
+
| `ragradar_core.store` | store location, schema + migrations, and every persistence primitive (runs, eval scores, benchmark, policies) |
|
|
43
|
+
| `ragradar_core.targets` | `parse_target_id("s4r3") -> (4, 3)` — the one sNrN parser |
|
|
44
|
+
|
|
45
|
+
## Environment setup contract
|
|
46
|
+
|
|
47
|
+
`ragradar_core.store.connect()` guarantees the environment before returning a
|
|
48
|
+
connection:
|
|
49
|
+
|
|
50
|
+
1. `~/.ragradar/` exists (created if missing),
|
|
51
|
+
2. `~/.ragradar/runs.db` exists (created if missing),
|
|
52
|
+
3. the schema is at the latest version — fresh databases are created
|
|
53
|
+
directly at the latest version; databases written by older package
|
|
54
|
+
versions are migrated in place.
|
|
55
|
+
|
|
56
|
+
Any entry point — a library call, a CLI command, an example script — works
|
|
57
|
+
on a fresh machine with no prior CLI invocation.
|
|
58
|
+
|
|
59
|
+
## Schema version + migration story
|
|
60
|
+
|
|
61
|
+
One constant, `ragradar_core.store.SCHEMA_VERSION` (currently `"3"`), recorded
|
|
62
|
+
in the `meta` table. The migration chain walks old databases forward on
|
|
63
|
+
first connect:
|
|
64
|
+
|
|
65
|
+
- **v1 → v2**: adds `eval_scores` / `risk_score` / `evaluated_at` columns
|
|
66
|
+
to `runs`; creates the `benchmark` and `policies` tables.
|
|
67
|
+
- **v2 → v3**: creates the `runs_fts` FTS5 index over run queries (with
|
|
68
|
+
insert/update/delete sync triggers, backfilled from existing rows) and
|
|
69
|
+
drops the now-redundant `idx_runs_query` index.
|
|
70
|
+
|
|
71
|
+
A database reporting a version this package doesn't know raises
|
|
72
|
+
`RuntimeError` rather than guessing.
|
|
73
|
+
|
|
74
|
+
## DB location and layout
|
|
75
|
+
|
|
76
|
+
The store lives at `~/.ragradar/runs.db` (SQLite, WAL mode).
|
|
77
|
+
|
|
78
|
+
| Table | Columns |
|
|
79
|
+
|---|---|
|
|
80
|
+
| `meta` | `key`, `value` — holds `schema_version` |
|
|
81
|
+
| `sessions` | `session_id`, `title`, `pipeline`, `created_at` |
|
|
82
|
+
| `runs` | `session_id`, `run_seq`, `query`, `pipeline`, `created_at`, `run_data` (JSON `RunRecord`), `eval_scores` (JSON), `risk_score`, `evaluated_at` |
|
|
83
|
+
| `benchmark` | `pipeline`, `factor`, `threshold`, `correlation`, `sample_count`, `updated_at` |
|
|
84
|
+
| `policies` | `pipeline`, `policy_data` (JSON), `updated_at` |
|
|
85
|
+
| `runs_fts` | FTS5 index over `runs.query` |
|
|
86
|
+
|
|
87
|
+
Runs are addressed as `s{session_id}r{run_seq}` (e.g. `s2r3`) everywhere —
|
|
88
|
+
"run" is the data noun; capturing is the verb, and belongs to
|
|
89
|
+
`ragradar-capture`.
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# ragradar-core
|
|
2
|
+
|
|
3
|
+
Shared kernel for the ragradar observability system: the run-record schema, the
|
|
4
|
+
single SQLite store, and the sNrN target parser. `ragradar-capture`, `ragradar`, and
|
|
5
|
+
`ragradar-evaluate` all depend on it — it depends on nothing.
|
|
6
|
+
|
|
7
|
+
**You normally do not import this directly.** Instrument pipelines with
|
|
8
|
+
`ragradar_capture`, evaluate with `ragradar_evaluate` — both re-export the schema
|
|
9
|
+
dataclasses. `ragradar_core` exists so those packages share one store contract
|
|
10
|
+
instead of three copies of it.
|
|
11
|
+
|
|
12
|
+
## Zero-dependency guarantee
|
|
13
|
+
|
|
14
|
+
`ragradar_core` imports only the Python standard library (`sqlite3`,
|
|
15
|
+
`dataclasses`, `json`, `re`, `pathlib`, `datetime`). This is enforced by a
|
|
16
|
+
test (`tests/test_zero_deps.py`) that imports the package in a subprocess
|
|
17
|
+
and asserts nothing outside the stdlib was loaded.
|
|
18
|
+
|
|
19
|
+
## What lives here
|
|
20
|
+
|
|
21
|
+
| Module | Contents |
|
|
22
|
+
|---|---|
|
|
23
|
+
| `ragradar_core.schema` | `RunRecord` and its child dataclasses (`ChunkRecord`, `TokenBudget`, `TokenUsage`, `Turn`, `CacheEvent`, `ToolCallRecord`), all tolerant of unknown kwargs |
|
|
24
|
+
| `ragradar_core.store` | store location, schema + migrations, and every persistence primitive (runs, eval scores, benchmark, policies) |
|
|
25
|
+
| `ragradar_core.targets` | `parse_target_id("s4r3") -> (4, 3)` — the one sNrN parser |
|
|
26
|
+
|
|
27
|
+
## Environment setup contract
|
|
28
|
+
|
|
29
|
+
`ragradar_core.store.connect()` guarantees the environment before returning a
|
|
30
|
+
connection:
|
|
31
|
+
|
|
32
|
+
1. `~/.ragradar/` exists (created if missing),
|
|
33
|
+
2. `~/.ragradar/runs.db` exists (created if missing),
|
|
34
|
+
3. the schema is at the latest version — fresh databases are created
|
|
35
|
+
directly at the latest version; databases written by older package
|
|
36
|
+
versions are migrated in place.
|
|
37
|
+
|
|
38
|
+
Any entry point — a library call, a CLI command, an example script — works
|
|
39
|
+
on a fresh machine with no prior CLI invocation.
|
|
40
|
+
|
|
41
|
+
## Schema version + migration story
|
|
42
|
+
|
|
43
|
+
One constant, `ragradar_core.store.SCHEMA_VERSION` (currently `"3"`), recorded
|
|
44
|
+
in the `meta` table. The migration chain walks old databases forward on
|
|
45
|
+
first connect:
|
|
46
|
+
|
|
47
|
+
- **v1 → v2**: adds `eval_scores` / `risk_score` / `evaluated_at` columns
|
|
48
|
+
to `runs`; creates the `benchmark` and `policies` tables.
|
|
49
|
+
- **v2 → v3**: creates the `runs_fts` FTS5 index over run queries (with
|
|
50
|
+
insert/update/delete sync triggers, backfilled from existing rows) and
|
|
51
|
+
drops the now-redundant `idx_runs_query` index.
|
|
52
|
+
|
|
53
|
+
A database reporting a version this package doesn't know raises
|
|
54
|
+
`RuntimeError` rather than guessing.
|
|
55
|
+
|
|
56
|
+
## DB location and layout
|
|
57
|
+
|
|
58
|
+
The store lives at `~/.ragradar/runs.db` (SQLite, WAL mode).
|
|
59
|
+
|
|
60
|
+
| Table | Columns |
|
|
61
|
+
|---|---|
|
|
62
|
+
| `meta` | `key`, `value` — holds `schema_version` |
|
|
63
|
+
| `sessions` | `session_id`, `title`, `pipeline`, `created_at` |
|
|
64
|
+
| `runs` | `session_id`, `run_seq`, `query`, `pipeline`, `created_at`, `run_data` (JSON `RunRecord`), `eval_scores` (JSON), `risk_score`, `evaluated_at` |
|
|
65
|
+
| `benchmark` | `pipeline`, `factor`, `threshold`, `correlation`, `sample_count`, `updated_at` |
|
|
66
|
+
| `policies` | `pipeline`, `policy_data` (JSON), `updated_at` |
|
|
67
|
+
| `runs_fts` | FTS5 index over `runs.query` |
|
|
68
|
+
|
|
69
|
+
Runs are addressed as `s{session_id}r{run_seq}` (e.g. `s2r3`) everywhere —
|
|
70
|
+
"run" is the data noun; capturing is the verb, and belongs to
|
|
71
|
+
`ragradar-capture`.
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "ragradar-core"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Shared schema, store, and target parsing for the ragradar observability system"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.11"
|
|
7
|
+
license = "MIT"
|
|
8
|
+
authors = [
|
|
9
|
+
{ name = "Leo Karthik Paramasivan", email = "pleokarthik@gmail.com" },
|
|
10
|
+
]
|
|
11
|
+
classifiers = [
|
|
12
|
+
"Development Status :: 3 - Alpha",
|
|
13
|
+
"License :: OSI Approved :: MIT License",
|
|
14
|
+
"Programming Language :: Python :: 3.11",
|
|
15
|
+
"Programming Language :: Python :: 3.12",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
18
|
+
]
|
|
19
|
+
dependencies = []
|
|
20
|
+
|
|
21
|
+
[project.urls]
|
|
22
|
+
Homepage = "https://github.com/pleokarthik/RAGRadar"
|
|
23
|
+
Repository = "https://github.com/pleokarthik/RAGRadar"
|
|
24
|
+
Issues = "https://github.com/pleokarthik/RAGRadar/issues"
|
|
25
|
+
|
|
26
|
+
[build-system]
|
|
27
|
+
requires = ["hatchling"]
|
|
28
|
+
build-backend = "hatchling.build"
|
|
29
|
+
|
|
30
|
+
[tool.hatch.build.targets.wheel]
|
|
31
|
+
packages = ["src/ragradar_core"]
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# ragradar-core is internal plumbing shared by ragradar-capture, ragradar, and
|
|
2
|
+
# ragradar-evaluate: the run-record dataclasses, the single SQLite store, and
|
|
3
|
+
# the sNrN target parser. End users normally import from ragradar_capture or
|
|
4
|
+
# ragradar_evaluate, both of which re-export the dataclasses.
|
|
5
|
+
from ragradar_core.schema import (
|
|
6
|
+
CacheEvent,
|
|
7
|
+
ChunkRecord,
|
|
8
|
+
RunRecord,
|
|
9
|
+
TokenBudget,
|
|
10
|
+
TokenUsage,
|
|
11
|
+
ToolCallRecord,
|
|
12
|
+
Turn,
|
|
13
|
+
)
|
|
14
|
+
from ragradar_core.targets import parse_target_id
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"ChunkRecord",
|
|
18
|
+
"TokenBudget",
|
|
19
|
+
"TokenUsage",
|
|
20
|
+
"Turn",
|
|
21
|
+
"CacheEvent",
|
|
22
|
+
"ToolCallRecord",
|
|
23
|
+
"RunRecord",
|
|
24
|
+
"parse_target_id",
|
|
25
|
+
]
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
"""Coercion of plain-Python inputs into the ragradar_core schema dataclasses.
|
|
2
|
+
|
|
3
|
+
This is the shared user-input boundary: ragradar_capture's entry points
|
|
4
|
+
(Capture methods, capture(), the thread-local proxies) and ragradar_evaluate's
|
|
5
|
+
target resolution (evaluate()/check() on a hand-built RunRecord) route
|
|
6
|
+
user input through these functions, so naive callers can pass primitives
|
|
7
|
+
— shorthand dicts, tuples, a bare int budget — without knowing the
|
|
8
|
+
dataclasses exist. The dataclasses (Turn, ChunkRecord, TokenBudget,
|
|
9
|
+
CacheEvent, TokenUsage, ToolCallRecord) remain the advanced path and
|
|
10
|
+
always pass through untouched; explicitly provided fields always win
|
|
11
|
+
over computed defaults.
|
|
12
|
+
|
|
13
|
+
Token counts are estimated with a deterministic ~4-characters-per-token
|
|
14
|
+
heuristic (no tokenizer dependency — ragradar-core stays stdlib-only). Pass
|
|
15
|
+
explicit ``tokens`` / ``token_count`` values to override.
|
|
16
|
+
|
|
17
|
+
All functions are pure and raise TypeError/KeyError on unusable input;
|
|
18
|
+
callers decide the failure policy (ragradar_capture swallows/logs by default
|
|
19
|
+
and raises in strict mode; ragradar_evaluate raises ValueError).
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from collections.abc import Mapping
|
|
23
|
+
|
|
24
|
+
from ragradar_core.schema import (
|
|
25
|
+
CacheEvent,
|
|
26
|
+
ChunkRecord,
|
|
27
|
+
RunRecord,
|
|
28
|
+
TokenBudget,
|
|
29
|
+
TokenUsage,
|
|
30
|
+
ToolCallRecord,
|
|
31
|
+
Turn,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def estimate_tokens(text) -> int:
|
|
36
|
+
"""Deterministic token estimate: ~4 characters per token. Pure.
|
|
37
|
+
|
|
38
|
+
Returns 0 for None/empty text, at least 1 for any non-empty text.
|
|
39
|
+
Used wherever a token count is derivable but not explicitly given.
|
|
40
|
+
"""
|
|
41
|
+
if not text:
|
|
42
|
+
return 0
|
|
43
|
+
return max(1, round(len(text) / 4))
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def coerce_turn(turn) -> Turn:
|
|
47
|
+
"""Coerce one history turn. Pure.
|
|
48
|
+
|
|
49
|
+
Accepts: a Turn (passed through untouched); a ("role", "content")
|
|
50
|
+
pair; a full dict with a "role" key; or the shorthand single-entry
|
|
51
|
+
dict {"user": "..."} / {"assistant": "..."} (optionally with a
|
|
52
|
+
"tokens" entry alongside). Tokens are estimated from the content
|
|
53
|
+
unless explicitly provided.
|
|
54
|
+
"""
|
|
55
|
+
if isinstance(turn, Turn):
|
|
56
|
+
return turn
|
|
57
|
+
if isinstance(turn, (tuple, list)):
|
|
58
|
+
if len(turn) != 2:
|
|
59
|
+
raise TypeError(f"Turn tuples must be (role, content), got {len(turn)} items: {turn!r}")
|
|
60
|
+
role, content = turn
|
|
61
|
+
return Turn(role=role, content=content, tokens=estimate_tokens(content))
|
|
62
|
+
if isinstance(turn, Mapping):
|
|
63
|
+
d = dict(turn)
|
|
64
|
+
if "role" in d:
|
|
65
|
+
content = d.get("content", "")
|
|
66
|
+
tokens = d["tokens"] if d.get("tokens") is not None else estimate_tokens(content)
|
|
67
|
+
return Turn(role=d["role"], content=content, tokens=tokens)
|
|
68
|
+
tokens = d.pop("tokens", None)
|
|
69
|
+
if len(d) != 1:
|
|
70
|
+
raise TypeError(
|
|
71
|
+
"Shorthand turn dicts must have exactly one role entry, e.g. "
|
|
72
|
+
f'{{"user": "..."}} (plus an optional "tokens"), got: {turn!r}'
|
|
73
|
+
)
|
|
74
|
+
((role, content),) = d.items()
|
|
75
|
+
if tokens is None:
|
|
76
|
+
tokens = estimate_tokens(content)
|
|
77
|
+
return Turn(role=role, content=content, tokens=tokens)
|
|
78
|
+
raise TypeError(f"Cannot coerce {type(turn).__name__} into a history turn: {turn!r}")
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def coerce_turns(turns) -> list[Turn]:
|
|
82
|
+
"""Coerce a sequence of history turns (see coerce_turn). Pure."""
|
|
83
|
+
return [coerce_turn(t) for t in turns]
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def coerce_chunk(chunk, index: int) -> ChunkRecord:
|
|
87
|
+
"""Coerce one retrieval chunk. Pure.
|
|
88
|
+
|
|
89
|
+
Accepts a ChunkRecord (passed through untouched) or a dict; "content"
|
|
90
|
+
is the only required key. Missing boilerplate is filled: chunk_id
|
|
91
|
+
defaults to "chunk_{index}", source_doc_id to "unknown", token_count
|
|
92
|
+
to an estimate of the content. Score/path/flag fields keep their
|
|
93
|
+
dataclass defaults when absent.
|
|
94
|
+
"""
|
|
95
|
+
if isinstance(chunk, ChunkRecord):
|
|
96
|
+
return chunk
|
|
97
|
+
if isinstance(chunk, Mapping):
|
|
98
|
+
d = dict(chunk)
|
|
99
|
+
d.setdefault("chunk_id", f"chunk_{index}")
|
|
100
|
+
d.setdefault("source_doc_id", "unknown")
|
|
101
|
+
if d.get("token_count") is None:
|
|
102
|
+
d["token_count"] = estimate_tokens(d.get("content"))
|
|
103
|
+
return ChunkRecord(**d)
|
|
104
|
+
raise TypeError(f"Cannot coerce {type(chunk).__name__} into a chunk: {chunk!r}")
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def coerce_chunks(chunks) -> list[ChunkRecord]:
|
|
108
|
+
"""Coerce a sequence of retrieval chunks (see coerce_chunk). Pure."""
|
|
109
|
+
return [coerce_chunk(c, i) for i, c in enumerate(chunks)]
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def coerce_token_budget(budget, final_prompt=None) -> TokenBudget:
|
|
113
|
+
"""Coerce a token budget. Pure.
|
|
114
|
+
|
|
115
|
+
Accepts a TokenBudget (passed through untouched), a bare int (the
|
|
116
|
+
total limit), or a dict with at least "total_limit". Allocation
|
|
117
|
+
fields default to 0. A missing headroom is derived, in order of
|
|
118
|
+
preference: total_limit minus the given allocations (when any
|
|
119
|
+
allocation was provided), total_limit minus the estimated
|
|
120
|
+
final_prompt tokens (when a prompt is available), else total_limit.
|
|
121
|
+
Derived headroom may be negative — that is the over-budget signal.
|
|
122
|
+
"""
|
|
123
|
+
if isinstance(budget, TokenBudget):
|
|
124
|
+
return budget
|
|
125
|
+
if isinstance(budget, bool) or not isinstance(budget, (int, Mapping)):
|
|
126
|
+
raise TypeError(f"Cannot coerce {type(budget).__name__} into a token budget: {budget!r}")
|
|
127
|
+
d = {"total_limit": budget} if isinstance(budget, int) else dict(budget)
|
|
128
|
+
|
|
129
|
+
alloc_keys = ("chunks_allocated", "history_allocated", "system_allocated")
|
|
130
|
+
alloc_given = any(d.get(k) is not None for k in alloc_keys)
|
|
131
|
+
for k in alloc_keys:
|
|
132
|
+
if d.get(k) is None:
|
|
133
|
+
d[k] = 0
|
|
134
|
+
|
|
135
|
+
if d.get("headroom") is None:
|
|
136
|
+
total = d["total_limit"]
|
|
137
|
+
if alloc_given:
|
|
138
|
+
d["headroom"] = total - sum(d[k] for k in alloc_keys)
|
|
139
|
+
elif final_prompt:
|
|
140
|
+
d["headroom"] = total - estimate_tokens(final_prompt)
|
|
141
|
+
else:
|
|
142
|
+
d["headroom"] = total
|
|
143
|
+
return TokenBudget(**d)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def coerce_cache_events(events) -> list[CacheEvent]:
|
|
147
|
+
"""Coerce cache events. Pure.
|
|
148
|
+
|
|
149
|
+
Accepts a mapping of {chunk_id: hit} for the whole call, or a
|
|
150
|
+
sequence whose items are CacheEvents (passed through untouched),
|
|
151
|
+
dicts, or ("chunk_id", hit) pairs.
|
|
152
|
+
"""
|
|
153
|
+
if isinstance(events, Mapping):
|
|
154
|
+
return [CacheEvent(chunk_id=k, hit=bool(v)) for k, v in events.items()]
|
|
155
|
+
out = []
|
|
156
|
+
for e in events:
|
|
157
|
+
if isinstance(e, CacheEvent):
|
|
158
|
+
out.append(e)
|
|
159
|
+
elif isinstance(e, Mapping):
|
|
160
|
+
out.append(CacheEvent(**e))
|
|
161
|
+
elif isinstance(e, (tuple, list)) and len(e) == 2:
|
|
162
|
+
out.append(CacheEvent(chunk_id=e[0], hit=bool(e[1])))
|
|
163
|
+
else:
|
|
164
|
+
raise TypeError(f"Cannot coerce {type(e).__name__} into a cache event: {e!r}")
|
|
165
|
+
return out
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def coerce_token_usage(usage) -> TokenUsage:
|
|
169
|
+
"""Coerce token usage. Pure.
|
|
170
|
+
|
|
171
|
+
Accepts a TokenUsage (passed through untouched) or a dict; a missing
|
|
172
|
+
total_tokens is derived as input_tokens + output_tokens.
|
|
173
|
+
"""
|
|
174
|
+
if isinstance(usage, TokenUsage):
|
|
175
|
+
return usage
|
|
176
|
+
if isinstance(usage, Mapping):
|
|
177
|
+
d = dict(usage)
|
|
178
|
+
if d.get("total_tokens") is None:
|
|
179
|
+
d["total_tokens"] = d.get("input_tokens", 0) + d.get("output_tokens", 0)
|
|
180
|
+
return TokenUsage(**d)
|
|
181
|
+
raise TypeError(f"Cannot coerce {type(usage).__name__} into token usage: {usage!r}")
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def coerce_tool_call(call) -> ToolCallRecord:
|
|
185
|
+
"""Coerce one tool call: a ToolCallRecord (untouched) or a dict. Pure."""
|
|
186
|
+
if isinstance(call, ToolCallRecord):
|
|
187
|
+
return call
|
|
188
|
+
if isinstance(call, Mapping):
|
|
189
|
+
return ToolCallRecord(**call)
|
|
190
|
+
raise TypeError(f"Cannot coerce {type(call).__name__} into a tool call: {call!r}")
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def coerce_run_record(record: RunRecord) -> RunRecord:
|
|
194
|
+
"""Normalized copy of ``record`` with every nested field coerced. Pure.
|
|
195
|
+
|
|
196
|
+
RunRecord's constructor stores nested values as given, so a
|
|
197
|
+
hand-built record may carry primitive chunks/turns/budget where the
|
|
198
|
+
metric layers expect dataclasses. This runs each nested field
|
|
199
|
+
through its coercer (dataclass instances pass through untouched)
|
|
200
|
+
and returns a new RunRecord; the input is never mutated.
|
|
201
|
+
"""
|
|
202
|
+
return RunRecord(
|
|
203
|
+
query=record.query,
|
|
204
|
+
response=record.response,
|
|
205
|
+
chunks=(coerce_chunks(record.chunks) if record.chunks is not None else None),
|
|
206
|
+
final_prompt=record.final_prompt,
|
|
207
|
+
token_budget=(
|
|
208
|
+
coerce_token_budget(record.token_budget, record.final_prompt)
|
|
209
|
+
if record.token_budget is not None
|
|
210
|
+
else None
|
|
211
|
+
),
|
|
212
|
+
history_pre=(coerce_turns(record.history_pre) if record.history_pre is not None else None),
|
|
213
|
+
history_post=(
|
|
214
|
+
coerce_turns(record.history_post) if record.history_post is not None else None
|
|
215
|
+
),
|
|
216
|
+
eviction_reason=record.eviction_reason,
|
|
217
|
+
cache_events=(
|
|
218
|
+
coerce_cache_events(record.cache_events) if record.cache_events is not None else None
|
|
219
|
+
),
|
|
220
|
+
tool_calls=(
|
|
221
|
+
[coerce_tool_call(c) for c in record.tool_calls]
|
|
222
|
+
if record.tool_calls is not None
|
|
223
|
+
else None
|
|
224
|
+
),
|
|
225
|
+
model=record.model,
|
|
226
|
+
token_usage=(
|
|
227
|
+
coerce_token_usage(record.token_usage) if record.token_usage is not None else None
|
|
228
|
+
),
|
|
229
|
+
)
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
"""Run record dataclasses shared by every ragradar package.
|
|
2
|
+
|
|
3
|
+
Pure data definitions — nothing in this module touches the store. All
|
|
4
|
+
dataclasses are decorated with ``_flexible`` so unknown keyword arguments
|
|
5
|
+
are silently dropped: instrumentation with extra fields never raises
|
|
6
|
+
``TypeError`` in a caller's pipeline, and future fields never break old
|
|
7
|
+
readers.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import functools
|
|
11
|
+
from dataclasses import asdict, dataclass, fields
|
|
12
|
+
from typing import Optional
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _flexible(cls):
|
|
16
|
+
"""Make dataclass __init__ accept and ignore unknown keyword arguments."""
|
|
17
|
+
original_init = cls.__init__
|
|
18
|
+
|
|
19
|
+
@functools.wraps(original_init)
|
|
20
|
+
def init(self, *args, **kwargs):
|
|
21
|
+
valid = {f.name for f in fields(cls)}
|
|
22
|
+
original_init(self, *args, **{k: v for k, v in kwargs.items() if k in valid})
|
|
23
|
+
|
|
24
|
+
cls.__init__ = init
|
|
25
|
+
return cls
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@_flexible
|
|
29
|
+
@dataclass
|
|
30
|
+
class ChunkRecord:
|
|
31
|
+
"""One retrieved chunk in a run's context window.
|
|
32
|
+
|
|
33
|
+
The advanced/typed path — most callers never construct this directly.
|
|
34
|
+
``ragradar.capture()``/``cap.chunks()`` accept plain dicts (only
|
|
35
|
+
``content`` is required; everything else, including ``chunk_id`` and
|
|
36
|
+
``source_doc_id``, gets a sensible default) and coerce them into this
|
|
37
|
+
shape internally. Construct ``ChunkRecord`` yourself only if you want
|
|
38
|
+
static typing or are round-tripping data you already have in this form.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
chunk_id: str
|
|
42
|
+
source_doc_id: str
|
|
43
|
+
content: str
|
|
44
|
+
token_count: int
|
|
45
|
+
retrieval_score: Optional[float] = None
|
|
46
|
+
rerank_score: Optional[float] = None
|
|
47
|
+
retrieval_path: Optional[str] = None
|
|
48
|
+
truncated: bool = False
|
|
49
|
+
cache_hit: Optional[bool] = None
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@_flexible
|
|
53
|
+
@dataclass
|
|
54
|
+
class TokenBudget:
|
|
55
|
+
"""How a run's token limit was allocated across chunks/history/system.
|
|
56
|
+
|
|
57
|
+
Advanced/typed path — ``cap.context(prompt, token_budget=...)`` also
|
|
58
|
+
accepts a bare int (the total limit) or a partial dict; missing
|
|
59
|
+
allocations default to 0 and ``headroom`` is derived when omitted.
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
total_limit: int
|
|
63
|
+
chunks_allocated: int
|
|
64
|
+
history_allocated: int
|
|
65
|
+
system_allocated: int
|
|
66
|
+
headroom: int
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@_flexible
|
|
70
|
+
@dataclass
|
|
71
|
+
class TokenUsage:
|
|
72
|
+
"""Actual token counts an LLM call reported (as opposed to the budget).
|
|
73
|
+
|
|
74
|
+
Advanced/typed path — ``cap.response(text, token_usage=...)`` also
|
|
75
|
+
accepts a dict; a missing ``total_tokens`` is derived as
|
|
76
|
+
``input_tokens + output_tokens``.
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
input_tokens: int
|
|
80
|
+
output_tokens: int
|
|
81
|
+
total_tokens: int
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
@_flexible
|
|
85
|
+
@dataclass
|
|
86
|
+
class Turn:
|
|
87
|
+
"""One turn of conversation history, before or after eviction.
|
|
88
|
+
|
|
89
|
+
Advanced/typed path — ``cap.history(pre=..., post=...)`` also accepts
|
|
90
|
+
shorthand ``{"user": "..."}`` / ``{"assistant": "..."}`` dicts or
|
|
91
|
+
``(role, content)`` tuples; a missing ``tokens`` count is estimated
|
|
92
|
+
from the content.
|
|
93
|
+
"""
|
|
94
|
+
|
|
95
|
+
role: str
|
|
96
|
+
content: str
|
|
97
|
+
tokens: Optional[int] = None
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
@_flexible
|
|
101
|
+
@dataclass
|
|
102
|
+
class CacheEvent:
|
|
103
|
+
"""Whether one chunk was served from cache for this run.
|
|
104
|
+
|
|
105
|
+
Advanced/typed path — ``cap.cache(...)`` also accepts a whole-call
|
|
106
|
+
``{chunk_id: hit}`` mapping or ``(chunk_id, hit)`` pairs.
|
|
107
|
+
"""
|
|
108
|
+
|
|
109
|
+
chunk_id: str
|
|
110
|
+
hit: bool
|
|
111
|
+
cache_source: Optional[str] = None
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
@_flexible
|
|
115
|
+
@dataclass
|
|
116
|
+
class ToolCallRecord:
|
|
117
|
+
"""One tool/function call made while producing a run's response.
|
|
118
|
+
|
|
119
|
+
Advanced/typed path — ``cap.tool_call(...)`` also accepts a plain
|
|
120
|
+
dict with the same field names.
|
|
121
|
+
"""
|
|
122
|
+
|
|
123
|
+
tool_name: str
|
|
124
|
+
arguments: dict
|
|
125
|
+
result: Optional[str] = None
|
|
126
|
+
error: Optional[str] = None
|
|
127
|
+
latency_ms: Optional[float] = None
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
@_flexible
|
|
131
|
+
@dataclass
|
|
132
|
+
class RunRecord:
|
|
133
|
+
"""The complete captured record of one pipeline run.
|
|
134
|
+
|
|
135
|
+
This is what ``ragradar.capture()``/``Capture`` build up and persist,
|
|
136
|
+
and what ``ragradar.evaluate()``/``check()`` score. Everything past
|
|
137
|
+
``query``/``response`` is optional — instrument as much or as little
|
|
138
|
+
of your pipeline as you have. Most callers never construct one by
|
|
139
|
+
hand; it is assembled for you from the primitives passed to
|
|
140
|
+
``capture()`` or the staged ``Capture`` methods.
|
|
141
|
+
"""
|
|
142
|
+
|
|
143
|
+
query: str
|
|
144
|
+
response: str
|
|
145
|
+
chunks: Optional[list[ChunkRecord]] = None
|
|
146
|
+
final_prompt: Optional[str] = None
|
|
147
|
+
token_budget: Optional[TokenBudget] = None
|
|
148
|
+
history_pre: Optional[list[Turn]] = None
|
|
149
|
+
history_post: Optional[list[Turn]] = None
|
|
150
|
+
eviction_reason: Optional[str] = None
|
|
151
|
+
cache_events: Optional[list[CacheEvent]] = None
|
|
152
|
+
tool_calls: Optional[list[ToolCallRecord]] = None
|
|
153
|
+
model: Optional[str] = None
|
|
154
|
+
token_usage: Optional[TokenUsage] = None
|
|
155
|
+
|
|
156
|
+
def to_json(self) -> dict:
|
|
157
|
+
"""This record as a plain, JSON-serializable dict. Pure."""
|
|
158
|
+
return asdict(self)
|
|
159
|
+
|
|
160
|
+
@classmethod
|
|
161
|
+
def from_json(cls, data: dict) -> "RunRecord":
|
|
162
|
+
"""Rebuild a ``RunRecord`` from ``to_json()``'s output. Pure.
|
|
163
|
+
|
|
164
|
+
Nested dicts are reinflated into their dataclasses (``chunks``
|
|
165
|
+
into ``ChunkRecord``s, etc.) so the result is fully typed, not
|
|
166
|
+
just a dict of dicts.
|
|
167
|
+
"""
|
|
168
|
+
data = dict(data)
|
|
169
|
+
if data.get("chunks") is not None:
|
|
170
|
+
data["chunks"] = [ChunkRecord(**c) for c in data["chunks"]]
|
|
171
|
+
if data.get("token_budget") is not None:
|
|
172
|
+
data["token_budget"] = TokenBudget(**data["token_budget"])
|
|
173
|
+
if data.get("history_pre") is not None:
|
|
174
|
+
data["history_pre"] = [Turn(**t) for t in data["history_pre"]]
|
|
175
|
+
if data.get("history_post") is not None:
|
|
176
|
+
data["history_post"] = [Turn(**t) for t in data["history_post"]]
|
|
177
|
+
if data.get("cache_events") is not None:
|
|
178
|
+
data["cache_events"] = [CacheEvent(**e) for e in data["cache_events"]]
|
|
179
|
+
if data.get("tool_calls") is not None:
|
|
180
|
+
data["tool_calls"] = [ToolCallRecord(**t) for t in data["tool_calls"]]
|
|
181
|
+
if data.get("token_usage") is not None:
|
|
182
|
+
data["token_usage"] = TokenUsage(**data["token_usage"])
|
|
183
|
+
return cls(**data)
|