pytest-grounding 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- grounding/__init__.py +64 -0
- grounding/_capture.py +68 -0
- grounding/_normalize.py +38 -0
- grounding/_text.py +52 -0
- grounding/claim.py +101 -0
- grounding/cli.py +39 -0
- grounding/guard.py +105 -0
- grounding/loaders.py +186 -0
- grounding/plugin.py +236 -0
- grounding/report_io.py +40 -0
- grounding/trace.py +65 -0
- pytest_grounding-0.0.1.dist-info/METADATA +214 -0
- pytest_grounding-0.0.1.dist-info/RECORD +17 -0
- pytest_grounding-0.0.1.dist-info/WHEEL +5 -0
- pytest_grounding-0.0.1.dist-info/entry_points.txt +5 -0
- pytest_grounding-0.0.1.dist-info/licenses/LICENSE +21 -0
- pytest_grounding-0.0.1.dist-info/top_level.txt +1 -0
grounding/__init__.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
"""grounding — turn assertions about data into re-runnable, provenance-tracked claims.
|
|
2
|
+
|
|
3
|
+
A claim is a pytest test. Inside it you ground a statement in sha-pinned evidence:
|
|
4
|
+
|
|
5
|
+
from grounding import data, doc, statement, evidence, strength, caveats
|
|
6
|
+
|
|
7
|
+
@strength("strong")
|
|
8
|
+
@caveats("single run; n=3 per dose")
|
|
9
|
+
def test_knockdown_at_high_dose():
|
|
10
|
+
df = data("measurements.csv") # sha-pinned read, recorded as provenance
|
|
11
|
+
hi = df[df.dose == 300].knockdown.mean()
|
|
12
|
+
statement(f"Knockdown reached {hi:.0f}% at the 300 nM dose") # the proposition
|
|
13
|
+
evidence(knockdown_pct=round(hi, 1))
|
|
14
|
+
assert hi > 50 # the grounding/drift check
|
|
15
|
+
|
|
16
|
+
The pytest plugin (auto-loaded via the ``pytest11`` entry point) wraps each test in a
|
|
17
|
+
capture, records every ``data``/``doc`` read, and emits ``grounding_report.json``. The
|
|
18
|
+
non-binary judgment (``@strength``/``@caveats``/``@kind``/``@reviewed``) is metadata a
|
|
19
|
+
reviewer judges — never a pass/fail input.
|
|
20
|
+
|
|
21
|
+
Everything here is a pure function of file bytes: no network, no key, no model.
|
|
22
|
+
|
|
23
|
+
Public API:
|
|
24
|
+
|
|
25
|
+
data / load sha-pinned CSV loader -> DataFrame(.attrs) [needs the [data] extra]
|
|
26
|
+
doc -> DocRef record a document; DocRef.contains() verifies a quote [needs [docs]]
|
|
27
|
+
statement(text) the claim's proposition (ideally computed from data)
|
|
28
|
+
evidence(**kv) headline numbers for the report
|
|
29
|
+
uses(claim_id) compose on a prior claim (transitive provenance + evidence)
|
|
30
|
+
strength/caveats/kind/reviewed the judgment markers
|
|
31
|
+
Capture / current_capture / record / registry / TRACKED_SUFFIXES the capture core
|
|
32
|
+
install_guard install the untracked-read bypass guard (the plugin does this)
|
|
33
|
+
"""
|
|
34
|
+
from __future__ import annotations
|
|
35
|
+
|
|
36
|
+
from ._capture import (
|
|
37
|
+
Capture,
|
|
38
|
+
TRACKED_SUFFIXES,
|
|
39
|
+
current_capture,
|
|
40
|
+
record,
|
|
41
|
+
registry,
|
|
42
|
+
)
|
|
43
|
+
from ._text import match_phrase, sha256
|
|
44
|
+
from .loaders import (
|
|
45
|
+
DocRef,
|
|
46
|
+
EmptyExtraction,
|
|
47
|
+
UnsupportedDocFormat,
|
|
48
|
+
data,
|
|
49
|
+
doc,
|
|
50
|
+
load,
|
|
51
|
+
)
|
|
52
|
+
from .claim import caveats, evidence, kind, reviewed, statement, strength, uses
|
|
53
|
+
from .guard import install_guard
|
|
54
|
+
|
|
55
|
+
__all__ = [
|
|
56
|
+
"load", "data", "doc", "DocRef", "UnsupportedDocFormat", "EmptyExtraction",
|
|
57
|
+
"statement", "evidence", "uses",
|
|
58
|
+
"strength", "caveats", "kind", "reviewed",
|
|
59
|
+
"Capture", "current_capture", "record", "registry", "TRACKED_SUFFIXES",
|
|
60
|
+
"match_phrase", "sha256",
|
|
61
|
+
"install_guard",
|
|
62
|
+
]
|
|
63
|
+
|
|
64
|
+
__version__ = "0.0.1"
|
grounding/_capture.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
"""The capture context — the heart of automatic provenance.
|
|
2
|
+
|
|
3
|
+
While a claim runs, a :class:`Capture` is active in a context variable. Every tracked read
|
|
4
|
+
(``load``/``data``/``doc``, or an untracked read the bypass guard catches) records its
|
|
5
|
+
``{kind, path, sha256}`` into it, and the claim's :func:`grounding.statement` /
|
|
6
|
+
:func:`grounding.evidence` write the proposition + headline numbers. A claim's id + its
|
|
7
|
+
captured inputs + its statement + its evidence form a *computed* record — never
|
|
8
|
+
hand-maintained.
|
|
9
|
+
"""
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import contextvars
|
|
13
|
+
from dataclasses import dataclass, field
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
# Source-file kinds we consider "tracked": reading one while a capture is active is
|
|
17
|
+
# provenance the claim depends on. The bypass guard watches the same set.
|
|
18
|
+
TRACKED_SUFFIXES = {
|
|
19
|
+
".csv", ".tsv", ".xlsx", ".xls", ".pdf", ".docx", ".pptx", ".ppt",
|
|
20
|
+
".json", ".yaml", ".yml",
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class Capture:
|
|
26
|
+
"""Records every tracked source read + the statement and headline numbers for one
|
|
27
|
+
claim. The claim's id, captured inputs, statement and evidence are all computed from
|
|
28
|
+
what actually ran, so they can't drift from reality."""
|
|
29
|
+
|
|
30
|
+
claim_id: str | None = None
|
|
31
|
+
statement: str | None = None # the proposition (set by statement())
|
|
32
|
+
inputs: list[dict] = field(default_factory=list) # {kind, path, sha256, via}
|
|
33
|
+
evidence: dict[str, Any] = field(default_factory=dict)
|
|
34
|
+
bypassed: list[str] = field(default_factory=list) # untracked reads the guard caught
|
|
35
|
+
_seen: set = field(default_factory=set)
|
|
36
|
+
|
|
37
|
+
def record(self, kind: str, path, sha: str, via: str = "tracked") -> None:
|
|
38
|
+
key = (kind, str(path))
|
|
39
|
+
if key in self._seen:
|
|
40
|
+
return
|
|
41
|
+
self._seen.add(key)
|
|
42
|
+
self.inputs.append({"kind": kind, "path": str(path), "sha256": sha, "via": via})
|
|
43
|
+
|
|
44
|
+
def merge(self, other: "Capture") -> None:
|
|
45
|
+
"""Pull another capture's inputs in transitively (used by :func:`grounding.uses`)."""
|
|
46
|
+
for inp in other.inputs:
|
|
47
|
+
self.record(inp["kind"], inp["path"], inp["sha256"], via="uses")
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
_CURRENT: contextvars.ContextVar[Capture | None] = contextvars.ContextVar(
|
|
51
|
+
"grounding_capture", default=None)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def current_capture() -> Capture | None:
|
|
55
|
+
return _CURRENT.get()
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def record(kind: str, path, sha: str, via: str = "tracked") -> None:
|
|
59
|
+
"""Record a (kind, path, sha) into the active capture, if any. Called by the tracked
|
|
60
|
+
loaders and the bypass guard."""
|
|
61
|
+
cap = _CURRENT.get()
|
|
62
|
+
if cap is not None:
|
|
63
|
+
cap.record(kind, path, sha, via)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
# A session-wide registry of completed claim records, keyed by node id. Populated by the
|
|
67
|
+
# plugin so :func:`grounding.uses` can pull a prior claim's evidence + inputs.
|
|
68
|
+
registry: dict[str, dict] = {}
|
grounding/_normalize.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""The one verbatim-quote text normalizer (pure stdlib).
|
|
2
|
+
|
|
3
|
+
A single place for the text normalization that quote matching depends on. Keeping it
|
|
4
|
+
in one function means a verbatim quote folds to exactly one canonical form everywhere
|
|
5
|
+
it is compared — so a correct quote is never defeated by a glyph variant, and any future
|
|
6
|
+
identity/caching layer built on top stays consistent with the matcher by construction.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import unicodedata
|
|
11
|
+
|
|
12
|
+
# Unicode dash/hyphen variants that publishers and PDF extractors use interchangeably
|
|
13
|
+
# with ASCII "-": en/em dashes, the Unicode hyphen, non-breaking hyphen, minus sign, etc.
|
|
14
|
+
# Folding them (plus NFKC, which normalizes ligatures/full-width/compatibility forms) lets
|
|
15
|
+
# a verbatim quote match stored text without the author reproducing the exact glyph — the
|
|
16
|
+
# single most common reason a real, correct quote fails a naive substring check.
|
|
17
|
+
_DASHES = "‐‑‒–—―⁃−﹘﹣-"
|
|
18
|
+
_DASH_MAP = {ord(c): "-" for c in _DASHES}
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def collapse_ws(s: str) -> str:
|
|
22
|
+
"""Collapse every run of whitespace to a single space (and strip). Quote matching is
|
|
23
|
+
*verbatim*, but extractors split a sentence across runs/lines/cells (worst in slide
|
|
24
|
+
decks); normalizing both sides makes a short quote match reliably."""
|
|
25
|
+
return " ".join(s.split())
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def fold_match(s: str) -> str:
|
|
29
|
+
"""Normalize text for verbatim-quote matching: NFKC-normalize, fold Unicode dashes to
|
|
30
|
+
ASCII ``-``, drop Markdown emphasis markers (``*``/``_``), then collapse whitespace.
|
|
31
|
+
Case is preserved (the quote stays verbatim)."""
|
|
32
|
+
folded = (
|
|
33
|
+
unicodedata.normalize("NFKC", s)
|
|
34
|
+
.translate(_DASH_MAP)
|
|
35
|
+
.replace("*", "")
|
|
36
|
+
.replace("_", "")
|
|
37
|
+
)
|
|
38
|
+
return collapse_ws(folded)
|
grounding/_text.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
"""Shared text / identifier helpers (leaf module).
|
|
2
|
+
|
|
3
|
+
Small, dependency-light helpers: the sha256 hasher, the single phrase matcher
|
|
4
|
+
``DocRef.contains`` delegates to, and the identifier-column preservation used by
|
|
5
|
+
:func:`grounding.load`. Imports only :mod:`grounding._normalize` (pure stdlib) and,
|
|
6
|
+
lazily, pandas inside :func:`preserve_identifier`; nothing here imports back up into the
|
|
7
|
+
package, so it is safe to import from anywhere.
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import hashlib
|
|
12
|
+
import re as _re
|
|
13
|
+
|
|
14
|
+
from ._normalize import fold_match
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def sha256(b: bytes) -> str:
|
|
18
|
+
return hashlib.sha256(b).hexdigest()
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def match_phrase(phrase: str, text: str, *, normalize_ws: bool = True) -> bool:
|
|
22
|
+
"""Substring-check ``phrase`` against ``text`` for verbatim-quote matching. With
|
|
23
|
+
``normalize_ws`` (default) fold both sides first (NFKC + Unicode-dash fold + Markdown
|
|
24
|
+
emphasis strip + whitespace-collapse) so a correct quote isn't defeated by an en-dash,
|
|
25
|
+
a ligature, stored Markdown, or an extractor that split it across runs/lines/cells.
|
|
26
|
+
Case is preserved."""
|
|
27
|
+
if normalize_ws:
|
|
28
|
+
return fold_match(phrase) in fold_match(text)
|
|
29
|
+
return phrase in text
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
_INT_LIKE = _re.compile(r"^-?\d+$")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def preserve_identifier(col, str_col):
|
|
36
|
+
"""Keep a column as faithful strings when pandas' numeric inference would corrupt
|
|
37
|
+
identifiers. Fires only when every non-blank value is a plain integer string AND
|
|
38
|
+
inference would alter it — a leading zero (``"01"`` -> ``1``) or a column floated by
|
|
39
|
+
blank cells (``"73"`` -> ``73.0``). Real measurement columns (decimals, sign-less
|
|
40
|
+
floats, clean blank-free integers) are left numeric and untouched."""
|
|
41
|
+
import pandas as pd
|
|
42
|
+
|
|
43
|
+
if not (pd.api.types.is_integer_dtype(col.dtype) or pd.api.types.is_float_dtype(col.dtype)):
|
|
44
|
+
return col # already object/string
|
|
45
|
+
nonblank = str_col[str_col != ""]
|
|
46
|
+
if not len(nonblank) or not nonblank.map(lambda v: bool(_INT_LIKE.match(v))).all():
|
|
47
|
+
return col # has decimals / non-integer text -> a real measurement column
|
|
48
|
+
has_leading_zero = nonblank.map(lambda v: len(v) > 1 and v.lstrip("-").startswith("0")).any()
|
|
49
|
+
has_blanks = (str_col == "").any()
|
|
50
|
+
if has_leading_zero or has_blanks:
|
|
51
|
+
return str_col # identifier-like; keep the exact text
|
|
52
|
+
return col # clean blank-free integers (counts, indices) stay numeric
|
grounding/claim.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
"""The claim surface: the proposition, the evidence, composition, and the judgment markers.
|
|
2
|
+
|
|
3
|
+
A claim is a pytest test. Inside it:
|
|
4
|
+
|
|
5
|
+
statement(...) the proposition — what is asserted (ideally computed from data)
|
|
6
|
+
evidence(**kv) headline numbers, kept OUT of the assert
|
|
7
|
+
uses(claim_id) compose on a prior claim (transitive provenance + its evidence)
|
|
8
|
+
|
|
9
|
+
and on it, the non-binary judgment (metadata, never a pass/fail input):
|
|
10
|
+
|
|
11
|
+
@strength(...) @caveats(...) @kind(...) @reviewed(...)
|
|
12
|
+
"""
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from ._capture import current_capture, registry
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def statement(text: str) -> None:
|
|
19
|
+
"""Record the claim's proposition — the human-readable statement a reviewer judges and
|
|
20
|
+
a citation renders. Prefer a value computed from the data, e.g.
|
|
21
|
+
``statement(f"knockdown reached {hi:.0f}% at the high dose")``, so the claim literally
|
|
22
|
+
cannot state a number its evidence doesn't produce. One statement per claim."""
|
|
23
|
+
cap = current_capture()
|
|
24
|
+
if cap is not None:
|
|
25
|
+
cap.statement = str(text)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def evidence(**kv) -> None:
|
|
29
|
+
"""Record headline numbers for the report (e.g. ``evidence(knockdown_pct=53)``). Kept
|
|
30
|
+
*out* of the assert so the assertion stays a pure grounding/drift check."""
|
|
31
|
+
cap = current_capture()
|
|
32
|
+
if cap is not None:
|
|
33
|
+
cap.evidence.update(kv)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def uses(claim_id: str) -> dict:
|
|
37
|
+
"""Compose on another claim: merge its recorded inputs into this capture (transitive
|
|
38
|
+
provenance) and return its evidence dict. The referenced claim must have run earlier in
|
|
39
|
+
the session (collection order).
|
|
40
|
+
|
|
41
|
+
``claim_id`` may be a full node id or a bare function name. A bare name that is ambiguous
|
|
42
|
+
across files prefers a candidate in the *calling claim's own file*; for a genuine
|
|
43
|
+
cross-file reference, pass a qualified id (``"<file>::test_x"``)."""
|
|
44
|
+
cap = current_capture()
|
|
45
|
+
rec = registry.get(claim_id)
|
|
46
|
+
if rec is None:
|
|
47
|
+
cand = [k for k in registry
|
|
48
|
+
if k == claim_id or k.endswith("::" + claim_id) or k.split("::")[-1] == claim_id]
|
|
49
|
+
if len(cand) > 1 and cap is not None and cap.claim_id:
|
|
50
|
+
my_file = cap.claim_id.split("::")[0]
|
|
51
|
+
same = [k for k in cand if k.split("::")[0] == my_file]
|
|
52
|
+
if same:
|
|
53
|
+
cand = same
|
|
54
|
+
if len(cand) == 1:
|
|
55
|
+
rec = registry.get(cand[0])
|
|
56
|
+
elif len(cand) > 1:
|
|
57
|
+
raise LookupError(
|
|
58
|
+
f"uses({claim_id!r}) is ambiguous — qualify it as "
|
|
59
|
+
f"'<file>::{claim_id.split('::')[-1]}'. Candidates: {sorted(cand)}")
|
|
60
|
+
if rec is None:
|
|
61
|
+
raise LookupError(
|
|
62
|
+
f"uses({claim_id!r}): no completed claim with that id has run yet "
|
|
63
|
+
f"(known: {sorted(registry)})")
|
|
64
|
+
if cap is not None:
|
|
65
|
+
for inp in rec["inputs"]:
|
|
66
|
+
cap.record(inp["kind"], inp["path"], inp["sha256"], via="uses")
|
|
67
|
+
return dict(rec.get("evidence", {}))
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
# --------------------------------------------------------------------------- #
|
|
71
|
+
# Markers — the non-binary judgment, kept out of the assert.
|
|
72
|
+
# --------------------------------------------------------------------------- #
|
|
73
|
+
def _marker(name):
|
|
74
|
+
import pytest
|
|
75
|
+
return getattr(pytest.mark, name)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def strength(level: str):
|
|
79
|
+
"""``@strength("strong|moderate|weak|...")`` — how strongly the evidence supports the
|
|
80
|
+
statement. Metadata, not a pass/fail input; edits across commits are the
|
|
81
|
+
belief-change ledger."""
|
|
82
|
+
return _marker("strength")(level)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def caveats(text: str):
|
|
86
|
+
"""``@caveats("...")`` — scope/limits a reader must keep in mind."""
|
|
87
|
+
return _marker("caveats")(text)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def kind(category: str):
|
|
91
|
+
"""``@kind("result|design|external|interpretive|...")`` — what sort of assertion this is."""
|
|
92
|
+
return _marker("kind")(category)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def reviewed(**verdict):
|
|
96
|
+
"""``@reviewed(by=..., support=True, date=..., note=...)`` — a reviewer's (human or
|
|
97
|
+
fresh-context agent) one-time judgment that the evidence actually supports the
|
|
98
|
+
statement as worded. The mechanical check is the assert; this records the reading
|
|
99
|
+
judgment that the assert can't make. ``support=False`` flags a claim the reviewer
|
|
100
|
+
judged unsupported."""
|
|
101
|
+
return _marker("reviewed")(**verdict)
|
grounding/cli.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""``grounding`` CLI — currently one verb, ``trace``.
|
|
2
|
+
|
|
3
|
+
grounding trace <grounding_report.json | dir> re-verify claims' inputs vs shas
|
|
4
|
+
grounding trace <dir> --json machine-readable
|
|
5
|
+
|
|
6
|
+
Exit 0 if every claim is still grounded, 1 if any input changed/went missing.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import argparse
|
|
11
|
+
import json
|
|
12
|
+
import sys
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def main(argv=None) -> int:
|
|
16
|
+
ap = argparse.ArgumentParser(prog="grounding", description="grounded-claims tooling")
|
|
17
|
+
sub = ap.add_subparsers(dest="cmd", required=True)
|
|
18
|
+
|
|
19
|
+
t = sub.add_parser("trace", help="re-verify each claim's inputs still match recorded shas")
|
|
20
|
+
t.add_argument("path", help="grounding_report.json, or a directory containing it")
|
|
21
|
+
t.add_argument("--json", action="store_true", help="machine-readable output")
|
|
22
|
+
|
|
23
|
+
args = ap.parse_args(argv)
|
|
24
|
+
|
|
25
|
+
if args.cmd == "trace":
|
|
26
|
+
from . import trace as T
|
|
27
|
+
|
|
28
|
+
result = T.trace(args.path)
|
|
29
|
+
if args.json:
|
|
30
|
+
print(json.dumps(result, indent=2, ensure_ascii=False))
|
|
31
|
+
else:
|
|
32
|
+
print(T.render(result))
|
|
33
|
+
return 0 if result["status"] == "GROUNDED" else 1
|
|
34
|
+
|
|
35
|
+
return 2
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
if __name__ == "__main__":
|
|
39
|
+
sys.exit(main())
|
grounding/guard.py
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
"""Bypass guard — make untracked source reads visible.
|
|
2
|
+
|
|
3
|
+
While a capture is active we wrap ``pandas.read_csv`` and ``builtins.open`` so a *direct*
|
|
4
|
+
read of a tracked source file (one not routed through :func:`grounding.load` / :func:`doc`)
|
|
5
|
+
is still captured and flagged. This guarantees the captured input set is complete: a claim
|
|
6
|
+
can't quietly read a CSV behind the harness's back. We capture-and-flag rather than
|
|
7
|
+
hard-fail, so the grounding report still renders and the flag surfaces in the claim record.
|
|
8
|
+
|
|
9
|
+
Only reads of tracked-suffix files *under the grounding root* are considered — the root is
|
|
10
|
+
``GROUNDING_ROOT`` (env) or whatever the plugin sets to the pytest rootdir — so the guard
|
|
11
|
+
never interferes with the test runner reading its own internals or temp files.
|
|
12
|
+
"""
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import builtins
|
|
16
|
+
import os
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
|
|
19
|
+
from ._capture import TRACKED_SUFFIXES, current_capture
|
|
20
|
+
from ._text import sha256
|
|
21
|
+
|
|
22
|
+
_guard_installed = False
|
|
23
|
+
_orig_open = builtins.open
|
|
24
|
+
_orig_read_csv = None
|
|
25
|
+
_ROOT: Path | None = None
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def set_root(path) -> None:
|
|
29
|
+
"""Set the directory under which untracked reads are flagged (the plugin calls this with
|
|
30
|
+
``GROUNDING_ROOT`` or the pytest rootdir). ``None`` disables flagging."""
|
|
31
|
+
global _ROOT
|
|
32
|
+
_ROOT = Path(path).resolve() if path else None
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _data_root() -> Path | None:
|
|
36
|
+
if _ROOT is not None:
|
|
37
|
+
return _ROOT
|
|
38
|
+
r = os.environ.get("GROUNDING_ROOT")
|
|
39
|
+
return Path(r).resolve() if r else None
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _under_root(p: Path) -> bool:
|
|
43
|
+
root = _data_root()
|
|
44
|
+
if root is None:
|
|
45
|
+
return False
|
|
46
|
+
try:
|
|
47
|
+
p.resolve().relative_to(root)
|
|
48
|
+
return True
|
|
49
|
+
except (ValueError, OSError):
|
|
50
|
+
return False
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _maybe_flag(path, via: str) -> None:
|
|
54
|
+
cap = current_capture()
|
|
55
|
+
if cap is None:
|
|
56
|
+
return
|
|
57
|
+
try:
|
|
58
|
+
p = Path(path)
|
|
59
|
+
except TypeError:
|
|
60
|
+
return
|
|
61
|
+
if p.suffix.lower() not in TRACKED_SUFFIXES or not _under_root(p):
|
|
62
|
+
return
|
|
63
|
+
if not p.is_file():
|
|
64
|
+
return
|
|
65
|
+
# If a tracked loader already recorded this exact path, nothing to flag.
|
|
66
|
+
if any(inp["path"] == str(p) for inp in cap.inputs):
|
|
67
|
+
return
|
|
68
|
+
sha = sha256(p.read_bytes())
|
|
69
|
+
cap.record("bypass", p, sha, via=f"bypass:{via}")
|
|
70
|
+
cap.bypassed.append(f"{via}: {p}")
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def install_guard() -> None:
|
|
74
|
+
"""Patch ``builtins.open`` (always) and ``pandas.read_csv`` (if pandas is installed) to
|
|
75
|
+
flag untracked tracked-file reads. Idempotent; a no-op when no capture is active, so it
|
|
76
|
+
is safe to leave installed for the whole session."""
|
|
77
|
+
global _guard_installed, _orig_read_csv
|
|
78
|
+
if _guard_installed:
|
|
79
|
+
return
|
|
80
|
+
|
|
81
|
+
def guarded_open(file, mode="r", *a, **k):
|
|
82
|
+
if "r" in mode and isinstance(file, (str, os.PathLike)):
|
|
83
|
+
_maybe_flag(file, "open")
|
|
84
|
+
return _orig_open(file, mode, *a, **k)
|
|
85
|
+
|
|
86
|
+
builtins.open = guarded_open
|
|
87
|
+
|
|
88
|
+
try:
|
|
89
|
+
import pandas as pd
|
|
90
|
+
except ImportError:
|
|
91
|
+
pd = None
|
|
92
|
+
if pd is not None:
|
|
93
|
+
orig_read_csv = pd.read_csv
|
|
94
|
+
_orig_read_csv = orig_read_csv
|
|
95
|
+
|
|
96
|
+
def guarded_read_csv(*a, **k):
|
|
97
|
+
# Only path-like first args are real file reads; BytesIO (our load()) is skipped.
|
|
98
|
+
target = a[0] if a else k.get("filepath_or_buffer")
|
|
99
|
+
if isinstance(target, (str, os.PathLike)):
|
|
100
|
+
_maybe_flag(target, "pandas.read_csv")
|
|
101
|
+
return orig_read_csv(*a, **k)
|
|
102
|
+
|
|
103
|
+
pd.read_csv = guarded_read_csv
|
|
104
|
+
|
|
105
|
+
_guard_installed = True
|
grounding/loaders.py
ADDED
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
"""Tracked loaders: sha-pinned data + document handles.
|
|
2
|
+
|
|
3
|
+
``load``/``data`` read a CSV into a DataFrame and record it as provenance; ``doc`` records
|
|
4
|
+
a non-table source (PDF/Word/PowerPoint) and returns a :class:`DocRef` whose
|
|
5
|
+
:meth:`~DocRef.contains` verifies a verbatim quote against the document's text. All reads
|
|
6
|
+
are sha-pinned: the recorded hash is of exactly the bytes that were parsed.
|
|
7
|
+
|
|
8
|
+
The per-format text readers are offline, deterministic, pure functions of the bytes — no
|
|
9
|
+
network, no key, no model. There is **no OCR**: a scanned/image-only document has no text
|
|
10
|
+
layer, so rather than let a quote silently "not match", :meth:`DocRef.text` raises
|
|
11
|
+
:class:`EmptyExtraction`.
|
|
12
|
+
"""
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import io
|
|
16
|
+
from dataclasses import dataclass, field
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
|
|
19
|
+
from ._capture import record
|
|
20
|
+
from ._text import match_phrase, preserve_identifier, sha256
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class UnsupportedDocFormat(ValueError):
|
|
24
|
+
"""Raised by :meth:`DocRef.text` for a suffix no built-in reader handles."""
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class EmptyExtraction(RuntimeError):
|
|
28
|
+
"""Raised when a document yields no extractable text (e.g. a scanned PDF with no text
|
|
29
|
+
layer). A claim must never quietly pass or fail because its source was unreadable."""
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# --------------------------------------------------------------------------- #
|
|
33
|
+
# CSV — the tracked data loader
|
|
34
|
+
# --------------------------------------------------------------------------- #
|
|
35
|
+
def load(path, kind: str = "data"):
|
|
36
|
+
"""Read a CSV into a DataFrame, sha-pin it, and record it as provenance.
|
|
37
|
+
|
|
38
|
+
The DataFrame carries ``.attrs["source"]`` and ``.attrs["sha256"]``. The sha is of the
|
|
39
|
+
file bytes (exactly what was parsed); the parse goes through ``BytesIO`` so the bypass
|
|
40
|
+
guard never double-counts it. Identifier columns whose values only look numeric (``"01"``,
|
|
41
|
+
``"08"``) are kept as faithful strings (see :func:`grounding._text.preserve_identifier`).
|
|
42
|
+
Needs the ``[data]`` extra (pandas)."""
|
|
43
|
+
import pandas as pd
|
|
44
|
+
|
|
45
|
+
p = Path(path)
|
|
46
|
+
raw = p.read_bytes()
|
|
47
|
+
sha = sha256(raw)
|
|
48
|
+
record(kind, p, sha)
|
|
49
|
+
df = pd.read_csv(io.BytesIO(raw))
|
|
50
|
+
str_df = pd.read_csv(io.BytesIO(raw), dtype=str, keep_default_na=False)
|
|
51
|
+
for col in df.columns:
|
|
52
|
+
df[col] = preserve_identifier(df[col], str_df[col])
|
|
53
|
+
df.attrs["source"] = str(p)
|
|
54
|
+
df.attrs["sha256"] = sha
|
|
55
|
+
return df
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
data = load # spelled both ways
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
# --------------------------------------------------------------------------- #
|
|
62
|
+
# Document text readers (the [docs] extra) — offline, deterministic, verbatim
|
|
63
|
+
# --------------------------------------------------------------------------- #
|
|
64
|
+
def read_pdf_text(path) -> str:
|
|
65
|
+
"""All page text of a PDF, newline-joined (pdfplumber). Pure function of the bytes.
|
|
66
|
+
Deliberately not a hosted/Markdown extractor: quote-matching needs raw text that is
|
|
67
|
+
deterministic and verbatim."""
|
|
68
|
+
import pdfplumber
|
|
69
|
+
|
|
70
|
+
with pdfplumber.open(str(path)) as pdf:
|
|
71
|
+
return "\n".join((page.extract_text() or "") for page in pdf.pages)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def read_docx_text(path) -> str:
|
|
75
|
+
"""All paragraph + table-cell text of a .docx, newline-joined (python-docx)."""
|
|
76
|
+
import docx
|
|
77
|
+
|
|
78
|
+
d = docx.Document(str(path))
|
|
79
|
+
parts = [p.text for p in d.paragraphs]
|
|
80
|
+
for table in d.tables:
|
|
81
|
+
for row in table.rows:
|
|
82
|
+
parts.extend(cell.text for cell in row.cells)
|
|
83
|
+
return "\n".join(parts)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def read_pptx_text(path) -> str:
|
|
87
|
+
"""All deck prose, newline-joined (python-pptx): title/body text frames, table cells,
|
|
88
|
+
*grouped* shapes, and speaker notes — so a quote in any of them is matchable."""
|
|
89
|
+
from pptx import Presentation
|
|
90
|
+
from pptx.enum.shapes import MSO_SHAPE_TYPE
|
|
91
|
+
|
|
92
|
+
def walk(shapes):
|
|
93
|
+
for shape in shapes:
|
|
94
|
+
if shape.shape_type == MSO_SHAPE_TYPE.GROUP:
|
|
95
|
+
yield from walk(shape.shapes)
|
|
96
|
+
else:
|
|
97
|
+
yield shape
|
|
98
|
+
|
|
99
|
+
prs = Presentation(str(path))
|
|
100
|
+
parts: list[str] = []
|
|
101
|
+
for slide in prs.slides:
|
|
102
|
+
for shape in walk(slide.shapes):
|
|
103
|
+
if shape.has_text_frame:
|
|
104
|
+
parts.append(shape.text_frame.text)
|
|
105
|
+
if shape.has_table:
|
|
106
|
+
for row in shape.table.rows:
|
|
107
|
+
parts.extend(cell.text for cell in row.cells)
|
|
108
|
+
if slide.has_notes_slide:
|
|
109
|
+
notes = slide.notes_slide.notes_text_frame
|
|
110
|
+
if notes is not None:
|
|
111
|
+
parts.append(notes.text)
|
|
112
|
+
return "\n".join(parts)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
# suffix -> reader. Pure-Python formats only; legacy .doc/.ppt (which would need
|
|
116
|
+
# LibreOffice) are intentionally absent and raise UnsupportedDocFormat.
|
|
117
|
+
_TEXT_READERS = {
|
|
118
|
+
".pdf": read_pdf_text,
|
|
119
|
+
".docx": read_docx_text,
|
|
120
|
+
".pptx": read_pptx_text,
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
_PRESENTATION_SUFFIXES = {".pptx", ".ppt", ".odp"}
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
@dataclass
|
|
127
|
+
class DocRef:
|
|
128
|
+
"""A handle to a non-table source (a PDF/Word report, or a slide deck) recorded as
|
|
129
|
+
evidence. Returned by :func:`doc` so a claim can quote it and keep the citation
|
|
130
|
+
traceable. :meth:`text`/:meth:`contains` extract and match its prose."""
|
|
131
|
+
|
|
132
|
+
path: Path
|
|
133
|
+
sha256: str
|
|
134
|
+
_text: str | None = field(default=None, init=False, repr=False, compare=False)
|
|
135
|
+
|
|
136
|
+
def __str__(self) -> str:
|
|
137
|
+
return f"{self.path.name}@{self.sha256[:12]}"
|
|
138
|
+
|
|
139
|
+
@property
|
|
140
|
+
def is_presentation(self) -> bool:
|
|
141
|
+
"""True for slide decks (.pptx/.ppt/.odp) — weaker evidence than a signed report
|
|
142
|
+
(summary text, rounded numbers, scattered across shapes)."""
|
|
143
|
+
return self.path.suffix.lower() in _PRESENTATION_SUFFIXES
|
|
144
|
+
|
|
145
|
+
def text(self) -> str:
|
|
146
|
+
"""Extract the document's plain text, dispatching on suffix (``.pdf``/``.docx``/
|
|
147
|
+
``.pptx``; needs the ``[docs]`` extra). Cached on the instance. Raises
|
|
148
|
+
:class:`UnsupportedDocFormat` for any other suffix, and :class:`EmptyExtraction`
|
|
149
|
+
when the document yields no text (scanned/image-only — OCR is not supported)."""
|
|
150
|
+
if self._text is None:
|
|
151
|
+
reader = _TEXT_READERS.get(self.path.suffix.lower())
|
|
152
|
+
if reader is None:
|
|
153
|
+
raise UnsupportedDocFormat(
|
|
154
|
+
f"doc().text() can't extract {self.path.suffix!r} ({self.path.name}): "
|
|
155
|
+
f"supported formats are {', '.join(sorted(_TEXT_READERS))} "
|
|
156
|
+
f"(install the [docs] extra). Legacy .doc/.ppt are not supported.")
|
|
157
|
+
try:
|
|
158
|
+
txt = reader(self.path)
|
|
159
|
+
except ImportError as exc:
|
|
160
|
+
name = getattr(exc, "name", None) or "a reader"
|
|
161
|
+
raise ImportError(
|
|
162
|
+
f"{name} is required to read {self.path.suffix} — install the [docs] "
|
|
163
|
+
f"extra: pip install 'grounding[docs]'") from exc
|
|
164
|
+
if not txt.strip():
|
|
165
|
+
raise EmptyExtraction(
|
|
166
|
+
f"no extractable text in {self.path.name} — a scanned/image-only "
|
|
167
|
+
f"document? OCR is not supported. A quote can't be verified against "
|
|
168
|
+
f"an unreadable source.")
|
|
169
|
+
self._text = txt
|
|
170
|
+
return self._text
|
|
171
|
+
|
|
172
|
+
def contains(self, phrase: str, *, normalize_ws: bool = True) -> bool:
|
|
173
|
+
"""Substring-check ``phrase`` against the extracted :meth:`text`. With
|
|
174
|
+
``normalize_ws`` (default), fold whitespace/dashes/Markdown on both sides first —
|
|
175
|
+
the robust way to match a verbatim quote an extractor split across lines/cells."""
|
|
176
|
+
return match_phrase(phrase, self.text(), normalize_ws=normalize_ws)
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def doc(path, kind: str = "doc"):
|
|
180
|
+
"""Record a non-table source (a PDF/Word report, or a slide deck) as a provenance input
|
|
181
|
+
and return a :class:`DocRef`. The quote a claim makes is grounded in the bytes of the
|
|
182
|
+
cited document, sha-pinned like any table. Call :meth:`DocRef.contains` to verify it."""
|
|
183
|
+
p = Path(path)
|
|
184
|
+
sha = sha256(p.read_bytes())
|
|
185
|
+
record(kind, p, sha)
|
|
186
|
+
return DocRef(p, sha)
|
grounding/plugin.py
ADDED
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
"""pytest plugin — capture provenance, collect claims, emit the grounding report.
|
|
2
|
+
|
|
3
|
+
A *claim* is a pytest test: its :func:`grounding.statement` call is the proposition, its
|
|
4
|
+
node id is the stable id, its body is the justification (reads sha-pinned via ``load``/
|
|
5
|
+
``doc``), and its assert is the grounding/drift check. Markers carry the non-binary
|
|
6
|
+
judgment (``strength``/``caveats``/``kind``/``reviewed``); lifecycle rides pytest states
|
|
7
|
+
(``xfail`` = contradicted/retracted, ``skip`` = unverifiable).
|
|
8
|
+
|
|
9
|
+
This plugin:
|
|
10
|
+
* registers the markers (no "unknown mark" warnings),
|
|
11
|
+
* wraps each test in a :class:`grounding.Capture` (autouse fixture) so every ``load``/
|
|
12
|
+
``doc`` read is recorded, and installs the bypass guard,
|
|
13
|
+
* collects ``{id, statement, outcome, evidence, inputs+shas, strength, caveats, kind,
|
|
14
|
+
reviewed, notes}`` per claim and writes ``grounding_report.{json,md}``.
|
|
15
|
+
|
|
16
|
+
Auto-loaded via the ``pytest11`` entry point, so a bare ``pytest`` collects grounded claims
|
|
17
|
+
once the package is installed.
|
|
18
|
+
"""
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import json
|
|
22
|
+
import os
|
|
23
|
+
from pathlib import Path
|
|
24
|
+
|
|
25
|
+
import pytest
|
|
26
|
+
|
|
27
|
+
from . import guard
|
|
28
|
+
from ._capture import Capture, _CURRENT, registry
|
|
29
|
+
|
|
30
|
+
_MARKERS = {
|
|
31
|
+
"strength": "strength(level): how strongly the evidence supports the statement",
|
|
32
|
+
"caveats": "caveats(text): scope/limits to keep in mind",
|
|
33
|
+
"kind": "kind(category): result|design|external|interpretive|...",
|
|
34
|
+
"reviewed": "reviewed(**verdict): a reviewer's support judgment of the claim",
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def pytest_addoption(parser):
|
|
39
|
+
g = parser.getgroup("grounding")
|
|
40
|
+
g.addoption("--grounding-out", action="store", default=None,
|
|
41
|
+
help="directory for grounding_report.{json,md} (default: rootdir)")
|
|
42
|
+
g.addoption("--grounding-fresh", "--no-merge", action="store_true", default=False,
|
|
43
|
+
dest="grounding_fresh",
|
|
44
|
+
help="ignore any existing grounding_report.json and write ONLY this run's "
|
|
45
|
+
"records (clean slate). Default MERGES this run's claims into the "
|
|
46
|
+
"existing report at test-file granularity, so a partial run updates "
|
|
47
|
+
"just its own files and leaves other modules' claims intact.")
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def pytest_configure(config):
|
|
51
|
+
for name, help_ in _MARKERS.items():
|
|
52
|
+
config.addinivalue_line("markers", help_)
|
|
53
|
+
# Flag untracked reads under the grounding root: GROUNDING_ROOT, else the rootdir.
|
|
54
|
+
guard.set_root(os.environ.get("GROUNDING_ROOT") or str(config.rootpath))
|
|
55
|
+
guard.install_guard()
|
|
56
|
+
config._grounding_records = []
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
# --------------------------------------------------------------------------- #
|
|
60
|
+
# Per-claim capture
|
|
61
|
+
# --------------------------------------------------------------------------- #
|
|
62
|
+
@pytest.fixture(autouse=True)
|
|
63
|
+
def _grounding_capture(request):
|
|
64
|
+
"""Set a fresh capture for each claim and attach it to the item so the report hook can
|
|
65
|
+
read it."""
|
|
66
|
+
cap = Capture(claim_id=request.node.nodeid)
|
|
67
|
+
token = _CURRENT.set(cap)
|
|
68
|
+
request.node._grounding_cap = cap
|
|
69
|
+
try:
|
|
70
|
+
yield cap
|
|
71
|
+
finally:
|
|
72
|
+
_CURRENT.reset(token)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _marker_val(item, name, default=None):
|
|
76
|
+
m = item.get_closest_marker(name)
|
|
77
|
+
if m is None:
|
|
78
|
+
return default
|
|
79
|
+
return m.args[0] if m.args else default
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _marker_kwargs(item, name):
|
|
83
|
+
m = item.get_closest_marker(name)
|
|
84
|
+
return dict(m.kwargs) if m is not None else None
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
@pytest.hookimpl(hookwrapper=True)
|
|
88
|
+
def pytest_runtest_makereport(item, call):
|
|
89
|
+
out = yield
|
|
90
|
+
rep = out.get_result()
|
|
91
|
+
if rep.when != "call":
|
|
92
|
+
return
|
|
93
|
+
cap = getattr(item, "_grounding_cap", None)
|
|
94
|
+
|
|
95
|
+
outcome = rep.outcome
|
|
96
|
+
if hasattr(rep, "wasxfail"):
|
|
97
|
+
outcome = "xpass" if rep.passed else "xfail"
|
|
98
|
+
elif rep.skipped and call.excinfo and call.excinfo.errisinstance(pytest.xfail.Exception):
|
|
99
|
+
outcome = "xfail"
|
|
100
|
+
|
|
101
|
+
notes = (item.function.__doc__ or "").strip() if hasattr(item, "function") else ""
|
|
102
|
+
statement = cap.statement if cap and cap.statement else None
|
|
103
|
+
evidence = dict(cap.evidence) if cap else {}
|
|
104
|
+
inputs = list(cap.inputs) if cap else []
|
|
105
|
+
|
|
106
|
+
advisories: list[str] = []
|
|
107
|
+
if statement is None and outcome not in ("skipped",):
|
|
108
|
+
advisories.append("no statement() — the proposition isn't recorded")
|
|
109
|
+
if cap and cap.bypassed:
|
|
110
|
+
advisories.append(f"{len(cap.bypassed)} untracked read(s) caught by the bypass guard")
|
|
111
|
+
|
|
112
|
+
rec = {
|
|
113
|
+
"id": item.nodeid,
|
|
114
|
+
"statement": statement,
|
|
115
|
+
"notes": notes or None,
|
|
116
|
+
"outcome": outcome,
|
|
117
|
+
"kind": _marker_val(item, "kind", "unspecified"),
|
|
118
|
+
"strength": _marker_val(item, "strength", "unspecified"),
|
|
119
|
+
"caveats": _marker_val(item, "caveats"),
|
|
120
|
+
"reviewed": _marker_kwargs(item, "reviewed"),
|
|
121
|
+
"evidence": evidence,
|
|
122
|
+
"inputs": inputs,
|
|
123
|
+
"bypassed": list(cap.bypassed) if cap else [],
|
|
124
|
+
"advisories": advisories,
|
|
125
|
+
"longrepr": str(rep.longrepr) if rep.failed and not getattr(rep, "wasxfail", None) else None,
|
|
126
|
+
}
|
|
127
|
+
item.config._grounding_records.append(rec)
|
|
128
|
+
registry[item.nodeid] = rec # enables uses(claim_id) for later claims
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
# --------------------------------------------------------------------------- #
|
|
132
|
+
# Report emission
|
|
133
|
+
# --------------------------------------------------------------------------- #
|
|
134
|
+
def _json_default(o):
|
|
135
|
+
if hasattr(o, "item"): # numpy scalar -> python scalar
|
|
136
|
+
try:
|
|
137
|
+
return o.item()
|
|
138
|
+
except (ValueError, TypeError):
|
|
139
|
+
pass
|
|
140
|
+
if hasattr(o, "tolist"): # numpy array / pandas Index/Series -> list
|
|
141
|
+
return o.tolist()
|
|
142
|
+
return str(o)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
_OUTCOME_LABEL = {
|
|
146
|
+
"passed": "✅ grounded", "failed": "❌ DRIFT", "xfail": "⊘ contradicted",
|
|
147
|
+
"xpass": "⚠️ unexpectedly grounded", "skipped": "… unverifiable",
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def _test_file_of(record: dict) -> str:
|
|
152
|
+
cid = record.get("id") or ""
|
|
153
|
+
head = cid.split("::", 1)[0]
|
|
154
|
+
return Path(head).name or cid
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def _merge_records(prior: list[dict], current: list[dict]) -> list[dict]:
|
|
158
|
+
"""Union prior and current records at test-file granularity, sorted by id: drop prior
|
|
159
|
+
records from files this run produced, keep the rest, add this run's records. A
|
|
160
|
+
whole-suite run replaces everything; a one-file run updates just that file."""
|
|
161
|
+
current_files = {_test_file_of(r) for r in current}
|
|
162
|
+
kept = [r for r in prior if _test_file_of(r) not in current_files]
|
|
163
|
+
return sorted(kept + list(current), key=lambda r: r.get("id") or "")
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def _load_prior_records(path: Path) -> list[dict]:
|
|
167
|
+
try:
|
|
168
|
+
data = json.loads(path.read_text(encoding="utf-8"))
|
|
169
|
+
return [c for c in data.get("claims", []) if isinstance(c, dict)]
|
|
170
|
+
except (OSError, ValueError, AttributeError, TypeError):
|
|
171
|
+
return []
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def pytest_sessionfinish(session):
|
|
175
|
+
config = session.config
|
|
176
|
+
records = getattr(config, "_grounding_records", [])
|
|
177
|
+
if not records:
|
|
178
|
+
return
|
|
179
|
+
out_dir = Path(config.getoption("--grounding-out") or config.rootpath)
|
|
180
|
+
out_dir.mkdir(parents=True, exist_ok=True)
|
|
181
|
+
json_path = out_dir / "grounding_report.json"
|
|
182
|
+
|
|
183
|
+
if config.getoption("grounding_fresh", default=False):
|
|
184
|
+
merged = sorted(records, key=lambda r: r.get("id") or "")
|
|
185
|
+
else:
|
|
186
|
+
merged = _merge_records(_load_prior_records(json_path), records)
|
|
187
|
+
|
|
188
|
+
json_path.write_text(
|
|
189
|
+
json.dumps({"claims": merged}, indent=2, ensure_ascii=False, default=_json_default),
|
|
190
|
+
encoding="utf-8")
|
|
191
|
+
(out_dir / "grounding_report.md").write_text(_render_md(merged), encoding="utf-8")
|
|
192
|
+
config._grounding_report_path = out_dir / "grounding_report.md"
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def pytest_terminal_summary(terminalreporter, exitstatus, config):
|
|
196
|
+
p = getattr(config, "_grounding_report_path", None)
|
|
197
|
+
if p is not None:
|
|
198
|
+
terminalreporter.write_sep("-", "grounding report")
|
|
199
|
+
terminalreporter.write_line(f" {p}")
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def _render_md(records: list[dict]) -> str:
|
|
203
|
+
from collections import Counter
|
|
204
|
+
|
|
205
|
+
tally = Counter(r["outcome"] for r in records)
|
|
206
|
+
lines = ["# Grounding report", "", "| outcome | n |", "|---|---|"]
|
|
207
|
+
for k, v in tally.items():
|
|
208
|
+
lines.append(f"| {_OUTCOME_LABEL.get(k, k)} | {v} |")
|
|
209
|
+
lines.append("")
|
|
210
|
+
by_kind: dict[str, list[dict]] = {}
|
|
211
|
+
for r in records:
|
|
212
|
+
by_kind.setdefault(r["kind"], []).append(r)
|
|
213
|
+
for kind in sorted(by_kind):
|
|
214
|
+
lines += [f"## kind: {kind}", ""]
|
|
215
|
+
for r in by_kind[kind]:
|
|
216
|
+
lines.append(f"### {_OUTCOME_LABEL.get(r['outcome'], r['outcome'])} — `{r['id'].split('::')[-1]}`")
|
|
217
|
+
if r.get("statement"):
|
|
218
|
+
lines.append(f"> {r['statement']}")
|
|
219
|
+
meta = f"**strength:** {r['strength']}"
|
|
220
|
+
if r.get("caveats"):
|
|
221
|
+
meta += f" · **caveats:** {r['caveats']}"
|
|
222
|
+
lines += ["", meta]
|
|
223
|
+
if r.get("evidence"):
|
|
224
|
+
ev = ", ".join(f"`{k}={v}`" for k, v in r["evidence"].items())
|
|
225
|
+
lines.append(f"\n**evidence:** {ev}")
|
|
226
|
+
if r.get("inputs"):
|
|
227
|
+
lines.append("\n**inputs:**")
|
|
228
|
+
for i in r["inputs"]:
|
|
229
|
+
via = "" if i["via"] == "tracked" else f" _({i['via']})_"
|
|
230
|
+
lines.append(f"- `{i['kind']}` {Path(i['path']).name} — `{i['sha256'][:12]}`{via}")
|
|
231
|
+
if r.get("advisories"):
|
|
232
|
+
lines.append("\n**advisories:** " + "; ".join(r["advisories"]))
|
|
233
|
+
if r.get("longrepr"):
|
|
234
|
+
lines.append(f"\n```\n{r['longrepr'][:800]}\n```")
|
|
235
|
+
lines.append("")
|
|
236
|
+
return "\n".join(lines) + "\n"
|
grounding/report_io.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""Locate + load the grounding report — the wire format between producer and consumers.
|
|
2
|
+
|
|
3
|
+
``grounding_report.json`` is the contract the pytest plugin emits and every consumer reads.
|
|
4
|
+
It is a *regenerable projection* of running the claims — never the source of truth (that is
|
|
5
|
+
the tests + the source bytes). Delete it and rebuild by re-running pytest.
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
GROUNDING_REPORT_NAME = "grounding_report.json"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def load_report(path) -> dict:
|
|
16
|
+
"""Parse a grounding_report.json into its dict form (``{"claims": [...]}``)."""
|
|
17
|
+
return json.loads(Path(path).read_text(encoding="utf-8"))
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def claims_of(data: dict) -> list[dict]:
|
|
21
|
+
"""The claim records from a loaded report (defensive against a malformed file)."""
|
|
22
|
+
return [c for c in data.get("claims", []) if isinstance(c, dict)]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def find_report(start) -> Path | None:
|
|
26
|
+
"""Resolve ``start`` to a grounding_report.json: an existing file is used as-is; a
|
|
27
|
+
directory resolves to ``<dir>/grounding_report.json`` (searching upward a few levels).
|
|
28
|
+
Returns ``None`` if none is found."""
|
|
29
|
+
p = Path(start)
|
|
30
|
+
if p.is_file():
|
|
31
|
+
return p
|
|
32
|
+
if p.is_dir():
|
|
33
|
+
cand = p / GROUNDING_REPORT_NAME
|
|
34
|
+
if cand.is_file():
|
|
35
|
+
return cand
|
|
36
|
+
for parent in list(p.resolve().parents)[:4]:
|
|
37
|
+
cand = parent / GROUNDING_REPORT_NAME
|
|
38
|
+
if cand.is_file():
|
|
39
|
+
return cand
|
|
40
|
+
return None
|
grounding/trace.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""Trace — re-verify a report's claims still rest on the bytes they recorded.
|
|
2
|
+
|
|
3
|
+
For each claim in a grounding_report.json, re-hash every recorded input and compare to the
|
|
4
|
+
stored sha256. A claim is GROUNDED if every input still exists and matches; a changed or
|
|
5
|
+
missing input is a break. This is the static, git-free check that answers "is this
|
|
6
|
+
conclusion still grounded?" without re-running the suite. (Re-running the suite is the
|
|
7
|
+
*executable* complement — it recomputes the claim against the current bytes.)
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
from ._text import sha256
|
|
14
|
+
from .report_io import claims_of, find_report, load_report
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def trace(report_path) -> dict:
|
|
18
|
+
"""Walk every claim's recorded inputs and re-verify their shas. Returns
|
|
19
|
+
``{status, report, claims:[{id, statement, outcome, breaks:[...]}]}`` where ``status``
|
|
20
|
+
is ``GROUNDED`` iff no claim has a break."""
|
|
21
|
+
rp = find_report(report_path)
|
|
22
|
+
if rp is None:
|
|
23
|
+
return {"status": "NO_REPORT", "report": str(report_path), "claims": []}
|
|
24
|
+
|
|
25
|
+
claims = claims_of(load_report(rp))
|
|
26
|
+
base = rp.parent
|
|
27
|
+
out = []
|
|
28
|
+
status = "GROUNDED"
|
|
29
|
+
for c in claims:
|
|
30
|
+
breaks: list[str] = []
|
|
31
|
+
for inp in c.get("inputs", []):
|
|
32
|
+
p = Path(inp["path"])
|
|
33
|
+
if not p.is_absolute():
|
|
34
|
+
p = base / p
|
|
35
|
+
if not p.is_file():
|
|
36
|
+
breaks.append(f"missing: {inp['path']}")
|
|
37
|
+
continue
|
|
38
|
+
if sha256(p.read_bytes()) != inp.get("sha256"):
|
|
39
|
+
breaks.append(f"changed: {inp['path']}")
|
|
40
|
+
# A claim that recorded no inputs at all can't be grounded in anything.
|
|
41
|
+
if not c.get("inputs"):
|
|
42
|
+
breaks.append("no recorded inputs")
|
|
43
|
+
if breaks:
|
|
44
|
+
status = "BROKEN"
|
|
45
|
+
out.append({
|
|
46
|
+
"id": c.get("id"),
|
|
47
|
+
"statement": c.get("statement"),
|
|
48
|
+
"outcome": c.get("outcome"),
|
|
49
|
+
"breaks": breaks,
|
|
50
|
+
})
|
|
51
|
+
return {"status": status, "report": str(rp), "claims": out}
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def render(result: dict) -> str:
|
|
55
|
+
"""Human-readable trace summary."""
|
|
56
|
+
lines = [f"trace: {result['status']} ({result['report']})"]
|
|
57
|
+
for c in result["claims"]:
|
|
58
|
+
mark = "✅" if not c["breaks"] else "❌"
|
|
59
|
+
name = (c["id"] or "").split("::")[-1]
|
|
60
|
+
lines.append(f" {mark} {name}")
|
|
61
|
+
if c.get("statement"):
|
|
62
|
+
lines.append(f" {c['statement']}")
|
|
63
|
+
for b in c["breaks"]:
|
|
64
|
+
lines.append(f" ! {b}")
|
|
65
|
+
return "\n".join(lines)
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pytest-grounding
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Turn assertions about data into re-runnable, provenance-tracked claims — written and reviewed by agents.
|
|
5
|
+
Author-email: Sam Quigley <quigley@emerose.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/emerose/pytest-grounding
|
|
8
|
+
Project-URL: Repository, https://github.com/emerose/pytest-grounding
|
|
9
|
+
Project-URL: Issues, https://github.com/emerose/pytest-grounding/issues
|
|
10
|
+
Keywords: pytest,provenance,grounding,claims,reproducibility,data,audit
|
|
11
|
+
Classifier: Framework :: Pytest
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Topic :: Software Development :: Testing
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering
|
|
19
|
+
Requires-Python: >=3.9
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
Requires-Dist: pytest>=7.0
|
|
23
|
+
Provides-Extra: data
|
|
24
|
+
Requires-Dist: pandas>=2.0; extra == "data"
|
|
25
|
+
Provides-Extra: docs
|
|
26
|
+
Requires-Dist: pdfplumber>=0.11; extra == "docs"
|
|
27
|
+
Requires-Dist: python-docx>=1.1; extra == "docs"
|
|
28
|
+
Requires-Dist: python-pptx>=1.0; extra == "docs"
|
|
29
|
+
Dynamic: license-file
|
|
30
|
+
|
|
31
|
+
# grounding
|
|
32
|
+
|
|
33
|
+
**Turn assertions about data into re-runnable, provenance-tracked claims — written and reviewed by agents.**
|
|
34
|
+
|
|
35
|
+
`grounding` is a small runtime on top of pytest. A test stops being a pass/fail check on your *code* and becomes a **grounded claim**: a statement about data, automatically pinned to the exact bytes it depends on, re-checked whenever those bytes change, carrying a non-binary judgment (how strong, with what caveats) that lives in version control.
|
|
36
|
+
|
|
37
|
+
It's built for a workflow where **an agent writes the claims and a second, fresh-context agent reviews them.**
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
pip install grounding # core (statement-only / quote-only)
|
|
41
|
+
pip install 'grounding[data]' # + CSV grounding via data()/load()
|
|
42
|
+
pip install 'grounding[docs]' # + document quote verification via doc()
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
No network, no API keys, no model inside. Everything is a pure function of file bytes.
|
|
46
|
+
|
|
47
|
+
## Why agents, specifically
|
|
48
|
+
|
|
49
|
+
When an agent asserts *"knockdown reached 53% at the high dose,"* you have two questions: **is it mechanically true** against the data, and **does the evidence actually support the claim** as worded? `grounding` splits those, and each half lands with the right reviewer:
|
|
50
|
+
|
|
51
|
+
- **The mechanical half is the test.** Re-run it; it passes or fails against sha-pinned bytes. No reviewer judgment needed — CI does it.
|
|
52
|
+
- **The judgment half is metadata** (`statement`, `@strength`, `@caveats`, the cited quote). A fresh-context reviewer agent reads *exactly* the bytes the author grounded — same shas, no drift — and decides whether the framing is honest.
|
|
53
|
+
|
|
54
|
+
`grounding_report.json` is the machine-readable handoff: the author agent emits it, the reviewer agent consumes it.
|
|
55
|
+
|
|
56
|
+
## A claim is a pytest test
|
|
57
|
+
|
|
58
|
+
```python
|
|
59
|
+
from grounding import data, evidence, statement, strength, caveats, kind
|
|
60
|
+
from scipy import stats
|
|
61
|
+
|
|
62
|
+
@kind("result")
|
|
63
|
+
@strength("moderate")
|
|
64
|
+
@caveats("n=8 per arm, single cohort; not corrected for multiple endpoints")
|
|
65
|
+
def test_treatment_lowers_biomarker_vs_vehicle():
|
|
66
|
+
"""Serum biomarker at day 28: 10 mg/kg arm vs vehicle, cohort B.
|
|
67
|
+
|
|
68
|
+
Reviewer notes: groups are the prespecified arms; Welch's t-test because the
|
|
69
|
+
vehicle arm's spread is larger; two treated animals were excluded upstream for
|
|
70
|
+
dosing errors (already applied in the tidy table).
|
|
71
|
+
"""
|
|
72
|
+
df = data("biomarker_day28.csv")
|
|
73
|
+
treated = df[df.arm == "10mpk"].biomarker
|
|
74
|
+
vehicle = df[df.arm == "vehicle"].biomarker
|
|
75
|
+
|
|
76
|
+
drop = 1 - treated.mean() / vehicle.mean()
|
|
77
|
+
t, p = stats.ttest_ind(treated, vehicle, equal_var=False)
|
|
78
|
+
|
|
79
|
+
statement(f"At day 28, the 10 mg/kg arm showed a {drop:.0%} lower serum biomarker "
|
|
80
|
+
f"than vehicle (Welch t = {t:.1f}, p = {p:.3f}).")
|
|
81
|
+
evidence(pct_drop=round(drop * 100, 1), p_value=round(p, 4))
|
|
82
|
+
|
|
83
|
+
assert p < 0.05 and drop > 0 # the qualitative claim: a real, downward effect
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
The three layers don't repeat each other:
|
|
87
|
+
|
|
88
|
+
- **`statement()`** is the proposition, with numbers interpolated from the data — it *can't* claim a drop the table doesn't produce.
|
|
89
|
+
- the **docstring** is the *why and how* — context that lets a later reviewer judge the claim without re-deriving it.
|
|
90
|
+
- the **`assert`** guards only the qualitative shape (significant, downward); the quantity lives in the computed statement.
|
|
91
|
+
|
|
92
|
+
Run it:
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
pytest --grounding-out ./out
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
→ `out/grounding_report.json`:
|
|
99
|
+
|
|
100
|
+
```json
|
|
101
|
+
{
|
|
102
|
+
"claims": [{
|
|
103
|
+
"id": "test_efficacy.py::test_treatment_lowers_biomarker_vs_vehicle",
|
|
104
|
+
"statement": "At day 28, the 10 mg/kg arm showed a 41% lower serum biomarker than vehicle (Welch t = 3.2, p = 0.006).",
|
|
105
|
+
"kind": "result",
|
|
106
|
+
"strength": "moderate",
|
|
107
|
+
"caveats": "n=8 per arm, single cohort; not corrected for multiple endpoints",
|
|
108
|
+
"inputs": [{"kind": "data", "path": "biomarker_day28.csv", "sha256": "a17b…", "via": "tracked"}],
|
|
109
|
+
"evidence": {"pct_drop": 41.2, "p_value": 0.0061}
|
|
110
|
+
}]
|
|
111
|
+
}
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
Nobody hand-wrote that provenance. `data()` recorded the read; the capture context attached it to the claim.
|
|
115
|
+
|
|
116
|
+
## Grounding a quote in a document
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
from grounding import doc, statement
|
|
120
|
+
|
|
121
|
+
def test_summary_states_endpoint_met():
|
|
122
|
+
"""Quote is from the signed CSR §10.1, not the synopsis."""
|
|
123
|
+
csr = doc("clinical_summary.pdf") # sha-pinned like any input
|
|
124
|
+
statement("The clinical study report states the primary endpoint was met.")
|
|
125
|
+
assert csr.contains("the primary endpoint was met")
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
`DocRef.contains()` extracts with pinned pure-Python readers (pdf/docx/pptx) and matches whitespace/dash/Markdown-robustly, so a quote split across lines or cells still matches. The match is a pure function of the bytes. There is **no OCR**: a scanned/image-only document raises `EmptyExtraction` rather than silently reporting "not found".
|
|
129
|
+
|
|
130
|
+
## Composing claims
|
|
131
|
+
|
|
132
|
+
`uses()` lets one claim build on earlier ones: it merges their sha-pinned inputs into this
|
|
133
|
+
claim's provenance (transitively) and hands back their `evidence`. The composed claim can read
|
|
134
|
+
no source of its own, yet `grounding trace` still walks it all the way down — change an upstream
|
|
135
|
+
dataset and the roll-up breaks too. Provenance is a computed DAG, never hand-maintained.
|
|
136
|
+
|
|
137
|
+
**Roll up independent results.** A program-level conclusion that rests on several per-dataset
|
|
138
|
+
claims — defined in different test files, over different data:
|
|
139
|
+
|
|
140
|
+
```python
|
|
141
|
+
from grounding import uses, statement, strength
|
|
142
|
+
|
|
143
|
+
@strength("moderate")
|
|
144
|
+
def test_effect_replicates_across_cohorts():
|
|
145
|
+
"""The biomarker drop holds in two independently-run cohorts."""
|
|
146
|
+
b = uses("test_treatment_lowers_biomarker_vs_vehicle") # cohort B
|
|
147
|
+
c = uses("test_treatment_lowers_biomarker_cohort_c") # cohort C, a different test file
|
|
148
|
+
statement(f"the effect replicates: {b['pct_drop']:.0f}% (cohort B) "
|
|
149
|
+
f"and {c['pct_drop']:.0f}% (cohort C)")
|
|
150
|
+
assert b["pct_drop"] > 0 and c["pct_drop"] > 0
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
This claim touches no CSV directly, but its recorded inputs now include *both* cohorts' files,
|
|
154
|
+
each sha-pinned. Change either cohort's data and this roll-up — not just the two underlying
|
|
155
|
+
claims — shows up as drifted.
|
|
156
|
+
|
|
157
|
+
**Cross-check data against a document.** Compose a numeric claim with a quote check to assert an
|
|
158
|
+
external report and your own data agree — the classic transcription-drift catcher:
|
|
159
|
+
|
|
160
|
+
```python
|
|
161
|
+
from grounding import doc, uses, statement, strength, kind
|
|
162
|
+
|
|
163
|
+
@kind("external")
|
|
164
|
+
@strength("strong")
|
|
165
|
+
def test_report_headline_matches_our_data():
|
|
166
|
+
"""The CSR's stated drop matches what our tidy data produces — no transcription drift."""
|
|
167
|
+
ours = uses("test_treatment_lowers_biomarker_vs_vehicle")["pct_drop"]
|
|
168
|
+
csr = doc("clinical_summary.pdf")
|
|
169
|
+
statement(f"the CSR's reported reduction matches our computed {ours:.0f}% drop")
|
|
170
|
+
assert csr.contains(f"{ours:.0f}% reduction")
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
This grounds the *agreement* itself: the PDF is pinned by `doc()`, the number is pinned
|
|
174
|
+
transitively through `uses()`, and the single assert fails if the report and the data ever
|
|
175
|
+
diverge. Each claim stays small and independently reviewable; higher-level claims inherit — never
|
|
176
|
+
re-derive — their evidence and provenance.
|
|
177
|
+
|
|
178
|
+
## Tracing
|
|
179
|
+
|
|
180
|
+
```bash
|
|
181
|
+
grounding trace ./out # re-verify every claim's inputs still match recorded shas
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
One command answers *"is this conclusion still grounded?"* — the question a reviewer otherwise spends an afternoon on. Exit 0 if grounded, 1 if any input changed or went missing.
|
|
185
|
+
|
|
186
|
+
## What's in the box
|
|
187
|
+
|
|
188
|
+
| Piece | What it does |
|
|
189
|
+
|---|---|
|
|
190
|
+
| **Capture context** | records every tracked read (kind, path, sha256) while a claim runs |
|
|
191
|
+
| **Tracked loaders** | `data()`/`load()` (CSV→DataFrame, sha-pinned), `doc()` (any document) |
|
|
192
|
+
| **`statement()`** | the claim's proposition — ideally computed from the data so it can't drift |
|
|
193
|
+
| **Quote verification** | `DocRef.contains()` — offline, deterministic; raises on unreadable sources |
|
|
194
|
+
| **pytest plugin** | wraps every test in a capture, emits `grounding_report.json` |
|
|
195
|
+
| **Judgment markers** | `@strength`, `@caveats`, `@kind`, `@reviewed` — the reviewer's surface |
|
|
196
|
+
| **`uses()`** | transitive claim composition |
|
|
197
|
+
| **Bypass guard** | flags a claim that reads data through an untracked path |
|
|
198
|
+
| **`grounding trace`** | walks the provenance DAG; tells you if a conclusion is still grounded |
|
|
199
|
+
|
|
200
|
+
## Design principles
|
|
201
|
+
|
|
202
|
+
- **Deterministic & offline.** Pure function of bytes. No network, keys, or model — runs in CI and in massively parallel agent fan-out with nothing to configure.
|
|
203
|
+
- **Sha-pinned.** The recorded hash is of exactly the bytes parsed.
|
|
204
|
+
- **The test is the spec.** A claim is an ordinary pytest test; your runner, fixtures, and CI just work. Git history of `statement`/`@strength`/`@caveats` is a belief-change ledger.
|
|
205
|
+
- **Computed, not curated.** Provenance, composition, and (ideally) the statement itself derive from what ran, so they can't drift from reality.
|
|
206
|
+
- **Author/critic separation by construction.** Mechanical truth → the assert; honest framing → metadata a fresh-context reviewer judges against the same pinned evidence.
|
|
207
|
+
|
|
208
|
+
## What it is *not*
|
|
209
|
+
|
|
210
|
+
- **Not data versioning** (DVC/lakeFS) — it pins shas of files you already have, wherever they live.
|
|
211
|
+
- **Not a workflow engine** — it observes reads during a test; it doesn't orchestrate them.
|
|
212
|
+
- **Not rendering** — turning grounded claims into a cited report (PDF/HTML) is a separate layer built *on top* of `grounding_report.json`.
|
|
213
|
+
- **Not storage/indexing** — the report is the wire format; building a searchable index over it is a consumer's concern.
|
|
214
|
+
- **Not an LLM judge** — it runs no model; judgments are recorded by the agents that use it.
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
grounding/__init__.py,sha256=trgaeeLarPVl6sZp-7xYsITSXr8bChD_NWiSK_mHulc,2486
|
|
2
|
+
grounding/_capture.py,sha256=IUSy-T5Lwa6q36Epl8Ni4zjbnYq1guC01T1O1jS35pE,2783
|
|
3
|
+
grounding/_normalize.py,sha256=Ec_5yCPnFq8ywoKcNj9CYLyIi62hLDHzezdOXBC52Zs,1752
|
|
4
|
+
grounding/_text.py,sha256=BX8YrI4WCJzbw2RFxn2UEIaJuUptIj8eqxczVEKbdMo,2373
|
|
5
|
+
grounding/claim.py,sha256=m8Uc_58ARKzrUjlBTxuaZFViDQczRbty1cZzLGWLkzU,4244
|
|
6
|
+
grounding/cli.py,sha256=cASvd8lRXejxkLRttF7F3vazQFAwjQolKZqyeYLvE_g,1210
|
|
7
|
+
grounding/guard.py,sha256=WvPfC6mPe91NuYQ7VpBXSUFUBqCxpuyNyfHjgKHupbU,3467
|
|
8
|
+
grounding/loaders.py,sha256=cx8AuB9WBh3hcB8Fyb_8gCeKCy0PWI4DR4IfdR4ihks,7829
|
|
9
|
+
grounding/plugin.py,sha256=FTNn8NGc17hLNVCPQWKZzcGuxqisqw25ehh1d6ZYElI,9476
|
|
10
|
+
grounding/report_io.py,sha256=B-CNv_Gyaxpv6O4yTDPUjWRqRmmEfbng5MqIPa26aNY,1464
|
|
11
|
+
grounding/trace.py,sha256=NxrWhZJdT-XvdvjUsOlYL0eENKafJP53iUZn9knsD0w,2513
|
|
12
|
+
pytest_grounding-0.0.1.dist-info/licenses/LICENSE,sha256=7zWYAdBL9zruGyPR4pIEkWOMm-LjlvOoaJBamd5Uays,1068
|
|
13
|
+
pytest_grounding-0.0.1.dist-info/METADATA,sha256=PZlqh0uXzn4fnrovmhpUbyzVSVLPq9tZ3KnjJ5OuAU4,10822
|
|
14
|
+
pytest_grounding-0.0.1.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
15
|
+
pytest_grounding-0.0.1.dist-info/entry_points.txt,sha256=e3hjM8FuEEuTRgqgW6L-haXT62tBiADjOtMb-N6Hm_Q,90
|
|
16
|
+
pytest_grounding-0.0.1.dist-info/top_level.txt,sha256=G9oeBZ6MVFGVkzOWhMqAu6DaZ62iKeVTviROIDeD7t0,10
|
|
17
|
+
pytest_grounding-0.0.1.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Sam Quigley
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
grounding
|