pdfhell 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pdfhell/__init__.py ADDED
@@ -0,0 +1,34 @@
1
+ """PDF Hell — adversarial PDFs that break AI document readers.
2
+
3
+ Procedural ground truth, not LLM-as-judge. Each trap family generates PDFs
4
+ *from code*, so the answer key is exact and reproducible — no circular
5
+ assurance.
6
+
7
+ Quickstart::
8
+
9
+ uvx pdfhell make --trap hidden_ocr_mismatch --seed 42
10
+ uvx pdfhell run --model anthropic:claude-sonnet-4-6 --suite mini
11
+ uvx pdfhell report runs/claude.json --share-card
12
+
13
+ Build on top of ``multivon-eval`` (the QAG engine, provider adapters, audit
14
+ packaging, cost tracking). pdfhell is *only* the adversarial generation
15
+ layer; the runtime, scoring, and reporting come from multivon-eval.
16
+ """
17
+ from __future__ import annotations
18
+
19
+ __version__ = "0.1.0"
20
+
21
+ from .case import HellCase
22
+ from .generators import (
23
+ GENERATORS,
24
+ TRAP_FAMILIES,
25
+ generate_case,
26
+ )
27
+
28
+ __all__ = [
29
+ "__version__",
30
+ "HellCase",
31
+ "GENERATORS",
32
+ "TRAP_FAMILIES",
33
+ "generate_case",
34
+ ]
pdfhell/auditpack.py ADDED
@@ -0,0 +1,182 @@
1
+ """Build a downloadable, hash-chained audit pack from a pdfhell run.
2
+
3
+ The pack is a ZIP containing:
4
+
5
+ - ``manifest.json`` — pdfhell version, run timestamp, model spec, suite,
6
+ per-trap pass rates, total cost (when known), SHA-256 of every file
7
+ inside the pack.
8
+ - ``run.json`` — the full :class:`SuiteReport` JSON.
9
+ - ``run.xml`` — JUnit XML (same data as ``run.json``, machine-readable
10
+ for CI dashboards).
11
+ - ``cases/<case_id>.pdf`` — every adversarial PDF the model was tested
12
+ against.
13
+ - ``cases/<case_id>.json`` — each case's answer key + metadata.
14
+ - ``README.txt`` — human-readable "what's in this ZIP" + reproduction
15
+ command. Procurement teams open this first.
16
+
17
+ The audit pack is the artifact a buyer's procurement team attaches to
18
+ a diligence appendix. It must be self-describing (no out-of-band
19
+ context required), reproducible (the manifest tells you the exact
20
+ command to regenerate the run), and tamper-evident (the manifest
21
+ includes a SHA-256 for every file in the pack; auditors can verify the
22
+ ZIP wasn't edited after delivery).
23
+ """
24
+ from __future__ import annotations
25
+
26
+ import hashlib
27
+ import json
28
+ import zipfile
29
+ from datetime import datetime, timezone
30
+ from pathlib import Path
31
+ from typing import Iterable
32
+
33
+ from . import __version__
34
+ from .case import HellCase
35
+ from .junit import report_to_junit
36
+ from .scorer import SuiteReport
37
+
38
+
39
+ _README_TEMPLATE = """\
40
+ # pdfhell audit pack
41
+
42
+ This ZIP is a complete, self-describing record of one PDF Hell run. It
43
+ contains every PDF the model was asked to read, every answer key, the
44
+ raw model output, and a tamper-evident manifest.
45
+
46
+ ## What's in this pack
47
+
48
+ - manifest.json — Run metadata + SHA-256 of every file in this ZIP.
49
+ - run.json — Full run report (per-case scores, model outputs).
50
+ - run.xml — JUnit XML (renders in CI dashboards).
51
+ - cases/*.pdf — The adversarial PDFs the model was tested against.
52
+ - cases/*.json — The answer keys + per-case metadata.
53
+ - README.txt — This file.
54
+
55
+ ## How to verify
56
+
57
+ The manifest contains a SHA-256 for every file in this ZIP. To verify
58
+ nothing was edited after delivery:
59
+
60
+ unzip -p audit-pack.zip manifest.json | jq .files
61
+ sha256sum cases/*.pdf cases/*.json run.json run.xml README.txt
62
+
63
+ Each hash in the manifest must match the file's actual SHA-256.
64
+
65
+ ## How to reproduce
66
+
67
+ The manifest records the exact pdfhell command. To regenerate
68
+ byte-identical PDFs and re-run the same model:
69
+
70
+ {repro_command}
71
+
72
+ pdfhell uses Canvas(invariant=True) on every generator so PDFs are
73
+ byte-identical across runs with the same seed.
74
+
75
+ ## Scope
76
+
77
+ pdfhell {pdfhell_version}, suite {suite}, model {model}. Generated
78
+ {timestamp}. {n} cases, {passed}/{n} passed ({pass_rate:.0%}). See
79
+ manifest.json for per-trap breakdown.
80
+ """
81
+
82
+
83
+ def _sha256(data: bytes) -> str:
84
+ return hashlib.sha256(data).hexdigest()
85
+
86
+
87
+ def _gather_files(report: SuiteReport, cases_dir: Path) -> Iterable[tuple[str, bytes]]:
88
+ """Yield (arcname, bytes) pairs for every file going into the ZIP.
89
+
90
+ Order: README first (humans see it first), then manifest, then JSON
91
+ + XML, then case PDFs + answer keys. Stable ordering keeps the
92
+ SHA-256 of the ZIP itself stable across runs.
93
+ """
94
+ for case_summary in report.cases:
95
+ case_id = case_summary.case_id
96
+ pdf_path = cases_dir / f"{case_id}.pdf"
97
+ json_path = cases_dir / f"{case_id}.json"
98
+ if pdf_path.exists():
99
+ yield f"cases/{case_id}.pdf", pdf_path.read_bytes()
100
+ if json_path.exists():
101
+ yield f"cases/{case_id}.json", json_path.read_bytes()
102
+
103
+
104
+ def build_audit_pack(
105
+ report: SuiteReport,
106
+ cases_dir: Path,
107
+ out_path: Path,
108
+ ) -> Path:
109
+ """Write a complete audit ZIP for ``report`` to ``out_path``.
110
+
111
+ Returns the resolved output path.
112
+ """
113
+ out_path = out_path.resolve()
114
+ out_path.parent.mkdir(parents=True, exist_ok=True)
115
+
116
+ # Materialise the per-case files into bytes first so we can hash them.
117
+ case_files: list[tuple[str, bytes]] = list(_gather_files(report, cases_dir))
118
+
119
+ run_json_bytes = json.dumps(report.to_dict(), indent=2).encode("utf-8")
120
+ run_xml_bytes = report_to_junit(report).encode("utf-8")
121
+ timestamp = datetime.now(timezone.utc).isoformat()
122
+ passed = sum(1 for c in report.cases if c.correct)
123
+
124
+ repro_command = (
125
+ f"uvx pdfhell run --model {report.model} --suite {report.suite}"
126
+ )
127
+ readme_bytes = _README_TEMPLATE.format(
128
+ pdfhell_version=__version__,
129
+ suite=report.suite,
130
+ model=report.model,
131
+ timestamp=timestamp,
132
+ n=report.n,
133
+ passed=passed,
134
+ pass_rate=report.pass_rate,
135
+ repro_command=repro_command,
136
+ ).encode("utf-8")
137
+
138
+ # Build a manifest that hashes every other file in the pack. The
139
+ # manifest is the LAST file we hash so we can include the hashes of
140
+ # everything else inside it.
141
+ files_in_pack: list[tuple[str, bytes]] = [
142
+ ("README.txt", readme_bytes),
143
+ ("run.json", run_json_bytes),
144
+ ("run.xml", run_xml_bytes),
145
+ *case_files,
146
+ ]
147
+ manifest = {
148
+ "pdfhell_version": __version__,
149
+ "generated_at": timestamp,
150
+ "model": report.model,
151
+ "suite": report.suite,
152
+ "n": report.n,
153
+ "passed": passed,
154
+ "pass_rate": report.pass_rate,
155
+ "per_trap_pass": report.per_trap_pass,
156
+ "per_trap_fell_for_trap": report.per_trap_fell_for_trap,
157
+ "reproduction": {
158
+ "command": repro_command,
159
+ "note": (
160
+ "PDFs are regenerated byte-identically via Canvas(invariant=True). "
161
+ "Same seed → same PDF → same answer key."
162
+ ),
163
+ },
164
+ "files": [
165
+ {"path": name, "sha256": _sha256(data), "size": len(data)}
166
+ for name, data in files_in_pack
167
+ ],
168
+ }
169
+ manifest_bytes = json.dumps(manifest, indent=2).encode("utf-8")
170
+
171
+ # ZIP_DEFLATED is universal; mtime is set to the run timestamp so
172
+ # the ZIP itself is reproducible across packaging runs.
173
+ with zipfile.ZipFile(out_path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
174
+ for name, data in [("manifest.json", manifest_bytes), *files_in_pack]:
175
+ info = zipfile.ZipInfo(name)
176
+ info.date_time = (2026, 1, 1, 0, 0, 0)
177
+ zf.writestr(info, data)
178
+
179
+ return out_path
180
+
181
+
182
+ __all__ = ["build_audit_pack"]
pdfhell/case.py ADDED
@@ -0,0 +1,87 @@
1
+ """Canonical case representation for a generated pdfhell trap.
2
+
3
+ A ``HellCase`` is the serialisable artifact that every trap-family generator
4
+ returns. The runner consumes these; the scoring layer treats
5
+ ``expected_answer`` as code-based ground truth and grades the model's
6
+ free-text output against it. QAG (multivon-eval's
7
+ :class:`~multivon_eval.DocumentGrounding`) is layered on as the
8
+ *explanation* of why a particular answer scored a particular way — not as
9
+ the score itself. This is the architectural fix the Round-7 churned design
10
+ partner demanded: no LLM-judges-LLM circular assurance for the primary
11
+ correctness signal.
12
+ """
13
+ from __future__ import annotations
14
+
15
+ import json
16
+ from dataclasses import asdict, dataclass, field
17
+ from pathlib import Path
18
+ from typing import Any
19
+
20
+
21
+ @dataclass(slots=True)
22
+ class HellCase:
23
+ """One adversarial PDF + its ground truth.
24
+
25
+ Attributes
26
+ ----------
27
+ id: stable, deterministic, e.g. ``"hidden_ocr_mismatch-0042"``.
28
+ trap_family: one of :data:`pdfhell.generators.TRAP_FAMILIES`.
29
+ seed: integer seed used to generate the case. Regenerating with the
30
+ same seed produces a byte-identical PDF and identical answer key.
31
+ question: the user-facing question the model must answer.
32
+ expected_answer: a human-readable form of the correct answer used in
33
+ reports + JUnit output. For single-value traps this is also the
34
+ substring the scorer looks for; for prose-style traps (e.g.
35
+ :mod:`pdfhell.generators.footnote_override`) the scorer instead
36
+ uses :attr:`expected_tokens` (see below).
37
+ expected_tokens: optional list of substrings that ALL must appear in
38
+ the model's output for the case to count as correct. Used by
39
+ traps where the right answer can be expressed multiple
40
+ equally-valid ways (e.g. a list of clause carve-outs in any
41
+ order). When empty, the scorer falls back to a contains-match
42
+ against ``expected_answer``.
43
+ forbidden_answers: optional list of plausible-but-wrong answers the
44
+ trap aims to elicit (e.g. the hidden-OCR amount). Used by the
45
+ scorer to detect the specific failure mode the trap was designed
46
+ for.
47
+ pdf_path: location of the generated PDF (relative to the suite root).
48
+ metadata: extra fields — number of pages, font size of footnotes,
49
+ the literal hidden-OCR string, etc. Useful for diagnostics and
50
+ for trap-family-specific scorers.
51
+ """
52
+
53
+ id: str
54
+ trap_family: str
55
+ seed: int
56
+ question: str
57
+ expected_answer: str
58
+ forbidden_answers: list[str] = field(default_factory=list)
59
+ expected_tokens: list[str] = field(default_factory=list)
60
+ pdf_path: str = ""
61
+ metadata: dict[str, Any] = field(default_factory=dict)
62
+
63
+ # ─── serialisation ─────────────────────────────────────────────────────
64
+
65
+ def to_dict(self) -> dict[str, Any]:
66
+ return asdict(self)
67
+
68
+ @classmethod
69
+ def from_dict(cls, raw: dict[str, Any]) -> "HellCase":
70
+ return cls(
71
+ id=raw["id"],
72
+ trap_family=raw["trap_family"],
73
+ seed=int(raw["seed"]),
74
+ question=raw["question"],
75
+ expected_answer=raw["expected_answer"],
76
+ forbidden_answers=list(raw.get("forbidden_answers", [])),
77
+ expected_tokens=list(raw.get("expected_tokens", [])),
78
+ pdf_path=raw.get("pdf_path", ""),
79
+ metadata=dict(raw.get("metadata", {})),
80
+ )
81
+
82
+ def dump_json(self, path: str | Path) -> None:
83
+ Path(path).write_text(json.dumps(self.to_dict(), indent=2), encoding="utf-8")
84
+
85
+ @classmethod
86
+ def load_json(cls, path: str | Path) -> "HellCase":
87
+ return cls.from_dict(json.loads(Path(path).read_text(encoding="utf-8")))
pdfhell/cli.py ADDED
@@ -0,0 +1,216 @@
1
+ """pdfhell CLI.
2
+
3
+ Four subcommands keep the surface minimal:
4
+
5
+ pdfhell list-traps list available trap families
6
+ pdfhell make --trap X --seed N [--out P] generate one case (pdf + json)
7
+ pdfhell build --suite mini --out cases/ materialise a named suite
8
+ pdfhell run --suite mini --model ... evaluate a model against the suite
9
+ pdfhell report runs/<name>.json print summary + optional share card
10
+
11
+ Everything else (scoring, provider dispatch, audit packaging) is pulled
12
+ from :mod:`multivon_eval`. The CLI is glue.
13
+ """
14
+ from __future__ import annotations
15
+
16
+ import argparse
17
+ import json
18
+ import sys
19
+ from pathlib import Path
20
+
21
+ from . import __version__
22
+ from .auditpack import build_audit_pack
23
+ from .case import HellCase
24
+ from .generators import TRAP_FAMILIES, generate_case
25
+ from .junit import report_to_junit
26
+ from .runner import parse_model_spec, run_suite
27
+ from .scorer import SuiteReport
28
+ from .suite import SUITES, build_suite
29
+
30
+
31
+ def _cmd_list_traps(args: argparse.Namespace) -> int:
32
+ for family in TRAP_FAMILIES:
33
+ print(family)
34
+ return 0
35
+
36
+
37
+ def _cmd_make(args: argparse.Namespace) -> int:
38
+ try:
39
+ pdf_bytes, case = generate_case(args.trap, args.seed)
40
+ except KeyError as exc:
41
+ print(exc, file=sys.stderr)
42
+ print(f"available trap families: {', '.join(TRAP_FAMILIES)}", file=sys.stderr)
43
+ return 2
44
+ out_dir = Path(args.out).resolve()
45
+ out_dir.mkdir(parents=True, exist_ok=True)
46
+ pdf_path = out_dir / f"{case.id}.pdf"
47
+ json_path = out_dir / f"{case.id}.json"
48
+ pdf_path.write_bytes(pdf_bytes)
49
+ case.pdf_path = pdf_path.name
50
+ case.dump_json(json_path)
51
+ print(f"wrote {pdf_path} ({len(pdf_bytes):,} bytes)")
52
+ print(f"wrote {json_path}")
53
+ print(f"expected answer: {case.expected_answer}")
54
+ return 0
55
+
56
+
57
+ def _cmd_build(args: argparse.Namespace) -> int:
58
+ if args.suite not in SUITES:
59
+ print(f"unknown suite {args.suite!r}; available: {', '.join(SUITES)}", file=sys.stderr)
60
+ return 2
61
+ spec = SUITES[args.suite]
62
+ out_dir = Path(args.out).resolve()
63
+ print(f"building suite {spec.name!r} ({spec.total_cases} cases) → {out_dir}")
64
+ cases = build_suite(spec, out_dir)
65
+ print(f"wrote {len(cases)} cases")
66
+ return 0
67
+
68
+
69
+ def _cmd_run(args: argparse.Namespace) -> int:
70
+ # Default cases dir tracks the suite name so `--suite smoke` doesn't
71
+ # silently run the mini suite.
72
+ cases_dir = Path(args.cases_dir).resolve() if args.cases_dir else Path(f"./cases/{args.suite}").resolve()
73
+ if not cases_dir.is_dir():
74
+ if args.suite in SUITES:
75
+ print(f"cases dir {cases_dir} not found; building {args.suite} suite first ...")
76
+ cases_dir.mkdir(parents=True, exist_ok=True)
77
+ build_suite(SUITES[args.suite], cases_dir)
78
+ else:
79
+ print(f"cases dir {cases_dir} not found and suite {args.suite!r} is unknown",
80
+ file=sys.stderr)
81
+ return 2
82
+
83
+ print(f"running {args.model} against {args.suite} suite at {cases_dir}")
84
+ report = run_suite(
85
+ cases_dir=cases_dir,
86
+ model_spec=args.model,
87
+ workers=args.workers,
88
+ progress=not args.quiet,
89
+ suite_name=args.suite,
90
+ )
91
+ out_path = Path(args.out).resolve() if args.out else _default_run_path(args.model, args.suite)
92
+ out_path.parent.mkdir(parents=True, exist_ok=True)
93
+ out_path.write_text(json.dumps(report.to_dict(), indent=2), encoding="utf-8")
94
+ print()
95
+ _print_report(report)
96
+ print()
97
+ print(f"wrote {out_path}")
98
+ if args.junit:
99
+ junit_path = Path(args.junit).resolve()
100
+ junit_path.parent.mkdir(parents=True, exist_ok=True)
101
+ junit_path.write_text(report_to_junit(report), encoding="utf-8")
102
+ print(f"wrote {junit_path}")
103
+ if args.audit_pack:
104
+ zip_path = Path(args.audit_pack).resolve()
105
+ build_audit_pack(report, cases_dir, zip_path)
106
+ print(f"wrote {zip_path}")
107
+ if args.fail_threshold is not None and report.pass_rate < args.fail_threshold:
108
+ print(
109
+ f"\nFAIL: pass_rate {report.pass_rate:.1%} below --fail-threshold "
110
+ f"{args.fail_threshold:.1%}"
111
+ )
112
+ return 1
113
+ return 0
114
+
115
+
116
+ def _cmd_report(args: argparse.Namespace) -> int:
117
+ raw = json.loads(Path(args.run).read_text(encoding="utf-8"))
118
+ report = SuiteReport(
119
+ model=raw["model"],
120
+ suite=raw["suite"],
121
+ n=raw["n"],
122
+ pass_rate=raw["pass_rate"],
123
+ per_trap_pass=raw["per_trap_pass"],
124
+ per_trap_fell_for_trap=raw["per_trap_fell_for_trap"],
125
+ refused_rate=raw["refused_rate"],
126
+ cases=[], # not needed for printing the summary
127
+ )
128
+ _print_report(report)
129
+ return 0
130
+
131
+
132
+ def _print_report(report: SuiteReport) -> None:
133
+ print(f"PDF Hell {report.suite} suite — n={report.n}")
134
+ print()
135
+ print(f"model: {report.model}")
136
+ print(f"pass: {sum(1 for _ in report.cases if _.correct) if report.cases else int(report.pass_rate * report.n)}/{report.n} ({report.pass_rate:.1%})")
137
+ print(f"refused: {report.refused_rate:.1%}")
138
+ print()
139
+ print("per-trap pass rate:")
140
+ for trap, rate in sorted(report.per_trap_pass.items()):
141
+ fell = report.per_trap_fell_for_trap.get(trap, 0.0)
142
+ print(f" {trap:30s} pass={rate:.0%} fell-for-trap={fell:.0%}")
143
+
144
+
145
+ def _default_run_path(model_spec: str, suite: str) -> Path:
146
+ safe = model_spec.replace("/", "-").replace(":", "-")
147
+ return Path(f"runs/{suite}-{safe}.json").resolve()
148
+
149
+
150
+ def build_parser() -> argparse.ArgumentParser:
151
+ p = argparse.ArgumentParser(
152
+ prog="pdfhell",
153
+ description="PDF Hell — adversarial PDFs that break AI document readers.",
154
+ )
155
+ p.add_argument("--version", action="version", version=f"pdfhell {__version__}")
156
+ sub = p.add_subparsers(dest="cmd", required=True, metavar="<command>")
157
+
158
+ p_list = sub.add_parser("list-traps", help="list available trap families")
159
+ p_list.set_defaults(func=_cmd_list_traps)
160
+
161
+ p_make = sub.add_parser("make", help="generate one case (pdf + json)")
162
+ p_make.add_argument("--trap", required=True, choices=TRAP_FAMILIES)
163
+ p_make.add_argument("--seed", required=True, type=int)
164
+ p_make.add_argument("--out", default="./cases", help="output directory (default: ./cases)")
165
+ p_make.set_defaults(func=_cmd_make)
166
+
167
+ p_build = sub.add_parser("build", help="materialise a named suite to disk")
168
+ p_build.add_argument("--suite", default="mini", choices=tuple(SUITES.keys()))
169
+ p_build.add_argument("--out", default="./cases/mini")
170
+ p_build.set_defaults(func=_cmd_build)
171
+
172
+ p_run = sub.add_parser("run", help="evaluate a model against a suite")
173
+ p_run.add_argument("--model", required=True,
174
+ help="provider:model, e.g. anthropic:claude-sonnet-4-6")
175
+ p_run.add_argument("--suite", default="mini", choices=tuple(SUITES.keys()))
176
+ p_run.add_argument(
177
+ "--cases-dir",
178
+ default=None,
179
+ help="dir with materialised cases (default: ./cases/<suite>; built on demand if missing)",
180
+ )
181
+ p_run.add_argument("--workers", type=int, default=4)
182
+ p_run.add_argument("--quiet", action="store_true")
183
+ p_run.add_argument("--out", help="output JSON path (default: runs/<suite>-<model>.json)")
184
+ p_run.add_argument(
185
+ "--junit",
186
+ help="also write a JUnit XML report to this path (renders in GitHub Actions / GitLab CI)",
187
+ )
188
+ p_run.add_argument(
189
+ "--audit-pack",
190
+ help=(
191
+ "also write a complete, hash-chained audit ZIP to this path "
192
+ "(PDFs + answer keys + run JSON + JUnit XML + SHA-256 manifest)"
193
+ ),
194
+ )
195
+ p_run.add_argument(
196
+ "--fail-threshold",
197
+ type=float,
198
+ help="exit nonzero if pass_rate is below this fraction (0.0–1.0); for CI gates",
199
+ )
200
+ p_run.set_defaults(func=_cmd_run)
201
+
202
+ p_report = sub.add_parser("report", help="print summary from a run JSON")
203
+ p_report.add_argument("run", help="path to runs/<suite>-<model>.json")
204
+ p_report.set_defaults(func=_cmd_report)
205
+
206
+ return p
207
+
208
+
209
+ def main(argv: list[str] | None = None) -> int:
210
+ parser = build_parser()
211
+ args = parser.parse_args(argv)
212
+ return args.func(args)
213
+
214
+
215
+ if __name__ == "__main__":
216
+ raise SystemExit(main())
@@ -0,0 +1,49 @@
1
+ """Trap-family generators.
2
+
3
+ Each generator is a deterministic ``(seed) -> (pdf_bytes, HellCase)``
4
+ function. The mapping :data:`GENERATORS` powers ``pdfhell make --trap X``
5
+ and the suite builder. Adding a new trap family means registering it
6
+ here.
7
+
8
+ Why a registry and not subclasses? Each trap is fundamentally a *small*
9
+ parameterised generator. The registry keeps generators portable, makes
10
+ the CLI's ``--trap`` flag introspectable, and avoids the class-hierarchy
11
+ sprawl that a 50-trap eventual suite would otherwise turn into.
12
+ """
13
+ from __future__ import annotations
14
+
15
+ from typing import Callable
16
+
17
+ from ..case import HellCase
18
+ from .hidden_ocr_mismatch import generate as _hidden_ocr_mismatch
19
+ from .footnote_override import generate as _footnote_override
20
+ from .split_table_across_pages import generate as _split_table_across_pages
21
+
22
+
23
+ # Signature: (seed: int) -> (pdf_bytes: bytes, case: HellCase).
24
+ # The case's pdf_path is set by the suite-builder after writing the bytes.
25
+ GeneratorFn = Callable[[int], tuple[bytes, HellCase]]
26
+
27
+ GENERATORS: dict[str, GeneratorFn] = {
28
+ "hidden_ocr_mismatch": _hidden_ocr_mismatch,
29
+ "footnote_override": _footnote_override,
30
+ "split_table_across_pages": _split_table_across_pages,
31
+ }
32
+
33
+ TRAP_FAMILIES: tuple[str, ...] = tuple(GENERATORS.keys())
34
+
35
+
36
+ def generate_case(trap_family: str, seed: int) -> tuple[bytes, HellCase]:
37
+ """Generate one case from ``trap_family`` with the given ``seed``.
38
+
39
+ Raises :class:`KeyError` for an unknown trap family — the CLI catches
40
+ this and prints the list of available families.
41
+ """
42
+ if trap_family not in GENERATORS:
43
+ raise KeyError(
44
+ f"unknown trap family {trap_family!r}; available: {', '.join(TRAP_FAMILIES)}"
45
+ )
46
+ return GENERATORS[trap_family](seed)
47
+
48
+
49
+ __all__ = ["GENERATORS", "TRAP_FAMILIES", "generate_case", "GeneratorFn"]