inclusio 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
inclusio/__init__.py ADDED
@@ -0,0 +1 @@
1
+ """Inclusio package."""
@@ -0,0 +1 @@
1
+ """CLI entrypoints for Inclusio."""
inclusio/cli/audit.py ADDED
@@ -0,0 +1,397 @@
1
+ #!/usr/bin/env python3
2
+ # Copyright (c) 2026 Sebastien Rousseau
3
+ # Licensed under the MIT License
4
+ # See LICENSE file for details
5
+ """
6
+ audit.py — EAA / WCAG 2.2 AA conformance audit for built PDFs.
7
+
8
+ Runs veraPDF against PDF/UA-2, WTPDF 1.0 Accessibility, and PDF/A-4f
9
+ flavours on a single PDF or every PDF under build/, and emits both a
10
+ machine-readable JSON report and a human-readable Markdown summary.
11
+
12
+ CI uses this in `--strict` mode to BLOCK release artefacts that fail
13
+ any of the three gates. Local users get the same report by running
14
+ `make audit`.
15
+
16
+ Usage:
17
+ # Audit one PDF
18
+ python -m inclusio.cli.audit build/papers/foo.pdf
19
+
20
+ # Audit every PDF under build/ (default)
21
+ python -m inclusio.cli.audit
22
+
23
+ # Strict mode: non-zero exit on any failure
24
+ python -m inclusio.cli.audit --strict
25
+
26
+ # Specific report path
27
+ python -m inclusio.cli.audit --json out.json --markdown out.md
28
+
29
+ # Subset of flavours
30
+ python -m inclusio.cli.audit --flavours ua2,4f
31
+
32
+ Requires: `verapdf` on PATH. Install via `brew install verapdf` on
33
+ macOS or the official installer (see .github/workflows/verapdf.yml).
34
+ """
35
+
36
+ import argparse
37
+ import json
38
+ import os
39
+ import shutil
40
+ import subprocess
41
+ import sys
42
+ from datetime import UTC, datetime
43
+ from pathlib import Path
44
+
45
+ try:
46
+ import yaml # type: ignore
47
+ except ImportError: # pragma: no cover - PyYAML is a hard dep at install time
48
+ yaml = None
49
+
50
+
51
+ SCRIPT_DIR = Path(__file__).resolve().parent
52
+ PROJECT_ROOT = SCRIPT_DIR.parent.parent
53
+
54
+ # CONTENT_ROOT: where build/ lives. Honours INCLUSIO_CONTENT_DIR.
55
+ _env_content = os.environ.get("INCLUSIO_CONTENT_DIR")
56
+ CONTENT_ROOT = Path(_env_content).resolve() if _env_content else PROJECT_ROOT
57
+ DEFAULT_BUILD_DIR = CONTENT_ROOT / "build"
58
+ DEFAULT_AUDIT_DIR = DEFAULT_BUILD_DIR / ".audit"
59
+ DEFAULT_META = CONTENT_ROOT / "data" / "meta.yaml"
60
+
61
+
62
+ # Flavour catalogue. Each entry: (veraPDF flavour, human label, blocking).
63
+ # "blocking" controls --strict behaviour: True means a FAIL exits non-zero.
64
+ DEFAULT_FLAVOURS = [
65
+ ("ua2", "PDF/UA-2 (ISO 14289-2:2024)", True),
66
+ ("wt1a", "WTPDF 1.0 Accessibility", True),
67
+ ("4f", "PDF/A-4f (ISO 19005-4:2020 + embedded files)", True),
68
+ ]
69
+
70
+
71
+ def _have_verapdf() -> bool:
72
+ """True iff `verapdf` is resolvable on PATH."""
73
+ return shutil.which("verapdf") is not None
74
+
75
+
76
+ def _verapdf(pdf: Path, flavour: str, timeout: int = 90) -> dict:
77
+ """Run veraPDF on a single PDF for a single flavour.
78
+
79
+ Returns a dict with keys: pdf, flavour, status, line, error.
80
+ status is one of PASS, FAIL, SKIP, ERROR.
81
+ """
82
+ if not _have_verapdf():
83
+ return {
84
+ "pdf": str(pdf),
85
+ "flavour": flavour,
86
+ "status": "SKIP",
87
+ "line": "",
88
+ "error": "verapdf not installed on PATH",
89
+ }
90
+ try:
91
+ result = subprocess.run(
92
+ ["verapdf", "--format", "text", "--flavour", flavour, str(pdf)],
93
+ capture_output=True,
94
+ text=True,
95
+ timeout=timeout,
96
+ )
97
+ except subprocess.TimeoutExpired:
98
+ return {
99
+ "pdf": str(pdf),
100
+ "flavour": flavour,
101
+ "status": "ERROR",
102
+ "line": "",
103
+ "error": f"verapdf timed out after {timeout}s",
104
+ }
105
+ first = (result.stdout or "").splitlines()[0] if result.stdout else ""
106
+ if first.startswith("PASS"):
107
+ status = "PASS"
108
+ elif first.startswith("FAIL"):
109
+ status = "FAIL"
110
+ else:
111
+ status = "ERROR"
112
+ return {
113
+ "pdf": str(pdf),
114
+ "flavour": flavour,
115
+ "status": status,
116
+ "line": first,
117
+ "error": result.stderr.strip() if status == "ERROR" else "",
118
+ }
119
+
120
+
121
+ def _registry_stems(meta_path: Path) -> set:
122
+ """Read meta.yaml and return the set of source-file stems that
123
+ identify Euxis-built artefacts (i.e. PDFs whose stem matches a
124
+ registered document's source file).
125
+
126
+ Returns an empty set if meta.yaml is missing or PyYAML is unavailable
127
+ — callers should treat empty as "no registry filtering" and fall
128
+ through to no-op (or use --all to bypass).
129
+ """
130
+ if yaml is None or not meta_path.exists():
131
+ return set()
132
+ try:
133
+ meta = yaml.safe_load(meta_path.read_text(encoding="utf-8")) or {}
134
+ except Exception:
135
+ return set()
136
+ stems = set()
137
+ for key, doc in (meta.get("documents") or {}).items():
138
+ # `doc` may be None when the YAML entry has no body (e.g.,
139
+ # `foo:` with no children). Treat as just the ID.
140
+ if isinstance(doc, dict):
141
+ src = doc.get("src") or ""
142
+ if src:
143
+ stems.add(Path(src).stem)
144
+ # Also accept the doc ID itself — template-rendered documents
145
+ # land under build/{type}/{doc_id}.pdf, where doc_id is the key.
146
+ stems.add(key)
147
+ return stems
148
+
149
+
150
+ def collect_pdfs(
151
+ target: Path,
152
+ build_dir: Path,
153
+ registry_stems: set = None,
154
+ ) -> list:
155
+ """Resolve `target` to a flat list of PDFs.
156
+
157
+ If `target` is a PDF, return [target] unconditionally (caller asked
158
+ for a specific file). If `target` is a directory, recursively
159
+ collect *.pdf under it, skipping `build/.cache/` intermediates and
160
+ any `.audit/` folder. When `registry_stems` is non-empty, only PDFs
161
+ whose stem appears in the registry are returned — this filters out
162
+ input briefs (recruiter CVs, job descriptions) co-located in
163
+ `build/jobs/`.
164
+ """
165
+ if target.is_file() and target.suffix.lower() == ".pdf":
166
+ return [target.resolve()]
167
+ if not target.exists():
168
+ return []
169
+ pdfs = []
170
+ for pdf in sorted(target.rglob("*.pdf")):
171
+ parts = pdf.parts
172
+ if any(p in (".cache", ".audit") for p in parts):
173
+ continue
174
+ if registry_stems and pdf.stem not in registry_stems:
175
+ continue
176
+ pdfs.append(pdf.resolve())
177
+ return pdfs
178
+
179
+
180
+ def audit(
181
+ pdfs: list,
182
+ flavours: list = DEFAULT_FLAVOURS,
183
+ timeout: int = 90,
184
+ ) -> dict:
185
+ """Run veraPDF over every (pdf, flavour) pair.
186
+
187
+ Returns the report dict (see audit_report_schema in docs).
188
+ """
189
+ started = datetime.now(UTC).isoformat()
190
+ checks = []
191
+ for pdf in pdfs:
192
+ for flavour, label, _block in flavours:
193
+ checks.append(_verapdf(pdf, flavour, timeout=timeout))
194
+ finished = datetime.now(UTC).isoformat()
195
+
196
+ # Aggregate per-PDF and per-flavour stats.
197
+ by_pdf = {}
198
+ by_flavour = {f[0]: {"pass": 0, "fail": 0, "skip": 0, "error": 0} for f in flavours}
199
+ for c in checks:
200
+ pdf = c["pdf"]
201
+ by_pdf.setdefault(pdf, []).append(c)
202
+ b = by_flavour[c["flavour"]]
203
+ b[c["status"].lower()] = b.get(c["status"].lower(), 0) + 1
204
+
205
+ summary = {
206
+ "pdfs": len(pdfs),
207
+ "checks": len(checks),
208
+ "pass": sum(1 for c in checks if c["status"] == "PASS"),
209
+ "fail": sum(1 for c in checks if c["status"] == "FAIL"),
210
+ "skip": sum(1 for c in checks if c["status"] == "SKIP"),
211
+ "error": sum(1 for c in checks if c["status"] == "ERROR"),
212
+ }
213
+ return {
214
+ "tool": "euxis-audit",
215
+ "verapdf_present": _have_verapdf(),
216
+ "started_at": started,
217
+ "finished_at": finished,
218
+ "flavours": [{"id": f[0], "label": f[1], "blocking": f[2]} for f in flavours],
219
+ "summary": summary,
220
+ "by_flavour": by_flavour,
221
+ "by_pdf": by_pdf,
222
+ "checks": checks,
223
+ }
224
+
225
+
226
+ def render_markdown(report: dict) -> str:
227
+ """Produce a human-readable Markdown summary of the audit report."""
228
+ s = report["summary"]
229
+ lines = []
230
+ lines.append("# Euxis EAA / Accessibility Audit Report")
231
+ lines.append("")
232
+ lines.append(f"- Tool: `{report['tool']}`")
233
+ lines.append(f"- veraPDF available: **{report['verapdf_present']}**")
234
+ lines.append(f"- Started: {report['started_at']}")
235
+ lines.append(f"- Finished: {report['finished_at']}")
236
+ lines.append("")
237
+ lines.append("## Summary")
238
+ lines.append("")
239
+ lines.append(f"- PDFs audited: **{s['pdfs']}**")
240
+ lines.append(f"- Total checks: **{s['checks']}**")
241
+ lines.append(
242
+ f"- PASS: **{s['pass']}**, FAIL: **{s['fail']}**, SKIP: {s['skip']}, ERROR: {s['error']}"
243
+ )
244
+ lines.append("")
245
+ lines.append("## Per-flavour")
246
+ lines.append("")
247
+ lines.append("| Flavour | PASS | FAIL | SKIP | ERROR |")
248
+ lines.append("|---|---:|---:|---:|---:|")
249
+ for fid, b in report["by_flavour"].items():
250
+ lines.append(
251
+ f"| `{fid}` | {b.get('pass', 0)} | {b.get('fail', 0)} | "
252
+ f"{b.get('skip', 0)} | {b.get('error', 0)} |"
253
+ )
254
+ lines.append("")
255
+ lines.append("## Per-PDF")
256
+ lines.append("")
257
+ lines.append("| PDF | " + " | ".join(f"`{f['id']}`" for f in report["flavours"]) + " |")
258
+ lines.append("|---" + "|---" * len(report["flavours"]) + "|")
259
+ for pdf, checks in report["by_pdf"].items():
260
+ row = [f"`{Path(pdf).name}`"]
261
+ for f in report["flavours"]:
262
+ c = next((c for c in checks if c["flavour"] == f["id"]), None)
263
+ row.append(c["status"] if c else "—")
264
+ lines.append("| " + " | ".join(row) + " |")
265
+ lines.append("")
266
+ if s["fail"] or s["error"]:
267
+ lines.append("## Failures")
268
+ lines.append("")
269
+ for c in report["checks"]:
270
+ if c["status"] in ("FAIL", "ERROR"):
271
+ lines.append(
272
+ f"- `{Path(c['pdf']).name}` `{c['flavour']}`: "
273
+ f"**{c['status']}** — {c['line'] or c['error']}"
274
+ )
275
+ lines.append("")
276
+ return "\n".join(lines) + "\n"
277
+
278
+
279
+ def _is_blocking(report: dict, status_set=("FAIL", "ERROR")) -> bool:
280
+ """Return True if any blocking-flavour check ended in the failure set."""
281
+ blocking = {f["id"] for f in report["flavours"] if f["blocking"]}
282
+ for c in report["checks"]:
283
+ if c["flavour"] in blocking and c["status"] in status_set:
284
+ return True
285
+ return False
286
+
287
+
288
+ def main(argv=None):
289
+ """Entry point for `python -m inclusio.cli.audit`.
290
+
291
+ Parses the audit CLI flags, resolves the PDF set, runs veraPDF over
292
+ every (pdf, flavour) pair, writes JSON + Markdown reports, and
293
+ returns the process exit code (0 success, 1 blocking failure in
294
+ `--strict` mode).
295
+ """
296
+ parser = argparse.ArgumentParser(
297
+ description="Euxis EAA / accessibility audit for built PDFs.",
298
+ )
299
+ parser.add_argument(
300
+ "target",
301
+ nargs="?",
302
+ default=str(DEFAULT_BUILD_DIR),
303
+ help="PDF file or directory to audit (default: $INCLUSIO_CONTENT_DIR/build)",
304
+ )
305
+ parser.add_argument(
306
+ "--flavours",
307
+ default=",".join(f[0] for f in DEFAULT_FLAVOURS),
308
+ help="Comma-separated veraPDF flavours (default: ua2,wt1a,4f)",
309
+ )
310
+ parser.add_argument(
311
+ "--json",
312
+ default=None,
313
+ help="Path to write JSON report (default: build/.audit/eaa-<ts>.json)",
314
+ )
315
+ parser.add_argument(
316
+ "--markdown",
317
+ default=None,
318
+ help="Path to write Markdown summary (default: build/.audit/eaa-<ts>.md)",
319
+ )
320
+ parser.add_argument(
321
+ "--strict",
322
+ action="store_true",
323
+ help="Exit non-zero on any blocking-flavour FAIL/ERROR (for CI gating)",
324
+ )
325
+ parser.add_argument(
326
+ "--timeout",
327
+ type=int,
328
+ default=90,
329
+ help="Per-check timeout in seconds (default: 90)",
330
+ )
331
+ parser.add_argument(
332
+ "--all",
333
+ action="store_true",
334
+ help="Audit every PDF under target (default: only registered docs)",
335
+ )
336
+ parser.add_argument(
337
+ "--meta",
338
+ default=str(DEFAULT_META),
339
+ help=f"meta.yaml path for registry filter (default: {DEFAULT_META})",
340
+ )
341
+ args = parser.parse_args(argv)
342
+
343
+ requested = [f.strip() for f in args.flavours.split(",") if f.strip()]
344
+ flavours = [f for f in DEFAULT_FLAVOURS if f[0] in requested]
345
+ unknown = set(requested) - {f[0] for f in flavours}
346
+ if unknown:
347
+ print(
348
+ f"WARNING: unknown flavours skipped: {', '.join(sorted(unknown))}",
349
+ file=sys.stderr,
350
+ )
351
+
352
+ target = Path(args.target).resolve()
353
+ registry = set() if args.all else _registry_stems(Path(args.meta))
354
+ pdfs = collect_pdfs(target, DEFAULT_BUILD_DIR, registry_stems=registry)
355
+ if not pdfs:
356
+ print(f"No PDFs found under {target}", file=sys.stderr)
357
+ # Empty target is still success in non-strict mode (nothing to audit).
358
+ return 0 if not args.strict else 1
359
+
360
+ report = audit(pdfs, flavours=flavours, timeout=args.timeout)
361
+
362
+ # Output paths.
363
+ DEFAULT_AUDIT_DIR.mkdir(parents=True, exist_ok=True)
364
+ ts = report["finished_at"].replace(":", "").replace("-", "")[:15]
365
+ json_path = Path(args.json) if args.json else DEFAULT_AUDIT_DIR / f"eaa-{ts}.json"
366
+ md_path = Path(args.markdown) if args.markdown else DEFAULT_AUDIT_DIR / f"eaa-{ts}.md"
367
+
368
+ json_path.parent.mkdir(parents=True, exist_ok=True)
369
+ md_path.parent.mkdir(parents=True, exist_ok=True)
370
+ json_path.write_text(json.dumps(report, indent=2) + "\n", encoding="utf-8")
371
+ md_path.write_text(render_markdown(report), encoding="utf-8")
372
+
373
+ print(render_markdown(report))
374
+ print(f"JSON: {json_path}")
375
+ print(f"Markdown: {md_path}")
376
+
377
+ # Order matters: a missing veraPDF is the root cause for every SKIP
378
+ # status in the report, so surface it first with a clearer error.
379
+ # Otherwise `_is_blocking` (which counts only FAIL/ERROR, not SKIP)
380
+ # returns False on a runner with no verapdf installed, and the gate
381
+ # silently passes — exactly the opposite of what --strict promises.
382
+ if args.strict and not report["verapdf_present"]:
383
+ print(
384
+ "STRICT MODE: verapdf is required but not installed on PATH. "
385
+ "Install via `brew install verapdf` on macOS or see "
386
+ ".github/workflows/verapdf.yml for the Linux installer.",
387
+ file=sys.stderr,
388
+ )
389
+ return 1
390
+ if args.strict and _is_blocking(report):
391
+ print("STRICT MODE: blocking-flavour failures detected.", file=sys.stderr)
392
+ return 1
393
+ return 0
394
+
395
+
396
+ if __name__ == "__main__":
397
+ sys.exit(main())