PyPI - invarlock - Versions diffs - 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl - Mend

invarlock 0.3.6py3-none-any.whl → 0.3.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

invarlock/__init__.py +4 -4
invarlock/adapters/__init__.py +10 -14
invarlock/adapters/auto.py +37 -50
invarlock/adapters/capabilities.py +2 -2
invarlock/adapters/hf_causal.py +418 -0
invarlock/adapters/{hf_onnx.py → hf_causal_onnx.py} +3 -3
invarlock/adapters/hf_loading.py +7 -7
invarlock/adapters/hf_mixin.py +53 -9
invarlock/adapters/{hf_bert.py → hf_mlm.py} +4 -11
invarlock/adapters/{hf_t5.py → hf_seq2seq.py} +9 -9
invarlock/assurance/__init__.py +15 -23
invarlock/cli/adapter_auto.py +32 -26
invarlock/cli/app.py +128 -27
invarlock/cli/commands/__init__.py +2 -2
invarlock/cli/commands/calibrate.py +48 -4
invarlock/cli/commands/doctor.py +8 -10
invarlock/cli/commands/evaluate.py +986 -0
invarlock/cli/commands/explain_gates.py +25 -17
invarlock/cli/commands/export_html.py +11 -9
invarlock/cli/commands/plugins.py +13 -9
invarlock/cli/commands/report.py +326 -92
invarlock/cli/commands/run.py +1160 -228
invarlock/cli/commands/verify.py +157 -97
invarlock/cli/config.py +1 -1
invarlock/cli/determinism.py +1 -1
invarlock/cli/doctor_helpers.py +4 -5
invarlock/cli/output.py +193 -0
invarlock/cli/provenance.py +4 -4
invarlock/core/bootstrap.py +1 -1
invarlock/core/registry.py +9 -11
invarlock/core/retry.py +14 -14
invarlock/core/runner.py +112 -26
invarlock/edits/noop.py +2 -2
invarlock/edits/quant_rtn.py +67 -39
invarlock/eval/__init__.py +1 -1
invarlock/eval/bench.py +14 -10
invarlock/eval/data.py +68 -23
invarlock/eval/metrics.py +59 -1
invarlock/eval/primary_metric.py +1 -1
invarlock/eval/tasks/__init__.py +12 -0
invarlock/eval/tasks/classification.py +48 -0
invarlock/eval/tasks/qa.py +36 -0
invarlock/eval/tasks/text_generation.py +102 -0
invarlock/guards/invariants.py +19 -10
invarlock/guards/rmt.py +2 -2
invarlock/guards/spectral.py +1 -1
invarlock/guards/variance.py +2 -2
invarlock/model_profile.py +64 -62
invarlock/observability/health.py +6 -6
invarlock/observability/metrics.py +108 -0
invarlock/plugins/hf_bnb_adapter.py +32 -21
invarlock/reporting/__init__.py +18 -4
invarlock/reporting/guards_analysis.py +154 -4
invarlock/reporting/html.py +61 -11
invarlock/reporting/normalizer.py +9 -2
invarlock/reporting/policy_utils.py +1 -1
invarlock/reporting/primary_metric_utils.py +11 -11
invarlock/reporting/render.py +876 -510
invarlock/reporting/report.py +72 -30
invarlock/reporting/{certificate.py → report_builder.py} +252 -99
invarlock/reporting/{certificate_schema.py → report_schema.py} +22 -22
invarlock/reporting/report_types.py +6 -1
invarlock/reporting/telemetry.py +86 -0
invarlock-0.3.8.dist-info/METADATA +283 -0
{invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/RECORD +69 -64
{invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/WHEEL +1 -1
{invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/entry_points.txt +5 -3
invarlock/adapters/hf_gpt2.py +0 -404
invarlock/adapters/hf_llama.py +0 -487
invarlock/cli/commands/certify.py +0 -422
invarlock-0.3.6.dist-info/METADATA +0 -588
{invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/licenses/LICENSE +0 -0
{invarlock-0.3.6.dist-info → invarlock-0.3.8.dist-info}/top_level.txt +0 -0

invarlock/reporting/render.py CHANGED Viewed

@@ -9,8 +9,7 @@ from typing import Any
 import yaml
-# Import certificate module for helper access without creating hard cycles
-from . import certificate as C
+from .report_schema import validate_report
 # Console Validation Block helpers (allow-list driven)
 _CONSOLE_LABELS_DEFAULT = [
@@ -37,8 +36,10 @@ def _load_console_labels() -> list[str]:
     return list(_CONSOLE_LABELS_DEFAULT)
-def compute_console_validation_block(certificate: dict[str, Any]) -> dict[str, Any]:
-    """Produce a normalized console validation block from a certificate.
+def compute_console_validation_block(
+    evaluation_report: dict[str, Any],
+) -> dict[str, Any]:
+    """Produce a normalized console validation block from an evaluation report.
     Returns a dict with keys:
     - labels: the canonical label list
@@ -47,8 +48,8 @@ def compute_console_validation_block(certificate: dict[str, Any]) -> dict[str, A
       counted only when evaluated.
     """
     labels = _load_console_labels()
-    validation = certificate.get("validation", {}) or {}
-    guard_ctx = certificate.get("guard_overhead", {}) or {}
+    validation = evaluation_report.get("validation", {}) or {}
+    guard_ctx = evaluation_report.get("guard_overhead", {}) or {}
     guard_evaluated = (
         bool(guard_ctx.get("evaluated")) if isinstance(guard_ctx, dict) else False
     )
@@ -113,6 +114,462 @@ def _short_digest(v: str) -> str:
     return v if len(v) <= 16 else (v[:8] + "…" + v[-8:])
+def _render_executive_dashboard(cert: dict[str, Any]) -> str:
+    """Render executive summary dashboard table."""
+    lines: list[str] = []
+    _append_safety_dashboard_section(lines, cert)
+    return "\n".join(lines).rstrip()
+def _append_safety_dashboard_section(
+    lines: list[str], evaluation_report: dict[str, Any]
+) -> None:
+    """Append a concise, first-screen dashboard for the evaluation report."""
+    block = compute_console_validation_block(evaluation_report)
+    overall_pass = bool(block.get("overall_pass"))
+    overall_status = (
+        f"{'✅' if overall_pass else '❌'} {'PASS' if overall_pass else 'FAIL'}"
+    )
+    validation = evaluation_report.get("validation", {}) or {}
+    pm = evaluation_report.get("primary_metric", {}) or {}
+    auto = evaluation_report.get("auto", {}) or {}
+    tier = str(auto.get("tier") or "balanced").lower()
+    # Primary metric summary
+    pm_kind = str(pm.get("kind", "")).lower()
+    pm_basis = pm.get("gating_basis") or pm.get("basis") or "point"
+    pm_ok: bool | None
+    if isinstance(validation, dict) and "primary_metric_acceptable" in validation:
+        pm_ok = bool(validation.get("primary_metric_acceptable"))
+    else:
+        pm_ok = None
+    pm_value = pm.get("ratio_vs_baseline")
+    if pm_kind in {"accuracy", "vqa_accuracy"}:
+        measured = f"{pm_value:+.2f} pp" if isinstance(pm_value, int | float) else "N/A"
+        th_map = {
+            "conservative": -0.5,
+            "balanced": -1.0,
+            "aggressive": -2.0,
+            "none": -1.0,
+        }
+        th = th_map.get(tier, -1.0)
+        threshold = f"≥ {th:+.2f} pp ({pm_basis})"
+    else:
+        measured = f"{pm_value:.3f}×" if isinstance(pm_value, int | float) else "N/A"
+        tier_thresholds = {
+            "conservative": 1.05,
+            "balanced": 1.10,
+            "aggressive": 1.20,
+            "none": 1.10,
+        }
+        ratio_limit = tier_thresholds.get(tier, 1.10)
+        target_ratio = auto.get("target_pm_ratio")
+        if isinstance(target_ratio, int | float) and target_ratio > 0:
+            ratio_limit = min(ratio_limit, float(target_ratio))
+        threshold = f"≤ {ratio_limit:.2f}× ({pm_basis})"
+    pm_status = (
+        f"{'✅' if pm_ok else '❌'} {measured}"
+        if isinstance(pm_ok, bool)
+        else f"ℹ️ {measured}"
+    )
+    # Drift summary (final/preview ratio) when preview/final are numeric
+    drift_ok: bool | None
+    if isinstance(validation, dict) and "preview_final_drift_acceptable" in validation:
+        drift_ok = bool(validation.get("preview_final_drift_acceptable"))
+    else:
+        drift_ok = None
+    drift_val = "N/A"
+    try:
+        pv = (
+            float(pm.get("preview"))
+            if isinstance(pm.get("preview"), int | float)
+            else float("nan")
+        )
+        fv = (
+            float(pm.get("final"))
+            if isinstance(pm.get("final"), int | float)
+            else float("nan")
+        )
+        drift = (
+            fv / pv
+            if (math.isfinite(pv) and pv > 0 and math.isfinite(fv))
+            else float("nan")
+        )
+        if math.isfinite(drift):
+            drift_val = f"{drift:.3f}×"
+    except Exception:
+        drift_val = "N/A"
+    drift_status = (
+        f"{'✅' if drift_ok else '❌'} {drift_val}"
+        if isinstance(drift_ok, bool)
+        else f"ℹ️ {drift_val}"
+    )
+    def _gate_cell(key: str, ok_default: bool | None = None) -> str:
+        ok: bool | None
+        if not isinstance(validation, dict):
+            ok = ok_default
+        elif key not in validation:
+            ok = ok_default
+        else:
+            ok = bool(validation.get(key))
+        if ok is None:
+            return "ℹ️ N/A"
+        return "✅ PASS" if ok else "❌ FAIL"
+    overhead_ctx = evaluation_report.get("guard_overhead", {}) or {}
+    overhead_evaluated = (
+        bool(overhead_ctx.get("evaluated")) if isinstance(overhead_ctx, dict) else False
+    )
+    overhead_row: tuple[str, str, str] | None = None
+    if overhead_evaluated:
+        overhead_pct = overhead_ctx.get("overhead_percent")
+        overhead_ratio = overhead_ctx.get("overhead_ratio")
+        if isinstance(overhead_pct, int | float) and math.isfinite(float(overhead_pct)):
+            overhead_measured = f"{float(overhead_pct):+.2f}%"
+        elif isinstance(overhead_ratio, int | float) and math.isfinite(
+            float(overhead_ratio)
+        ):
+            overhead_measured = f"{float(overhead_ratio):.3f}×"
+        else:
+            overhead_measured = "N/A"
+        threshold_pct = overhead_ctx.get("threshold_percent")
+        if isinstance(threshold_pct, int | float) and math.isfinite(
+            float(threshold_pct)
+        ):
+            threshold_str = f"≤ +{float(threshold_pct):.1f}%"
+        else:
+            threshold_str = "≤ +1.0%"
+        overhead_row = (
+            "Overhead",
+            f"{'✅' if bool(validation.get('guard_overhead_acceptable', True)) else '❌'} {overhead_measured}"
+            if isinstance(validation, dict)
+            else f"ℹ️ {overhead_measured}",
+            threshold_str,
+        )
+    lines.append("## Evaluation Dashboard")
+    lines.append("")
+    lines.append("| Check | Status | Quick Summary |")
+    lines.append("|-------|--------|---------------|")
+    lines.append(f"| Overall | {overall_status} | Canonical gate outcomes |")
+    lines.append(f"| Primary Metric | {pm_status} | {threshold} |")
+    lines.append(f"| Drift | {drift_status} | 0.95–1.05× band |")
+    lines.append(
+        f"| Invariants | {_gate_cell('invariants_pass')} | Model integrity checks |"
+    )
+    lines.append(
+        f"| Spectral | {_gate_cell('spectral_stable')} | Weight matrix spectral norms |"
+    )
+    lines.append(f"| RMT | {_gate_cell('rmt_stable')} | Random Matrix Theory guard |")
+    if overhead_row:
+        lines.append(f"| {overhead_row[0]} | {overhead_row[1]} | {overhead_row[2]} |")
+    lines.append("")
+def _append_primary_metric_section(
+    lines: list[str], evaluation_report: dict[str, Any]
+) -> None:
+    """Append the Primary Metric section early for quick triage."""
+    pm = evaluation_report.get("primary_metric")
+    if not isinstance(pm, dict) or not pm:
+        return
+    kind = pm.get("kind", "unknown")
+    lines.append("## Primary Metric")
+    lines.append("")
+    unit = pm.get("unit", "-")
+    paired = pm.get("paired", False)
+    estimated_flag = False
+    try:
+        if bool(pm.get("estimated")):
+            estimated_flag = True
+        elif str(pm.get("counts_source", "")).lower() == "pseudo_config":
+            estimated_flag = True
+    except Exception:
+        estimated_flag = False
+    est_suffix = " (estimated)" if estimated_flag else ""
+    lines.append(f"- Kind: {kind} (unit: {unit}){est_suffix}")
+    gating_basis = pm.get("gating_basis") or pm.get("basis")
+    if gating_basis:
+        lines.append(f"- Basis: {gating_basis}")
+    if isinstance(paired, bool):
+        lines.append(f"- Paired: {paired}")
+    reps = pm.get("reps")
+    if isinstance(reps, int | float):
+        lines.append(f"- Bootstrap Reps: {int(reps)}")
+    ci = pm.get("ci") or pm.get("display_ci")
+    if (
+        isinstance(ci, list | tuple)
+        and len(ci) == 2
+        and all(isinstance(x, int | float) for x in ci)
+    ):
+        lines.append(f"- CI: {ci[0]:.3f}–{ci[1]:.3f}")
+    prev = pm.get("preview")
+    fin = pm.get("final")
+    ratio = pm.get("ratio_vs_baseline")
+    lines.append("")
+    if estimated_flag and str(kind).lower() in {"accuracy", "vqa_accuracy"}:
+        lines.append(
+            "- Note: Accuracy derived from pseudo counts (quick dev preset); use a labeled preset for measured accuracy."
+        )
+    lines.append("| Field | Value |")
+    lines.append("|-------|-------|")
+    lines.append(f"| Preview | {_fmt_by_kind(prev, str(kind))} |")
+    lines.append(f"| Final | {_fmt_by_kind(fin, str(kind))} |")
+    if kind in {"accuracy", "vqa_accuracy"}:
+        lines.append(f"| Δ vs Baseline | {_fmt_by_kind(ratio, str(kind))} |")
+        try:
+            base_pt = pm.get("baseline_point")
+            if isinstance(base_pt, int | float) and base_pt < 0.05:
+                lines.append("- Note: baseline < 5%; ratio suppressed; showing Δpp")
+        except Exception:
+            pass
+    else:
+        try:
+            lines.append(f"| Ratio vs Baseline | {float(ratio):.3f} |")
+        except Exception:
+            lines.append("| Ratio vs Baseline | N/A |")
+    lines.append("")
+    # Secondary metrics (informational)
+    try:
+        secs = evaluation_report.get("secondary_metrics")
+        if isinstance(secs, list) and secs:
+            lines.append("## Secondary Metrics (informational)")
+            lines.append("")
+            lines.append("| Kind | Preview | Final | vs Baseline | CI |")
+            lines.append("|------|---------|-------|-------------|----|")
+            for m in secs:
+                if not isinstance(m, dict):
+                    continue
+                k = m.get("kind", "?")
+                pv = _fmt_by_kind(m.get("preview"), str(k))
+                fv = _fmt_by_kind(m.get("final"), str(k))
+                rb = m.get("ratio_vs_baseline")
+                try:
+                    rb_str = (
+                        f"{float(rb):.3f}"
+                        if (str(k).startswith("ppl"))
+                        else _fmt_by_kind(rb, str(k))
+                    )
+                except Exception:
+                    rb_str = "N/A"
+                ci = m.get("display_ci") or m.get("ci")
+                if isinstance(ci, tuple | list) and len(ci) == 2:
+                    ci_str = f"{float(ci[0]):.3f}-{float(ci[1]):.3f}"
+                else:
+                    ci_str = "–"
+                lines.append(f"| {k} | {pv} | {fv} | {rb_str} | {ci_str} |")
+            lines.append("")
+    except Exception:
+        pass
+def _append_policy_configuration_section(
+    lines: list[str], evaluation_report: dict[str, Any]
+) -> None:
+    resolved_policy = evaluation_report.get("resolved_policy")
+    policy_provenance = evaluation_report.get("policy_provenance", {}) or {}
+    has_prov = isinstance(policy_provenance, dict) and bool(policy_provenance)
+    has_resolved = isinstance(resolved_policy, dict) and bool(resolved_policy)
+    if not (has_prov or has_resolved):
+        return
+    lines.append("## Policy Configuration")
+    lines.append("")
+    tier = None
+    if has_prov:
+        tier = policy_provenance.get("tier")
+    if not tier:
+        tier = (evaluation_report.get("auto", {}) or {}).get("tier")
+    digest_value = None
+    if has_prov:
+        digest_value = policy_provenance.get("policy_digest")
+    if not digest_value:
+        digest_value = (evaluation_report.get("policy_digest", {}) or {}).get(
+            "thresholds_hash"
+        )
+    summary_parts: list[str] = []
+    if tier:
+        summary_parts.append(f"**Tier:** {tier}")
+    if digest_value:
+        summary_parts.append(f"**Digest:** `{_short_digest(str(digest_value))}`")
+    if summary_parts:
+        lines.append(" | ".join(summary_parts))
+    if has_prov:
+        overrides_list = policy_provenance.get("overrides") or []
+        if overrides_list:
+            lines.append(f"- **Overrides:** {', '.join(overrides_list)}")
+        else:
+            lines.append("- **Overrides:** (none)")
+        if policy_provenance.get("resolved_at"):
+            lines.append(f"- **Resolved At:** {policy_provenance.get('resolved_at')}")
+    if has_resolved:
+        lines.append("")
+        lines.append("<details>")
+        lines.append("<summary>Resolved Policy YAML</summary>")
+        lines.append("")
+        lines.append("```yaml")
+        resolved_yaml = yaml.safe_dump(
+            resolved_policy, sort_keys=True, width=80, default_flow_style=False
+        ).strip()
+        for line in resolved_yaml.splitlines():
+            lines.append(line)
+        lines.append("```")
+        lines.append("")
+        lines.append("</details>")
+    lines.append("")
+def _append_dataset_and_provenance_section(
+    lines: list[str], evaluation_report: dict[str, Any]
+) -> None:
+    dataset = evaluation_report.get("dataset", {}) or {}
+    provenance_info = evaluation_report.get("provenance", {}) or {}
+    has_dataset = isinstance(dataset, dict) and bool(dataset)
+    has_provenance = isinstance(provenance_info, dict) and bool(provenance_info)
+    if not (has_dataset or has_provenance):
+        return
+    lines.append("## Dataset and Provenance")
+    lines.append("")
+    if has_dataset:
+        prov = dataset.get("provider") or "unknown"
+        lines.append(f"- **Provider:** {prov}")
+        try:
+            seq_len_val = (
+                int(dataset.get("seq_len"))
+                if isinstance(dataset.get("seq_len"), int | float)
+                else dataset.get("seq_len")
+            )
+        except Exception:  # pragma: no cover - defensive
+            seq_len_val = dataset.get("seq_len")
+        if seq_len_val is not None:
+            lines.append(f"- **Sequence Length:** {seq_len_val}")
+        windows_blk = (
+            dataset.get("windows", {})
+            if isinstance(dataset.get("windows"), dict)
+            else {}
+        )
+        win_prev = windows_blk.get("preview")
+        win_final = windows_blk.get("final")
+        if win_prev is not None and win_final is not None:
+            lines.append(f"- **Windows:** {win_prev} preview + {win_final} final")
+        if windows_blk.get("seed") is not None:
+            lines.append(f"- **Seed:** {windows_blk.get('seed')}")
+        hash_blk = (
+            dataset.get("hash", {}) if isinstance(dataset.get("hash"), dict) else {}
+        )
+        if hash_blk.get("preview_tokens") is not None:
+            lines.append(f"- **Preview Tokens:** {hash_blk.get('preview_tokens'):,}")
+        if hash_blk.get("final_tokens") is not None:
+            lines.append(f"- **Final Tokens:** {hash_blk.get('final_tokens'):,}")
+        if hash_blk.get("total_tokens") is not None:
+            lines.append(f"- **Total Tokens:** {hash_blk.get('total_tokens'):,}")
+        if hash_blk.get("dataset"):
+            lines.append(f"- **Dataset Hash:** {hash_blk.get('dataset')}")
+        tokenizer = dataset.get("tokenizer", {})
+        if isinstance(tokenizer, dict) and (
+            tokenizer.get("name") or tokenizer.get("hash")
+        ):
+            vocab_size = tokenizer.get("vocab_size")
+            vocab_suffix = (
+                f" (vocab {vocab_size})" if isinstance(vocab_size, int) else ""
+            )
+            lines.append(
+                f"- **Tokenizer:** {tokenizer.get('name', 'unknown')}{vocab_suffix}"
+            )
+            if tokenizer.get("hash"):
+                lines.append(f"  - Hash: {tokenizer['hash']}")
+            lines.append(
+                f"  - BOS/EOS: {tokenizer.get('bos_token')} / {tokenizer.get('eos_token')}"
+            )
+            if tokenizer.get("pad_token") is not None:
+                lines.append(f"  - PAD: {tokenizer.get('pad_token')}")
+            if tokenizer.get("add_prefix_space") is not None:
+                lines.append(
+                    f"  - add_prefix_space: {tokenizer.get('add_prefix_space')}"
+                )
+    if has_provenance:
+        baseline_info = provenance_info.get("baseline", {}) or {}
+        edited_info = provenance_info.get("edited", {}) or {}
+        if baseline_info or edited_info:
+            lines.append("")
+        if baseline_info:
+            lines.append(f"- **Baseline Run ID:** {baseline_info.get('run_id')}")
+            if baseline_info.get("report_hash"):
+                lines.append(f"  - Report Hash: `{baseline_info.get('report_hash')}`")
+            if baseline_info.get("report_path"):
+                lines.append(f"  - Report Path: {baseline_info.get('report_path')}")
+        if edited_info:
+            lines.append(f"- **Edited Run ID:** {edited_info.get('run_id')}")
+            if edited_info.get("report_hash"):
+                lines.append(f"  - Report Hash: `{edited_info.get('report_hash')}`")
+            if edited_info.get("report_path"):
+                lines.append(f"  - Report Path: {edited_info.get('report_path')}")
+        provider_digest = provenance_info.get("provider_digest")
+        if isinstance(provider_digest, dict) and provider_digest:
+            ids_d = provider_digest.get("ids_sha256")
+            tok_d = provider_digest.get("tokenizer_sha256")
+            mask_d = provider_digest.get("masking_sha256")
+            lines.append("- **Provider Digest:**")
+            if tok_d:
+                lines.append(
+                    f"  - tokenizer_sha256: `{_short_digest(tok_d)}` (full in JSON)"
+                )
+            if ids_d:
+                lines.append(f"  - ids_sha256: `{_short_digest(ids_d)}` (full in JSON)")
+            if mask_d:
+                lines.append(
+                    f"  - masking_sha256: `{_short_digest(mask_d)}` (full in JSON)"
+                )
+        try:
+            conf = evaluation_report.get("confidence", {}) or {}
+            if isinstance(conf, dict) and conf.get("label"):
+                lines.append(f"- **Confidence:** {conf.get('label')}")
+        except Exception:
+            pass
+        try:
+            pd = evaluation_report.get("policy_digest", {}) or {}
+            if isinstance(pd, dict) and pd:
+                pv = pd.get("policy_version")
+                th = pd.get("thresholds_hash")
+                if pv:
+                    lines.append(f"- **Policy Version:** {pv}")
+                if isinstance(th, str) and th:
+                    short = th if len(th) <= 16 else (th[:8] + "…" + th[-8:])
+                    lines.append(f"- **Thresholds Digest:** `{short}` (full in JSON)")
+                if pd.get("changed"):
+                    lines.append("- Note: policy changed")
+        except Exception:
+            pass
+    lines.append("")
 def _fmt_by_kind(x: Any, k: str) -> str:
     try:
         xv = float(x)
@@ -215,13 +672,13 @@ def _append_accuracy_subgroups(lines: list[str], subgroups: dict[str, Any]) -> N
     lines.append("")
-def _compute_certificate_hash(certificate: dict[str, Any]) -> str:
-    """Compute integrity hash for the certificate.
+def _compute_report_hash(evaluation_report: dict[str, Any]) -> str:
+    """Compute integrity hash for the evaluation_report.
     Hash ignores the `artifacts` section for stability across saves.
     """
     # Create a copy without the artifacts section for stable hashing
-    cert_copy = dict(certificate or {})
+    cert_copy = dict(evaluation_report or {})
     cert_copy.pop("artifacts", None)
     # Sort keys for deterministic hashing
@@ -231,8 +688,8 @@ def _compute_certificate_hash(certificate: dict[str, Any]) -> str:
     return _hash.sha256(cert_str.encode()).hexdigest()[:16]
-def build_console_summary_pack(certificate: dict[str, Any]) -> dict[str, Any]:
-    """Build a small, reusable console summary pack from a certificate.
+def build_console_summary_pack(evaluation_report: dict[str, Any]) -> dict[str, Any]:
+    """Build a small, reusable console summary pack from a evaluation_report.
     Returns a dict with:
     - overall_pass: bool
@@ -240,7 +697,7 @@ def build_console_summary_pack(certificate: dict[str, Any]) -> dict[str, Any]:
     - gate_lines: list of "<Label>: <Status>" strings for each evaluated gate
     - labels: the canonical label list used
     """
-    block = compute_console_validation_block(certificate)
+    block = compute_console_validation_block(evaluation_report)
     overall_pass = bool(block.get("overall_pass"))
     emoji = "✅" if overall_pass else "❌"
     overall_line = f"Overall Status: {emoji} {'PASS' if overall_pass else 'FAIL'}"
@@ -261,38 +718,38 @@ def build_console_summary_pack(certificate: dict[str, Any]) -> dict[str, Any]:
     }
-def render_certificate_markdown(certificate: dict[str, Any]) -> str:
+def render_report_markdown(evaluation_report: dict[str, Any]) -> str:
     """
-    Render a certificate as a formatted Markdown report with pretty tables.
+    Render an evaluation report as a formatted Markdown report with pretty tables.
-    This implementation is moved from certificate.py to keep that module lean.
-    To avoid circular import issues, we alias helpers from the certificate
-    module inside the function body.
+    This implementation is moved from report_builder.py to keep that module lean.
     """
-    # Alias frequently used helpers locally to avoid editing the large body
-    validate_certificate = C.validate_certificate
-    if not validate_certificate(certificate):
-        raise ValueError("Invalid certificate structure")
+    if not validate_report(evaluation_report):
+        raise ValueError("Invalid evaluation report structure")
-    lines = []
-    edit_name = str(certificate.get("edit_name") or "").lower()
+    lines: list[str] = []
+    appendix_lines: list[str] = []
+    edit_name = str(evaluation_report.get("edit_name") or "").lower()
     # Header
-    lines.append("# InvarLock Safety Certificate")
+    lines.append("# InvarLock Evaluation Report")
     lines.append("")
     lines.append(
         "> *Basis: “point” gates check the point estimate; “upper” gates check the CI "
         "upper bound; “point & upper” requires both to pass.*"
     )
     lines.append("")
-    lines.append(f"**Schema Version:** {certificate['schema_version']}")
-    lines.append(f"**Run ID:** `{certificate['run_id']}`")
-    lines.append(f"**Generated:** {certificate['artifacts']['generated_at']}")
-    lines.append(f"**Edit Type:** {certificate.get('edit_name', 'Unknown')}")
+    lines.append(f"**Schema Version:** {evaluation_report['schema_version']}")
+    lines.append(f"**Run ID:** `{evaluation_report['run_id']}`")
+    lines.append(f"**Generated:** {evaluation_report['artifacts']['generated_at']}")
+    lines.append(f"**Edit Type:** {evaluation_report.get('edit_name', 'Unknown')}")
+    lines.append("")
+    lines.append(
+        "> Full evidence: see [`evaluation.report.json`](evaluation.report.json) for complete provenance, digests, and raw measurements."
+    )
     lines.append("")
-    plugins = certificate.get("plugins", {})
+    plugins = evaluation_report.get("plugins", {})
     if isinstance(plugins, dict) and plugins:
         lines.append("## Plugin Provenance")
         lines.append("")
@@ -314,12 +771,12 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
             ]
             if guard_entries:
                 lines.append("- Guards:\n  - " + "\n  - ".join(guard_entries))
-        lines.append("")
+    lines.append("")
     # Executive Summary with validation status (canonical, from console block)
     lines.append("## Executive Summary")
     lines.append("")
-    _block = compute_console_validation_block(certificate)
+    _block = compute_console_validation_block(evaluation_report)
     overall_pass = bool(_block.get("overall_pass"))
     status_emoji = "✅" if overall_pass else "❌"
     lines.append(
@@ -328,13 +785,13 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
     # Window Plan one-liner for quick audit
     try:
         plan_ctx = (
-            certificate.get("window_plan")
-            or certificate.get("dataset", {}).get("windows", {})
-            or certificate.get("ppl", {}).get("window_plan")
+            evaluation_report.get("window_plan")
+            or evaluation_report.get("dataset", {}).get("windows", {})
+            or evaluation_report.get("ppl", {}).get("window_plan")
         )
-        seq_len = certificate.get("dataset", {}).get("seq_len") or certificate.get(
-            "dataset", {}
-        ).get("sequence_length")
+        seq_len = evaluation_report.get("dataset", {}).get(
+            "seq_len"
+        ) or evaluation_report.get("dataset", {}).get("sequence_length")
         if isinstance(plan_ctx, dict):
             profile = plan_ctx.get("profile")
             preview_n = (
@@ -354,15 +811,34 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
         pass
     lines.append("")
+    dashboard = _render_executive_dashboard(evaluation_report)
+    if dashboard:
+        lines.extend(dashboard.splitlines())
+        lines.append("")
+    lines.append("## Contents")
+    lines.append("")
+    lines.append("- [Evaluation Dashboard](#evaluation-dashboard)")
+    lines.append("- [Quality Gates](#quality-gates)")
+    lines.append("- [Guard Check Details](#guard-check-details)")
+    lines.append("- [Primary Metric](#primary-metric)")
+    lines.append("- [Guard Observability](#guard-observability)")
+    lines.append("- [Model Information](#model-information)")
+    lines.append("- [Dataset and Provenance](#dataset-and-provenance)")
+    lines.append("- [Policy Configuration](#policy-configuration)")
+    lines.append("- [Appendix](#appendix)")
+    lines.append("- [Evaluation Report Integrity](#evaluation-report-integrity)")
+    lines.append("")
     # Validation table with canonical gates (mirrors console allow-list)
     lines.append("## Quality Gates")
     lines.append("")
     lines.append("| Gate | Status | Measured | Threshold | Basis | Description |")
     lines.append("|------|--------|----------|-----------|-------|-------------|")
-    pm_block = certificate.get("primary_metric", {}) or {}
+    pm_block = evaluation_report.get("primary_metric", {}) or {}
     has_pm = isinstance(pm_block, dict) and bool(pm_block)
-    auto_info = certificate.get("auto", {})
+    auto_info = evaluation_report.get("auto", {})
     tier = (auto_info.get("tier") or "balanced").lower()
     # Helper to emit Primary Metric Acceptable row
@@ -371,7 +847,9 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
         value = pm_block.get("ratio_vs_baseline")
         gating_basis = pm_block.get("gating_basis") or "point"
         ok = bool(
-            certificate.get("validation", {}).get("primary_metric_acceptable", True)
+            evaluation_report.get("validation", {}).get(
+                "primary_metric_acceptable", True
+            )
         )
         status = "✅ PASS" if ok else "❌ FAIL"
         if pm_kind in {"accuracy", "vqa_accuracy"}:
@@ -405,11 +883,36 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
     # Helper to emit Preview Final Drift Acceptable row
     def _emit_drift_gate_row() -> None:
         ok = bool(
-            certificate.get("validation", {}).get(
+            evaluation_report.get("validation", {}).get(
                 "preview_final_drift_acceptable", True
             )
         )
         status = "✅ PASS" if ok else "❌ FAIL"
+        drift_min = 0.95
+        drift_max = 1.05
+        try:
+            drift_band = (
+                pm_block.get("drift_band") if isinstance(pm_block, dict) else None
+            )
+            if isinstance(drift_band, dict):
+                lo = drift_band.get("min")
+                hi = drift_band.get("max")
+                if isinstance(lo, int | float) and isinstance(hi, int | float):
+                    lo_f = float(lo)
+                    hi_f = float(hi)
+                    if math.isfinite(lo_f) and math.isfinite(hi_f) and 0 < lo_f < hi_f:
+                        drift_min = lo_f
+                        drift_max = hi_f
+            elif isinstance(drift_band, list | tuple) and len(drift_band) == 2:
+                lo_raw, hi_raw = drift_band[0], drift_band[1]
+                if isinstance(lo_raw, int | float) and isinstance(hi_raw, int | float):
+                    lo_f = float(lo_raw)
+                    hi_f = float(hi_raw)
+                    if math.isfinite(lo_f) and math.isfinite(hi_f) and 0 < lo_f < hi_f:
+                        drift_min = lo_f
+                        drift_max = hi_f
+        except Exception:
+            pass
         # Compute drift from PM preview/final when available
         try:
             pv = (
@@ -430,18 +933,21 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
         except Exception:
             drift = float("nan")
         measured = f"{drift:.3f}x" if math.isfinite(drift) else "N/A"
+        band_label = f"{drift_min:.2f}–{drift_max:.2f}x"
         lines.append(
-            f"| Preview Final Drift Acceptable | {status} | {measured} | 0.95–1.05x | point | Final/Preview ratio stability |"
+            f"| Preview Final Drift Acceptable | {status} | {measured} | {band_label} | point | Final/Preview ratio stability |"
         )
     # Helper to emit Guard Overhead Acceptable row (only when evaluated)
     def _emit_overhead_gate_row() -> None:
-        guard_overhead = certificate.get("guard_overhead", {}) or {}
+        guard_overhead = evaluation_report.get("guard_overhead", {}) or {}
         evaluated = bool(guard_overhead.get("evaluated"))
         if not evaluated:
             return
         ok = bool(
-            certificate.get("validation", {}).get("guard_overhead_acceptable", True)
+            evaluation_report.get("validation", {}).get(
+                "guard_overhead_acceptable", True
+            )
         )
         status = "✅ PASS" if ok else "❌ FAIL"
         overhead_pct = guard_overhead.get("overhead_percent")
@@ -469,7 +975,7 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
         )
     def _emit_pm_tail_gate_row() -> None:
-        pm_tail = certificate.get("primary_metric_tail", {}) or {}
+        pm_tail = evaluation_report.get("primary_metric_tail", {}) or {}
         if not isinstance(pm_tail, dict) or not pm_tail:
             return
@@ -479,7 +985,7 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
         warned = bool(pm_tail.get("warned", False))
         if not evaluated:
-            status = "🛈 INFO"
+            status = "ℹ️ INFO"
         elif passed:
             status = "✅ PASS"
         elif mode == "fail":
@@ -536,17 +1042,17 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
         _emit_overhead_gate_row()
     # Annotate hysteresis usage if applied
-    if certificate.get("validation", {}).get("hysteresis_applied"):
+    if evaluation_report.get("validation", {}).get("hysteresis_applied"):
         lines.append("- Note: hysteresis applied to gate boundary")
     lines.append("")
-    lines.append("## Safety Check Details")
+    lines.append("## Guard Check Details")
     lines.append("")
-    lines.append("| Safety Check | Status | Measured | Threshold | Description |")
+    lines.append("| Guard Check | Status | Measured | Threshold | Description |")
     lines.append("|--------------|--------|----------|-----------|-------------|")
-    inv_summary = certificate["invariants"]
-    validation = certificate.get("validation", {})
+    inv_summary = evaluation_report["invariants"]
+    validation = evaluation_report.get("validation", {})
     inv_status = "✅ PASS" if validation.get("invariants_pass", False) else "❌ FAIL"
     inv_counts = inv_summary.get("summary", {}) or {}
     inv_measure = inv_summary.get("status", "pass").upper()
@@ -578,23 +1084,23 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
         lines.append(f"- Non-fatal: {non_fatal_message}")
     spec_status = "✅ PASS" if validation.get("spectral_stable", False) else "❌ FAIL"
-    caps_applied = certificate["spectral"]["caps_applied"]
+    caps_applied = evaluation_report["spectral"]["caps_applied"]
     lines.append(
         f"| Spectral Stability | {spec_status} | {caps_applied} violations | < 5 | Weight matrix spectral norms |"
     )
     # Catastrophic spike safety stop row is now driven by primary metric flags
-    if isinstance(certificate.get("primary_metric"), dict):
+    if isinstance(evaluation_report.get("primary_metric"), dict):
         pm_ok = bool(validation.get("primary_metric_acceptable", True))
-        pm_ratio = certificate.get("primary_metric", {}).get("ratio_vs_baseline")
+        pm_ratio = evaluation_report.get("primary_metric", {}).get("ratio_vs_baseline")
         if isinstance(pm_ratio, int | float):
             lines.append(
-                f"| Catastrophic Spike Gate (safety stop) | {'✅ PASS' if pm_ok else '❌ FAIL'} | {pm_ratio:.3f}x | ≤ 2.0x | Hard stop @ 2.0× |"
+                f"| Catastrophic Spike Gate (hard stop) | {'✅ PASS' if pm_ok else '❌ FAIL'} | {pm_ratio:.3f}x | ≤ 2.0x | Hard stop @ 2.0× |"
             )
     # Include RMT Health row for compatibility and clarity
     rmt_status = "✅ PASS" if validation.get("rmt_stable", False) else "❌ FAIL"
-    rmt_state = certificate.get("rmt", {}).get("status", "unknown").title()
+    rmt_state = evaluation_report.get("rmt", {}).get("status", "unknown").title()
     lines.append(
         f"| RMT Health | {rmt_status} | {rmt_state} | ε-rule | Random Matrix Theory guard status |"
     )
@@ -602,8 +1108,8 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
     # Pairing + Bootstrap snapshot (quick audit surface)
     try:
         stats = (
-            certificate.get("dataset", {}).get("windows", {}).get("stats", {})
-            or certificate.get("ppl", {}).get("stats", {})
+            evaluation_report.get("dataset", {}).get("windows", {}).get("stats", {})
+            or evaluation_report.get("ppl", {}).get("stats", {})
             or {}
         )
         paired_windows = stats.get("paired_windows")
@@ -616,24 +1122,51 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
             or overlap_frac is not None
         ):
             lines.append("")
-            lines.append(
-                f"- Pairing: paired={paired_windows}, match={match_frac:.3f}, overlap={overlap_frac:.3f}"
-            )
+            parts: list[str] = []
+            if paired_windows is not None:
+                try:
+                    parts.append(f"{int(paired_windows)} windows")
+                except Exception:
+                    parts.append(f"windows={paired_windows}")
+            if isinstance(match_frac, int | float) and math.isfinite(float(match_frac)):
+                parts.append(f"{float(match_frac) * 100.0:.1f}% match")
+            elif match_frac is not None:
+                parts.append(f"match={match_frac}")
+            if isinstance(overlap_frac, int | float) and math.isfinite(
+                float(overlap_frac)
+            ):
+                parts.append(f"{float(overlap_frac) * 100.0:.1f}% overlap")
+            elif overlap_frac is not None:
+                parts.append(f"overlap={overlap_frac}")
+            lines.append(f"- ✅ Pairing: {', '.join(parts) if parts else 'N/A'}")
         if isinstance(bootstrap, dict):
             reps = bootstrap.get("replicates")
             bseed = bootstrap.get("seed")
             if reps is not None or bseed is not None:
-                lines.append(f"- Bootstrap: replicates={reps}, seed={bseed}")
+                bits: list[str] = []
+                if reps is not None:
+                    try:
+                        bits.append(f"{int(reps)} replicates")
+                    except Exception:
+                        bits.append(f"replicates={reps}")
+                if bseed is not None:
+                    try:
+                        bits.append(f"seed={int(bseed)}")
+                    except Exception:
+                        bits.append(f"seed={bseed}")
+                lines.append(f"- ✅ Bootstrap: {', '.join(bits) if bits else 'N/A'}")
         # Optional: show log-space paired Δ CI next to ratio CI for clarity
-        delta_ci = certificate.get("primary_metric", {}).get("ci") or certificate.get(
-            "ppl", {}
-        ).get("logloss_delta_ci")
+        delta_ci = evaluation_report.get("primary_metric", {}).get(
+            "ci"
+        ) or evaluation_report.get("ppl", {}).get("logloss_delta_ci")
         if (
             isinstance(delta_ci, tuple | list)
             and len(delta_ci) == 2
             and all(isinstance(x, int | float) for x in delta_ci)
         ):
-            lines.append(f"- Log Δ (paired) CI: [{delta_ci[0]:.6f}, {delta_ci[1]:.6f}]")
+            lines.append(
+                f"- ℹ️ Log Δ (paired) CI: [{delta_ci[0]:.6f}, {delta_ci[1]:.6f}]"
+            )
     except Exception:
         pass
@@ -654,124 +1187,198 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
     lines.append("")
+    _append_primary_metric_section(lines, evaluation_report)
     # Guard observability snapshots
     lines.append("## Guard Observability")
     lines.append("")
-    spectral_info = certificate.get("spectral", {}) or {}
+    spectral_info = evaluation_report.get("spectral", {}) or {}
     if spectral_info:
-        lines.append("### Spectral Guard")
+        lines.append("### Spectral Guard Summary")
         lines.append("")
-        mt_info = spectral_info.get("multiple_testing", {}) or {}
-        if mt_info:
-            lines.append("- **Multiple Testing:**")
-            lines.append("  ```yaml")
-            mt_yaml = (
-                yaml.safe_dump(mt_info, sort_keys=True, width=70).strip().splitlines()
-            )
-            for line in mt_yaml:
-                lines.append(f"  {line}")
-            lines.append("  ```")
-        # Spectral summary (place key knobs together for quick scan)
-        spec_sigma = spectral_info.get("sigma_quantile")
-        spec_deadband = spectral_info.get("deadband")
-        spec_max_caps = spectral_info.get("max_caps")
-        summary_yaml = {
-            "sigma_quantile": float(spec_sigma)
-            if isinstance(spec_sigma, int | float)
-            else None,
-            "deadband": float(spec_deadband)
-            if isinstance(spec_deadband, int | float)
-            else None,
-            "max_caps": int(spec_max_caps)
-            if isinstance(spec_max_caps, int | float)
-            else None,
-        }
-        # Drop Nones from summary
-        summary_yaml = {k: v for k, v in summary_yaml.items() if v is not None}
-        if summary_yaml:
-            lines.append("- **Spectral Summary:**")
-            lines.append("  ```yaml")
-            for line in (
-                yaml.safe_dump(summary_yaml, sort_keys=True, width=70)
-                .strip()
-                .splitlines()
-            ):
-                lines.append(f"  {line}")
-            lines.append("  ```")
+        lines.append("| Metric | Value | Status |")
+        lines.append("|--------|-------|--------|")
+        spectral_ok = bool(validation.get("spectral_stable", False))
+        caps_applied = spectral_info.get("caps_applied")
+        max_caps = spectral_info.get("max_caps")
+        caps_val = (
+            f"{caps_applied}/{max_caps}"
+            if caps_applied is not None and max_caps is not None
+            else "-"
+        )
         lines.append(
-            f"- Caps Applied: {spectral_info.get('caps_applied')} / {spectral_info.get('max_caps')}"
+            f"| Caps Applied | {caps_val} | {'✅ OK' if spectral_ok else '❌ FAIL'} |"
         )
         summary = spectral_info.get("summary", {}) or {}
-        lines.append(f"- Caps Exceeded: {summary.get('caps_exceeded', False)}")
-        caps_by_family = spectral_info.get("caps_applied_by_family") or {}
+        caps_exceeded = summary.get("caps_exceeded")
+        if caps_exceeded is not None:
+            cap_status = "✅ OK" if not bool(caps_exceeded) else "⚠️ WARN"
+            lines.append(f"| Caps Exceeded | {caps_exceeded} | {cap_status} |")
+        top_scores = spectral_info.get("top_z_scores") or {}
+        max_family: str | None = None
+        max_module: str | None = None
+        max_abs_z: float | None = None
+        if isinstance(top_scores, dict):
+            for family, entries in top_scores.items():
+                if not isinstance(entries, list):
+                    continue
+                for entry in entries:
+                    if not isinstance(entry, dict):
+                        continue
+                    z_val = entry.get("z")
+                    if not (
+                        isinstance(z_val, int | float) and math.isfinite(float(z_val))
+                    ):
+                        continue
+                    z_abs = abs(float(z_val))
+                    if max_abs_z is None or z_abs > max_abs_z:
+                        max_abs_z = z_abs
+                        max_family = str(family)
+                        max_module = (
+                            str(entry.get("module")) if entry.get("module") else None
+                        )
         family_caps = spectral_info.get("family_caps") or {}
-        if caps_by_family:
-            lines.append("")
-            lines.append("| Family | κ | Violations |")
-            lines.append("|--------|---|------------|")
-            for family, count in caps_by_family.items():
-                kappa = family_caps.get(family, {}).get("kappa")
-                if isinstance(kappa, int | float) and math.isfinite(float(kappa)):
-                    kappa_str = f"{kappa:.3f}"
-                else:
-                    kappa_str = "-"
-                lines.append(f"| {family} | {kappa_str} | {count} |")
-            lines.append("")
+        kappa = None
+        if max_family and isinstance(family_caps, dict):
+            try:
+                kappa = (family_caps.get(max_family, {}) or {}).get("kappa")
+            except Exception:
+                kappa = None
+        kappa_f = (
+            float(kappa)
+            if isinstance(kappa, int | float) and math.isfinite(float(kappa))
+            else None
+        )
+        if max_abs_z is not None:
+            max_val = f"{max_abs_z:.3f}"
+            if max_family:
+                max_val += f" ({max_family})"
+            if max_module:
+                max_val += f" – {max_module}"
+            if kappa_f is None:
+                max_status = "ℹ️ No κ"
+            elif max_abs_z <= kappa_f:
+                max_status = f"✅ Within κ={kappa_f:.3f}"
+            else:
+                max_status = f"❌ Exceeds κ={kappa_f:.3f}"
+            lines.append(f"| Max |z| | {max_val} | {max_status} |")
+        mt_info = spectral_info.get("multiple_testing", {}) or {}
+        if isinstance(mt_info, dict) and mt_info:
+            mt_method = mt_info.get("method")
+            mt_alpha = mt_info.get("alpha")
+            mt_m = mt_info.get("m")
+            parts: list[str] = []
+            if mt_method:
+                parts.append(f"method={mt_method}")
+            if isinstance(mt_alpha, int | float) and math.isfinite(float(mt_alpha)):
+                parts.append(f"α={float(mt_alpha):.3g}")
+            if isinstance(mt_m, int | float) and math.isfinite(float(mt_m)):
+                parts.append(f"m={int(mt_m)}")
+            lines.append(
+                f"| Multiple Testing | {', '.join(parts) if parts else '—'} | ℹ️ INFO |"
+            )
+        lines.append("")
+        caps_by_family = spectral_info.get("caps_applied_by_family") or {}
         quantiles = spectral_info.get("family_z_quantiles") or {}
-        if quantiles:
-            lines.append("| Family | q95 | q99 | Max | Samples |")
-            lines.append("|--------|-----|-----|-----|---------|")
-            for family, stats in quantiles.items():
-                q95 = stats.get("q95")
-                q99 = stats.get("q99")
-                max_z = stats.get("max")
-                count = stats.get("count")
+        if any(
+            bool(x)
+            for x in (caps_by_family, quantiles, family_caps, top_scores)
+            if isinstance(x, dict)
+        ):
+            lines.append("<details>")
+            lines.append("<summary>Per-family details</summary>")
+            lines.append("")
+            lines.append("| Family | κ | q95 | Max |z| | Violations |")
+            lines.append("|--------|---|-----|--------|------------|")
+            families: set[str] = set()
+            for block in (caps_by_family, quantiles, family_caps, top_scores):
+                if isinstance(block, dict):
+                    families.update(str(k) for k in block.keys())
+            for family in sorted(families):
+                kappa = None
+                if isinstance(family_caps, dict):
+                    kappa = (family_caps.get(family, {}) or {}).get("kappa")
+                kappa_str = (
+                    f"{float(kappa):.3f}"
+                    if isinstance(kappa, int | float) and math.isfinite(float(kappa))
+                    else "-"
+                )
+                q95 = None
+                max_z = None
+                if isinstance(quantiles, dict):
+                    stats = quantiles.get(family) or {}
+                    if isinstance(stats, dict):
+                        q95 = stats.get("q95")
+                        max_z = stats.get("max")
                 q95_str = f"{q95:.3f}" if isinstance(q95, int | float) else "-"
-                q99_str = f"{q99:.3f}" if isinstance(q99, int | float) else "-"
                 max_str = f"{max_z:.3f}" if isinstance(max_z, int | float) else "-"
-                count_str = str(count) if isinstance(count, int | float) else "-"
+                violations = None
+                if isinstance(caps_by_family, dict):
+                    violations = caps_by_family.get(family)
+                v_str = (
+                    str(int(violations)) if isinstance(violations, int | float) else "0"
+                )
                 lines.append(
-                    f"| {family} | {q95_str} | {q99_str} | {max_str} | {count_str} |"
+                    f"| {family} | {kappa_str} | {q95_str} | {max_str} | {v_str} |"
                 )
+            if isinstance(top_scores, dict) and top_scores:
+                lines.append("")
+                lines.append("Top |z| per family:")
+                for family in sorted(top_scores.keys()):
+                    entries = top_scores[family]
+                    if not isinstance(entries, list) or not entries:
+                        continue
+                    formatted_entries = []
+                    for entry in entries:
+                        if not isinstance(entry, dict):
+                            continue
+                        module_name = entry.get("module", "unknown")
+                        z_val = entry.get("z")
+                        if isinstance(z_val, int | float) and math.isfinite(
+                            float(z_val)
+                        ):
+                            z_str = f"{z_val:.3f}"
+                        else:
+                            z_str = "n/a"
+                        formatted_entries.append(f"{module_name} (|z|={z_str})")
+                    lines.append(f"- {family}: {', '.join(formatted_entries)}")
             lines.append("")
-        policy_caps = spectral_info.get("policy", {}).get("family_caps")
-        if policy_caps:
-            lines.append("- **Family κ (policy):**")
-            lines.append("  ```yaml")
-            caps_yaml = (
-                yaml.safe_dump(policy_caps, sort_keys=True, width=70)
-                .strip()
-                .splitlines()
-            )
-            for line in caps_yaml:
-                lines.append(f"  {line}")
-            lines.append("  ```")
-        top_scores = spectral_info.get("top_z_scores") or {}
-        if top_scores:
-            lines.append("Top |z| per family:")
-            for family in sorted(top_scores.keys()):
-                entries = top_scores[family]
-                if not entries:
-                    continue
-                formatted_entries = []
-                for entry in entries:
-                    module_name = entry.get("module", "unknown")
-                    z_val = entry.get("z")
-                    if isinstance(z_val, int | float) and math.isfinite(float(z_val)):
-                        z_str = f"{z_val:.3f}"
-                    else:
-                        z_str = "n/a"
-                    formatted_entries.append(f"{module_name} (|z|={z_str})")
-                lines.append(f"- {family}: {', '.join(formatted_entries)}")
+            lines.append("</details>")
             lines.append("")
-    rmt_info = certificate.get("rmt", {}) or {}
+    rmt_info = evaluation_report.get("rmt", {}) or {}
     if rmt_info:
         lines.append("### RMT Guard")
         lines.append("")
         families = rmt_info.get("families") or {}
+        stable = bool(rmt_info.get("stable", True))
+        status = "✅ OK" if stable else "❌ FAIL"
+        delta_total = rmt_info.get("delta_total")
+        if isinstance(delta_total, int):
+            lines.append(f"- Δ total: {delta_total:+d}")
+        lines.append(f"- Status: {status}")
+        lines.append(f"- Families: {len(families)}")
         if families:
+            lines.append("")
+            lines.append("<details>")
+            lines.append("<summary>RMT family details</summary>")
+            lines.append("")
             lines.append("| Family | ε_f | Bare | Guarded | Δ |")
             lines.append("|--------|-----|------|---------|---|")
             for family, data in families.items():
@@ -801,14 +1408,12 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
                     f"| {family} | {epsilon_str} | {bare_str} | {guarded_str} | {delta_str} |"
                 )
             lines.append("")
-        # Delta total and stability flags
-        delta_total = rmt_info.get("delta_total")
-        if isinstance(delta_total, int):
-            lines.append(f"- Δ total: {delta_total:+d}")
-        lines.append(f"- Stable: {rmt_info.get('stable', True)}")
-        lines.append("")
+            lines.append("</details>")
+            lines.append("")
+        else:
+            lines.append("")
-    guard_overhead_info = certificate.get("guard_overhead", {}) or {}
+    guard_overhead_info = evaluation_report.get("guard_overhead", {}) or {}
     if guard_overhead_info:
         lines.append("### Guard Overhead")
         lines.append("")
@@ -836,7 +1441,7 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
         overhead_source = guard_overhead_info.get("source")
         if overhead_source:
             lines.append(f"- Source: {overhead_source}")
-        plan_ctx = certificate.get("provenance", {}).get("window_plan", {})
+        plan_ctx = evaluation_report.get("provenance", {}).get("window_plan", {})
         if isinstance(plan_ctx, dict) and plan_ctx:
             plan_preview = (
                 plan_ctx.get("preview_n")
@@ -855,34 +1460,34 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
         lines.append("")
     compression_diag = (
-        certificate.get("structure", {}).get("compression_diagnostics", {})
-        if isinstance(certificate.get("structure"), dict)
+        evaluation_report.get("structure", {}).get("compression_diagnostics", {})
+        if isinstance(evaluation_report.get("structure"), dict)
         else {}
     )
     inference_flags = compression_diag.get("inferred") or {}
     inference_sources = compression_diag.get("inference_source") or {}
     inference_log = compression_diag.get("inference_log") or []
     if inference_flags or inference_sources or inference_log:
-        lines.append("## Inference")
-        lines.append("")
+        appendix_lines.append("### Inference Diagnostics")
+        appendix_lines.append("")
         if inference_flags:
-            lines.append("- **Fields Inferred:**")
+            appendix_lines.append("- **Fields Inferred:**")
             for field, flag in inference_flags.items():
-                lines.append(f"  - {field}: {'yes' if flag else 'no'}")
+                appendix_lines.append(f"  - {field}: {'yes' if flag else 'no'}")
         if inference_sources:
-            lines.append("- **Sources:**")
+            appendix_lines.append("- **Sources:**")
             for field, source in inference_sources.items():
-                lines.append(f"  - {field}: {source}")
+                appendix_lines.append(f"  - {field}: {source}")
         if inference_log:
-            lines.append("- **Inference Log:**")
+            appendix_lines.append("- **Inference Log:**")
             for entry in inference_log:
-                lines.append(f"  - {entry}")
-        lines.append("")
+                appendix_lines.append(f"  - {entry}")
+        appendix_lines.append("")
     # Model and Configuration
     lines.append("## Model Information")
     lines.append("")
-    meta = certificate["meta"]
+    meta = evaluation_report["meta"]
     lines.append(f"- **Model ID:** {meta.get('model_id')}")
     lines.append(f"- **Adapter:** {meta.get('adapter')}")
     lines.append(f"- **Device:** {meta.get('device')}")
@@ -906,34 +1511,54 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
     if invarlock_version:
         lines.append(f"- **InvarLock Version:** {invarlock_version}")
     env_flags = meta.get("env_flags")
-    if isinstance(env_flags, dict) and env_flags:
-        lines.append("- **Env Flags:**")
-        lines.append("  ```yaml")
-        for k, v in env_flags.items():
-            lines.append(f"  {k}: {v}")
-        lines.append("  ```")
-    # Determinism flags (if present)
     cuda_flags = meta.get("cuda_flags")
+    # Compressed determinism/environment summary for readability
+    det_parts: list[str] = []
+    for label, keys in (
+        ("torch_det", ("torch_deterministic_algorithms", "deterministic_algorithms")),
+        ("cudnn_det", ("cudnn_deterministic",)),
+        ("cudnn_bench", ("cudnn_benchmark",)),
+        ("tf32_matmul", ("cuda_matmul_allow_tf32",)),
+        ("tf32_cudnn", ("cudnn_allow_tf32",)),
+        ("cublas_ws", ("CUBLAS_WORKSPACE_CONFIG",)),
+    ):
+        val = None
+        for key in keys:
+            if isinstance(env_flags, dict) and env_flags.get(key) is not None:
+                val = env_flags.get(key)
+                break
+            if isinstance(cuda_flags, dict) and cuda_flags.get(key) is not None:
+                val = cuda_flags.get(key)
+                break
+        if val is not None:
+            det_parts.append(f"{label}={val}")
+    if det_parts:
+        lines.append(f"- **Determinism:** {', '.join(det_parts)}")
+    full_flags: dict[str, Any] = {}
+    if isinstance(env_flags, dict) and env_flags:
+        full_flags["env_flags"] = env_flags
     if isinstance(cuda_flags, dict) and cuda_flags:
-        parts = []
-        for key in (
-            "deterministic_algorithms",
-            "cudnn_deterministic",
-            "cudnn_benchmark",
-            "cudnn_allow_tf32",
-            "cuda_matmul_allow_tf32",
-            "CUBLAS_WORKSPACE_CONFIG",
-        ):
-            if key in cuda_flags and cuda_flags[key] is not None:
-                parts.append(f"{key}={cuda_flags[key]}")
-        if parts:
-            lines.append(f"- **Determinism Flags:** {', '.join(parts)}")
+        full_flags["cuda_flags"] = cuda_flags
+    if full_flags:
+        lines.append("")
+        lines.append("<details>")
+        lines.append("<summary>Environment flags (full)</summary>")
+        lines.append("")
+        lines.append("```yaml")
+        flags_yaml = yaml.safe_dump(full_flags, sort_keys=True, width=80).strip()
+        for line in flags_yaml.splitlines():
+            lines.append(line)
+        lines.append("```")
+        lines.append("")
+        lines.append("</details>")
     lines.append("")
     # Edit Configuration (removed duplicate Edit Information section)
     # Auto-tuning Configuration
-    auto = certificate["auto"]
+    auto = evaluation_report["auto"]
     if auto["tier"] != "none":
         lines.append("## Auto-Tuning Configuration")
         lines.append("")
@@ -951,275 +1576,18 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
             pass
         lines.append("")
-    resolved_policy = certificate.get("resolved_policy")
-    if resolved_policy:
-        lines.append("## Resolved Policy")
-        lines.append("")
-        lines.append("```yaml")
-        resolved_yaml = yaml.safe_dump(
-            resolved_policy, sort_keys=True, width=80, default_flow_style=False
-        ).strip()
-        for line in resolved_yaml.splitlines():
-            lines.append(line)
-        lines.append("```")
-        lines.append("")
-    policy_provenance = certificate.get("policy_provenance", {})
-    if policy_provenance:
-        lines.append("## Policy Provenance")
-        lines.append("")
-        lines.append(f"- **Tier:** {policy_provenance.get('tier')}")
-        overrides_list = policy_provenance.get("overrides") or []
-        if overrides_list:
-            lines.append(f"- **Overrides:** {', '.join(overrides_list)}")
-        else:
-            lines.append("- **Overrides:** (none)")
-        digest_value = policy_provenance.get("policy_digest")
-        if digest_value:
-            lines.append(f"- **Policy Digest:** `{digest_value}`")
-        else:
-            lines.append("- **Policy Digest:** (not recorded)")
-        if policy_provenance.get("resolved_at"):
-            lines.append(f"- **Resolved At:** {policy_provenance.get('resolved_at')}")
-        lines.append("")
-    # Dataset Information
-    lines.append("## Dataset Configuration")
-    lines.append("")
-    dataset = certificate.get("dataset", {}) or {}
-    prov = (
-        (dataset.get("provider") or "unknown")
-        if isinstance(dataset, dict)
-        else "unknown"
-    )
-    lines.append(f"- **Provider:** {prov}")
-    try:
-        seq_len_val = (
-            int(dataset.get("seq_len"))
-            if isinstance(dataset.get("seq_len"), int | float)
-            else dataset.get("seq_len")
-        )
-    except Exception:  # pragma: no cover - defensive
-        seq_len_val = dataset.get("seq_len")
-    if seq_len_val is not None:
-        lines.append(f"- **Sequence Length:** {seq_len_val}")
-    windows_blk = (
-        dataset.get("windows", {}) if isinstance(dataset.get("windows"), dict) else {}
-    )
-    win_prev = windows_blk.get("preview")
-    win_final = windows_blk.get("final")
-    if win_prev is not None and win_final is not None:
-        lines.append(f"- **Windows:** {win_prev} preview + {win_final} final")
-    if windows_blk.get("seed") is not None:
-        lines.append(f"- **Seed:** {windows_blk.get('seed')}")
-    hash_blk = dataset.get("hash", {}) if isinstance(dataset.get("hash"), dict) else {}
-    if hash_blk.get("preview_tokens") is not None:
-        lines.append(f"- **Preview Tokens:** {hash_blk.get('preview_tokens'):,}")
-    if hash_blk.get("final_tokens") is not None:
-        lines.append(f"- **Final Tokens:** {hash_blk.get('final_tokens'):,}")
-    if hash_blk.get("total_tokens") is not None:
-        lines.append(f"- **Total Tokens:** {hash_blk.get('total_tokens'):,}")
-    if hash_blk.get("dataset"):
-        lines.append(f"- **Dataset Hash:** {hash_blk.get('dataset')}")
-    tokenizer = dataset.get("tokenizer", {})
-    if tokenizer.get("name") or tokenizer.get("hash"):
-        vocab_size = tokenizer.get("vocab_size")
-        vocab_suffix = f" (vocab {vocab_size})" if isinstance(vocab_size, int) else ""
-        lines.append(
-            f"- **Tokenizer:** {tokenizer.get('name', 'unknown')}{vocab_suffix}"
-        )
-        if tokenizer.get("hash"):
-            lines.append(f"  - Hash: {tokenizer['hash']}")
-        lines.append(
-            f"  - BOS/EOS: {tokenizer.get('bos_token')} / {tokenizer.get('eos_token')}"
-        )
-        if tokenizer.get("pad_token") is not None:
-            lines.append(f"  - PAD: {tokenizer.get('pad_token')}")
-        if tokenizer.get("add_prefix_space") is not None:
-            lines.append(f"  - add_prefix_space: {tokenizer.get('add_prefix_space')}")
-    lines.append("")
-    provenance_info = certificate.get("provenance", {}) or {}
-    if provenance_info:
-        lines.append("## Run Provenance")
-        lines.append("")
-        baseline_info = provenance_info.get("baseline", {}) or {}
-        if baseline_info:
-            lines.append(f"- **Baseline Run ID:** {baseline_info.get('run_id')}")
-            if baseline_info.get("report_hash"):
-                lines.append(f"  - Report Hash: `{baseline_info.get('report_hash')}`")
-            if baseline_info.get("report_path"):
-                lines.append(f"  - Report Path: {baseline_info.get('report_path')}")
-        edited_info = provenance_info.get("edited", {}) or {}
-        if edited_info:
-            lines.append(f"- **Edited Run ID:** {edited_info.get('run_id')}")
-            if edited_info.get("report_hash"):
-                lines.append(f"  - Report Hash: `{edited_info.get('report_hash')}`")
-            if edited_info.get("report_path"):
-                lines.append(f"  - Report Path: {edited_info.get('report_path')}")
-        window_plan = provenance_info.get("window_plan")
-        if isinstance(window_plan, dict) and window_plan:
-            preview_val = window_plan.get(
-                "preview_n", window_plan.get("actual_preview")
-            )
-            final_val = window_plan.get("final_n", window_plan.get("actual_final"))
-            lines.append(
-                f"- **Window Plan:** profile={window_plan.get('profile')}, preview={preview_val}, final={final_val}"
-            )
-        provider_digest = provenance_info.get("provider_digest")
-        if isinstance(provider_digest, dict) and provider_digest:
-            ids_d = provider_digest.get("ids_sha256")
-            tok_d = provider_digest.get("tokenizer_sha256")
-            mask_d = provider_digest.get("masking_sha256")
-            lines.append("- **Provider Digest:**")
-            if tok_d:
-                lines.append(
-                    f"  - tokenizer_sha256: `{_short_digest(tok_d)}` (full in JSON)"
-                )
-            if ids_d:
-                lines.append(f"  - ids_sha256: `{_short_digest(ids_d)}` (full in JSON)")
-            if mask_d:
-                lines.append(
-                    f"  - masking_sha256: `{_short_digest(mask_d)}` (full in JSON)"
-                )
-        # Surface confidence label prominently
-        try:
-            conf = certificate.get("confidence", {}) or {}
-            if isinstance(conf, dict) and conf.get("label"):
-                lines.append(f"- **Confidence:** {conf.get('label')}")
-        except Exception:
-            pass
-        # Surface policy version + thresholds hash (short)
-        try:
-            pd = certificate.get("policy_digest", {}) or {}
-            if isinstance(pd, dict) and pd:
-                pv = pd.get("policy_version")
-                th = pd.get("thresholds_hash")
-                if pv:
-                    lines.append(f"- **Policy Version:** {pv}")
-                if isinstance(th, str) and th:
-                    short = th if len(th) <= 16 else (th[:8] + "…" + th[-8:])
-                    lines.append(f"- **Thresholds Digest:** `{short}` (full in JSON)")
-                if pd.get("changed"):
-                    lines.append("- Note: policy changed")
-        except Exception:
-            pass
-        lines.append("")
+    _append_dataset_and_provenance_section(lines, evaluation_report)
     # Structural Changes heading is printed with content later; avoid empty header here
-    # Primary Metric (metric-v1) snapshot, if present
-    try:
-        pm = certificate.get("primary_metric")
-        if isinstance(pm, dict) and pm:
-            kind = pm.get("kind", "unknown")
-            lines.append(f"## Primary Metric ({kind})")
-            lines.append("")
-            unit = pm.get("unit", "-")
-            paired = pm.get("paired", False)
-            reps = None
-            # Snapshot only; bootstrap reps live in ppl.stats.bootstrap for ppl metrics
-            # Mark estimated metrics (e.g., pseudo accuracy counts) clearly
-            estimated_flag = False
-            try:
-                if bool(pm.get("estimated")):
-                    estimated_flag = True
-                elif str(pm.get("counts_source", "")).lower() == "pseudo_config":
-                    estimated_flag = True
-            except Exception:
-                estimated_flag = False
-            est_suffix = " (estimated)" if estimated_flag else ""
-            lines.append(f"- Kind: {kind} (unit: {unit}){est_suffix}")
-            gating_basis = pm.get("gating_basis") or pm.get("basis")
-            if gating_basis:
-                lines.append(f"- Basis: {gating_basis}")
-            if isinstance(paired, bool):
-                lines.append(f"- Paired: {paired}")
-            reps = pm.get("reps")
-            if isinstance(reps, int | float):
-                lines.append(f"- Bootstrap Reps: {int(reps)}")
-            ci = pm.get("ci") or pm.get("display_ci")
-            if (
-                isinstance(ci, list | tuple)
-                and len(ci) == 2
-                and all(isinstance(x, int | float) for x in ci)
-            ):
-                lines.append(f"- CI: {ci[0]:.3f}–{ci[1]:.3f}")
-            prev = pm.get("preview")
-            fin = pm.get("final")
-            ratio = pm.get("ratio_vs_baseline")
-            lines.append("")
-            if estimated_flag and str(kind).lower() in {"accuracy", "vqa_accuracy"}:
-                lines.append(
-                    "- Note: Accuracy derived from pseudo counts (quick dev preset); use a labeled preset for measured accuracy."
-                )
-            lines.append("| Field | Value |")
-            lines.append("|-------|-------|")
-            lines.append(f"| Preview | {_fmt_by_kind(prev, str(kind))} |")
-            lines.append(f"| Final | {_fmt_by_kind(fin, str(kind))} |")
-            # For accuracy, ratio field is actually a delta (as per helper); clarify inline
-            if kind in {"accuracy", "vqa_accuracy"}:
-                lines.append(f"| Δ vs Baseline | {_fmt_by_kind(ratio, str(kind))} |")
-                # When baseline accuracy is near-zero, clarify display rule
-                try:
-                    base_pt = pm.get("baseline_point")
-                    if isinstance(base_pt, int | float) and base_pt < 0.05:
-                        lines.append(
-                            "- Note: baseline < 5%; ratio suppressed; showing Δpp"
-                        )
-                except Exception:
-                    pass
-            else:
-                try:
-                    lines.append(f"| Ratio vs Baseline | {float(ratio):.3f} |")
-                except Exception:
-                    lines.append("| Ratio vs Baseline | N/A |")
-            lines.append("")
-            # Secondary metrics (informational)
-            try:
-                secs = certificate.get("secondary_metrics")
-                if isinstance(secs, list) and secs:
-                    lines.append("## Secondary Metrics (informational)")
-                    lines.append("")
-                    lines.append("| Kind | Preview | Final | vs Baseline | CI |")
-                    lines.append("|------|---------|-------|-------------|----|")
-                    for m in secs:
-                        if not isinstance(m, dict):
-                            continue
-                        k = m.get("kind", "?")
-                        pv = _fmt_by_kind(m.get("preview"), str(k))
-                        fv = _fmt_by_kind(m.get("final"), str(k))
-                        rb = m.get("ratio_vs_baseline")
-                        try:
-                            rb_str = (
-                                f"{float(rb):.3f}"
-                                if (str(k).startswith("ppl"))
-                                else _fmt_by_kind(rb, str(k))
-                            )
-                        except Exception:
-                            rb_str = "N/A"
-                        ci = m.get("display_ci") or m.get("ci")
-                        if isinstance(ci, tuple | list) and len(ci) == 2:
-                            ci_str = f"{float(ci[0]):.3f}-{float(ci[1]):.3f}"
-                        else:
-                            ci_str = "–"
-                        lines.append(f"| {k} | {pv} | {fv} | {rb_str} | {ci_str} |")
-                    lines.append("")
-            except Exception:
-                pass
-    except Exception:
-        pass
     # System Overhead section (latency/throughput)
-    sys_over = certificate.get("system_overhead", {}) or {}
+    sys_over = evaluation_report.get("system_overhead", {}) or {}
     if isinstance(sys_over, dict) and sys_over:
         _append_system_overhead_section(lines, sys_over)
     # Accuracy Subgroups (informational)
     try:
-        cls = certificate.get("classification", {})
+        cls = evaluation_report.get("classification", {})
         sub = cls.get("subgroups") if isinstance(cls, dict) else None
         if isinstance(sub, dict) and sub:
             _append_accuracy_subgroups(lines, sub)
@@ -1227,7 +1595,7 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
         pass
     # Structural Changes
     try:
-        structure = certificate.get("structure", {}) or {}
+        structure = evaluation_report.get("structure", {}) or {}
         params_changed = int(structure.get("params_changed", 0) or 0)
         layers_modified = int(structure.get("layers_modified", 0) or 0)
         bitwidth_changes = 0
@@ -1239,7 +1607,7 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
         has_changes = any(
             v > 0 for v in (params_changed, layers_modified, bitwidth_changes)
         )
-        edit_name = str(certificate.get("edit_name", "unknown"))
+        edit_name = str(evaluation_report.get("edit_name", "unknown"))
         if has_changes:
             lines.append("## Structural Changes")
             lines.append("")
@@ -1369,47 +1737,48 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
             lines.append("")
     # Variance Guard (Spectral/RMT summaries are already provided above)
-    variance = certificate["variance"]
-    lines.append("## Variance Guard")
+    variance = evaluation_report["variance"]
+    appendix_lines.append("### Variance Guard")
+    appendix_lines.append("")
     # Display whether VE was enabled after A/B test
-    lines.append(f"- **Enabled:** {'Yes' if variance['enabled'] else 'No'}")
+    appendix_lines.append(f"- **Enabled:** {'Yes' if variance['enabled'] else 'No'}")
     if variance["enabled"]:
         # VE was enabled - show the gain
         gain_value = variance.get("gain", "N/A")
         if isinstance(gain_value, int | float):
-            lines.append(f"- **Gain:** {gain_value:.3f}")
+            appendix_lines.append(f"- **Gain:** {gain_value:.3f}")
         else:
-            lines.append(f"- **Gain:** {gain_value}")
+            appendix_lines.append(f"- **Gain:** {gain_value}")
     else:
         # VE was not enabled - show succinct reason if available, else a clear disabled message
         ppl_no_ve = variance.get("ppl_no_ve")
         ppl_with_ve = variance.get("ppl_with_ve")
         ratio_ci = variance.get("ratio_ci")
         if ppl_no_ve is not None and ppl_with_ve is not None and ratio_ci:
-            lines.append(f"- **Primary metric without VE:** {ppl_no_ve:.3f}")
-            lines.append(f"- **Primary metric with VE:** {ppl_with_ve:.3f}")
+            appendix_lines.append(f"- **Primary metric without VE:** {ppl_no_ve:.3f}")
+            appendix_lines.append(f"- **Primary metric with VE:** {ppl_with_ve:.3f}")
             gain_value = variance.get("gain")
             if isinstance(gain_value, int | float):
-                lines.append(f"- **Gain (insufficient):** {gain_value:.3f}")
+                appendix_lines.append(f"- **Gain (insufficient):** {gain_value:.3f}")
         else:
-            lines.append(
+            appendix_lines.append(
                 "- Variance Guard: Disabled (predictive gate not evaluated for this edit)."
             )
             # Add concise rationale aligned with Balanced predictive gate contract
             try:
-                ve_policy = certificate.get("policies", {}).get("variance", {})
+                ve_policy = evaluation_report.get("policies", {}).get("variance", {})
                 min_effect = ve_policy.get("min_effect_lognll")
                 if isinstance(min_effect, int | float):
-                    lines.append(
+                    appendix_lines.append(
                         f"- Predictive gate (Balanced): one-sided; enables only if CI excludes 0 and |mean Δ| ≥ {float(min_effect):.4g}."
                     )
                 else:
-                    lines.append(
+                    appendix_lines.append(
                         "- Predictive gate (Balanced): one-sided; enables only if CI excludes 0 and |mean Δ| ≥ min_effect."
                     )
-                lines.append(
+                appendix_lines.append(
                     "- Predictive Gate: evaluated=false (disabled under current policy/edit)."
                 )
             except Exception:
@@ -1417,19 +1786,26 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
     if variance.get("ratio_ci"):
         ratio_lo, ratio_hi = variance["ratio_ci"]
-        lines.append(f"- **Ratio CI:** [{ratio_lo:.3f}, {ratio_hi:.3f}]")
+        appendix_lines.append(f"- **Ratio CI:** [{ratio_lo:.3f}, {ratio_hi:.3f}]")
     if variance.get("calibration") and variance.get("enabled"):
         calib = variance["calibration"]
         coverage = calib.get("coverage")
         requested = calib.get("requested")
         status = calib.get("status", "unknown")
-        lines.append(f"- **Calibration:** {coverage}/{requested} windows ({status})")
+        appendix_lines.append(
+            f"- **Calibration:** {coverage}/{requested} windows ({status})"
+        )
+    appendix_lines.append("")
     lines.append("")
     # MoE Observability (non-gating)
-    moe = certificate.get("moe", {}) if isinstance(certificate.get("moe"), dict) else {}
+    moe = (
+        evaluation_report.get("moe", {})
+        if isinstance(evaluation_report.get("moe"), dict)
+        else {}
+    )
     if moe:
         lines.append("## MoE Observability")
         lines.append("")
@@ -1458,46 +1834,36 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
                 lines.append(f"- **{label}:** {float(moe[key]):+.4f}")
         lines.append("")
-    # Policy Summary
-    lines.append("## Applied Policies")
-    lines.append("")
-    policies = certificate["policies"]
-    for guard_name, policy in policies.items():
-        lines.append(f"### {guard_name.title()}")
-        lines.append("")
-        policy_yaml = (
-            yaml.safe_dump(policy, sort_keys=True, width=80).strip().splitlines()
-        )
-        lines.append("```yaml")
-        for line in policy_yaml:
-            lines.append(line)
-        lines.append("```")
-        lines.append("")
+    _append_policy_configuration_section(lines, evaluation_report)
-    # Artifacts
-    lines.append("## Artifacts")
-    lines.append("")
-    artifacts = certificate["artifacts"]
+    appendix_lines.append("### Artifacts")
+    appendix_lines.append("")
+    artifacts = evaluation_report["artifacts"]
     if artifacts.get("events_path"):
-        lines.append(f"- **Events Log:** `{artifacts['events_path']}`")
+        appendix_lines.append(f"- **Events Log:** `{artifacts['events_path']}`")
     if artifacts.get("report_path"):
-        lines.append(f"- **Full Report:** `{artifacts['report_path']}`")
-    lines.append(f"- **Certificate Generated:** {artifacts['generated_at']}")
-    lines.append("")
+        appendix_lines.append(f"- **Full Report:** `{artifacts['report_path']}`")
+    appendix_lines.append(f"- **Report Generated:** {artifacts['generated_at']}")
+    appendix_lines.append("")
+    if appendix_lines:
+        lines.append("## Appendix")
+        lines.append("")
+        lines.extend(appendix_lines)
-    # Certificate Hash for Integrity
-    cert_hash = _compute_certificate_hash(certificate)
-    lines.append("## Certificate Integrity")
+    # Report Hash for Integrity
+    cert_hash = _compute_report_hash(evaluation_report)
+    lines.append("## Evaluation Report Integrity")
     lines.append("")
-    lines.append(f"**Certificate Hash:** `{cert_hash}`")
+    lines.append(f"**Report Hash:** `{cert_hash}`")
     lines.append("")
     lines.append("---")
     lines.append("")
     lines.append(
-        "*This InvarLock safety certificate provides a comprehensive assessment of model compression safety.*"
+        "*This InvarLock Evaluation Report summarizes baseline‑paired evaluation results for a subject model relative to the provided baseline snapshot under the configured profile/preset.*"
     )
     lines.append(
-        "*All metrics are compared against the uncompressed baseline model for safety validation.*"
+        "*It reports regression-risk indicators for the measured signals; it is not a broad AI safety, alignment, or content-safety guarantee.*"
     )
     return "\n".join(lines)

invarlock 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl

invarlock 0.3.6py3-none-any.whl → 0.3.8py3-none-any.whl