invarlock 0.3.7__py3-none-any.whl → 0.3.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. invarlock/__init__.py +3 -3
  2. invarlock/adapters/auto.py +2 -10
  3. invarlock/adapters/hf_loading.py +7 -7
  4. invarlock/adapters/hf_mixin.py +28 -5
  5. invarlock/assurance/__init__.py +15 -23
  6. invarlock/calibration/spectral_null.py +1 -1
  7. invarlock/cli/adapter_auto.py +1 -5
  8. invarlock/cli/app.py +57 -27
  9. invarlock/cli/commands/__init__.py +2 -2
  10. invarlock/cli/commands/calibrate.py +48 -4
  11. invarlock/cli/commands/{certify.py → evaluate.py} +69 -46
  12. invarlock/cli/commands/explain_gates.py +94 -51
  13. invarlock/cli/commands/export_html.py +11 -9
  14. invarlock/cli/commands/report.py +121 -47
  15. invarlock/cli/commands/run.py +274 -66
  16. invarlock/cli/commands/verify.py +84 -89
  17. invarlock/cli/determinism.py +1 -1
  18. invarlock/cli/provenance.py +3 -3
  19. invarlock/core/bootstrap.py +1 -1
  20. invarlock/core/retry.py +14 -14
  21. invarlock/core/runner.py +1 -1
  22. invarlock/edits/noop.py +2 -2
  23. invarlock/edits/quant_rtn.py +2 -2
  24. invarlock/eval/__init__.py +1 -1
  25. invarlock/eval/bench.py +11 -7
  26. invarlock/eval/primary_metric.py +1 -1
  27. invarlock/guards/spectral.py +2 -2
  28. invarlock/guards_ref/spectral_ref.py +1 -1
  29. invarlock/model_profile.py +16 -35
  30. invarlock/observability/health.py +38 -20
  31. invarlock/plugins/hf_bnb_adapter.py +32 -21
  32. invarlock/reporting/__init__.py +18 -4
  33. invarlock/reporting/html.py +7 -7
  34. invarlock/reporting/normalizer.py +2 -2
  35. invarlock/reporting/policy_utils.py +1 -1
  36. invarlock/reporting/primary_metric_utils.py +11 -11
  37. invarlock/reporting/render.py +126 -120
  38. invarlock/reporting/report.py +43 -37
  39. invarlock/reporting/{certificate.py → report_builder.py} +103 -99
  40. invarlock/reporting/{certificate_schema.py → report_schema.py} +22 -22
  41. invarlock-0.3.9.dist-info/METADATA +303 -0
  42. {invarlock-0.3.7.dist-info → invarlock-0.3.9.dist-info}/RECORD +46 -46
  43. {invarlock-0.3.7.dist-info → invarlock-0.3.9.dist-info}/WHEEL +1 -1
  44. invarlock-0.3.7.dist-info/METADATA +0 -602
  45. {invarlock-0.3.7.dist-info → invarlock-0.3.9.dist-info}/entry_points.txt +0 -0
  46. {invarlock-0.3.7.dist-info → invarlock-0.3.9.dist-info}/licenses/LICENSE +0 -0
  47. {invarlock-0.3.7.dist-info → invarlock-0.3.9.dist-info}/top_level.txt +0 -0
@@ -9,8 +9,7 @@ from typing import Any
9
9
 
10
10
  import yaml
11
11
 
12
- # Import certificate module for helper access without creating hard cycles
13
- from . import certificate as C
12
+ from .report_schema import validate_report
14
13
 
15
14
  # Console Validation Block helpers (allow-list driven)
16
15
  _CONSOLE_LABELS_DEFAULT = [
@@ -37,8 +36,10 @@ def _load_console_labels() -> list[str]:
37
36
  return list(_CONSOLE_LABELS_DEFAULT)
38
37
 
39
38
 
40
- def compute_console_validation_block(certificate: dict[str, Any]) -> dict[str, Any]:
41
- """Produce a normalized console validation block from a certificate.
39
+ def compute_console_validation_block(
40
+ evaluation_report: dict[str, Any],
41
+ ) -> dict[str, Any]:
42
+ """Produce a normalized console validation block from an evaluation report.
42
43
 
43
44
  Returns a dict with keys:
44
45
  - labels: the canonical label list
@@ -47,8 +48,8 @@ def compute_console_validation_block(certificate: dict[str, Any]) -> dict[str, A
47
48
  counted only when evaluated.
48
49
  """
49
50
  labels = _load_console_labels()
50
- validation = certificate.get("validation", {}) or {}
51
- guard_ctx = certificate.get("guard_overhead", {}) or {}
51
+ validation = evaluation_report.get("validation", {}) or {}
52
+ guard_ctx = evaluation_report.get("guard_overhead", {}) or {}
52
53
  guard_evaluated = (
53
54
  bool(guard_ctx.get("evaluated")) if isinstance(guard_ctx, dict) else False
54
55
  )
@@ -121,18 +122,18 @@ def _render_executive_dashboard(cert: dict[str, Any]) -> str:
121
122
 
122
123
 
123
124
  def _append_safety_dashboard_section(
124
- lines: list[str], certificate: dict[str, Any]
125
+ lines: list[str], evaluation_report: dict[str, Any]
125
126
  ) -> None:
126
- """Append a concise, first-screen dashboard for the certificate."""
127
- block = compute_console_validation_block(certificate)
127
+ """Append a concise, first-screen dashboard for the evaluation report."""
128
+ block = compute_console_validation_block(evaluation_report)
128
129
  overall_pass = bool(block.get("overall_pass"))
129
130
  overall_status = (
130
131
  f"{'✅' if overall_pass else '❌'} {'PASS' if overall_pass else 'FAIL'}"
131
132
  )
132
133
 
133
- validation = certificate.get("validation", {}) or {}
134
- pm = certificate.get("primary_metric", {}) or {}
135
- auto = certificate.get("auto", {}) or {}
134
+ validation = evaluation_report.get("validation", {}) or {}
135
+ pm = evaluation_report.get("primary_metric", {}) or {}
136
+ auto = evaluation_report.get("auto", {}) or {}
136
137
  tier = str(auto.get("tier") or "balanced").lower()
137
138
 
138
139
  # Primary metric summary
@@ -172,7 +173,7 @@ def _append_safety_dashboard_section(
172
173
  pm_status = (
173
174
  f"{'✅' if pm_ok else '❌'} {measured}"
174
175
  if isinstance(pm_ok, bool)
175
- else f"🛈 {measured}"
176
+ else f"ℹ️ {measured}"
176
177
  )
177
178
 
178
179
  # Drift summary (final/preview ratio) when preview/final are numeric
@@ -205,7 +206,7 @@ def _append_safety_dashboard_section(
205
206
  drift_status = (
206
207
  f"{'✅' if drift_ok else '❌'} {drift_val}"
207
208
  if isinstance(drift_ok, bool)
208
- else f"🛈 {drift_val}"
209
+ else f"ℹ️ {drift_val}"
209
210
  )
210
211
 
211
212
  def _gate_cell(key: str, ok_default: bool | None = None) -> str:
@@ -217,10 +218,10 @@ def _append_safety_dashboard_section(
217
218
  else:
218
219
  ok = bool(validation.get(key))
219
220
  if ok is None:
220
- return "🛈 N/A"
221
+ return "ℹ️ N/A"
221
222
  return "✅ PASS" if ok else "❌ FAIL"
222
223
 
223
- overhead_ctx = certificate.get("guard_overhead", {}) or {}
224
+ overhead_ctx = evaluation_report.get("guard_overhead", {}) or {}
224
225
  overhead_evaluated = (
225
226
  bool(overhead_ctx.get("evaluated")) if isinstance(overhead_ctx, dict) else False
226
227
  )
@@ -247,11 +248,11 @@ def _append_safety_dashboard_section(
247
248
  "Overhead",
248
249
  f"{'✅' if bool(validation.get('guard_overhead_acceptable', True)) else '❌'} {overhead_measured}"
249
250
  if isinstance(validation, dict)
250
- else f"🛈 {overhead_measured}",
251
+ else f"ℹ️ {overhead_measured}",
251
252
  threshold_str,
252
253
  )
253
254
 
254
- lines.append("## Safety Dashboard")
255
+ lines.append("## Evaluation Dashboard")
255
256
  lines.append("")
256
257
  lines.append("| Check | Status | Quick Summary |")
257
258
  lines.append("|-------|--------|---------------|")
@@ -271,10 +272,10 @@ def _append_safety_dashboard_section(
271
272
 
272
273
 
273
274
  def _append_primary_metric_section(
274
- lines: list[str], certificate: dict[str, Any]
275
+ lines: list[str], evaluation_report: dict[str, Any]
275
276
  ) -> None:
276
277
  """Append the Primary Metric section early for quick triage."""
277
- pm = certificate.get("primary_metric")
278
+ pm = evaluation_report.get("primary_metric")
278
279
  if not isinstance(pm, dict) or not pm:
279
280
  return
280
281
 
@@ -342,7 +343,7 @@ def _append_primary_metric_section(
342
343
 
343
344
  # Secondary metrics (informational)
344
345
  try:
345
- secs = certificate.get("secondary_metrics")
346
+ secs = evaluation_report.get("secondary_metrics")
346
347
  if isinstance(secs, list) and secs:
347
348
  lines.append("## Secondary Metrics (informational)")
348
349
  lines.append("")
@@ -375,10 +376,10 @@ def _append_primary_metric_section(
375
376
 
376
377
 
377
378
  def _append_policy_configuration_section(
378
- lines: list[str], certificate: dict[str, Any]
379
+ lines: list[str], evaluation_report: dict[str, Any]
379
380
  ) -> None:
380
- resolved_policy = certificate.get("resolved_policy")
381
- policy_provenance = certificate.get("policy_provenance", {}) or {}
381
+ resolved_policy = evaluation_report.get("resolved_policy")
382
+ policy_provenance = evaluation_report.get("policy_provenance", {}) or {}
382
383
  has_prov = isinstance(policy_provenance, dict) and bool(policy_provenance)
383
384
  has_resolved = isinstance(resolved_policy, dict) and bool(resolved_policy)
384
385
  if not (has_prov or has_resolved):
@@ -391,12 +392,12 @@ def _append_policy_configuration_section(
391
392
  if has_prov:
392
393
  tier = policy_provenance.get("tier")
393
394
  if not tier:
394
- tier = (certificate.get("auto", {}) or {}).get("tier")
395
+ tier = (evaluation_report.get("auto", {}) or {}).get("tier")
395
396
  digest_value = None
396
397
  if has_prov:
397
398
  digest_value = policy_provenance.get("policy_digest")
398
399
  if not digest_value:
399
- digest_value = (certificate.get("policy_digest", {}) or {}).get(
400
+ digest_value = (evaluation_report.get("policy_digest", {}) or {}).get(
400
401
  "thresholds_hash"
401
402
  )
402
403
 
@@ -436,10 +437,10 @@ def _append_policy_configuration_section(
436
437
 
437
438
 
438
439
  def _append_dataset_and_provenance_section(
439
- lines: list[str], certificate: dict[str, Any]
440
+ lines: list[str], evaluation_report: dict[str, Any]
440
441
  ) -> None:
441
- dataset = certificate.get("dataset", {}) or {}
442
- provenance_info = certificate.get("provenance", {}) or {}
442
+ dataset = evaluation_report.get("dataset", {}) or {}
443
+ provenance_info = evaluation_report.get("provenance", {}) or {}
443
444
 
444
445
  has_dataset = isinstance(dataset, dict) and bool(dataset)
445
446
  has_provenance = isinstance(provenance_info, dict) and bool(provenance_info)
@@ -545,14 +546,14 @@ def _append_dataset_and_provenance_section(
545
546
  )
546
547
 
547
548
  try:
548
- conf = certificate.get("confidence", {}) or {}
549
+ conf = evaluation_report.get("confidence", {}) or {}
549
550
  if isinstance(conf, dict) and conf.get("label"):
550
551
  lines.append(f"- **Confidence:** {conf.get('label')}")
551
552
  except Exception:
552
553
  pass
553
554
 
554
555
  try:
555
- pd = certificate.get("policy_digest", {}) or {}
556
+ pd = evaluation_report.get("policy_digest", {}) or {}
556
557
  if isinstance(pd, dict) and pd:
557
558
  pv = pd.get("policy_version")
558
559
  th = pd.get("thresholds_hash")
@@ -671,13 +672,13 @@ def _append_accuracy_subgroups(lines: list[str], subgroups: dict[str, Any]) -> N
671
672
  lines.append("")
672
673
 
673
674
 
674
- def _compute_certificate_hash(certificate: dict[str, Any]) -> str:
675
- """Compute integrity hash for the certificate.
675
+ def _compute_report_hash(evaluation_report: dict[str, Any]) -> str:
676
+ """Compute integrity hash for the evaluation_report.
676
677
 
677
678
  Hash ignores the `artifacts` section for stability across saves.
678
679
  """
679
680
  # Create a copy without the artifacts section for stable hashing
680
- cert_copy = dict(certificate or {})
681
+ cert_copy = dict(evaluation_report or {})
681
682
  cert_copy.pop("artifacts", None)
682
683
 
683
684
  # Sort keys for deterministic hashing
@@ -687,8 +688,8 @@ def _compute_certificate_hash(certificate: dict[str, Any]) -> str:
687
688
  return _hash.sha256(cert_str.encode()).hexdigest()[:16]
688
689
 
689
690
 
690
- def build_console_summary_pack(certificate: dict[str, Any]) -> dict[str, Any]:
691
- """Build a small, reusable console summary pack from a certificate.
691
+ def build_console_summary_pack(evaluation_report: dict[str, Any]) -> dict[str, Any]:
692
+ """Build a small, reusable console summary pack from a evaluation_report.
692
693
 
693
694
  Returns a dict with:
694
695
  - overall_pass: bool
@@ -696,7 +697,7 @@ def build_console_summary_pack(certificate: dict[str, Any]) -> dict[str, Any]:
696
697
  - gate_lines: list of "<Label>: <Status>" strings for each evaluated gate
697
698
  - labels: the canonical label list used
698
699
  """
699
- block = compute_console_validation_block(certificate)
700
+ block = compute_console_validation_block(evaluation_report)
700
701
  overall_pass = bool(block.get("overall_pass"))
701
702
  emoji = "✅" if overall_pass else "❌"
702
703
  overall_line = f"Overall Status: {emoji} {'PASS' if overall_pass else 'FAIL'}"
@@ -717,43 +718,38 @@ def build_console_summary_pack(certificate: dict[str, Any]) -> dict[str, Any]:
717
718
  }
718
719
 
719
720
 
720
- def render_certificate_markdown(certificate: dict[str, Any]) -> str:
721
+ def render_report_markdown(evaluation_report: dict[str, Any]) -> str:
721
722
  """
722
- Render a certificate as a formatted Markdown report with pretty tables.
723
+ Render an evaluation report as a formatted Markdown report with pretty tables.
723
724
 
724
- This implementation is moved from certificate.py to keep that module lean.
725
- To avoid circular import issues, we alias helpers from the certificate
726
- module inside the function body.
725
+ This implementation is moved from report_builder.py to keep that module lean.
727
726
  """
728
- # Alias frequently used helpers locally to avoid editing the large body
729
- validate_certificate = C.validate_certificate
730
-
731
- if not validate_certificate(certificate):
732
- raise ValueError("Invalid certificate structure")
727
+ if not validate_report(evaluation_report):
728
+ raise ValueError("Invalid evaluation report structure")
733
729
 
734
730
  lines: list[str] = []
735
731
  appendix_lines: list[str] = []
736
- edit_name = str(certificate.get("edit_name") or "").lower()
732
+ edit_name = str(evaluation_report.get("edit_name") or "").lower()
737
733
 
738
734
  # Header
739
- lines.append("# InvarLock Evaluation Certificate")
735
+ lines.append("# InvarLock Evaluation Report")
740
736
  lines.append("")
741
737
  lines.append(
742
738
  "> *Basis: “point” gates check the point estimate; “upper” gates check the CI "
743
739
  "upper bound; “point & upper” requires both to pass.*"
744
740
  )
745
741
  lines.append("")
746
- lines.append(f"**Schema Version:** {certificate['schema_version']}")
747
- lines.append(f"**Run ID:** `{certificate['run_id']}`")
748
- lines.append(f"**Generated:** {certificate['artifacts']['generated_at']}")
749
- lines.append(f"**Edit Type:** {certificate.get('edit_name', 'Unknown')}")
742
+ lines.append(f"**Schema Version:** {evaluation_report['schema_version']}")
743
+ lines.append(f"**Run ID:** `{evaluation_report['run_id']}`")
744
+ lines.append(f"**Generated:** {evaluation_report['artifacts']['generated_at']}")
745
+ lines.append(f"**Edit Type:** {evaluation_report.get('edit_name', 'Unknown')}")
750
746
  lines.append("")
751
747
  lines.append(
752
- "> Full evidence: see [`evaluation.cert.json`](evaluation.cert.json) for complete provenance, digests, and raw measurements."
748
+ "> Full evidence: see [`evaluation.report.json`](evaluation.report.json) for complete provenance, digests, and raw measurements."
753
749
  )
754
750
  lines.append("")
755
751
 
756
- plugins = certificate.get("plugins", {})
752
+ plugins = evaluation_report.get("plugins", {})
757
753
  if isinstance(plugins, dict) and plugins:
758
754
  lines.append("## Plugin Provenance")
759
755
  lines.append("")
@@ -780,7 +776,7 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
780
776
  # Executive Summary with validation status (canonical, from console block)
781
777
  lines.append("## Executive Summary")
782
778
  lines.append("")
783
- _block = compute_console_validation_block(certificate)
779
+ _block = compute_console_validation_block(evaluation_report)
784
780
  overall_pass = bool(_block.get("overall_pass"))
785
781
  status_emoji = "✅" if overall_pass else "❌"
786
782
  lines.append(
@@ -789,13 +785,13 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
789
785
  # Window Plan one-liner for quick audit
790
786
  try:
791
787
  plan_ctx = (
792
- certificate.get("window_plan")
793
- or certificate.get("dataset", {}).get("windows", {})
794
- or certificate.get("ppl", {}).get("window_plan")
788
+ evaluation_report.get("window_plan")
789
+ or evaluation_report.get("dataset", {}).get("windows", {})
790
+ or evaluation_report.get("ppl", {}).get("window_plan")
795
791
  )
796
- seq_len = certificate.get("dataset", {}).get("seq_len") or certificate.get(
797
- "dataset", {}
798
- ).get("sequence_length")
792
+ seq_len = evaluation_report.get("dataset", {}).get(
793
+ "seq_len"
794
+ ) or evaluation_report.get("dataset", {}).get("sequence_length")
799
795
  if isinstance(plan_ctx, dict):
800
796
  profile = plan_ctx.get("profile")
801
797
  preview_n = (
@@ -815,23 +811,23 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
815
811
  pass
816
812
  lines.append("")
817
813
 
818
- dashboard = _render_executive_dashboard(certificate)
814
+ dashboard = _render_executive_dashboard(evaluation_report)
819
815
  if dashboard:
820
816
  lines.extend(dashboard.splitlines())
821
817
  lines.append("")
822
818
 
823
819
  lines.append("## Contents")
824
820
  lines.append("")
825
- lines.append("- [Safety Dashboard](#safety-dashboard)")
821
+ lines.append("- [Evaluation Dashboard](#evaluation-dashboard)")
826
822
  lines.append("- [Quality Gates](#quality-gates)")
827
- lines.append("- [Safety Check Details](#safety-check-details)")
823
+ lines.append("- [Guard Check Details](#guard-check-details)")
828
824
  lines.append("- [Primary Metric](#primary-metric)")
829
825
  lines.append("- [Guard Observability](#guard-observability)")
830
826
  lines.append("- [Model Information](#model-information)")
831
827
  lines.append("- [Dataset and Provenance](#dataset-and-provenance)")
832
828
  lines.append("- [Policy Configuration](#policy-configuration)")
833
829
  lines.append("- [Appendix](#appendix)")
834
- lines.append("- [Certificate Integrity](#certificate-integrity)")
830
+ lines.append("- [Evaluation Report Integrity](#evaluation-report-integrity)")
835
831
  lines.append("")
836
832
 
837
833
  # Validation table with canonical gates (mirrors console allow-list)
@@ -840,9 +836,9 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
840
836
  lines.append("| Gate | Status | Measured | Threshold | Basis | Description |")
841
837
  lines.append("|------|--------|----------|-----------|-------|-------------|")
842
838
 
843
- pm_block = certificate.get("primary_metric", {}) or {}
839
+ pm_block = evaluation_report.get("primary_metric", {}) or {}
844
840
  has_pm = isinstance(pm_block, dict) and bool(pm_block)
845
- auto_info = certificate.get("auto", {})
841
+ auto_info = evaluation_report.get("auto", {})
846
842
  tier = (auto_info.get("tier") or "balanced").lower()
847
843
 
848
844
  # Helper to emit Primary Metric Acceptable row
@@ -851,7 +847,9 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
851
847
  value = pm_block.get("ratio_vs_baseline")
852
848
  gating_basis = pm_block.get("gating_basis") or "point"
853
849
  ok = bool(
854
- certificate.get("validation", {}).get("primary_metric_acceptable", True)
850
+ evaluation_report.get("validation", {}).get(
851
+ "primary_metric_acceptable", True
852
+ )
855
853
  )
856
854
  status = "✅ PASS" if ok else "❌ FAIL"
857
855
  if pm_kind in {"accuracy", "vqa_accuracy"}:
@@ -885,7 +883,7 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
885
883
  # Helper to emit Preview Final Drift Acceptable row
886
884
  def _emit_drift_gate_row() -> None:
887
885
  ok = bool(
888
- certificate.get("validation", {}).get(
886
+ evaluation_report.get("validation", {}).get(
889
887
  "preview_final_drift_acceptable", True
890
888
  )
891
889
  )
@@ -942,12 +940,14 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
942
940
 
943
941
  # Helper to emit Guard Overhead Acceptable row (only when evaluated)
944
942
  def _emit_overhead_gate_row() -> None:
945
- guard_overhead = certificate.get("guard_overhead", {}) or {}
943
+ guard_overhead = evaluation_report.get("guard_overhead", {}) or {}
946
944
  evaluated = bool(guard_overhead.get("evaluated"))
947
945
  if not evaluated:
948
946
  return
949
947
  ok = bool(
950
- certificate.get("validation", {}).get("guard_overhead_acceptable", True)
948
+ evaluation_report.get("validation", {}).get(
949
+ "guard_overhead_acceptable", True
950
+ )
951
951
  )
952
952
  status = "✅ PASS" if ok else "❌ FAIL"
953
953
  overhead_pct = guard_overhead.get("overhead_percent")
@@ -975,7 +975,7 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
975
975
  )
976
976
 
977
977
  def _emit_pm_tail_gate_row() -> None:
978
- pm_tail = certificate.get("primary_metric_tail", {}) or {}
978
+ pm_tail = evaluation_report.get("primary_metric_tail", {}) or {}
979
979
  if not isinstance(pm_tail, dict) or not pm_tail:
980
980
  return
981
981
 
@@ -985,7 +985,7 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
985
985
  warned = bool(pm_tail.get("warned", False))
986
986
 
987
987
  if not evaluated:
988
- status = "🛈 INFO"
988
+ status = "ℹ️ INFO"
989
989
  elif passed:
990
990
  status = "✅ PASS"
991
991
  elif mode == "fail":
@@ -1042,17 +1042,17 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
1042
1042
  _emit_overhead_gate_row()
1043
1043
 
1044
1044
  # Annotate hysteresis usage if applied
1045
- if certificate.get("validation", {}).get("hysteresis_applied"):
1045
+ if evaluation_report.get("validation", {}).get("hysteresis_applied"):
1046
1046
  lines.append("- Note: hysteresis applied to gate boundary")
1047
1047
 
1048
1048
  lines.append("")
1049
- lines.append("## Safety Check Details")
1049
+ lines.append("## Guard Check Details")
1050
1050
  lines.append("")
1051
- lines.append("| Safety Check | Status | Measured | Threshold | Description |")
1051
+ lines.append("| Guard Check | Status | Measured | Threshold | Description |")
1052
1052
  lines.append("|--------------|--------|----------|-----------|-------------|")
1053
1053
 
1054
- inv_summary = certificate["invariants"]
1055
- validation = certificate.get("validation", {})
1054
+ inv_summary = evaluation_report["invariants"]
1055
+ validation = evaluation_report.get("validation", {})
1056
1056
  inv_status = "✅ PASS" if validation.get("invariants_pass", False) else "❌ FAIL"
1057
1057
  inv_counts = inv_summary.get("summary", {}) or {}
1058
1058
  inv_measure = inv_summary.get("status", "pass").upper()
@@ -1084,23 +1084,23 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
1084
1084
  lines.append(f"- Non-fatal: {non_fatal_message}")
1085
1085
 
1086
1086
  spec_status = "✅ PASS" if validation.get("spectral_stable", False) else "❌ FAIL"
1087
- caps_applied = certificate["spectral"]["caps_applied"]
1087
+ caps_applied = evaluation_report["spectral"]["caps_applied"]
1088
1088
  lines.append(
1089
1089
  f"| Spectral Stability | {spec_status} | {caps_applied} violations | < 5 | Weight matrix spectral norms |"
1090
1090
  )
1091
1091
 
1092
1092
  # Catastrophic spike safety stop row is now driven by primary metric flags
1093
- if isinstance(certificate.get("primary_metric"), dict):
1093
+ if isinstance(evaluation_report.get("primary_metric"), dict):
1094
1094
  pm_ok = bool(validation.get("primary_metric_acceptable", True))
1095
- pm_ratio = certificate.get("primary_metric", {}).get("ratio_vs_baseline")
1095
+ pm_ratio = evaluation_report.get("primary_metric", {}).get("ratio_vs_baseline")
1096
1096
  if isinstance(pm_ratio, int | float):
1097
1097
  lines.append(
1098
- f"| Catastrophic Spike Gate (safety stop) | {'✅ PASS' if pm_ok else '❌ FAIL'} | {pm_ratio:.3f}x | ≤ 2.0x | Hard stop @ 2.0× |"
1098
+ f"| Catastrophic Spike Gate (hard stop) | {'✅ PASS' if pm_ok else '❌ FAIL'} | {pm_ratio:.3f}x | ≤ 2.0x | Hard stop @ 2.0× |"
1099
1099
  )
1100
1100
 
1101
1101
  # Include RMT Health row for compatibility and clarity
1102
1102
  rmt_status = "✅ PASS" if validation.get("rmt_stable", False) else "❌ FAIL"
1103
- rmt_state = certificate.get("rmt", {}).get("status", "unknown").title()
1103
+ rmt_state = evaluation_report.get("rmt", {}).get("status", "unknown").title()
1104
1104
  lines.append(
1105
1105
  f"| RMT Health | {rmt_status} | {rmt_state} | ε-rule | Random Matrix Theory guard status |"
1106
1106
  )
@@ -1108,8 +1108,8 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
1108
1108
  # Pairing + Bootstrap snapshot (quick audit surface)
1109
1109
  try:
1110
1110
  stats = (
1111
- certificate.get("dataset", {}).get("windows", {}).get("stats", {})
1112
- or certificate.get("ppl", {}).get("stats", {})
1111
+ evaluation_report.get("dataset", {}).get("windows", {}).get("stats", {})
1112
+ or evaluation_report.get("ppl", {}).get("stats", {})
1113
1113
  or {}
1114
1114
  )
1115
1115
  paired_windows = stats.get("paired_windows")
@@ -1138,7 +1138,7 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
1138
1138
  parts.append(f"{float(overlap_frac) * 100.0:.1f}% overlap")
1139
1139
  elif overlap_frac is not None:
1140
1140
  parts.append(f"overlap={overlap_frac}")
1141
- lines.append(f"✅ Pairing: {', '.join(parts) if parts else 'N/A'}")
1141
+ lines.append(f"- ✅ Pairing: {', '.join(parts) if parts else 'N/A'}")
1142
1142
  if isinstance(bootstrap, dict):
1143
1143
  reps = bootstrap.get("replicates")
1144
1144
  bseed = bootstrap.get("seed")
@@ -1154,17 +1154,19 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
1154
1154
  bits.append(f"seed={int(bseed)}")
1155
1155
  except Exception:
1156
1156
  bits.append(f"seed={bseed}")
1157
- lines.append(f"✅ Bootstrap: {', '.join(bits) if bits else 'N/A'}")
1157
+ lines.append(f"- ✅ Bootstrap: {', '.join(bits) if bits else 'N/A'}")
1158
1158
  # Optional: show log-space paired Δ CI next to ratio CI for clarity
1159
- delta_ci = certificate.get("primary_metric", {}).get("ci") or certificate.get(
1160
- "ppl", {}
1161
- ).get("logloss_delta_ci")
1159
+ delta_ci = evaluation_report.get("primary_metric", {}).get(
1160
+ "ci"
1161
+ ) or evaluation_report.get("ppl", {}).get("logloss_delta_ci")
1162
1162
  if (
1163
1163
  isinstance(delta_ci, tuple | list)
1164
1164
  and len(delta_ci) == 2
1165
1165
  and all(isinstance(x, int | float) for x in delta_ci)
1166
1166
  ):
1167
- lines.append(f"🛈 Log Δ (paired) CI: [{delta_ci[0]:.6f}, {delta_ci[1]:.6f}]")
1167
+ lines.append(
1168
+ f"- ℹ️ Log Δ (paired) CI: [{delta_ci[0]:.6f}, {delta_ci[1]:.6f}]"
1169
+ )
1168
1170
  except Exception:
1169
1171
  pass
1170
1172
 
@@ -1185,13 +1187,13 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
1185
1187
 
1186
1188
  lines.append("")
1187
1189
 
1188
- _append_primary_metric_section(lines, certificate)
1190
+ _append_primary_metric_section(lines, evaluation_report)
1189
1191
 
1190
1192
  # Guard observability snapshots
1191
1193
  lines.append("## Guard Observability")
1192
1194
  lines.append("")
1193
1195
 
1194
- spectral_info = certificate.get("spectral", {}) or {}
1196
+ spectral_info = evaluation_report.get("spectral", {}) or {}
1195
1197
  if spectral_info:
1196
1198
  lines.append("### Spectral Guard Summary")
1197
1199
  lines.append("")
@@ -1260,7 +1262,7 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
1260
1262
  if max_module:
1261
1263
  max_val += f" – {max_module}"
1262
1264
  if kappa_f is None:
1263
- max_status = "🛈 No κ"
1265
+ max_status = "ℹ️ No κ"
1264
1266
  elif max_abs_z <= kappa_f:
1265
1267
  max_status = f"✅ Within κ={kappa_f:.3f}"
1266
1268
  else:
@@ -1280,7 +1282,7 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
1280
1282
  if isinstance(mt_m, int | float) and math.isfinite(float(mt_m)):
1281
1283
  parts.append(f"m={int(mt_m)}")
1282
1284
  lines.append(
1283
- f"| Multiple Testing | {', '.join(parts) if parts else '—'} | 🛈 INFO |"
1285
+ f"| Multiple Testing | {', '.join(parts) if parts else '—'} | ℹ️ INFO |"
1284
1286
  )
1285
1287
 
1286
1288
  lines.append("")
@@ -1360,7 +1362,7 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
1360
1362
  lines.append("</details>")
1361
1363
  lines.append("")
1362
1364
 
1363
- rmt_info = certificate.get("rmt", {}) or {}
1365
+ rmt_info = evaluation_report.get("rmt", {}) or {}
1364
1366
  if rmt_info:
1365
1367
  lines.append("### RMT Guard")
1366
1368
  lines.append("")
@@ -1411,7 +1413,7 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
1411
1413
  else:
1412
1414
  lines.append("")
1413
1415
 
1414
- guard_overhead_info = certificate.get("guard_overhead", {}) or {}
1416
+ guard_overhead_info = evaluation_report.get("guard_overhead", {}) or {}
1415
1417
  if guard_overhead_info:
1416
1418
  lines.append("### Guard Overhead")
1417
1419
  lines.append("")
@@ -1439,7 +1441,7 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
1439
1441
  overhead_source = guard_overhead_info.get("source")
1440
1442
  if overhead_source:
1441
1443
  lines.append(f"- Source: {overhead_source}")
1442
- plan_ctx = certificate.get("provenance", {}).get("window_plan", {})
1444
+ plan_ctx = evaluation_report.get("provenance", {}).get("window_plan", {})
1443
1445
  if isinstance(plan_ctx, dict) and plan_ctx:
1444
1446
  plan_preview = (
1445
1447
  plan_ctx.get("preview_n")
@@ -1458,8 +1460,8 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
1458
1460
  lines.append("")
1459
1461
 
1460
1462
  compression_diag = (
1461
- certificate.get("structure", {}).get("compression_diagnostics", {})
1462
- if isinstance(certificate.get("structure"), dict)
1463
+ evaluation_report.get("structure", {}).get("compression_diagnostics", {})
1464
+ if isinstance(evaluation_report.get("structure"), dict)
1463
1465
  else {}
1464
1466
  )
1465
1467
  inference_flags = compression_diag.get("inferred") or {}
@@ -1485,7 +1487,7 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
1485
1487
  # Model and Configuration
1486
1488
  lines.append("## Model Information")
1487
1489
  lines.append("")
1488
- meta = certificate["meta"]
1490
+ meta = evaluation_report["meta"]
1489
1491
  lines.append(f"- **Model ID:** {meta.get('model_id')}")
1490
1492
  lines.append(f"- **Adapter:** {meta.get('adapter')}")
1491
1493
  lines.append(f"- **Device:** {meta.get('device')}")
@@ -1556,7 +1558,7 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
1556
1558
  # Edit Configuration (removed duplicate Edit Information section)
1557
1559
 
1558
1560
  # Auto-tuning Configuration
1559
- auto = certificate["auto"]
1561
+ auto = evaluation_report["auto"]
1560
1562
  if auto["tier"] != "none":
1561
1563
  lines.append("## Auto-Tuning Configuration")
1562
1564
  lines.append("")
@@ -1574,18 +1576,18 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
1574
1576
  pass
1575
1577
  lines.append("")
1576
1578
 
1577
- _append_dataset_and_provenance_section(lines, certificate)
1579
+ _append_dataset_and_provenance_section(lines, evaluation_report)
1578
1580
 
1579
1581
  # Structural Changes heading is printed with content later; avoid empty header here
1580
1582
 
1581
1583
  # System Overhead section (latency/throughput)
1582
- sys_over = certificate.get("system_overhead", {}) or {}
1584
+ sys_over = evaluation_report.get("system_overhead", {}) or {}
1583
1585
  if isinstance(sys_over, dict) and sys_over:
1584
1586
  _append_system_overhead_section(lines, sys_over)
1585
1587
 
1586
1588
  # Accuracy Subgroups (informational)
1587
1589
  try:
1588
- cls = certificate.get("classification", {})
1590
+ cls = evaluation_report.get("classification", {})
1589
1591
  sub = cls.get("subgroups") if isinstance(cls, dict) else None
1590
1592
  if isinstance(sub, dict) and sub:
1591
1593
  _append_accuracy_subgroups(lines, sub)
@@ -1593,7 +1595,7 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
1593
1595
  pass
1594
1596
  # Structural Changes
1595
1597
  try:
1596
- structure = certificate.get("structure", {}) or {}
1598
+ structure = evaluation_report.get("structure", {}) or {}
1597
1599
  params_changed = int(structure.get("params_changed", 0) or 0)
1598
1600
  layers_modified = int(structure.get("layers_modified", 0) or 0)
1599
1601
  bitwidth_changes = 0
@@ -1605,7 +1607,7 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
1605
1607
  has_changes = any(
1606
1608
  v > 0 for v in (params_changed, layers_modified, bitwidth_changes)
1607
1609
  )
1608
- edit_name = str(certificate.get("edit_name", "unknown"))
1610
+ edit_name = str(evaluation_report.get("edit_name", "unknown"))
1609
1611
  if has_changes:
1610
1612
  lines.append("## Structural Changes")
1611
1613
  lines.append("")
@@ -1735,7 +1737,7 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
1735
1737
  lines.append("")
1736
1738
 
1737
1739
  # Variance Guard (Spectral/RMT summaries are already provided above)
1738
- variance = certificate["variance"]
1740
+ variance = evaluation_report["variance"]
1739
1741
  appendix_lines.append("### Variance Guard")
1740
1742
  appendix_lines.append("")
1741
1743
 
@@ -1766,7 +1768,7 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
1766
1768
  )
1767
1769
  # Add concise rationale aligned with Balanced predictive gate contract
1768
1770
  try:
1769
- ve_policy = certificate.get("policies", {}).get("variance", {})
1771
+ ve_policy = evaluation_report.get("policies", {}).get("variance", {})
1770
1772
  min_effect = ve_policy.get("min_effect_lognll")
1771
1773
  if isinstance(min_effect, int | float):
1772
1774
  appendix_lines.append(
@@ -1799,7 +1801,11 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
1799
1801
  lines.append("")
1800
1802
 
1801
1803
  # MoE Observability (non-gating)
1802
- moe = certificate.get("moe", {}) if isinstance(certificate.get("moe"), dict) else {}
1804
+ moe = (
1805
+ evaluation_report.get("moe", {})
1806
+ if isinstance(evaluation_report.get("moe"), dict)
1807
+ else {}
1808
+ )
1803
1809
  if moe:
1804
1810
  lines.append("## MoE Observability")
1805
1811
  lines.append("")
@@ -1828,16 +1834,16 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
1828
1834
  lines.append(f"- **{label}:** {float(moe[key]):+.4f}")
1829
1835
  lines.append("")
1830
1836
 
1831
- _append_policy_configuration_section(lines, certificate)
1837
+ _append_policy_configuration_section(lines, evaluation_report)
1832
1838
 
1833
1839
  appendix_lines.append("### Artifacts")
1834
1840
  appendix_lines.append("")
1835
- artifacts = certificate["artifacts"]
1841
+ artifacts = evaluation_report["artifacts"]
1836
1842
  if artifacts.get("events_path"):
1837
1843
  appendix_lines.append(f"- **Events Log:** `{artifacts['events_path']}`")
1838
1844
  if artifacts.get("report_path"):
1839
1845
  appendix_lines.append(f"- **Full Report:** `{artifacts['report_path']}`")
1840
- appendix_lines.append(f"- **Certificate Generated:** {artifacts['generated_at']}")
1846
+ appendix_lines.append(f"- **Report Generated:** {artifacts['generated_at']}")
1841
1847
  appendix_lines.append("")
1842
1848
 
1843
1849
  if appendix_lines:
@@ -1845,19 +1851,19 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
1845
1851
  lines.append("")
1846
1852
  lines.extend(appendix_lines)
1847
1853
 
1848
- # Certificate Hash for Integrity
1849
- cert_hash = _compute_certificate_hash(certificate)
1850
- lines.append("## Certificate Integrity")
1854
+ # Report Hash for Integrity
1855
+ cert_hash = _compute_report_hash(evaluation_report)
1856
+ lines.append("## Evaluation Report Integrity")
1851
1857
  lines.append("")
1852
- lines.append(f"**Certificate Hash:** `{cert_hash}`")
1858
+ lines.append(f"**Report Hash:** `{cert_hash}`")
1853
1859
  lines.append("")
1854
1860
  lines.append("---")
1855
1861
  lines.append("")
1856
1862
  lines.append(
1857
- "*This InvarLock evaluation certificate provides a comprehensive assessment of model compression safety.*"
1863
+ "*This InvarLock Evaluation Report summarizes baseline‑paired evaluation results for a subject model relative to the provided baseline snapshot under the configured profile/preset.*"
1858
1864
  )
1859
1865
  lines.append(
1860
- "*All metrics are compared against the uncompressed baseline model for safety validation.*"
1866
+ "*It reports regression-risk indicators for the measured signals; it is not a broad AI safety, alignment, or content-safety guarantee.*"
1861
1867
  )
1862
1868
 
1863
1869
  return "\n".join(lines)