invarlock 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. invarlock/__init__.py +33 -0
  2. invarlock/__main__.py +10 -0
  3. invarlock/_data/runtime/profiles/ci_cpu.yaml +15 -0
  4. invarlock/_data/runtime/profiles/release.yaml +23 -0
  5. invarlock/_data/runtime/tiers.yaml +76 -0
  6. invarlock/adapters/__init__.py +102 -0
  7. invarlock/adapters/_capabilities.py +45 -0
  8. invarlock/adapters/auto.py +99 -0
  9. invarlock/adapters/base.py +530 -0
  10. invarlock/adapters/base_types.py +85 -0
  11. invarlock/adapters/hf_bert.py +852 -0
  12. invarlock/adapters/hf_gpt2.py +403 -0
  13. invarlock/adapters/hf_llama.py +485 -0
  14. invarlock/adapters/hf_mixin.py +383 -0
  15. invarlock/adapters/hf_onnx.py +112 -0
  16. invarlock/adapters/hf_t5.py +137 -0
  17. invarlock/adapters/py.typed +1 -0
  18. invarlock/assurance/__init__.py +43 -0
  19. invarlock/cli/__init__.py +8 -0
  20. invarlock/cli/__main__.py +8 -0
  21. invarlock/cli/_evidence.py +25 -0
  22. invarlock/cli/_json.py +75 -0
  23. invarlock/cli/adapter_auto.py +162 -0
  24. invarlock/cli/app.py +287 -0
  25. invarlock/cli/commands/__init__.py +26 -0
  26. invarlock/cli/commands/certify.py +403 -0
  27. invarlock/cli/commands/doctor.py +1358 -0
  28. invarlock/cli/commands/explain_gates.py +151 -0
  29. invarlock/cli/commands/export_html.py +100 -0
  30. invarlock/cli/commands/plugins.py +1331 -0
  31. invarlock/cli/commands/report.py +354 -0
  32. invarlock/cli/commands/run.py +4146 -0
  33. invarlock/cli/commands/verify.py +1040 -0
  34. invarlock/cli/config.py +396 -0
  35. invarlock/cli/constants.py +68 -0
  36. invarlock/cli/device.py +92 -0
  37. invarlock/cli/doctor_helpers.py +74 -0
  38. invarlock/cli/errors.py +6 -0
  39. invarlock/cli/overhead_utils.py +60 -0
  40. invarlock/cli/provenance.py +66 -0
  41. invarlock/cli/utils.py +41 -0
  42. invarlock/config.py +56 -0
  43. invarlock/core/__init__.py +62 -0
  44. invarlock/core/abi.py +15 -0
  45. invarlock/core/api.py +274 -0
  46. invarlock/core/auto_tuning.py +317 -0
  47. invarlock/core/bootstrap.py +226 -0
  48. invarlock/core/checkpoint.py +221 -0
  49. invarlock/core/contracts.py +73 -0
  50. invarlock/core/error_utils.py +64 -0
  51. invarlock/core/events.py +298 -0
  52. invarlock/core/exceptions.py +95 -0
  53. invarlock/core/registry.py +481 -0
  54. invarlock/core/retry.py +146 -0
  55. invarlock/core/runner.py +2041 -0
  56. invarlock/core/types.py +154 -0
  57. invarlock/edits/__init__.py +12 -0
  58. invarlock/edits/_edit_utils.py +249 -0
  59. invarlock/edits/_external_utils.py +268 -0
  60. invarlock/edits/noop.py +47 -0
  61. invarlock/edits/py.typed +1 -0
  62. invarlock/edits/quant_rtn.py +801 -0
  63. invarlock/edits/registry.py +166 -0
  64. invarlock/eval/__init__.py +23 -0
  65. invarlock/eval/bench.py +1207 -0
  66. invarlock/eval/bootstrap.py +50 -0
  67. invarlock/eval/data.py +2052 -0
  68. invarlock/eval/metrics.py +2167 -0
  69. invarlock/eval/primary_metric.py +767 -0
  70. invarlock/eval/probes/__init__.py +24 -0
  71. invarlock/eval/probes/fft.py +139 -0
  72. invarlock/eval/probes/mi.py +213 -0
  73. invarlock/eval/probes/post_attention.py +323 -0
  74. invarlock/eval/providers/base.py +67 -0
  75. invarlock/eval/providers/seq2seq.py +111 -0
  76. invarlock/eval/providers/text_lm.py +113 -0
  77. invarlock/eval/providers/vision_text.py +93 -0
  78. invarlock/eval/py.typed +1 -0
  79. invarlock/guards/__init__.py +18 -0
  80. invarlock/guards/_contracts.py +9 -0
  81. invarlock/guards/invariants.py +640 -0
  82. invarlock/guards/policies.py +805 -0
  83. invarlock/guards/py.typed +1 -0
  84. invarlock/guards/rmt.py +2097 -0
  85. invarlock/guards/spectral.py +1419 -0
  86. invarlock/guards/tier_config.py +354 -0
  87. invarlock/guards/variance.py +3298 -0
  88. invarlock/guards_ref/__init__.py +15 -0
  89. invarlock/guards_ref/rmt_ref.py +40 -0
  90. invarlock/guards_ref/spectral_ref.py +135 -0
  91. invarlock/guards_ref/variance_ref.py +60 -0
  92. invarlock/model_profile.py +353 -0
  93. invarlock/model_utils.py +221 -0
  94. invarlock/observability/__init__.py +10 -0
  95. invarlock/observability/alerting.py +535 -0
  96. invarlock/observability/core.py +546 -0
  97. invarlock/observability/exporters.py +565 -0
  98. invarlock/observability/health.py +588 -0
  99. invarlock/observability/metrics.py +457 -0
  100. invarlock/observability/py.typed +1 -0
  101. invarlock/observability/utils.py +553 -0
  102. invarlock/plugins/__init__.py +12 -0
  103. invarlock/plugins/hello_guard.py +33 -0
  104. invarlock/plugins/hf_awq_adapter.py +82 -0
  105. invarlock/plugins/hf_bnb_adapter.py +79 -0
  106. invarlock/plugins/hf_gptq_adapter.py +78 -0
  107. invarlock/plugins/py.typed +1 -0
  108. invarlock/py.typed +1 -0
  109. invarlock/reporting/__init__.py +7 -0
  110. invarlock/reporting/certificate.py +3221 -0
  111. invarlock/reporting/certificate_schema.py +244 -0
  112. invarlock/reporting/dataset_hashing.py +215 -0
  113. invarlock/reporting/guards_analysis.py +948 -0
  114. invarlock/reporting/html.py +32 -0
  115. invarlock/reporting/normalizer.py +235 -0
  116. invarlock/reporting/policy_utils.py +517 -0
  117. invarlock/reporting/primary_metric_utils.py +265 -0
  118. invarlock/reporting/render.py +1442 -0
  119. invarlock/reporting/report.py +903 -0
  120. invarlock/reporting/report_types.py +278 -0
  121. invarlock/reporting/utils.py +175 -0
  122. invarlock/reporting/validate.py +631 -0
  123. invarlock/security.py +176 -0
  124. invarlock/sparsity_utils.py +323 -0
  125. invarlock/utils/__init__.py +150 -0
  126. invarlock/utils/digest.py +45 -0
  127. invarlock-0.2.0.dist-info/METADATA +586 -0
  128. invarlock-0.2.0.dist-info/RECORD +132 -0
  129. invarlock-0.2.0.dist-info/WHEEL +5 -0
  130. invarlock-0.2.0.dist-info/entry_points.txt +20 -0
  131. invarlock-0.2.0.dist-info/licenses/LICENSE +201 -0
  132. invarlock-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1442 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import math
5
+ from pathlib import Path
6
+
7
+ # mypy: ignore-errors
8
+ from typing import Any
9
+
10
+ import yaml
11
+
12
+ # Import certificate module for helper access without creating hard cycles
13
+ from . import certificate as C
14
+
15
+ # Console Validation Block helpers (allow-list driven)
16
+ _CONSOLE_LABELS_DEFAULT = [
17
+ "Primary Metric Acceptable",
18
+ "Preview Final Drift Acceptable",
19
+ "Guard Overhead Acceptable",
20
+ "Invariants Pass",
21
+ "Spectral Stable",
22
+ "Rmt Stable",
23
+ ]
24
+
25
+
26
+ def _load_console_labels() -> list[str]:
27
+ """Load console labels allow-list from contracts with a safe fallback."""
28
+ try:
29
+ root = Path(__file__).resolve().parents[3]
30
+ path = root / "contracts" / "console_labels.json"
31
+ if path.exists():
32
+ data = json.loads(path.read_text(encoding="utf-8"))
33
+ if isinstance(data, list) and all(isinstance(x, str) for x in data):
34
+ return list(data)
35
+ except Exception:
36
+ pass
37
+ return list(_CONSOLE_LABELS_DEFAULT)
38
+
39
+
40
+ def compute_console_validation_block(certificate: dict[str, Any]) -> dict[str, Any]:
41
+ """Produce a normalized console validation block from a certificate.
42
+
43
+ Returns a dict with keys:
44
+ - labels: the canonical label list
45
+ - rows: list of {label, status, evaluated, ok}
46
+ - overall_pass: boolean computed from canonical rows only. Guard Overhead is
47
+ counted only when evaluated.
48
+ """
49
+ labels = _load_console_labels()
50
+ validation = certificate.get("validation", {}) or {}
51
+ guard_ctx = certificate.get("guard_overhead", {}) or {}
52
+ guard_evaluated = (
53
+ bool(guard_ctx.get("evaluated")) if isinstance(guard_ctx, dict) else False
54
+ )
55
+
56
+ # Map label → validation key
57
+ def _to_key(label: str) -> str:
58
+ return label.strip().lower().replace(" ", "_")
59
+
60
+ rows: list[dict[str, Any]] = []
61
+ ok_map: dict[str, bool] = {}
62
+ effective_labels: list[str] = []
63
+ for label in labels:
64
+ key = _to_key(label)
65
+ ok = bool(validation.get(key, False))
66
+ status: str
67
+ evaluated = True
68
+ if key == "guard_overhead_acceptable":
69
+ evaluated = guard_evaluated
70
+ if not evaluated:
71
+ # Omit row entirely when not evaluated (policy/profile skipped)
72
+ continue
73
+ status = "✅ PASS" if ok else "❌ FAIL"
74
+ else:
75
+ status = "✅ PASS" if ok else "❌ FAIL"
76
+ rows.append(
77
+ {"label": label, "status": status, "evaluated": evaluated, "ok": ok}
78
+ )
79
+ effective_labels.append(label)
80
+ ok_map[key] = ok
81
+
82
+ # Overall policy from canonical rows only; exclude guard when not evaluated
83
+ keys_for_overall = [
84
+ "primary_metric_acceptable",
85
+ "preview_final_drift_acceptable",
86
+ "invariants_pass",
87
+ "spectral_stable",
88
+ "rmt_stable",
89
+ ]
90
+ # Include guard overhead only if evaluated
91
+ if guard_evaluated:
92
+ keys_for_overall.append("guard_overhead_acceptable")
93
+
94
+ overall_pass = all(ok_map.get(k, False) for k in keys_for_overall)
95
+
96
+ return {"labels": effective_labels, "rows": rows, "overall_pass": overall_pass}
97
+
98
+
99
+ def _format_plugin(plugin: dict[str, Any]) -> str:
100
+ """Format a plugin entry for markdown list rendering."""
101
+ name = plugin.get("name", "unknown")
102
+ version = plugin.get("version") or "-"
103
+ module = plugin.get("module") or "unknown"
104
+ entry = plugin.get("entry_point")
105
+ pieces = [f"**{name}** v{version}", f"`{module}`"]
106
+ if entry:
107
+ pieces.append(f"[{entry}]")
108
+ return " ".join(pieces)
109
+
110
+
111
+ def _short_digest(v: str) -> str:
112
+ v = str(v)
113
+ return v if len(v) <= 16 else (v[:8] + "…" + v[-8:])
114
+
115
+
116
+ def _fmt_by_kind(x: Any, k: str) -> str:
117
+ try:
118
+ xv = float(x)
119
+ except Exception:
120
+ return "N/A"
121
+ k = str(k).lower()
122
+ if k in {"accuracy", "vqa_accuracy"}:
123
+ return f"{xv * 100.0:.1f}"
124
+ if k.startswith("ppl"):
125
+ return f"{xv:.3g}"
126
+ return f"{xv:.3f}"
127
+
128
+
129
+ def _fmtv(key: str, v: Any) -> str:
130
+ if not (isinstance(v, int | float) and math.isfinite(float(v))):
131
+ return "-"
132
+ if key.startswith("latency_ms_"):
133
+ return f"{float(v):.0f}"
134
+ if key.startswith("throughput_"):
135
+ return f"{float(v):.1f}"
136
+ return f"{float(v):.3f}"
137
+
138
+
139
+ def _p(x: Any) -> str:
140
+ try:
141
+ return f"{float(x) * 100.0:.1f}%"
142
+ except Exception:
143
+ return "N/A"
144
+
145
+
146
+ def _append_system_overhead_section(lines: list[str], sys_over: dict[str, Any]) -> None:
147
+ """Append the System Overhead markdown section to lines given a payload."""
148
+ if not (isinstance(sys_over, dict) and sys_over):
149
+ return
150
+ lines.append("## System Overhead")
151
+ lines.append("")
152
+ lines.append("| Metric | Baseline | Edited | Δ | Ratio |")
153
+ lines.append("|--------|----------|--------|---|-------|")
154
+
155
+ mapping = {
156
+ "latency_ms_p50": "Latency p50 (ms)",
157
+ "latency_ms_p95": "Latency p95 (ms)",
158
+ "throughput_sps": "Throughput (samples/s)",
159
+ }
160
+ for key, label in mapping.items():
161
+ ent = sys_over.get(key)
162
+ if not isinstance(ent, dict):
163
+ continue
164
+ b_raw = ent.get("baseline")
165
+ e_raw = ent.get("edited")
166
+ # If both baseline and edited are missing or zero, present N/A to avoid implying measured zeros
167
+ try:
168
+ b_val = float(b_raw)
169
+ except Exception:
170
+ b_val = float("nan")
171
+ try:
172
+ e_val = float(e_raw)
173
+ except Exception:
174
+ e_val = float("nan")
175
+ if (not math.isfinite(b_val) or b_val == 0.0) and (
176
+ not math.isfinite(e_val) or e_val == 0.0
177
+ ):
178
+ b_str = e_str = d_str = r_str = "N/A"
179
+ else:
180
+ b_str = _fmtv(key, b_val)
181
+ e_str = _fmtv(key, e_val)
182
+ d = ent.get("delta")
183
+ r = ent.get("ratio")
184
+ d_str = _fmtv(key, d) if isinstance(d, int | float) else "-"
185
+ r_str = _fmtv(key, r) if isinstance(r, int | float) else "-"
186
+ lines.append(f"| {label} | {b_str} | {e_str} | {d_str} | {r_str} |")
187
+ lines.append("")
188
+
189
+
190
+ def _append_accuracy_subgroups(lines: list[str], subgroups: dict[str, Any]) -> None:
191
+ """Append the Accuracy Subgroups markdown table given a subgroups payload."""
192
+ if not (isinstance(subgroups, dict) and subgroups):
193
+ return
194
+ lines.append("## Accuracy Subgroups (informational)")
195
+ lines.append("")
196
+ lines.append("| Group | n(prev) | n(final) | Acc(prev) | Acc(final) | Δpp |")
197
+ lines.append("|-------|---------|----------|-----------|------------|-----|")
198
+ for g, rec in subgroups.items():
199
+ try:
200
+ npv = int(rec.get("n_preview", 0))
201
+ except Exception:
202
+ npv = 0
203
+ try:
204
+ nfi = int(rec.get("n_final", 0))
205
+ except Exception:
206
+ nfi = 0
207
+ dp = rec.get("delta_pp")
208
+ try:
209
+ dp_str = f"{float(dp):+.1f} pp"
210
+ except Exception:
211
+ dp_str = "N/A"
212
+ lines.append(
213
+ f"| {g} | {npv} | {nfi} | {_p(rec.get('preview'))} | {_p(rec.get('final'))} | {dp_str} |"
214
+ )
215
+ lines.append("")
216
+
217
+
218
+ def _compute_certificate_hash(certificate: dict[str, Any]) -> str:
219
+ """Compute integrity hash for the certificate.
220
+
221
+ Hash ignores the `artifacts` section for stability across saves.
222
+ """
223
+ # Create a copy without the artifacts section for stable hashing
224
+ cert_copy = dict(certificate or {})
225
+ cert_copy.pop("artifacts", None)
226
+
227
+ # Sort keys for deterministic hashing
228
+ cert_str = json.dumps(cert_copy, sort_keys=True)
229
+ import hashlib as _hash
230
+
231
+ return _hash.sha256(cert_str.encode()).hexdigest()[:16]
232
+
233
+
234
+ def build_console_summary_pack(certificate: dict[str, Any]) -> dict[str, Any]:
235
+ """Build a small, reusable console summary pack from a certificate.
236
+
237
+ Returns a dict with:
238
+ - overall_pass: bool
239
+ - overall_line: human-friendly overall status line
240
+ - gate_lines: list of "<Label>: <Status>" strings for each evaluated gate
241
+ - labels: the canonical label list used
242
+ """
243
+ block = compute_console_validation_block(certificate)
244
+ overall_pass = bool(block.get("overall_pass"))
245
+ emoji = "✅" if overall_pass else "❌"
246
+ overall_line = f"Overall Status: {emoji} {'PASS' if overall_pass else 'FAIL'}"
247
+
248
+ gate_lines: list[str] = []
249
+ for row in block.get("rows", []) or []:
250
+ if not isinstance(row, dict):
251
+ continue
252
+ label = row.get("label", "Gate")
253
+ status = row.get("status", "")
254
+ gate_lines.append(f"{label}: {status}")
255
+
256
+ return {
257
+ "overall_pass": overall_pass,
258
+ "overall_line": overall_line,
259
+ "gate_lines": gate_lines,
260
+ "labels": block.get("labels", []),
261
+ }
262
+
263
+
264
+ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
265
+ """
266
+ Render a certificate as a formatted Markdown report with pretty tables.
267
+
268
+ This implementation is moved from certificate.py to keep that module lean.
269
+ To avoid circular import issues, we alias helpers from the certificate
270
+ module inside the function body.
271
+ """
272
+ # Alias frequently used helpers locally to avoid editing the large body
273
+ validate_certificate = C.validate_certificate
274
+
275
+ if not validate_certificate(certificate):
276
+ raise ValueError("Invalid certificate structure")
277
+
278
+ lines = []
279
+ edit_name = str(certificate.get("edit_name") or "").lower()
280
+
281
+ # Header
282
+ lines.append("# InvarLock Safety Certificate")
283
+ lines.append("")
284
+ lines.append(
285
+ "> *Basis: “point” gates check the point estimate; “upper” gates check the CI "
286
+ "upper bound; “point & upper” requires both to pass.*"
287
+ )
288
+ lines.append("")
289
+ lines.append(f"**Schema Version:** {certificate['schema_version']}")
290
+ lines.append(f"**Run ID:** `{certificate['run_id']}`")
291
+ lines.append(f"**Generated:** {certificate['artifacts']['generated_at']}")
292
+ lines.append(f"**Edit Type:** {certificate.get('edit_name', 'Unknown')}")
293
+ lines.append("")
294
+
295
+ plugins = certificate.get("plugins", {})
296
+ if isinstance(plugins, dict) and plugins:
297
+ lines.append("## Plugin Provenance")
298
+ lines.append("")
299
+
300
+ adapter_plugin = plugins.get("adapter")
301
+ if isinstance(adapter_plugin, dict):
302
+ lines.append(f"- Adapter: {_format_plugin(adapter_plugin)}")
303
+
304
+ edit_plugin = plugins.get("edit")
305
+ if isinstance(edit_plugin, dict):
306
+ lines.append(f"- Edit: {_format_plugin(edit_plugin)}")
307
+
308
+ guard_plugins = plugins.get("guards")
309
+ if isinstance(guard_plugins, list) and guard_plugins:
310
+ guard_entries = [
311
+ _format_plugin(plugin)
312
+ for plugin in guard_plugins
313
+ if isinstance(plugin, dict)
314
+ ]
315
+ if guard_entries:
316
+ lines.append("- Guards:\n - " + "\n - ".join(guard_entries))
317
+ lines.append("")
318
+
319
+ # Executive Summary with validation status (canonical, from console block)
320
+ lines.append("## Executive Summary")
321
+ lines.append("")
322
+ _block = compute_console_validation_block(certificate)
323
+ overall_pass = bool(_block.get("overall_pass"))
324
+ status_emoji = "✅" if overall_pass else "❌"
325
+ lines.append(
326
+ f"**Overall Status:** {status_emoji} {'PASS' if overall_pass else 'FAIL'}"
327
+ )
328
+ # Window Plan one-liner for quick audit
329
+ try:
330
+ plan_ctx = (
331
+ certificate.get("window_plan")
332
+ or certificate.get("dataset", {}).get("windows", {})
333
+ or certificate.get("ppl", {}).get("window_plan")
334
+ )
335
+ seq_len = certificate.get("dataset", {}).get("seq_len") or certificate.get(
336
+ "dataset", {}
337
+ ).get("sequence_length")
338
+ if isinstance(plan_ctx, dict):
339
+ profile = plan_ctx.get("profile")
340
+ preview_n = (
341
+ plan_ctx.get("preview_n")
342
+ if plan_ctx.get("preview_n") is not None
343
+ else plan_ctx.get("actual_preview")
344
+ )
345
+ final_n = (
346
+ plan_ctx.get("final_n")
347
+ if plan_ctx.get("final_n") is not None
348
+ else plan_ctx.get("actual_final")
349
+ )
350
+ lines.append(
351
+ f"- Window Plan: {profile}, {preview_n}/{final_n}{', seq_len=' + str(seq_len) if seq_len else ''}"
352
+ )
353
+ except Exception:
354
+ pass
355
+ lines.append("")
356
+
357
+ # Validation table with canonical gates (mirrors console allow-list)
358
+ lines.append("## Quality Gates")
359
+ lines.append("")
360
+ lines.append("| Gate | Status | Measured | Threshold | Basis | Description |")
361
+ lines.append("|------|--------|----------|-----------|-------|-------------|")
362
+
363
+ pm_block = certificate.get("primary_metric", {}) or {}
364
+ has_pm = isinstance(pm_block, dict) and bool(pm_block)
365
+ auto_info = certificate.get("auto", {})
366
+ tier = (auto_info.get("tier") or "balanced").lower()
367
+
368
+ # Helper to emit Primary Metric Acceptable row
369
+ def _emit_pm_gate_row() -> None:
370
+ pm_kind = str(pm_block.get("kind", "")).lower()
371
+ value = pm_block.get("ratio_vs_baseline")
372
+ gating_basis = pm_block.get("gating_basis") or "point"
373
+ ok = bool(
374
+ certificate.get("validation", {}).get("primary_metric_acceptable", True)
375
+ )
376
+ status = "✅ PASS" if ok else "❌ FAIL"
377
+ if pm_kind in {"accuracy", "vqa_accuracy"}:
378
+ measured = f"{value:+.2f} pp" if isinstance(value, int | float) else "N/A"
379
+ th_map = {
380
+ "conservative": -0.5,
381
+ "balanced": -1.0,
382
+ "aggressive": -2.0,
383
+ "none": -1.0,
384
+ }
385
+ th = th_map.get(tier, -1.0)
386
+ lines.append(
387
+ f"| Primary Metric Acceptable | {status} | {measured} | ≥ {th:+.2f} pp | {gating_basis} | Δ accuracy vs baseline |"
388
+ )
389
+ else:
390
+ tier_thresholds = {
391
+ "conservative": 1.05,
392
+ "balanced": 1.10,
393
+ "aggressive": 1.20,
394
+ "none": 1.10,
395
+ }
396
+ ratio_limit = tier_thresholds.get(tier, 1.10)
397
+ target_ratio = auto_info.get("target_pm_ratio")
398
+ if isinstance(target_ratio, int | float) and target_ratio > 0:
399
+ ratio_limit = min(ratio_limit, float(target_ratio))
400
+ measured = f"{value:.3f}x" if isinstance(value, int | float) else "N/A"
401
+ lines.append(
402
+ f"| Primary Metric Acceptable | {status} | {measured} | ≤ {ratio_limit:.2f}x | {gating_basis} | Ratio vs baseline |"
403
+ )
404
+
405
+ # Helper to emit Preview Final Drift Acceptable row
406
+ def _emit_drift_gate_row() -> None:
407
+ ok = bool(
408
+ certificate.get("validation", {}).get(
409
+ "preview_final_drift_acceptable", True
410
+ )
411
+ )
412
+ status = "✅ PASS" if ok else "❌ FAIL"
413
+ # Compute drift from PM preview/final when available
414
+ try:
415
+ pv = (
416
+ float(pm_block.get("preview"))
417
+ if isinstance(pm_block.get("preview"), int | float)
418
+ else float("nan")
419
+ )
420
+ fv = (
421
+ float(pm_block.get("final"))
422
+ if isinstance(pm_block.get("final"), int | float)
423
+ else float("nan")
424
+ )
425
+ drift = (
426
+ fv / pv
427
+ if (math.isfinite(pv) and pv > 0 and math.isfinite(fv))
428
+ else float("nan")
429
+ )
430
+ except Exception:
431
+ drift = float("nan")
432
+ measured = f"{drift:.3f}x" if math.isfinite(drift) else "N/A"
433
+ lines.append(
434
+ f"| Preview Final Drift Acceptable | {status} | {measured} | 0.95–1.05x | point | Final/Preview ratio stability |"
435
+ )
436
+
437
+ # Helper to emit Guard Overhead Acceptable row (only when evaluated)
438
+ def _emit_overhead_gate_row() -> None:
439
+ guard_overhead = certificate.get("guard_overhead", {}) or {}
440
+ evaluated = bool(guard_overhead.get("evaluated"))
441
+ if not evaluated:
442
+ return
443
+ ok = bool(
444
+ certificate.get("validation", {}).get("guard_overhead_acceptable", True)
445
+ )
446
+ status = "✅ PASS" if ok else "❌ FAIL"
447
+ overhead_pct = guard_overhead.get("overhead_percent")
448
+ overhead_ratio = guard_overhead.get("overhead_ratio")
449
+ if isinstance(overhead_pct, int | float) and math.isfinite(float(overhead_pct)):
450
+ measured = f"{float(overhead_pct):+.2f}%"
451
+ elif isinstance(overhead_ratio, int | float) and math.isfinite(
452
+ float(overhead_ratio)
453
+ ):
454
+ measured = f"{float(overhead_ratio):.3f}x"
455
+ else:
456
+ measured = "N/A"
457
+ threshold_pct = guard_overhead.get("threshold_percent")
458
+ if not (
459
+ isinstance(threshold_pct, int | float)
460
+ and math.isfinite(float(threshold_pct))
461
+ ):
462
+ threshold_val = guard_overhead.get("overhead_threshold", 0.01)
463
+ try:
464
+ threshold_pct = float(threshold_val) * 100.0
465
+ except Exception:
466
+ threshold_pct = 1.0
467
+ lines.append(
468
+ f"| Guard Overhead Acceptable | {status} | {measured} | ≤ +{threshold_pct:.1f}% | point | Guarded vs bare PM overhead |"
469
+ )
470
+
471
+ # Emit canonical gate rows
472
+ if has_pm:
473
+ _emit_pm_gate_row()
474
+ _emit_drift_gate_row()
475
+ _emit_overhead_gate_row()
476
+
477
+ # Annotate hysteresis usage if applied
478
+ if certificate.get("validation", {}).get("hysteresis_applied"):
479
+ lines.append("- Note: hysteresis applied to gate boundary")
480
+
481
+ lines.append("")
482
+ lines.append("## Safety Check Details")
483
+ lines.append("")
484
+ lines.append("| Safety Check | Status | Measured | Threshold | Description |")
485
+ lines.append("|--------------|--------|----------|-----------|-------------|")
486
+
487
+ inv_summary = certificate["invariants"]
488
+ validation = certificate.get("validation", {})
489
+ inv_status = "✅ PASS" if validation.get("invariants_pass", False) else "❌ FAIL"
490
+ inv_counts = inv_summary.get("summary", {}) or {}
491
+ inv_measure = inv_summary.get("status", "pass").upper()
492
+ fatal_violations = inv_counts.get("fatal_violations") or 0
493
+ warning_violations = (
494
+ inv_counts.get("warning_violations") or inv_counts.get("violations_found") or 0
495
+ )
496
+ if fatal_violations:
497
+ suffix = f"{fatal_violations} fatal"
498
+ if warning_violations:
499
+ suffix += f", {warning_violations} warning"
500
+ inv_measure = f"{inv_measure} ({suffix})"
501
+ elif warning_violations:
502
+ inv_measure = f"{inv_measure} ({warning_violations} warning)"
503
+ lines.append(
504
+ f"| Invariants | {inv_status} | {inv_measure} | pass | Model integrity checks |"
505
+ )
506
+ invariants_failures = inv_summary.get("failures") or []
507
+ if warning_violations and not fatal_violations:
508
+ non_fatal_message = None
509
+ for failure in invariants_failures:
510
+ if isinstance(failure, dict):
511
+ msg = failure.get("message") or failure.get("type")
512
+ if msg:
513
+ non_fatal_message = msg
514
+ break
515
+ if not non_fatal_message:
516
+ non_fatal_message = "Non-fatal invariant warnings present."
517
+ lines.append(f"- Non-fatal: {non_fatal_message}")
518
+
519
+ spec_status = "✅ PASS" if validation.get("spectral_stable", False) else "❌ FAIL"
520
+ caps_applied = certificate["spectral"]["caps_applied"]
521
+ lines.append(
522
+ f"| Spectral Stability | {spec_status} | {caps_applied} violations | < 5 | Weight matrix spectral norms |"
523
+ )
524
+
525
+ # Catastrophic spike safety stop row is now driven by primary metric flags
526
+ if isinstance(certificate.get("primary_metric"), dict):
527
+ pm_ok = bool(validation.get("primary_metric_acceptable", True))
528
+ pm_ratio = certificate.get("primary_metric", {}).get("ratio_vs_baseline")
529
+ if isinstance(pm_ratio, int | float):
530
+ lines.append(
531
+ f"| Catastrophic Spike Gate (safety stop) | {'✅ PASS' if pm_ok else '❌ FAIL'} | {pm_ratio:.3f}x | ≤ 2.0x | Hard stop @ 2.0× |"
532
+ )
533
+
534
+ # Include RMT Health row for compatibility and clarity
535
+ rmt_status = "✅ PASS" if validation.get("rmt_stable", False) else "❌ FAIL"
536
+ rmt_state = certificate.get("rmt", {}).get("status", "unknown").title()
537
+ lines.append(
538
+ f"| RMT Health | {rmt_status} | {rmt_state} | ε-rule | Random Matrix Theory guard status |"
539
+ )
540
+
541
+ # Pairing + Bootstrap snapshot (quick audit surface)
542
+ try:
543
+ stats = (
544
+ certificate.get("dataset", {}).get("windows", {}).get("stats", {})
545
+ or certificate.get("ppl", {}).get("stats", {})
546
+ or {}
547
+ )
548
+ paired_windows = stats.get("paired_windows")
549
+ match_frac = stats.get("window_match_fraction")
550
+ overlap_frac = stats.get("window_overlap_fraction")
551
+ bootstrap = stats.get("bootstrap") or {}
552
+ if (
553
+ paired_windows is not None
554
+ or match_frac is not None
555
+ or overlap_frac is not None
556
+ ):
557
+ lines.append("")
558
+ lines.append(
559
+ f"- Pairing: paired={paired_windows}, match={match_frac:.3f}, overlap={overlap_frac:.3f}"
560
+ )
561
+ if isinstance(bootstrap, dict):
562
+ reps = bootstrap.get("replicates")
563
+ bseed = bootstrap.get("seed")
564
+ if reps is not None or bseed is not None:
565
+ lines.append(f"- Bootstrap: replicates={reps}, seed={bseed}")
566
+ # Optional: show log-space paired Δ CI next to ratio CI for clarity
567
+ delta_ci = certificate.get("primary_metric", {}).get("ci") or certificate.get(
568
+ "ppl", {}
569
+ ).get("logloss_delta_ci")
570
+ if (
571
+ isinstance(delta_ci, tuple | list)
572
+ and len(delta_ci) == 2
573
+ and all(isinstance(x, int | float) for x in delta_ci)
574
+ ):
575
+ lines.append(f"- Log Δ (paired) CI: [{delta_ci[0]:.6f}, {delta_ci[1]:.6f}]")
576
+ except Exception:
577
+ pass
578
+
579
+ if invariants_failures:
580
+ lines.append("")
581
+ lines.append("**Invariant Notes**")
582
+ lines.append("")
583
+ for failure in invariants_failures:
584
+ severity = failure.get("severity", "warning")
585
+ detail = failure.get("detail", {})
586
+ detail_str = ""
587
+ if isinstance(detail, dict) and detail:
588
+ detail_str = ", ".join(f"{k}={v}" for k, v in detail.items())
589
+ detail_str = f" ({detail_str})"
590
+ lines.append(
591
+ f"- {failure.get('check', 'unknown')} [{severity}]: {failure.get('type', 'violation')}{detail_str}"
592
+ )
593
+
594
+ lines.append("")
595
+
596
+ # Guard observability snapshots
597
+ lines.append("## Guard Observability")
598
+ lines.append("")
599
+
600
+ spectral_info = certificate.get("spectral", {}) or {}
601
+ if spectral_info:
602
+ lines.append("### Spectral Guard")
603
+ lines.append("")
604
+ mt_info = spectral_info.get("multiple_testing", {}) or {}
605
+ if mt_info:
606
+ lines.append("- **Multiple Testing:**")
607
+ lines.append(" ```yaml")
608
+ mt_yaml = (
609
+ yaml.safe_dump(mt_info, sort_keys=True, width=70).strip().splitlines()
610
+ )
611
+ for line in mt_yaml:
612
+ lines.append(f" {line}")
613
+ lines.append(" ```")
614
+ # Spectral summary (place key knobs together for quick scan)
615
+ spec_sigma = spectral_info.get("sigma_quantile")
616
+ spec_deadband = spectral_info.get("deadband")
617
+ spec_max_caps = spectral_info.get("max_caps")
618
+ summary_yaml = {
619
+ "sigma_quantile": float(spec_sigma)
620
+ if isinstance(spec_sigma, int | float)
621
+ else None,
622
+ "deadband": float(spec_deadband)
623
+ if isinstance(spec_deadband, int | float)
624
+ else None,
625
+ "max_caps": int(spec_max_caps)
626
+ if isinstance(spec_max_caps, int | float)
627
+ else None,
628
+ }
629
+ # Drop Nones from summary
630
+ summary_yaml = {k: v for k, v in summary_yaml.items() if v is not None}
631
+ if summary_yaml:
632
+ lines.append("- **Spectral Summary:**")
633
+ lines.append(" ```yaml")
634
+ for line in (
635
+ yaml.safe_dump(summary_yaml, sort_keys=True, width=70)
636
+ .strip()
637
+ .splitlines()
638
+ ):
639
+ lines.append(f" {line}")
640
+ lines.append(" ```")
641
+ lines.append(
642
+ f"- Caps Applied: {spectral_info.get('caps_applied')} / {spectral_info.get('max_caps')}"
643
+ )
644
+ summary = spectral_info.get("summary", {}) or {}
645
+ lines.append(f"- Caps Exceeded: {summary.get('caps_exceeded', False)}")
646
+ caps_by_family = spectral_info.get("caps_applied_by_family") or {}
647
+ family_caps = spectral_info.get("family_caps") or {}
648
+ if caps_by_family:
649
+ lines.append("")
650
+ lines.append("| Family | κ | Violations |")
651
+ lines.append("|--------|---|------------|")
652
+ for family, count in caps_by_family.items():
653
+ kappa = family_caps.get(family, {}).get("kappa")
654
+ if isinstance(kappa, int | float) and math.isfinite(float(kappa)):
655
+ kappa_str = f"{kappa:.3f}"
656
+ else:
657
+ kappa_str = "-"
658
+ lines.append(f"| {family} | {kappa_str} | {count} |")
659
+ lines.append("")
660
+ quantiles = spectral_info.get("family_z_quantiles") or {}
661
+ if quantiles:
662
+ lines.append("| Family | q95 | q99 | Max | Samples |")
663
+ lines.append("|--------|-----|-----|-----|---------|")
664
+ for family, stats in quantiles.items():
665
+ q95 = stats.get("q95")
666
+ q99 = stats.get("q99")
667
+ max_z = stats.get("max")
668
+ count = stats.get("count")
669
+ q95_str = f"{q95:.3f}" if isinstance(q95, int | float) else "-"
670
+ q99_str = f"{q99:.3f}" if isinstance(q99, int | float) else "-"
671
+ max_str = f"{max_z:.3f}" if isinstance(max_z, int | float) else "-"
672
+ count_str = str(count) if isinstance(count, int | float) else "-"
673
+ lines.append(
674
+ f"| {family} | {q95_str} | {q99_str} | {max_str} | {count_str} |"
675
+ )
676
+ lines.append("")
677
+ policy_caps = spectral_info.get("policy", {}).get("family_caps")
678
+ if policy_caps:
679
+ lines.append("- **Family κ (policy):**")
680
+ lines.append(" ```yaml")
681
+ caps_yaml = (
682
+ yaml.safe_dump(policy_caps, sort_keys=True, width=70)
683
+ .strip()
684
+ .splitlines()
685
+ )
686
+ for line in caps_yaml:
687
+ lines.append(f" {line}")
688
+ lines.append(" ```")
689
+ top_scores = spectral_info.get("top_z_scores") or {}
690
+ if top_scores:
691
+ lines.append("Top |z| per family:")
692
+ for family in sorted(top_scores.keys()):
693
+ entries = top_scores[family]
694
+ if not entries:
695
+ continue
696
+ formatted_entries = []
697
+ for entry in entries:
698
+ module_name = entry.get("module", "unknown")
699
+ z_val = entry.get("z")
700
+ if isinstance(z_val, int | float) and math.isfinite(float(z_val)):
701
+ z_str = f"{z_val:.3f}"
702
+ else:
703
+ z_str = "n/a"
704
+ formatted_entries.append(f"{module_name} (|z|={z_str})")
705
+ lines.append(f"- {family}: {', '.join(formatted_entries)}")
706
+ lines.append("")
707
+
708
+ rmt_info = certificate.get("rmt", {}) or {}
709
+ if rmt_info:
710
+ lines.append("### RMT Guard")
711
+ lines.append("")
712
+ families = rmt_info.get("families") or {}
713
+ if families:
714
+ lines.append("| Family | ε_f | Bare | Guarded | Δ |")
715
+ lines.append("|--------|-----|------|---------|---|")
716
+ for family, data in families.items():
717
+ epsilon_val = data.get("epsilon")
718
+ epsilon_str = (
719
+ f"{epsilon_val:.3f}"
720
+ if isinstance(epsilon_val, int | float)
721
+ else "-"
722
+ )
723
+ bare_count = data.get("bare", 0)
724
+ guarded_count = data.get("guarded", 0)
725
+ delta_val = None
726
+ try:
727
+ bare_str = str(int(bare_count))
728
+ except (TypeError, ValueError):
729
+ bare_str = "-"
730
+ try:
731
+ guarded_str = str(int(guarded_count))
732
+ except (TypeError, ValueError):
733
+ guarded_str = "-"
734
+ try:
735
+ delta_val = int(guarded_count) - int(bare_count) # type: ignore[arg-type]
736
+ except Exception:
737
+ delta_val = None
738
+ delta_str = f"{delta_val:+d}" if isinstance(delta_val, int) else "-"
739
+ lines.append(
740
+ f"| {family} | {epsilon_str} | {bare_str} | {guarded_str} | {delta_str} |"
741
+ )
742
+ lines.append("")
743
+ # Delta total and stability flags
744
+ delta_total = rmt_info.get("delta_total")
745
+ if isinstance(delta_total, int):
746
+ lines.append(f"- Δ total: {delta_total:+d}")
747
+ lines.append(f"- Stable: {rmt_info.get('stable', True)}")
748
+ lines.append("")
749
+
750
+ guard_overhead_info = certificate.get("guard_overhead", {}) or {}
751
+ if guard_overhead_info:
752
+ lines.append("### Guard Overhead")
753
+ lines.append("")
754
+ evaluated_flag = bool(guard_overhead_info.get("evaluated", True))
755
+ if not evaluated_flag:
756
+ # Make explicit when overhead was not evaluated by policy/profile
757
+ lines.append("- Evaluated: false (skipped by policy/profile)")
758
+ bare_ppl = guard_overhead_info.get("bare_ppl")
759
+ guarded_ppl = guard_overhead_info.get("guarded_ppl")
760
+ if isinstance(bare_ppl, int | float) and math.isfinite(float(bare_ppl)):
761
+ lines.append(f"- Bare Primary Metric: {bare_ppl:.3f}")
762
+ if isinstance(guarded_ppl, int | float) and math.isfinite(float(guarded_ppl)):
763
+ lines.append(f"- Guarded Primary Metric: {guarded_ppl:.3f}")
764
+ ratio = guard_overhead_info.get("overhead_ratio")
765
+ percent = guard_overhead_info.get("overhead_percent")
766
+ if (
767
+ isinstance(ratio, int | float)
768
+ and math.isfinite(float(ratio))
769
+ and isinstance(percent, int | float)
770
+ and math.isfinite(float(percent))
771
+ ):
772
+ lines.append(f"- Overhead: {ratio:.4f}x ({percent:+.2f}%)")
773
+ elif isinstance(ratio, int | float) and math.isfinite(float(ratio)):
774
+ lines.append(f"- Overhead: {ratio:.4f}x")
775
+ overhead_source = guard_overhead_info.get("source")
776
+ if overhead_source:
777
+ lines.append(f"- Source: {overhead_source}")
778
+ plan_ctx = certificate.get("provenance", {}).get("window_plan", {})
779
+ if isinstance(plan_ctx, dict) and plan_ctx:
780
+ plan_preview = (
781
+ plan_ctx.get("preview_n")
782
+ if plan_ctx.get("preview_n") is not None
783
+ else plan_ctx.get("actual_preview")
784
+ )
785
+ plan_final = (
786
+ plan_ctx.get("final_n")
787
+ if plan_ctx.get("final_n") is not None
788
+ else plan_ctx.get("actual_final")
789
+ )
790
+ plan_profile = plan_ctx.get("profile")
791
+ lines.append(
792
+ f"- Window Plan Used: profile={plan_profile}, preview={plan_preview}, final={plan_final}"
793
+ )
794
+ lines.append("")
795
+
796
+ compression_diag = (
797
+ certificate.get("structure", {}).get("compression_diagnostics", {})
798
+ if isinstance(certificate.get("structure"), dict)
799
+ else {}
800
+ )
801
+ inference_flags = compression_diag.get("inferred") or {}
802
+ inference_sources = compression_diag.get("inference_source") or {}
803
+ inference_log = compression_diag.get("inference_log") or []
804
+ if inference_flags or inference_sources or inference_log:
805
+ lines.append("## Inference")
806
+ lines.append("")
807
+ if inference_flags:
808
+ lines.append("- **Fields Inferred:**")
809
+ for field, flag in inference_flags.items():
810
+ lines.append(f" - {field}: {'yes' if flag else 'no'}")
811
+ if inference_sources:
812
+ lines.append("- **Sources:**")
813
+ for field, source in inference_sources.items():
814
+ lines.append(f" - {field}: {source}")
815
+ if inference_log:
816
+ lines.append("- **Inference Log:**")
817
+ for entry in inference_log:
818
+ lines.append(f" - {entry}")
819
+ lines.append("")
820
+
821
+ # Model and Configuration
822
+ lines.append("## Model Information")
823
+ lines.append("")
824
+ meta = certificate["meta"]
825
+ lines.append(f"- **Model ID:** {meta.get('model_id')}")
826
+ lines.append(f"- **Adapter:** {meta.get('adapter')}")
827
+ lines.append(f"- **Device:** {meta.get('device')}")
828
+ lines.append(f"- **Timestamp:** {meta.get('ts')}")
829
+ commit_value = meta.get("commit") or ""
830
+ if commit_value:
831
+ short_sha = str(commit_value)[:12]
832
+ lines.append(f"- **Commit:** {short_sha}")
833
+ else:
834
+ lines.append("- **Commit:** (not set)")
835
+ lines.append(f"- **Seed:** {meta.get('seed')}")
836
+ seeds_map = meta.get("seeds", {})
837
+ if isinstance(seeds_map, dict) and seeds_map:
838
+ lines.append(
839
+ "- **Seeds:** "
840
+ f"python={seeds_map.get('python')}, "
841
+ f"numpy={seeds_map.get('numpy')}, "
842
+ f"torch={seeds_map.get('torch')}"
843
+ )
844
+ invarlock_version = meta.get("invarlock_version")
845
+ if invarlock_version:
846
+ lines.append(f"- **InvarLock Version:** {invarlock_version}")
847
+ env_flags = meta.get("env_flags")
848
+ if isinstance(env_flags, dict) and env_flags:
849
+ lines.append("- **Env Flags:**")
850
+ lines.append(" ```yaml")
851
+ for k, v in env_flags.items():
852
+ lines.append(f" {k}: {v}")
853
+ lines.append(" ```")
854
+ # Determinism flags (if present)
855
+ cuda_flags = meta.get("cuda_flags")
856
+ if isinstance(cuda_flags, dict) and cuda_flags:
857
+ parts = []
858
+ for key in (
859
+ "deterministic_algorithms",
860
+ "cudnn_deterministic",
861
+ "cudnn_benchmark",
862
+ "cudnn_allow_tf32",
863
+ "cuda_matmul_allow_tf32",
864
+ "CUBLAS_WORKSPACE_CONFIG",
865
+ ):
866
+ if key in cuda_flags and cuda_flags[key] is not None:
867
+ parts.append(f"{key}={cuda_flags[key]}")
868
+ if parts:
869
+ lines.append(f"- **Determinism Flags:** {', '.join(parts)}")
870
+ lines.append("")
871
+
872
+ # Edit Configuration (removed duplicate Edit Information section)
873
+
874
+ # Auto-tuning Configuration
875
+ auto = certificate["auto"]
876
+ if auto["tier"] != "none":
877
+ lines.append("## Auto-Tuning Configuration")
878
+ lines.append("")
879
+ lines.append(f"- **Tier:** {auto['tier']}")
880
+ lines.append(f"- **Probes Used:** {auto['probes_used']}")
881
+ if auto.get("target_pm_ratio"):
882
+ lines.append(
883
+ f"- **Auto Policy Target Ratio (informational):** {auto['target_pm_ratio']:.3f}"
884
+ )
885
+ # Tiny relax breadcrumb for dev-only demos
886
+ try:
887
+ if bool(auto.get("tiny_relax")):
888
+ lines.append("- Tiny relax: enabled (dev-only)")
889
+ except Exception:
890
+ pass
891
+ lines.append("")
892
+
893
+ resolved_policy = certificate.get("resolved_policy")
894
+ if resolved_policy:
895
+ lines.append("## Resolved Policy")
896
+ lines.append("")
897
+ lines.append("```yaml")
898
+ resolved_yaml = yaml.safe_dump(
899
+ resolved_policy, sort_keys=True, width=80, default_flow_style=False
900
+ ).strip()
901
+ for line in resolved_yaml.splitlines():
902
+ lines.append(line)
903
+ lines.append("```")
904
+ lines.append("")
905
+
906
+ policy_provenance = certificate.get("policy_provenance", {})
907
+ if policy_provenance:
908
+ lines.append("## Policy Provenance")
909
+ lines.append("")
910
+ lines.append(f"- **Tier:** {policy_provenance.get('tier')}")
911
+ overrides_list = policy_provenance.get("overrides") or []
912
+ if overrides_list:
913
+ lines.append(f"- **Overrides:** {', '.join(overrides_list)}")
914
+ else:
915
+ lines.append("- **Overrides:** (none)")
916
+ digest_value = policy_provenance.get("policy_digest")
917
+ if digest_value:
918
+ lines.append(f"- **Policy Digest:** `{digest_value}`")
919
+ else:
920
+ lines.append("- **Policy Digest:** (not recorded)")
921
+ if policy_provenance.get("resolved_at"):
922
+ lines.append(f"- **Resolved At:** {policy_provenance.get('resolved_at')}")
923
+ lines.append("")
924
+
925
+ # Dataset Information
926
+ lines.append("## Dataset Configuration")
927
+ lines.append("")
928
+ dataset = certificate.get("dataset", {}) or {}
929
+ prov = (
930
+ (dataset.get("provider") or "unknown")
931
+ if isinstance(dataset, dict)
932
+ else "unknown"
933
+ )
934
+ lines.append(f"- **Provider:** {prov}")
935
+ try:
936
+ seq_len_val = (
937
+ int(dataset.get("seq_len"))
938
+ if isinstance(dataset.get("seq_len"), int | float)
939
+ else dataset.get("seq_len")
940
+ )
941
+ except Exception: # pragma: no cover - defensive
942
+ seq_len_val = dataset.get("seq_len")
943
+ if seq_len_val is not None:
944
+ lines.append(f"- **Sequence Length:** {seq_len_val}")
945
+ windows_blk = (
946
+ dataset.get("windows", {}) if isinstance(dataset.get("windows"), dict) else {}
947
+ )
948
+ win_prev = windows_blk.get("preview")
949
+ win_final = windows_blk.get("final")
950
+ if win_prev is not None and win_final is not None:
951
+ lines.append(f"- **Windows:** {win_prev} preview + {win_final} final")
952
+ if windows_blk.get("seed") is not None:
953
+ lines.append(f"- **Seed:** {windows_blk.get('seed')}")
954
+ hash_blk = dataset.get("hash", {}) if isinstance(dataset.get("hash"), dict) else {}
955
+ if hash_blk.get("preview_tokens") is not None:
956
+ lines.append(f"- **Preview Tokens:** {hash_blk.get('preview_tokens'):,}")
957
+ if hash_blk.get("final_tokens") is not None:
958
+ lines.append(f"- **Final Tokens:** {hash_blk.get('final_tokens'):,}")
959
+ if hash_blk.get("total_tokens") is not None:
960
+ lines.append(f"- **Total Tokens:** {hash_blk.get('total_tokens'):,}")
961
+ if hash_blk.get("dataset"):
962
+ lines.append(f"- **Dataset Hash:** {hash_blk.get('dataset')}")
963
+ tokenizer = dataset.get("tokenizer", {})
964
+ if tokenizer.get("name") or tokenizer.get("hash"):
965
+ vocab_size = tokenizer.get("vocab_size")
966
+ vocab_suffix = f" (vocab {vocab_size})" if isinstance(vocab_size, int) else ""
967
+ lines.append(
968
+ f"- **Tokenizer:** {tokenizer.get('name', 'unknown')}{vocab_suffix}"
969
+ )
970
+ if tokenizer.get("hash"):
971
+ lines.append(f" - Hash: {tokenizer['hash']}")
972
+ lines.append(
973
+ f" - BOS/EOS: {tokenizer.get('bos_token')} / {tokenizer.get('eos_token')}"
974
+ )
975
+ if tokenizer.get("pad_token") is not None:
976
+ lines.append(f" - PAD: {tokenizer.get('pad_token')}")
977
+ if tokenizer.get("add_prefix_space") is not None:
978
+ lines.append(f" - add_prefix_space: {tokenizer.get('add_prefix_space')}")
979
+ lines.append("")
980
+
981
+ provenance_info = certificate.get("provenance", {}) or {}
982
+ if provenance_info:
983
+ lines.append("## Run Provenance")
984
+ lines.append("")
985
+ baseline_info = provenance_info.get("baseline", {}) or {}
986
+ if baseline_info:
987
+ lines.append(f"- **Baseline Run ID:** {baseline_info.get('run_id')}")
988
+ if baseline_info.get("report_hash"):
989
+ lines.append(f" - Report Hash: `{baseline_info.get('report_hash')}`")
990
+ if baseline_info.get("report_path"):
991
+ lines.append(f" - Report Path: {baseline_info.get('report_path')}")
992
+ edited_info = provenance_info.get("edited", {}) or {}
993
+ if edited_info:
994
+ lines.append(f"- **Edited Run ID:** {edited_info.get('run_id')}")
995
+ if edited_info.get("report_hash"):
996
+ lines.append(f" - Report Hash: `{edited_info.get('report_hash')}`")
997
+ if edited_info.get("report_path"):
998
+ lines.append(f" - Report Path: {edited_info.get('report_path')}")
999
+ window_plan = provenance_info.get("window_plan")
1000
+ if isinstance(window_plan, dict) and window_plan:
1001
+ preview_val = window_plan.get(
1002
+ "preview_n", window_plan.get("actual_preview")
1003
+ )
1004
+ final_val = window_plan.get("final_n", window_plan.get("actual_final"))
1005
+ lines.append(
1006
+ f"- **Window Plan:** profile={window_plan.get('profile')}, preview={preview_val}, final={final_val}"
1007
+ )
1008
+ provider_digest = provenance_info.get("provider_digest")
1009
+ if isinstance(provider_digest, dict) and provider_digest:
1010
+ ids_d = provider_digest.get("ids_sha256")
1011
+ tok_d = provider_digest.get("tokenizer_sha256")
1012
+ mask_d = provider_digest.get("masking_sha256")
1013
+
1014
+ lines.append("- **Provider Digest:**")
1015
+ if tok_d:
1016
+ lines.append(
1017
+ f" - tokenizer_sha256: `{_short_digest(tok_d)}` (full in JSON)"
1018
+ )
1019
+ if ids_d:
1020
+ lines.append(f" - ids_sha256: `{_short_digest(ids_d)}` (full in JSON)")
1021
+ if mask_d:
1022
+ lines.append(
1023
+ f" - masking_sha256: `{_short_digest(mask_d)}` (full in JSON)"
1024
+ )
1025
+ # Surface confidence label prominently
1026
+ try:
1027
+ conf = certificate.get("confidence", {}) or {}
1028
+ if isinstance(conf, dict) and conf.get("label"):
1029
+ lines.append(f"- **Confidence:** {conf.get('label')}")
1030
+ except Exception:
1031
+ pass
1032
+ # Surface policy version + thresholds hash (short)
1033
+ try:
1034
+ pd = certificate.get("policy_digest", {}) or {}
1035
+ if isinstance(pd, dict) and pd:
1036
+ pv = pd.get("policy_version")
1037
+ th = pd.get("thresholds_hash")
1038
+ if pv:
1039
+ lines.append(f"- **Policy Version:** {pv}")
1040
+ if isinstance(th, str) and th:
1041
+ short = th if len(th) <= 16 else (th[:8] + "…" + th[-8:])
1042
+ lines.append(f"- **Thresholds Digest:** `{short}` (full in JSON)")
1043
+ if pd.get("changed"):
1044
+ lines.append("- Note: policy changed")
1045
+ except Exception:
1046
+ pass
1047
+ lines.append("")
1048
+
1049
+ # Structural Changes heading is printed with content later; avoid empty header here
1050
+
1051
+ # Primary Metric (metric-v1) snapshot, if present
1052
+ try:
1053
+ pm = certificate.get("primary_metric")
1054
+ if isinstance(pm, dict) and pm:
1055
+ kind = pm.get("kind", "unknown")
1056
+ lines.append(f"## Primary Metric ({kind})")
1057
+ lines.append("")
1058
+ unit = pm.get("unit", "-")
1059
+ paired = pm.get("paired", False)
1060
+ reps = None
1061
+ # Snapshot only; bootstrap reps live in ppl.stats.bootstrap for ppl metrics
1062
+ # Mark estimated metrics (e.g., pseudo accuracy counts) clearly
1063
+ estimated_flag = False
1064
+ try:
1065
+ if bool(pm.get("estimated")):
1066
+ estimated_flag = True
1067
+ elif str(pm.get("counts_source", "")).lower() == "pseudo_config":
1068
+ estimated_flag = True
1069
+ except Exception:
1070
+ estimated_flag = False
1071
+ est_suffix = " (estimated)" if estimated_flag else ""
1072
+ lines.append(f"- Kind: {kind} (unit: {unit}){est_suffix}")
1073
+ gating_basis = pm.get("gating_basis") or pm.get("basis")
1074
+ if gating_basis:
1075
+ lines.append(f"- Basis: {gating_basis}")
1076
+ if isinstance(paired, bool):
1077
+ lines.append(f"- Paired: {paired}")
1078
+ reps = pm.get("reps")
1079
+ if isinstance(reps, int | float):
1080
+ lines.append(f"- Bootstrap Reps: {int(reps)}")
1081
+ ci = pm.get("ci") or pm.get("display_ci")
1082
+ if (
1083
+ isinstance(ci, list | tuple)
1084
+ and len(ci) == 2
1085
+ and all(isinstance(x, int | float) for x in ci)
1086
+ ):
1087
+ lines.append(f"- CI: {ci[0]:.3f}–{ci[1]:.3f}")
1088
+ prev = pm.get("preview")
1089
+ fin = pm.get("final")
1090
+ ratio = pm.get("ratio_vs_baseline")
1091
+
1092
+ lines.append("")
1093
+ if estimated_flag and str(kind).lower() in {"accuracy", "vqa_accuracy"}:
1094
+ lines.append(
1095
+ "- Note: Accuracy derived from pseudo counts (quick dev preset); use a labeled preset for measured accuracy."
1096
+ )
1097
+ lines.append("| Field | Value |")
1098
+ lines.append("|-------|-------|")
1099
+ lines.append(f"| Preview | {_fmt_by_kind(prev, str(kind))} |")
1100
+ lines.append(f"| Final | {_fmt_by_kind(fin, str(kind))} |")
1101
+ # For accuracy, ratio field is actually a delta (as per helper); clarify inline
1102
+ if kind in {"accuracy", "vqa_accuracy"}:
1103
+ lines.append(f"| Δ vs Baseline | {_fmt_by_kind(ratio, str(kind))} |")
1104
+ # When baseline accuracy is near-zero, clarify display rule
1105
+ try:
1106
+ base_pt = pm.get("baseline_point")
1107
+ if isinstance(base_pt, int | float) and base_pt < 0.05:
1108
+ lines.append(
1109
+ "- Note: baseline < 5%; ratio suppressed; showing Δpp"
1110
+ )
1111
+ except Exception:
1112
+ pass
1113
+ else:
1114
+ try:
1115
+ lines.append(f"| Ratio vs Baseline | {float(ratio):.3f} |")
1116
+ except Exception:
1117
+ lines.append("| Ratio vs Baseline | N/A |")
1118
+ lines.append("")
1119
+ # Secondary metrics (informational)
1120
+ try:
1121
+ secs = certificate.get("secondary_metrics")
1122
+ if isinstance(secs, list) and secs:
1123
+ lines.append("## Secondary Metrics (informational)")
1124
+ lines.append("")
1125
+ lines.append("| Kind | Preview | Final | vs Baseline | CI |")
1126
+ lines.append("|------|---------|-------|-------------|----|")
1127
+ for m in secs:
1128
+ if not isinstance(m, dict):
1129
+ continue
1130
+ k = m.get("kind", "?")
1131
+ pv = _fmt_by_kind(m.get("preview"), str(k))
1132
+ fv = _fmt_by_kind(m.get("final"), str(k))
1133
+ rb = m.get("ratio_vs_baseline")
1134
+ try:
1135
+ rb_str = (
1136
+ f"{float(rb):.3f}"
1137
+ if (str(k).startswith("ppl"))
1138
+ else _fmt_by_kind(rb, str(k))
1139
+ )
1140
+ except Exception:
1141
+ rb_str = "N/A"
1142
+ ci = m.get("display_ci") or m.get("ci")
1143
+ if isinstance(ci, tuple | list) and len(ci) == 2:
1144
+ ci_str = f"{float(ci[0]):.3f}-{float(ci[1]):.3f}"
1145
+ else:
1146
+ ci_str = "–"
1147
+ lines.append(f"| {k} | {pv} | {fv} | {rb_str} | {ci_str} |")
1148
+ lines.append("")
1149
+ except Exception:
1150
+ pass
1151
+ except Exception:
1152
+ pass
1153
+
1154
+ # System Overhead section (latency/throughput)
1155
+ sys_over = certificate.get("system_overhead", {}) or {}
1156
+ if isinstance(sys_over, dict) and sys_over:
1157
+ _append_system_overhead_section(lines, sys_over)
1158
+
1159
+ # Accuracy Subgroups (informational)
1160
+ try:
1161
+ cls = certificate.get("classification", {})
1162
+ sub = cls.get("subgroups") if isinstance(cls, dict) else None
1163
+ if isinstance(sub, dict) and sub:
1164
+ _append_accuracy_subgroups(lines, sub)
1165
+ except Exception:
1166
+ pass
1167
+ # Structural Changes
1168
+ try:
1169
+ structure = certificate.get("structure", {}) or {}
1170
+ params_changed = int(structure.get("params_changed", 0) or 0)
1171
+ layers_modified = int(structure.get("layers_modified", 0) or 0)
1172
+ bitwidth_changes = 0
1173
+ try:
1174
+ bitwidth_changes = int(len(structure.get("bitwidths", []) or []))
1175
+ except Exception:
1176
+ bitwidth_changes = 0
1177
+ # Decide whether to show the section
1178
+ has_changes = any(
1179
+ v > 0 for v in (params_changed, layers_modified, bitwidth_changes)
1180
+ )
1181
+ edit_name = str(certificate.get("edit_name", "unknown"))
1182
+ if has_changes:
1183
+ lines.append("## Structural Changes")
1184
+ lines.append("")
1185
+ lines.append("| Change Type | Count |")
1186
+ lines.append("|-------------|-------|")
1187
+ lines.append(f"| Parameters Changed | {params_changed:,} |")
1188
+ if edit_name == "quant_rtn":
1189
+ # For quantization: prefer a single clear line reconciling target vs applied
1190
+ # using diagnostics when available. Fallback to bitwidth-change count.
1191
+ try:
1192
+ t_an = (structure.get("compression_diagnostics", {}) or {}).get(
1193
+ "target_analysis", {}
1194
+ )
1195
+ except Exception:
1196
+ t_an = {}
1197
+ eligible = None
1198
+ modified = None
1199
+ if isinstance(t_an, dict) and t_an:
1200
+ eligible = t_an.get("modules_eligible")
1201
+ modified = t_an.get("modules_modified")
1202
+ if isinstance(modified, int) and isinstance(eligible, int):
1203
+ lines.append(
1204
+ f"| Linear Modules Quantized | {modified} of {eligible} targeted |"
1205
+ )
1206
+ else:
1207
+ total_bitwidth_changes = bitwidth_changes
1208
+ if total_bitwidth_changes > 0 and layers_modified > 0:
1209
+ modules_per_layer = total_bitwidth_changes // max(
1210
+ layers_modified, 1
1211
+ )
1212
+ lines.append(
1213
+ f"| Linear Modules Quantized | {total_bitwidth_changes} ({modules_per_layer} per block × {layers_modified} blocks) |"
1214
+ )
1215
+ elif total_bitwidth_changes > 0:
1216
+ lines.append(
1217
+ f"| Linear Modules Quantized | {total_bitwidth_changes} |"
1218
+ )
1219
+ else:
1220
+ lines.append(f"| Layers Modified | {layers_modified} |")
1221
+ lines.append("")
1222
+ except Exception:
1223
+ # Best-effort; omit section on error
1224
+ pass
1225
+
1226
+ # Add detailed breakdowns if available
1227
+ if structure.get("bitwidths") and edit_name != "quant_rtn":
1228
+ lines.append(f"| Bit-width Changes | {len(structure['bitwidths'])} layers |")
1229
+ if structure.get("ranks"):
1230
+ lines.append(f"| Rank Changes | {len(structure['ranks'])} layers |")
1231
+
1232
+ lines.append("")
1233
+
1234
+ # Compression Diagnostics
1235
+ compression_diag = structure.get("compression_diagnostics", {})
1236
+ if edit_name == "noop":
1237
+ lines.append("### Compression Diagnostics")
1238
+ lines.append("")
1239
+ lines.append("Not applicable (no parameters modified).")
1240
+ lines.append("")
1241
+ elif compression_diag:
1242
+ lines.append("### Compression Diagnostics")
1243
+ lines.append("")
1244
+
1245
+ # Algorithm execution status
1246
+ status = compression_diag.get("execution_status", "unknown")
1247
+ status_emoji = (
1248
+ "✅" if status == "successful" else "❌" if status == "failed" else "⚠️"
1249
+ )
1250
+ lines.append(f"**Execution Status:** {status_emoji} {status.upper()}")
1251
+ lines.append("")
1252
+
1253
+ # Target module analysis
1254
+ target_analysis = compression_diag.get("target_analysis", {})
1255
+ if target_analysis:
1256
+ lines.append("**Target Module Analysis:**")
1257
+ lines.append("")
1258
+ lines.append("| Metric | Value |")
1259
+ lines.append("|--------|-------|")
1260
+ lines.append(
1261
+ f"| Modules Found | {target_analysis.get('modules_found', 0)} |"
1262
+ )
1263
+ lines.append(
1264
+ f"| Modules Eligible | {target_analysis.get('modules_eligible', 0)} |"
1265
+ )
1266
+ lines.append(
1267
+ f"| Modules Modified | {target_analysis.get('modules_modified', 0)} |"
1268
+ )
1269
+ try:
1270
+ _eligible = int(target_analysis.get("modules_eligible", 0))
1271
+ _modified = int(target_analysis.get("modules_modified", 0))
1272
+ lines.append(f"| Targets → Applied | {_eligible} → {_modified} |")
1273
+ except Exception:
1274
+ pass
1275
+ lines.append(f"| Scope | {target_analysis.get('scope', 'unknown')} |")
1276
+ lines.append("")
1277
+
1278
+ # Parameter effectiveness
1279
+ param_analysis = compression_diag.get("parameter_analysis", {})
1280
+ if param_analysis:
1281
+ lines.append("**Parameter Effectiveness:**")
1282
+ lines.append("")
1283
+ for param, info in param_analysis.items():
1284
+ if isinstance(info, dict):
1285
+ lines.append(
1286
+ f"- **{param}:** {info.get('value', 'N/A')} ({info.get('effectiveness', 'unknown')})"
1287
+ )
1288
+ else:
1289
+ lines.append(f"- **{param}:** {info}")
1290
+ lines.append("")
1291
+
1292
+ # Algorithm-specific details
1293
+ algo_details = compression_diag.get("algorithm_details", {})
1294
+ if algo_details:
1295
+ lines.append("**Algorithm Details:**")
1296
+ lines.append("")
1297
+ for key, value in algo_details.items():
1298
+ lines.append(f"- **{key}:** {value}")
1299
+ lines.append("")
1300
+
1301
+ # Informational recommendations (non-normative)
1302
+ warnings = compression_diag.get("warnings", [])
1303
+ if warnings:
1304
+ lines.append("**ℹ️ Informational:**")
1305
+ lines.append("")
1306
+ for warning in warnings:
1307
+ lines.append(f"- {warning}")
1308
+ lines.append("")
1309
+
1310
+ # Variance Guard (Spectral/RMT summaries are already provided above)
1311
+ variance = certificate["variance"]
1312
+ lines.append("## Variance Guard")
1313
+
1314
+ # Display whether VE was enabled after A/B test
1315
+ lines.append(f"- **Enabled:** {'Yes' if variance['enabled'] else 'No'}")
1316
+
1317
+ if variance["enabled"]:
1318
+ # VE was enabled - show the gain
1319
+ gain_value = variance.get("gain", "N/A")
1320
+ if isinstance(gain_value, int | float):
1321
+ lines.append(f"- **Gain:** {gain_value:.3f}")
1322
+ else:
1323
+ lines.append(f"- **Gain:** {gain_value}")
1324
+ else:
1325
+ # VE was not enabled - show succinct reason if available, else a clear disabled message
1326
+ ppl_no_ve = variance.get("ppl_no_ve")
1327
+ ppl_with_ve = variance.get("ppl_with_ve")
1328
+ ratio_ci = variance.get("ratio_ci")
1329
+ if ppl_no_ve is not None and ppl_with_ve is not None and ratio_ci:
1330
+ lines.append(f"- **Primary metric without VE:** {ppl_no_ve:.3f}")
1331
+ lines.append(f"- **Primary metric with VE:** {ppl_with_ve:.3f}")
1332
+ gain_value = variance.get("gain")
1333
+ if isinstance(gain_value, int | float):
1334
+ lines.append(f"- **Gain (insufficient):** {gain_value:.3f}")
1335
+ else:
1336
+ lines.append(
1337
+ "- Variance Guard: Disabled (predictive gate not evaluated for this edit)."
1338
+ )
1339
+ # Add concise rationale aligned with Balanced predictive gate contract
1340
+ try:
1341
+ ve_policy = certificate.get("policies", {}).get("variance", {})
1342
+ min_effect = ve_policy.get("min_effect_lognll")
1343
+ if isinstance(min_effect, int | float):
1344
+ lines.append(
1345
+ f"- Predictive gate (Balanced): one-sided; enables only if CI excludes 0 and |mean Δ| ≥ {float(min_effect):.4g}."
1346
+ )
1347
+ else:
1348
+ lines.append(
1349
+ "- Predictive gate (Balanced): one-sided; enables only if CI excludes 0 and |mean Δ| ≥ min_effect."
1350
+ )
1351
+ lines.append(
1352
+ "- Predictive Gate: evaluated=false (disabled under current policy/edit)."
1353
+ )
1354
+ except Exception:
1355
+ pass
1356
+
1357
+ if variance.get("ratio_ci"):
1358
+ ratio_lo, ratio_hi = variance["ratio_ci"]
1359
+ lines.append(f"- **Ratio CI:** [{ratio_lo:.3f}, {ratio_hi:.3f}]")
1360
+
1361
+ if variance.get("calibration") and variance.get("enabled"):
1362
+ calib = variance["calibration"]
1363
+ coverage = calib.get("coverage")
1364
+ requested = calib.get("requested")
1365
+ status = calib.get("status", "unknown")
1366
+ lines.append(f"- **Calibration:** {coverage}/{requested} windows ({status})")
1367
+
1368
+ lines.append("")
1369
+
1370
+ # MoE Observability (non-gating)
1371
+ moe = certificate.get("moe", {}) if isinstance(certificate.get("moe"), dict) else {}
1372
+ if moe:
1373
+ lines.append("## MoE Observability")
1374
+ lines.append("")
1375
+ # Core router fields
1376
+ for key in ("top_k", "capacity_factor", "expert_drop_rate"):
1377
+ if key in moe:
1378
+ lines.append(f"- **{key}:** {moe[key]}")
1379
+ # Utilization summary
1380
+ if "utilization_count" in moe or "utilization_mean" in moe:
1381
+ uc = moe.get("utilization_count")
1382
+ um = moe.get("utilization_mean")
1383
+ parts = []
1384
+ if uc is not None:
1385
+ parts.append(f"N={int(uc)}")
1386
+ if isinstance(um, int | float):
1387
+ parts.append(f"mean={um:.3f}")
1388
+ if parts:
1389
+ lines.append(f"- **Utilization:** {'; '.join(parts)}")
1390
+ # Delta summaries when available
1391
+ for key, label in (
1392
+ ("delta_load_balance_loss", "Δ load_balance_loss"),
1393
+ ("delta_router_entropy", "Δ router_entropy"),
1394
+ ("delta_utilization_mean", "Δ utilization mean"),
1395
+ ):
1396
+ if key in moe and isinstance(moe.get(key), int | float):
1397
+ lines.append(f"- **{label}:** {float(moe[key]):+.4f}")
1398
+ lines.append("")
1399
+
1400
+ # Policy Summary
1401
+ lines.append("## Applied Policies")
1402
+ lines.append("")
1403
+ policies = certificate["policies"]
1404
+ for guard_name, policy in policies.items():
1405
+ lines.append(f"### {guard_name.title()}")
1406
+ lines.append("")
1407
+ policy_yaml = (
1408
+ yaml.safe_dump(policy, sort_keys=True, width=80).strip().splitlines()
1409
+ )
1410
+ lines.append("```yaml")
1411
+ for line in policy_yaml:
1412
+ lines.append(line)
1413
+ lines.append("```")
1414
+ lines.append("")
1415
+
1416
+ # Artifacts
1417
+ lines.append("## Artifacts")
1418
+ lines.append("")
1419
+ artifacts = certificate["artifacts"]
1420
+ if artifacts.get("events_path"):
1421
+ lines.append(f"- **Events Log:** `{artifacts['events_path']}`")
1422
+ if artifacts.get("report_path"):
1423
+ lines.append(f"- **Full Report:** `{artifacts['report_path']}`")
1424
+ lines.append(f"- **Certificate Generated:** {artifacts['generated_at']}")
1425
+ lines.append("")
1426
+
1427
+ # Certificate Hash for Integrity
1428
+ cert_hash = _compute_certificate_hash(certificate)
1429
+ lines.append("## Certificate Integrity")
1430
+ lines.append("")
1431
+ lines.append(f"**Certificate Hash:** `{cert_hash}`")
1432
+ lines.append("")
1433
+ lines.append("---")
1434
+ lines.append("")
1435
+ lines.append(
1436
+ "*This InvarLock safety certificate provides a comprehensive assessment of model compression safety.*"
1437
+ )
1438
+ lines.append(
1439
+ "*All metrics are compared against the uncompressed baseline model for safety validation.*"
1440
+ )
1441
+
1442
+ return "\n".join(lines)