invarlock 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. invarlock/__init__.py +2 -2
  2. invarlock/_data/runtime/tiers.yaml +57 -30
  3. invarlock/adapters/__init__.py +11 -15
  4. invarlock/adapters/auto.py +35 -40
  5. invarlock/adapters/capabilities.py +2 -2
  6. invarlock/adapters/hf_causal.py +418 -0
  7. invarlock/adapters/{hf_onnx.py → hf_causal_onnx.py} +3 -3
  8. invarlock/adapters/hf_mixin.py +25 -4
  9. invarlock/adapters/{hf_bert.py → hf_mlm.py} +4 -11
  10. invarlock/adapters/{hf_t5.py → hf_seq2seq.py} +9 -9
  11. invarlock/calibration/spectral_null.py +15 -10
  12. invarlock/calibration/variance_ve.py +0 -2
  13. invarlock/cli/adapter_auto.py +31 -21
  14. invarlock/cli/app.py +73 -2
  15. invarlock/cli/commands/calibrate.py +6 -2
  16. invarlock/cli/commands/certify.py +651 -91
  17. invarlock/cli/commands/doctor.py +11 -11
  18. invarlock/cli/commands/explain_gates.py +57 -8
  19. invarlock/cli/commands/plugins.py +13 -9
  20. invarlock/cli/commands/report.py +233 -69
  21. invarlock/cli/commands/run.py +1066 -244
  22. invarlock/cli/commands/verify.py +154 -15
  23. invarlock/cli/config.py +22 -6
  24. invarlock/cli/doctor_helpers.py +4 -5
  25. invarlock/cli/output.py +193 -0
  26. invarlock/cli/provenance.py +1 -1
  27. invarlock/core/api.py +45 -5
  28. invarlock/core/auto_tuning.py +65 -20
  29. invarlock/core/bootstrap.py +1 -1
  30. invarlock/core/contracts.py +7 -1
  31. invarlock/core/registry.py +11 -13
  32. invarlock/core/runner.py +425 -75
  33. invarlock/edits/quant_rtn.py +65 -37
  34. invarlock/eval/bench.py +3 -16
  35. invarlock/eval/data.py +82 -51
  36. invarlock/eval/metrics.py +63 -2
  37. invarlock/eval/primary_metric.py +23 -0
  38. invarlock/eval/tail_stats.py +230 -0
  39. invarlock/eval/tasks/__init__.py +12 -0
  40. invarlock/eval/tasks/classification.py +48 -0
  41. invarlock/eval/tasks/qa.py +36 -0
  42. invarlock/eval/tasks/text_generation.py +102 -0
  43. invarlock/guards/_estimators.py +154 -0
  44. invarlock/guards/invariants.py +19 -10
  45. invarlock/guards/policies.py +16 -6
  46. invarlock/guards/rmt.py +627 -546
  47. invarlock/guards/spectral.py +348 -110
  48. invarlock/guards/tier_config.py +32 -30
  49. invarlock/guards/variance.py +7 -31
  50. invarlock/guards_ref/rmt_ref.py +23 -23
  51. invarlock/model_profile.py +90 -42
  52. invarlock/observability/health.py +6 -6
  53. invarlock/observability/metrics.py +108 -0
  54. invarlock/reporting/certificate.py +384 -55
  55. invarlock/reporting/certificate_schema.py +3 -2
  56. invarlock/reporting/dataset_hashing.py +15 -2
  57. invarlock/reporting/guards_analysis.py +350 -277
  58. invarlock/reporting/html.py +55 -5
  59. invarlock/reporting/normalizer.py +13 -0
  60. invarlock/reporting/policy_utils.py +38 -36
  61. invarlock/reporting/primary_metric_utils.py +71 -17
  62. invarlock/reporting/render.py +852 -431
  63. invarlock/reporting/report.py +40 -4
  64. invarlock/reporting/report_types.py +11 -3
  65. invarlock/reporting/telemetry.py +86 -0
  66. invarlock/reporting/validate.py +1 -18
  67. {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/METADATA +27 -13
  68. {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/RECORD +72 -65
  69. {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/WHEEL +1 -1
  70. {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/entry_points.txt +5 -3
  71. invarlock/adapters/hf_gpt2.py +0 -404
  72. invarlock/adapters/hf_llama.py +0 -487
  73. {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/licenses/LICENSE +0 -0
  74. {invarlock-0.3.5.dist-info → invarlock-0.3.7.dist-info}/top_level.txt +0 -0
@@ -113,6 +113,462 @@ def _short_digest(v: str) -> str:
113
113
  return v if len(v) <= 16 else (v[:8] + "…" + v[-8:])
114
114
 
115
115
 
116
+ def _render_executive_dashboard(cert: dict[str, Any]) -> str:
117
+ """Render executive summary dashboard table."""
118
+ lines: list[str] = []
119
+ _append_safety_dashboard_section(lines, cert)
120
+ return "\n".join(lines).rstrip()
121
+
122
+
123
+ def _append_safety_dashboard_section(
124
+ lines: list[str], certificate: dict[str, Any]
125
+ ) -> None:
126
+ """Append a concise, first-screen dashboard for the certificate."""
127
+ block = compute_console_validation_block(certificate)
128
+ overall_pass = bool(block.get("overall_pass"))
129
+ overall_status = (
130
+ f"{'✅' if overall_pass else '❌'} {'PASS' if overall_pass else 'FAIL'}"
131
+ )
132
+
133
+ validation = certificate.get("validation", {}) or {}
134
+ pm = certificate.get("primary_metric", {}) or {}
135
+ auto = certificate.get("auto", {}) or {}
136
+ tier = str(auto.get("tier") or "balanced").lower()
137
+
138
+ # Primary metric summary
139
+ pm_kind = str(pm.get("kind", "")).lower()
140
+ pm_basis = pm.get("gating_basis") or pm.get("basis") or "point"
141
+ pm_ok: bool | None
142
+ if isinstance(validation, dict) and "primary_metric_acceptable" in validation:
143
+ pm_ok = bool(validation.get("primary_metric_acceptable"))
144
+ else:
145
+ pm_ok = None
146
+ pm_value = pm.get("ratio_vs_baseline")
147
+
148
+ if pm_kind in {"accuracy", "vqa_accuracy"}:
149
+ measured = f"{pm_value:+.2f} pp" if isinstance(pm_value, int | float) else "N/A"
150
+ th_map = {
151
+ "conservative": -0.5,
152
+ "balanced": -1.0,
153
+ "aggressive": -2.0,
154
+ "none": -1.0,
155
+ }
156
+ th = th_map.get(tier, -1.0)
157
+ threshold = f"≥ {th:+.2f} pp ({pm_basis})"
158
+ else:
159
+ measured = f"{pm_value:.3f}×" if isinstance(pm_value, int | float) else "N/A"
160
+ tier_thresholds = {
161
+ "conservative": 1.05,
162
+ "balanced": 1.10,
163
+ "aggressive": 1.20,
164
+ "none": 1.10,
165
+ }
166
+ ratio_limit = tier_thresholds.get(tier, 1.10)
167
+ target_ratio = auto.get("target_pm_ratio")
168
+ if isinstance(target_ratio, int | float) and target_ratio > 0:
169
+ ratio_limit = min(ratio_limit, float(target_ratio))
170
+ threshold = f"≤ {ratio_limit:.2f}× ({pm_basis})"
171
+
172
+ pm_status = (
173
+ f"{'✅' if pm_ok else '❌'} {measured}"
174
+ if isinstance(pm_ok, bool)
175
+ else f"🛈 {measured}"
176
+ )
177
+
178
+ # Drift summary (final/preview ratio) when preview/final are numeric
179
+ drift_ok: bool | None
180
+ if isinstance(validation, dict) and "preview_final_drift_acceptable" in validation:
181
+ drift_ok = bool(validation.get("preview_final_drift_acceptable"))
182
+ else:
183
+ drift_ok = None
184
+ drift_val = "N/A"
185
+ try:
186
+ pv = (
187
+ float(pm.get("preview"))
188
+ if isinstance(pm.get("preview"), int | float)
189
+ else float("nan")
190
+ )
191
+ fv = (
192
+ float(pm.get("final"))
193
+ if isinstance(pm.get("final"), int | float)
194
+ else float("nan")
195
+ )
196
+ drift = (
197
+ fv / pv
198
+ if (math.isfinite(pv) and pv > 0 and math.isfinite(fv))
199
+ else float("nan")
200
+ )
201
+ if math.isfinite(drift):
202
+ drift_val = f"{drift:.3f}×"
203
+ except Exception:
204
+ drift_val = "N/A"
205
+ drift_status = (
206
+ f"{'✅' if drift_ok else '❌'} {drift_val}"
207
+ if isinstance(drift_ok, bool)
208
+ else f"🛈 {drift_val}"
209
+ )
210
+
211
+ def _gate_cell(key: str, ok_default: bool | None = None) -> str:
212
+ ok: bool | None
213
+ if not isinstance(validation, dict):
214
+ ok = ok_default
215
+ elif key not in validation:
216
+ ok = ok_default
217
+ else:
218
+ ok = bool(validation.get(key))
219
+ if ok is None:
220
+ return "🛈 N/A"
221
+ return "✅ PASS" if ok else "❌ FAIL"
222
+
223
+ overhead_ctx = certificate.get("guard_overhead", {}) or {}
224
+ overhead_evaluated = (
225
+ bool(overhead_ctx.get("evaluated")) if isinstance(overhead_ctx, dict) else False
226
+ )
227
+ overhead_row: tuple[str, str, str] | None = None
228
+ if overhead_evaluated:
229
+ overhead_pct = overhead_ctx.get("overhead_percent")
230
+ overhead_ratio = overhead_ctx.get("overhead_ratio")
231
+ if isinstance(overhead_pct, int | float) and math.isfinite(float(overhead_pct)):
232
+ overhead_measured = f"{float(overhead_pct):+.2f}%"
233
+ elif isinstance(overhead_ratio, int | float) and math.isfinite(
234
+ float(overhead_ratio)
235
+ ):
236
+ overhead_measured = f"{float(overhead_ratio):.3f}×"
237
+ else:
238
+ overhead_measured = "N/A"
239
+ threshold_pct = overhead_ctx.get("threshold_percent")
240
+ if isinstance(threshold_pct, int | float) and math.isfinite(
241
+ float(threshold_pct)
242
+ ):
243
+ threshold_str = f"≤ +{float(threshold_pct):.1f}%"
244
+ else:
245
+ threshold_str = "≤ +1.0%"
246
+ overhead_row = (
247
+ "Overhead",
248
+ f"{'✅' if bool(validation.get('guard_overhead_acceptable', True)) else '❌'} {overhead_measured}"
249
+ if isinstance(validation, dict)
250
+ else f"🛈 {overhead_measured}",
251
+ threshold_str,
252
+ )
253
+
254
+ lines.append("## Safety Dashboard")
255
+ lines.append("")
256
+ lines.append("| Check | Status | Quick Summary |")
257
+ lines.append("|-------|--------|---------------|")
258
+ lines.append(f"| Overall | {overall_status} | Canonical gate outcomes |")
259
+ lines.append(f"| Primary Metric | {pm_status} | {threshold} |")
260
+ lines.append(f"| Drift | {drift_status} | 0.95–1.05× band |")
261
+ lines.append(
262
+ f"| Invariants | {_gate_cell('invariants_pass')} | Model integrity checks |"
263
+ )
264
+ lines.append(
265
+ f"| Spectral | {_gate_cell('spectral_stable')} | Weight matrix spectral norms |"
266
+ )
267
+ lines.append(f"| RMT | {_gate_cell('rmt_stable')} | Random Matrix Theory guard |")
268
+ if overhead_row:
269
+ lines.append(f"| {overhead_row[0]} | {overhead_row[1]} | {overhead_row[2]} |")
270
+ lines.append("")
271
+
272
+
273
+ def _append_primary_metric_section(
274
+ lines: list[str], certificate: dict[str, Any]
275
+ ) -> None:
276
+ """Append the Primary Metric section early for quick triage."""
277
+ pm = certificate.get("primary_metric")
278
+ if not isinstance(pm, dict) or not pm:
279
+ return
280
+
281
+ kind = pm.get("kind", "unknown")
282
+ lines.append("## Primary Metric")
283
+ lines.append("")
284
+ unit = pm.get("unit", "-")
285
+ paired = pm.get("paired", False)
286
+
287
+ estimated_flag = False
288
+ try:
289
+ if bool(pm.get("estimated")):
290
+ estimated_flag = True
291
+ elif str(pm.get("counts_source", "")).lower() == "pseudo_config":
292
+ estimated_flag = True
293
+ except Exception:
294
+ estimated_flag = False
295
+ est_suffix = " (estimated)" if estimated_flag else ""
296
+
297
+ lines.append(f"- Kind: {kind} (unit: {unit}){est_suffix}")
298
+ gating_basis = pm.get("gating_basis") or pm.get("basis")
299
+ if gating_basis:
300
+ lines.append(f"- Basis: {gating_basis}")
301
+ if isinstance(paired, bool):
302
+ lines.append(f"- Paired: {paired}")
303
+ reps = pm.get("reps")
304
+ if isinstance(reps, int | float):
305
+ lines.append(f"- Bootstrap Reps: {int(reps)}")
306
+ ci = pm.get("ci") or pm.get("display_ci")
307
+ if (
308
+ isinstance(ci, list | tuple)
309
+ and len(ci) == 2
310
+ and all(isinstance(x, int | float) for x in ci)
311
+ ):
312
+ lines.append(f"- CI: {ci[0]:.3f}–{ci[1]:.3f}")
313
+
314
+ prev = pm.get("preview")
315
+ fin = pm.get("final")
316
+ ratio = pm.get("ratio_vs_baseline")
317
+
318
+ lines.append("")
319
+ if estimated_flag and str(kind).lower() in {"accuracy", "vqa_accuracy"}:
320
+ lines.append(
321
+ "- Note: Accuracy derived from pseudo counts (quick dev preset); use a labeled preset for measured accuracy."
322
+ )
323
+ lines.append("| Field | Value |")
324
+ lines.append("|-------|-------|")
325
+ lines.append(f"| Preview | {_fmt_by_kind(prev, str(kind))} |")
326
+ lines.append(f"| Final | {_fmt_by_kind(fin, str(kind))} |")
327
+
328
+ if kind in {"accuracy", "vqa_accuracy"}:
329
+ lines.append(f"| Δ vs Baseline | {_fmt_by_kind(ratio, str(kind))} |")
330
+ try:
331
+ base_pt = pm.get("baseline_point")
332
+ if isinstance(base_pt, int | float) and base_pt < 0.05:
333
+ lines.append("- Note: baseline < 5%; ratio suppressed; showing Δpp")
334
+ except Exception:
335
+ pass
336
+ else:
337
+ try:
338
+ lines.append(f"| Ratio vs Baseline | {float(ratio):.3f} |")
339
+ except Exception:
340
+ lines.append("| Ratio vs Baseline | N/A |")
341
+ lines.append("")
342
+
343
+ # Secondary metrics (informational)
344
+ try:
345
+ secs = certificate.get("secondary_metrics")
346
+ if isinstance(secs, list) and secs:
347
+ lines.append("## Secondary Metrics (informational)")
348
+ lines.append("")
349
+ lines.append("| Kind | Preview | Final | vs Baseline | CI |")
350
+ lines.append("|------|---------|-------|-------------|----|")
351
+ for m in secs:
352
+ if not isinstance(m, dict):
353
+ continue
354
+ k = m.get("kind", "?")
355
+ pv = _fmt_by_kind(m.get("preview"), str(k))
356
+ fv = _fmt_by_kind(m.get("final"), str(k))
357
+ rb = m.get("ratio_vs_baseline")
358
+ try:
359
+ rb_str = (
360
+ f"{float(rb):.3f}"
361
+ if (str(k).startswith("ppl"))
362
+ else _fmt_by_kind(rb, str(k))
363
+ )
364
+ except Exception:
365
+ rb_str = "N/A"
366
+ ci = m.get("display_ci") or m.get("ci")
367
+ if isinstance(ci, tuple | list) and len(ci) == 2:
368
+ ci_str = f"{float(ci[0]):.3f}-{float(ci[1]):.3f}"
369
+ else:
370
+ ci_str = "–"
371
+ lines.append(f"| {k} | {pv} | {fv} | {rb_str} | {ci_str} |")
372
+ lines.append("")
373
+ except Exception:
374
+ pass
375
+
376
+
377
+ def _append_policy_configuration_section(
378
+ lines: list[str], certificate: dict[str, Any]
379
+ ) -> None:
380
+ resolved_policy = certificate.get("resolved_policy")
381
+ policy_provenance = certificate.get("policy_provenance", {}) or {}
382
+ has_prov = isinstance(policy_provenance, dict) and bool(policy_provenance)
383
+ has_resolved = isinstance(resolved_policy, dict) and bool(resolved_policy)
384
+ if not (has_prov or has_resolved):
385
+ return
386
+
387
+ lines.append("## Policy Configuration")
388
+ lines.append("")
389
+
390
+ tier = None
391
+ if has_prov:
392
+ tier = policy_provenance.get("tier")
393
+ if not tier:
394
+ tier = (certificate.get("auto", {}) or {}).get("tier")
395
+ digest_value = None
396
+ if has_prov:
397
+ digest_value = policy_provenance.get("policy_digest")
398
+ if not digest_value:
399
+ digest_value = (certificate.get("policy_digest", {}) or {}).get(
400
+ "thresholds_hash"
401
+ )
402
+
403
+ summary_parts: list[str] = []
404
+ if tier:
405
+ summary_parts.append(f"**Tier:** {tier}")
406
+ if digest_value:
407
+ summary_parts.append(f"**Digest:** `{_short_digest(str(digest_value))}`")
408
+ if summary_parts:
409
+ lines.append(" | ".join(summary_parts))
410
+
411
+ if has_prov:
412
+ overrides_list = policy_provenance.get("overrides") or []
413
+ if overrides_list:
414
+ lines.append(f"- **Overrides:** {', '.join(overrides_list)}")
415
+ else:
416
+ lines.append("- **Overrides:** (none)")
417
+ if policy_provenance.get("resolved_at"):
418
+ lines.append(f"- **Resolved At:** {policy_provenance.get('resolved_at')}")
419
+
420
+ if has_resolved:
421
+ lines.append("")
422
+ lines.append("<details>")
423
+ lines.append("<summary>Resolved Policy YAML</summary>")
424
+ lines.append("")
425
+ lines.append("```yaml")
426
+ resolved_yaml = yaml.safe_dump(
427
+ resolved_policy, sort_keys=True, width=80, default_flow_style=False
428
+ ).strip()
429
+ for line in resolved_yaml.splitlines():
430
+ lines.append(line)
431
+ lines.append("```")
432
+ lines.append("")
433
+ lines.append("</details>")
434
+
435
+ lines.append("")
436
+
437
+
438
+ def _append_dataset_and_provenance_section(
439
+ lines: list[str], certificate: dict[str, Any]
440
+ ) -> None:
441
+ dataset = certificate.get("dataset", {}) or {}
442
+ provenance_info = certificate.get("provenance", {}) or {}
443
+
444
+ has_dataset = isinstance(dataset, dict) and bool(dataset)
445
+ has_provenance = isinstance(provenance_info, dict) and bool(provenance_info)
446
+ if not (has_dataset or has_provenance):
447
+ return
448
+
449
+ lines.append("## Dataset and Provenance")
450
+ lines.append("")
451
+
452
+ if has_dataset:
453
+ prov = dataset.get("provider") or "unknown"
454
+ lines.append(f"- **Provider:** {prov}")
455
+ try:
456
+ seq_len_val = (
457
+ int(dataset.get("seq_len"))
458
+ if isinstance(dataset.get("seq_len"), int | float)
459
+ else dataset.get("seq_len")
460
+ )
461
+ except Exception: # pragma: no cover - defensive
462
+ seq_len_val = dataset.get("seq_len")
463
+ if seq_len_val is not None:
464
+ lines.append(f"- **Sequence Length:** {seq_len_val}")
465
+ windows_blk = (
466
+ dataset.get("windows", {})
467
+ if isinstance(dataset.get("windows"), dict)
468
+ else {}
469
+ )
470
+ win_prev = windows_blk.get("preview")
471
+ win_final = windows_blk.get("final")
472
+ if win_prev is not None and win_final is not None:
473
+ lines.append(f"- **Windows:** {win_prev} preview + {win_final} final")
474
+ if windows_blk.get("seed") is not None:
475
+ lines.append(f"- **Seed:** {windows_blk.get('seed')}")
476
+ hash_blk = (
477
+ dataset.get("hash", {}) if isinstance(dataset.get("hash"), dict) else {}
478
+ )
479
+ if hash_blk.get("preview_tokens") is not None:
480
+ lines.append(f"- **Preview Tokens:** {hash_blk.get('preview_tokens'):,}")
481
+ if hash_blk.get("final_tokens") is not None:
482
+ lines.append(f"- **Final Tokens:** {hash_blk.get('final_tokens'):,}")
483
+ if hash_blk.get("total_tokens") is not None:
484
+ lines.append(f"- **Total Tokens:** {hash_blk.get('total_tokens'):,}")
485
+ if hash_blk.get("dataset"):
486
+ lines.append(f"- **Dataset Hash:** {hash_blk.get('dataset')}")
487
+ tokenizer = dataset.get("tokenizer", {})
488
+ if isinstance(tokenizer, dict) and (
489
+ tokenizer.get("name") or tokenizer.get("hash")
490
+ ):
491
+ vocab_size = tokenizer.get("vocab_size")
492
+ vocab_suffix = (
493
+ f" (vocab {vocab_size})" if isinstance(vocab_size, int) else ""
494
+ )
495
+ lines.append(
496
+ f"- **Tokenizer:** {tokenizer.get('name', 'unknown')}{vocab_suffix}"
497
+ )
498
+ if tokenizer.get("hash"):
499
+ lines.append(f" - Hash: {tokenizer['hash']}")
500
+ lines.append(
501
+ f" - BOS/EOS: {tokenizer.get('bos_token')} / {tokenizer.get('eos_token')}"
502
+ )
503
+ if tokenizer.get("pad_token") is not None:
504
+ lines.append(f" - PAD: {tokenizer.get('pad_token')}")
505
+ if tokenizer.get("add_prefix_space") is not None:
506
+ lines.append(
507
+ f" - add_prefix_space: {tokenizer.get('add_prefix_space')}"
508
+ )
509
+
510
+ if has_provenance:
511
+ baseline_info = provenance_info.get("baseline", {}) or {}
512
+ edited_info = provenance_info.get("edited", {}) or {}
513
+
514
+ if baseline_info or edited_info:
515
+ lines.append("")
516
+ if baseline_info:
517
+ lines.append(f"- **Baseline Run ID:** {baseline_info.get('run_id')}")
518
+ if baseline_info.get("report_hash"):
519
+ lines.append(f" - Report Hash: `{baseline_info.get('report_hash')}`")
520
+ if baseline_info.get("report_path"):
521
+ lines.append(f" - Report Path: {baseline_info.get('report_path')}")
522
+ if edited_info:
523
+ lines.append(f"- **Edited Run ID:** {edited_info.get('run_id')}")
524
+ if edited_info.get("report_hash"):
525
+ lines.append(f" - Report Hash: `{edited_info.get('report_hash')}`")
526
+ if edited_info.get("report_path"):
527
+ lines.append(f" - Report Path: {edited_info.get('report_path')}")
528
+
529
+ provider_digest = provenance_info.get("provider_digest")
530
+ if isinstance(provider_digest, dict) and provider_digest:
531
+ ids_d = provider_digest.get("ids_sha256")
532
+ tok_d = provider_digest.get("tokenizer_sha256")
533
+ mask_d = provider_digest.get("masking_sha256")
534
+
535
+ lines.append("- **Provider Digest:**")
536
+ if tok_d:
537
+ lines.append(
538
+ f" - tokenizer_sha256: `{_short_digest(tok_d)}` (full in JSON)"
539
+ )
540
+ if ids_d:
541
+ lines.append(f" - ids_sha256: `{_short_digest(ids_d)}` (full in JSON)")
542
+ if mask_d:
543
+ lines.append(
544
+ f" - masking_sha256: `{_short_digest(mask_d)}` (full in JSON)"
545
+ )
546
+
547
+ try:
548
+ conf = certificate.get("confidence", {}) or {}
549
+ if isinstance(conf, dict) and conf.get("label"):
550
+ lines.append(f"- **Confidence:** {conf.get('label')}")
551
+ except Exception:
552
+ pass
553
+
554
+ try:
555
+ pd = certificate.get("policy_digest", {}) or {}
556
+ if isinstance(pd, dict) and pd:
557
+ pv = pd.get("policy_version")
558
+ th = pd.get("thresholds_hash")
559
+ if pv:
560
+ lines.append(f"- **Policy Version:** {pv}")
561
+ if isinstance(th, str) and th:
562
+ short = th if len(th) <= 16 else (th[:8] + "…" + th[-8:])
563
+ lines.append(f"- **Thresholds Digest:** `{short}` (full in JSON)")
564
+ if pd.get("changed"):
565
+ lines.append("- Note: policy changed")
566
+ except Exception:
567
+ pass
568
+
569
+ lines.append("")
570
+
571
+
116
572
  def _fmt_by_kind(x: Any, k: str) -> str:
117
573
  try:
118
574
  xv = float(x)
@@ -275,11 +731,12 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
275
731
  if not validate_certificate(certificate):
276
732
  raise ValueError("Invalid certificate structure")
277
733
 
278
- lines = []
734
+ lines: list[str] = []
735
+ appendix_lines: list[str] = []
279
736
  edit_name = str(certificate.get("edit_name") or "").lower()
280
737
 
281
738
  # Header
282
- lines.append("# InvarLock Safety Certificate")
739
+ lines.append("# InvarLock Evaluation Certificate")
283
740
  lines.append("")
284
741
  lines.append(
285
742
  "> *Basis: “point” gates check the point estimate; “upper” gates check the CI "
@@ -291,6 +748,10 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
291
748
  lines.append(f"**Generated:** {certificate['artifacts']['generated_at']}")
292
749
  lines.append(f"**Edit Type:** {certificate.get('edit_name', 'Unknown')}")
293
750
  lines.append("")
751
+ lines.append(
752
+ "> Full evidence: see [`evaluation.cert.json`](evaluation.cert.json) for complete provenance, digests, and raw measurements."
753
+ )
754
+ lines.append("")
294
755
 
295
756
  plugins = certificate.get("plugins", {})
296
757
  if isinstance(plugins, dict) and plugins:
@@ -314,7 +775,7 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
314
775
  ]
315
776
  if guard_entries:
316
777
  lines.append("- Guards:\n - " + "\n - ".join(guard_entries))
317
- lines.append("")
778
+ lines.append("")
318
779
 
319
780
  # Executive Summary with validation status (canonical, from console block)
320
781
  lines.append("## Executive Summary")
@@ -354,6 +815,25 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
354
815
  pass
355
816
  lines.append("")
356
817
 
818
+ dashboard = _render_executive_dashboard(certificate)
819
+ if dashboard:
820
+ lines.extend(dashboard.splitlines())
821
+ lines.append("")
822
+
823
+ lines.append("## Contents")
824
+ lines.append("")
825
+ lines.append("- [Safety Dashboard](#safety-dashboard)")
826
+ lines.append("- [Quality Gates](#quality-gates)")
827
+ lines.append("- [Safety Check Details](#safety-check-details)")
828
+ lines.append("- [Primary Metric](#primary-metric)")
829
+ lines.append("- [Guard Observability](#guard-observability)")
830
+ lines.append("- [Model Information](#model-information)")
831
+ lines.append("- [Dataset and Provenance](#dataset-and-provenance)")
832
+ lines.append("- [Policy Configuration](#policy-configuration)")
833
+ lines.append("- [Appendix](#appendix)")
834
+ lines.append("- [Certificate Integrity](#certificate-integrity)")
835
+ lines.append("")
836
+
357
837
  # Validation table with canonical gates (mirrors console allow-list)
358
838
  lines.append("## Quality Gates")
359
839
  lines.append("")
@@ -410,6 +890,31 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
410
890
  )
411
891
  )
412
892
  status = "✅ PASS" if ok else "❌ FAIL"
893
+ drift_min = 0.95
894
+ drift_max = 1.05
895
+ try:
896
+ drift_band = (
897
+ pm_block.get("drift_band") if isinstance(pm_block, dict) else None
898
+ )
899
+ if isinstance(drift_band, dict):
900
+ lo = drift_band.get("min")
901
+ hi = drift_band.get("max")
902
+ if isinstance(lo, int | float) and isinstance(hi, int | float):
903
+ lo_f = float(lo)
904
+ hi_f = float(hi)
905
+ if math.isfinite(lo_f) and math.isfinite(hi_f) and 0 < lo_f < hi_f:
906
+ drift_min = lo_f
907
+ drift_max = hi_f
908
+ elif isinstance(drift_band, list | tuple) and len(drift_band) == 2:
909
+ lo_raw, hi_raw = drift_band[0], drift_band[1]
910
+ if isinstance(lo_raw, int | float) and isinstance(hi_raw, int | float):
911
+ lo_f = float(lo_raw)
912
+ hi_f = float(hi_raw)
913
+ if math.isfinite(lo_f) and math.isfinite(hi_f) and 0 < lo_f < hi_f:
914
+ drift_min = lo_f
915
+ drift_max = hi_f
916
+ except Exception:
917
+ pass
413
918
  # Compute drift from PM preview/final when available
414
919
  try:
415
920
  pv = (
@@ -430,8 +935,9 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
430
935
  except Exception:
431
936
  drift = float("nan")
432
937
  measured = f"{drift:.3f}x" if math.isfinite(drift) else "N/A"
938
+ band_label = f"{drift_min:.2f}–{drift_max:.2f}x"
433
939
  lines.append(
434
- f"| Preview Final Drift Acceptable | {status} | {measured} | 0.95–1.05x | point | Final/Preview ratio stability |"
940
+ f"| Preview Final Drift Acceptable | {status} | {measured} | {band_label} | point | Final/Preview ratio stability |"
435
941
  )
436
942
 
437
943
  # Helper to emit Guard Overhead Acceptable row (only when evaluated)
@@ -468,9 +974,70 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
468
974
  f"| Guard Overhead Acceptable | {status} | {measured} | ≤ +{threshold_pct:.1f}% | point | Guarded vs bare PM overhead |"
469
975
  )
470
976
 
977
+ def _emit_pm_tail_gate_row() -> None:
978
+ pm_tail = certificate.get("primary_metric_tail", {}) or {}
979
+ if not isinstance(pm_tail, dict) or not pm_tail:
980
+ return
981
+
982
+ evaluated = bool(pm_tail.get("evaluated", False))
983
+ mode = str(pm_tail.get("mode", "warn") or "warn").strip().lower()
984
+ passed = bool(pm_tail.get("passed", True))
985
+ warned = bool(pm_tail.get("warned", False))
986
+
987
+ if not evaluated:
988
+ status = "🛈 INFO"
989
+ elif passed:
990
+ status = "✅ PASS"
991
+ elif mode == "fail":
992
+ status = "❌ FAIL"
993
+ else:
994
+ status = "⚠️ WARN" if warned else "⚠️ WARN"
995
+
996
+ policy = (
997
+ pm_tail.get("policy", {}) if isinstance(pm_tail.get("policy"), dict) else {}
998
+ )
999
+ stats = (
1000
+ pm_tail.get("stats", {}) if isinstance(pm_tail.get("stats"), dict) else {}
1001
+ )
1002
+
1003
+ q = policy.get("quantile", 0.95)
1004
+ try:
1005
+ qf = float(q)
1006
+ except Exception:
1007
+ qf = 0.95
1008
+ qf = max(0.0, min(1.0, qf))
1009
+ q_key = f"q{int(round(100.0 * qf))}"
1010
+ q_name = f"P{int(round(100.0 * qf))}"
1011
+ q_val = stats.get(q_key)
1012
+ mass_val = stats.get("tail_mass")
1013
+ eps = policy.get("epsilon", stats.get("epsilon"))
1014
+
1015
+ measured_parts: list[str] = []
1016
+ if isinstance(q_val, int | float) and math.isfinite(float(q_val)):
1017
+ measured_parts.append(f"{q_name}={float(q_val):.3f}")
1018
+ if isinstance(mass_val, int | float) and math.isfinite(float(mass_val)):
1019
+ measured_parts.append(f"mass={float(mass_val):.3f}")
1020
+ measured = ", ".join(measured_parts) if measured_parts else "N/A"
1021
+
1022
+ thr_parts: list[str] = []
1023
+ qmax = policy.get("quantile_max")
1024
+ if isinstance(qmax, int | float) and math.isfinite(float(qmax)):
1025
+ thr_parts.append(f"{q_name}≤{float(qmax):.3f}")
1026
+ mmax = policy.get("mass_max")
1027
+ if isinstance(mmax, int | float) and math.isfinite(float(mmax)):
1028
+ thr_parts.append(f"mass≤{float(mmax):.3f}")
1029
+ if isinstance(eps, int | float) and math.isfinite(float(eps)):
1030
+ thr_parts.append(f"ε={float(eps):.1e}")
1031
+ threshold = "; ".join(thr_parts) if thr_parts else "policy"
1032
+
1033
+ lines.append(
1034
+ f"| Primary Metric Tail | {status} | {measured} | {threshold} | {q_name.lower()} | Tail regression vs baseline (ΔlogNLL) |"
1035
+ )
1036
+
471
1037
  # Emit canonical gate rows
472
1038
  if has_pm:
473
1039
  _emit_pm_gate_row()
1040
+ _emit_pm_tail_gate_row()
474
1041
  _emit_drift_gate_row()
475
1042
  _emit_overhead_gate_row()
476
1043
 
@@ -555,14 +1122,39 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
555
1122
  or overlap_frac is not None
556
1123
  ):
557
1124
  lines.append("")
558
- lines.append(
559
- f"- Pairing: paired={paired_windows}, match={match_frac:.3f}, overlap={overlap_frac:.3f}"
560
- )
1125
+ parts: list[str] = []
1126
+ if paired_windows is not None:
1127
+ try:
1128
+ parts.append(f"{int(paired_windows)} windows")
1129
+ except Exception:
1130
+ parts.append(f"windows={paired_windows}")
1131
+ if isinstance(match_frac, int | float) and math.isfinite(float(match_frac)):
1132
+ parts.append(f"{float(match_frac) * 100.0:.1f}% match")
1133
+ elif match_frac is not None:
1134
+ parts.append(f"match={match_frac}")
1135
+ if isinstance(overlap_frac, int | float) and math.isfinite(
1136
+ float(overlap_frac)
1137
+ ):
1138
+ parts.append(f"{float(overlap_frac) * 100.0:.1f}% overlap")
1139
+ elif overlap_frac is not None:
1140
+ parts.append(f"overlap={overlap_frac}")
1141
+ lines.append(f"✅ Pairing: {', '.join(parts) if parts else 'N/A'}")
561
1142
  if isinstance(bootstrap, dict):
562
1143
  reps = bootstrap.get("replicates")
563
1144
  bseed = bootstrap.get("seed")
564
1145
  if reps is not None or bseed is not None:
565
- lines.append(f"- Bootstrap: replicates={reps}, seed={bseed}")
1146
+ bits: list[str] = []
1147
+ if reps is not None:
1148
+ try:
1149
+ bits.append(f"{int(reps)} replicates")
1150
+ except Exception:
1151
+ bits.append(f"replicates={reps}")
1152
+ if bseed is not None:
1153
+ try:
1154
+ bits.append(f"seed={int(bseed)}")
1155
+ except Exception:
1156
+ bits.append(f"seed={bseed}")
1157
+ lines.append(f"✅ Bootstrap: {', '.join(bits) if bits else 'N/A'}")
566
1158
  # Optional: show log-space paired Δ CI next to ratio CI for clarity
567
1159
  delta_ci = certificate.get("primary_metric", {}).get("ci") or certificate.get(
568
1160
  "ppl", {}
@@ -572,7 +1164,7 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
572
1164
  and len(delta_ci) == 2
573
1165
  and all(isinstance(x, int | float) for x in delta_ci)
574
1166
  ):
575
- lines.append(f"- Log Δ (paired) CI: [{delta_ci[0]:.6f}, {delta_ci[1]:.6f}]")
1167
+ lines.append(f"🛈 Log Δ (paired) CI: [{delta_ci[0]:.6f}, {delta_ci[1]:.6f}]")
576
1168
  except Exception:
577
1169
  pass
578
1170
 
@@ -593,116 +1185,179 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
593
1185
 
594
1186
  lines.append("")
595
1187
 
1188
+ _append_primary_metric_section(lines, certificate)
1189
+
596
1190
  # Guard observability snapshots
597
1191
  lines.append("## Guard Observability")
598
1192
  lines.append("")
599
1193
 
600
1194
  spectral_info = certificate.get("spectral", {}) or {}
601
1195
  if spectral_info:
602
- lines.append("### Spectral Guard")
1196
+ lines.append("### Spectral Guard Summary")
603
1197
  lines.append("")
604
- mt_info = spectral_info.get("multiple_testing", {}) or {}
605
- if mt_info:
606
- lines.append("- **Multiple Testing:**")
607
- lines.append(" ```yaml")
608
- mt_yaml = (
609
- yaml.safe_dump(mt_info, sort_keys=True, width=70).strip().splitlines()
610
- )
611
- for line in mt_yaml:
612
- lines.append(f" {line}")
613
- lines.append(" ```")
614
- # Spectral summary (place key knobs together for quick scan)
615
- spec_sigma = spectral_info.get("sigma_quantile")
616
- spec_deadband = spectral_info.get("deadband")
617
- spec_max_caps = spectral_info.get("max_caps")
618
- summary_yaml = {
619
- "sigma_quantile": float(spec_sigma)
620
- if isinstance(spec_sigma, int | float)
621
- else None,
622
- "deadband": float(spec_deadband)
623
- if isinstance(spec_deadband, int | float)
624
- else None,
625
- "max_caps": int(spec_max_caps)
626
- if isinstance(spec_max_caps, int | float)
627
- else None,
628
- }
629
- # Drop Nones from summary
630
- summary_yaml = {k: v for k, v in summary_yaml.items() if v is not None}
631
- if summary_yaml:
632
- lines.append("- **Spectral Summary:**")
633
- lines.append(" ```yaml")
634
- for line in (
635
- yaml.safe_dump(summary_yaml, sort_keys=True, width=70)
636
- .strip()
637
- .splitlines()
638
- ):
639
- lines.append(f" {line}")
640
- lines.append(" ```")
1198
+ lines.append("| Metric | Value | Status |")
1199
+ lines.append("|--------|-------|--------|")
1200
+
1201
+ spectral_ok = bool(validation.get("spectral_stable", False))
1202
+ caps_applied = spectral_info.get("caps_applied")
1203
+ max_caps = spectral_info.get("max_caps")
1204
+ caps_val = (
1205
+ f"{caps_applied}/{max_caps}"
1206
+ if caps_applied is not None and max_caps is not None
1207
+ else "-"
1208
+ )
641
1209
  lines.append(
642
- f"- Caps Applied: {spectral_info.get('caps_applied')} / {spectral_info.get('max_caps')}"
1210
+ f"| Caps Applied | {caps_val} | {'✅ OK' if spectral_ok else '❌ FAIL'} |"
643
1211
  )
1212
+
644
1213
  summary = spectral_info.get("summary", {}) or {}
645
- lines.append(f"- Caps Exceeded: {summary.get('caps_exceeded', False)}")
646
- caps_by_family = spectral_info.get("caps_applied_by_family") or {}
1214
+ caps_exceeded = summary.get("caps_exceeded")
1215
+ if caps_exceeded is not None:
1216
+ cap_status = "✅ OK" if not bool(caps_exceeded) else "⚠️ WARN"
1217
+ lines.append(f"| Caps Exceeded | {caps_exceeded} | {cap_status} |")
1218
+
1219
+ top_scores = spectral_info.get("top_z_scores") or {}
1220
+ max_family: str | None = None
1221
+ max_module: str | None = None
1222
+ max_abs_z: float | None = None
1223
+ if isinstance(top_scores, dict):
1224
+ for family, entries in top_scores.items():
1225
+ if not isinstance(entries, list):
1226
+ continue
1227
+ for entry in entries:
1228
+ if not isinstance(entry, dict):
1229
+ continue
1230
+ z_val = entry.get("z")
1231
+ if not (
1232
+ isinstance(z_val, int | float) and math.isfinite(float(z_val))
1233
+ ):
1234
+ continue
1235
+ z_abs = abs(float(z_val))
1236
+ if max_abs_z is None or z_abs > max_abs_z:
1237
+ max_abs_z = z_abs
1238
+ max_family = str(family)
1239
+ max_module = (
1240
+ str(entry.get("module")) if entry.get("module") else None
1241
+ )
1242
+
647
1243
  family_caps = spectral_info.get("family_caps") or {}
648
- if caps_by_family:
649
- lines.append("")
650
- lines.append("| Family | κ | Violations |")
651
- lines.append("|--------|---|------------|")
652
- for family, count in caps_by_family.items():
653
- kappa = family_caps.get(family, {}).get("kappa")
654
- if isinstance(kappa, int | float) and math.isfinite(float(kappa)):
655
- kappa_str = f"{kappa:.3f}"
656
- else:
657
- kappa_str = "-"
658
- lines.append(f"| {family} | {kappa_str} | {count} |")
659
- lines.append("")
1244
+ kappa = None
1245
+ if max_family and isinstance(family_caps, dict):
1246
+ try:
1247
+ kappa = (family_caps.get(max_family, {}) or {}).get("kappa")
1248
+ except Exception:
1249
+ kappa = None
1250
+ kappa_f = (
1251
+ float(kappa)
1252
+ if isinstance(kappa, int | float) and math.isfinite(float(kappa))
1253
+ else None
1254
+ )
1255
+
1256
+ if max_abs_z is not None:
1257
+ max_val = f"{max_abs_z:.3f}"
1258
+ if max_family:
1259
+ max_val += f" ({max_family})"
1260
+ if max_module:
1261
+ max_val += f" – {max_module}"
1262
+ if kappa_f is None:
1263
+ max_status = "🛈 No κ"
1264
+ elif max_abs_z <= kappa_f:
1265
+ max_status = f"✅ Within κ={kappa_f:.3f}"
1266
+ else:
1267
+ max_status = f"❌ Exceeds κ={kappa_f:.3f}"
1268
+ lines.append(f"| Max |z| | {max_val} | {max_status} |")
1269
+
1270
+ mt_info = spectral_info.get("multiple_testing", {}) or {}
1271
+ if isinstance(mt_info, dict) and mt_info:
1272
+ mt_method = mt_info.get("method")
1273
+ mt_alpha = mt_info.get("alpha")
1274
+ mt_m = mt_info.get("m")
1275
+ parts: list[str] = []
1276
+ if mt_method:
1277
+ parts.append(f"method={mt_method}")
1278
+ if isinstance(mt_alpha, int | float) and math.isfinite(float(mt_alpha)):
1279
+ parts.append(f"α={float(mt_alpha):.3g}")
1280
+ if isinstance(mt_m, int | float) and math.isfinite(float(mt_m)):
1281
+ parts.append(f"m={int(mt_m)}")
1282
+ lines.append(
1283
+ f"| Multiple Testing | {', '.join(parts) if parts else '—'} | 🛈 INFO |"
1284
+ )
1285
+
1286
+ lines.append("")
1287
+
1288
+ caps_by_family = spectral_info.get("caps_applied_by_family") or {}
660
1289
  quantiles = spectral_info.get("family_z_quantiles") or {}
661
- if quantiles:
662
- lines.append("| Family | q95 | q99 | Max | Samples |")
663
- lines.append("|--------|-----|-----|-----|---------|")
664
- for family, stats in quantiles.items():
665
- q95 = stats.get("q95")
666
- q99 = stats.get("q99")
667
- max_z = stats.get("max")
668
- count = stats.get("count")
1290
+ if any(
1291
+ bool(x)
1292
+ for x in (caps_by_family, quantiles, family_caps, top_scores)
1293
+ if isinstance(x, dict)
1294
+ ):
1295
+ lines.append("<details>")
1296
+ lines.append("<summary>Per-family details</summary>")
1297
+ lines.append("")
1298
+ lines.append("| Family | κ | q95 | Max |z| | Violations |")
1299
+ lines.append("|--------|---|-----|--------|------------|")
1300
+
1301
+ families: set[str] = set()
1302
+ for block in (caps_by_family, quantiles, family_caps, top_scores):
1303
+ if isinstance(block, dict):
1304
+ families.update(str(k) for k in block.keys())
1305
+
1306
+ for family in sorted(families):
1307
+ kappa = None
1308
+ if isinstance(family_caps, dict):
1309
+ kappa = (family_caps.get(family, {}) or {}).get("kappa")
1310
+ kappa_str = (
1311
+ f"{float(kappa):.3f}"
1312
+ if isinstance(kappa, int | float) and math.isfinite(float(kappa))
1313
+ else "-"
1314
+ )
1315
+
1316
+ q95 = None
1317
+ max_z = None
1318
+ if isinstance(quantiles, dict):
1319
+ stats = quantiles.get(family) or {}
1320
+ if isinstance(stats, dict):
1321
+ q95 = stats.get("q95")
1322
+ max_z = stats.get("max")
669
1323
  q95_str = f"{q95:.3f}" if isinstance(q95, int | float) else "-"
670
- q99_str = f"{q99:.3f}" if isinstance(q99, int | float) else "-"
671
1324
  max_str = f"{max_z:.3f}" if isinstance(max_z, int | float) else "-"
672
- count_str = str(count) if isinstance(count, int | float) else "-"
1325
+
1326
+ violations = None
1327
+ if isinstance(caps_by_family, dict):
1328
+ violations = caps_by_family.get(family)
1329
+ v_str = (
1330
+ str(int(violations)) if isinstance(violations, int | float) else "0"
1331
+ )
1332
+
673
1333
  lines.append(
674
- f"| {family} | {q95_str} | {q99_str} | {max_str} | {count_str} |"
1334
+ f"| {family} | {kappa_str} | {q95_str} | {max_str} | {v_str} |"
675
1335
  )
1336
+
1337
+ if isinstance(top_scores, dict) and top_scores:
1338
+ lines.append("")
1339
+ lines.append("Top |z| per family:")
1340
+ for family in sorted(top_scores.keys()):
1341
+ entries = top_scores[family]
1342
+ if not isinstance(entries, list) or not entries:
1343
+ continue
1344
+ formatted_entries = []
1345
+ for entry in entries:
1346
+ if not isinstance(entry, dict):
1347
+ continue
1348
+ module_name = entry.get("module", "unknown")
1349
+ z_val = entry.get("z")
1350
+ if isinstance(z_val, int | float) and math.isfinite(
1351
+ float(z_val)
1352
+ ):
1353
+ z_str = f"{z_val:.3f}"
1354
+ else:
1355
+ z_str = "n/a"
1356
+ formatted_entries.append(f"{module_name} (|z|={z_str})")
1357
+ lines.append(f"- {family}: {', '.join(formatted_entries)}")
1358
+
676
1359
  lines.append("")
677
- policy_caps = spectral_info.get("policy", {}).get("family_caps")
678
- if policy_caps:
679
- lines.append("- **Family κ (policy):**")
680
- lines.append(" ```yaml")
681
- caps_yaml = (
682
- yaml.safe_dump(policy_caps, sort_keys=True, width=70)
683
- .strip()
684
- .splitlines()
685
- )
686
- for line in caps_yaml:
687
- lines.append(f" {line}")
688
- lines.append(" ```")
689
- top_scores = spectral_info.get("top_z_scores") or {}
690
- if top_scores:
691
- lines.append("Top |z| per family:")
692
- for family in sorted(top_scores.keys()):
693
- entries = top_scores[family]
694
- if not entries:
695
- continue
696
- formatted_entries = []
697
- for entry in entries:
698
- module_name = entry.get("module", "unknown")
699
- z_val = entry.get("z")
700
- if isinstance(z_val, int | float) and math.isfinite(float(z_val)):
701
- z_str = f"{z_val:.3f}"
702
- else:
703
- z_str = "n/a"
704
- formatted_entries.append(f"{module_name} (|z|={z_str})")
705
- lines.append(f"- {family}: {', '.join(formatted_entries)}")
1360
+ lines.append("</details>")
706
1361
  lines.append("")
707
1362
 
708
1363
  rmt_info = certificate.get("rmt", {}) or {}
@@ -710,7 +1365,18 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
710
1365
  lines.append("### RMT Guard")
711
1366
  lines.append("")
712
1367
  families = rmt_info.get("families") or {}
1368
+ stable = bool(rmt_info.get("stable", True))
1369
+ status = "✅ OK" if stable else "❌ FAIL"
1370
+ delta_total = rmt_info.get("delta_total")
1371
+ if isinstance(delta_total, int):
1372
+ lines.append(f"- Δ total: {delta_total:+d}")
1373
+ lines.append(f"- Status: {status}")
1374
+ lines.append(f"- Families: {len(families)}")
713
1375
  if families:
1376
+ lines.append("")
1377
+ lines.append("<details>")
1378
+ lines.append("<summary>RMT family details</summary>")
1379
+ lines.append("")
714
1380
  lines.append("| Family | ε_f | Bare | Guarded | Δ |")
715
1381
  lines.append("|--------|-----|------|---------|---|")
716
1382
  for family, data in families.items():
@@ -740,12 +1406,10 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
740
1406
  f"| {family} | {epsilon_str} | {bare_str} | {guarded_str} | {delta_str} |"
741
1407
  )
742
1408
  lines.append("")
743
- # Delta total and stability flags
744
- delta_total = rmt_info.get("delta_total")
745
- if isinstance(delta_total, int):
746
- lines.append(f"- Δ total: {delta_total:+d}")
747
- lines.append(f"- Stable: {rmt_info.get('stable', True)}")
748
- lines.append("")
1409
+ lines.append("</details>")
1410
+ lines.append("")
1411
+ else:
1412
+ lines.append("")
749
1413
 
750
1414
  guard_overhead_info = certificate.get("guard_overhead", {}) or {}
751
1415
  if guard_overhead_info:
@@ -802,21 +1466,21 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
802
1466
  inference_sources = compression_diag.get("inference_source") or {}
803
1467
  inference_log = compression_diag.get("inference_log") or []
804
1468
  if inference_flags or inference_sources or inference_log:
805
- lines.append("## Inference")
806
- lines.append("")
1469
+ appendix_lines.append("### Inference Diagnostics")
1470
+ appendix_lines.append("")
807
1471
  if inference_flags:
808
- lines.append("- **Fields Inferred:**")
1472
+ appendix_lines.append("- **Fields Inferred:**")
809
1473
  for field, flag in inference_flags.items():
810
- lines.append(f" - {field}: {'yes' if flag else 'no'}")
1474
+ appendix_lines.append(f" - {field}: {'yes' if flag else 'no'}")
811
1475
  if inference_sources:
812
- lines.append("- **Sources:**")
1476
+ appendix_lines.append("- **Sources:**")
813
1477
  for field, source in inference_sources.items():
814
- lines.append(f" - {field}: {source}")
1478
+ appendix_lines.append(f" - {field}: {source}")
815
1479
  if inference_log:
816
- lines.append("- **Inference Log:**")
1480
+ appendix_lines.append("- **Inference Log:**")
817
1481
  for entry in inference_log:
818
- lines.append(f" - {entry}")
819
- lines.append("")
1482
+ appendix_lines.append(f" - {entry}")
1483
+ appendix_lines.append("")
820
1484
 
821
1485
  # Model and Configuration
822
1486
  lines.append("## Model Information")
@@ -845,28 +1509,48 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
845
1509
  if invarlock_version:
846
1510
  lines.append(f"- **InvarLock Version:** {invarlock_version}")
847
1511
  env_flags = meta.get("env_flags")
848
- if isinstance(env_flags, dict) and env_flags:
849
- lines.append("- **Env Flags:**")
850
- lines.append(" ```yaml")
851
- for k, v in env_flags.items():
852
- lines.append(f" {k}: {v}")
853
- lines.append(" ```")
854
- # Determinism flags (if present)
855
1512
  cuda_flags = meta.get("cuda_flags")
1513
+
1514
+ # Compressed determinism/environment summary for readability
1515
+ det_parts: list[str] = []
1516
+ for label, keys in (
1517
+ ("torch_det", ("torch_deterministic_algorithms", "deterministic_algorithms")),
1518
+ ("cudnn_det", ("cudnn_deterministic",)),
1519
+ ("cudnn_bench", ("cudnn_benchmark",)),
1520
+ ("tf32_matmul", ("cuda_matmul_allow_tf32",)),
1521
+ ("tf32_cudnn", ("cudnn_allow_tf32",)),
1522
+ ("cublas_ws", ("CUBLAS_WORKSPACE_CONFIG",)),
1523
+ ):
1524
+ val = None
1525
+ for key in keys:
1526
+ if isinstance(env_flags, dict) and env_flags.get(key) is not None:
1527
+ val = env_flags.get(key)
1528
+ break
1529
+ if isinstance(cuda_flags, dict) and cuda_flags.get(key) is not None:
1530
+ val = cuda_flags.get(key)
1531
+ break
1532
+ if val is not None:
1533
+ det_parts.append(f"{label}={val}")
1534
+ if det_parts:
1535
+ lines.append(f"- **Determinism:** {', '.join(det_parts)}")
1536
+
1537
+ full_flags: dict[str, Any] = {}
1538
+ if isinstance(env_flags, dict) and env_flags:
1539
+ full_flags["env_flags"] = env_flags
856
1540
  if isinstance(cuda_flags, dict) and cuda_flags:
857
- parts = []
858
- for key in (
859
- "deterministic_algorithms",
860
- "cudnn_deterministic",
861
- "cudnn_benchmark",
862
- "cudnn_allow_tf32",
863
- "cuda_matmul_allow_tf32",
864
- "CUBLAS_WORKSPACE_CONFIG",
865
- ):
866
- if key in cuda_flags and cuda_flags[key] is not None:
867
- parts.append(f"{key}={cuda_flags[key]}")
868
- if parts:
869
- lines.append(f"- **Determinism Flags:** {', '.join(parts)}")
1541
+ full_flags["cuda_flags"] = cuda_flags
1542
+ if full_flags:
1543
+ lines.append("")
1544
+ lines.append("<details>")
1545
+ lines.append("<summary>Environment flags (full)</summary>")
1546
+ lines.append("")
1547
+ lines.append("```yaml")
1548
+ flags_yaml = yaml.safe_dump(full_flags, sort_keys=True, width=80).strip()
1549
+ for line in flags_yaml.splitlines():
1550
+ lines.append(line)
1551
+ lines.append("```")
1552
+ lines.append("")
1553
+ lines.append("</details>")
870
1554
  lines.append("")
871
1555
 
872
1556
  # Edit Configuration (removed duplicate Edit Information section)
@@ -890,267 +1574,10 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
890
1574
  pass
891
1575
  lines.append("")
892
1576
 
893
- resolved_policy = certificate.get("resolved_policy")
894
- if resolved_policy:
895
- lines.append("## Resolved Policy")
896
- lines.append("")
897
- lines.append("```yaml")
898
- resolved_yaml = yaml.safe_dump(
899
- resolved_policy, sort_keys=True, width=80, default_flow_style=False
900
- ).strip()
901
- for line in resolved_yaml.splitlines():
902
- lines.append(line)
903
- lines.append("```")
904
- lines.append("")
905
-
906
- policy_provenance = certificate.get("policy_provenance", {})
907
- if policy_provenance:
908
- lines.append("## Policy Provenance")
909
- lines.append("")
910
- lines.append(f"- **Tier:** {policy_provenance.get('tier')}")
911
- overrides_list = policy_provenance.get("overrides") or []
912
- if overrides_list:
913
- lines.append(f"- **Overrides:** {', '.join(overrides_list)}")
914
- else:
915
- lines.append("- **Overrides:** (none)")
916
- digest_value = policy_provenance.get("policy_digest")
917
- if digest_value:
918
- lines.append(f"- **Policy Digest:** `{digest_value}`")
919
- else:
920
- lines.append("- **Policy Digest:** (not recorded)")
921
- if policy_provenance.get("resolved_at"):
922
- lines.append(f"- **Resolved At:** {policy_provenance.get('resolved_at')}")
923
- lines.append("")
924
-
925
- # Dataset Information
926
- lines.append("## Dataset Configuration")
927
- lines.append("")
928
- dataset = certificate.get("dataset", {}) or {}
929
- prov = (
930
- (dataset.get("provider") or "unknown")
931
- if isinstance(dataset, dict)
932
- else "unknown"
933
- )
934
- lines.append(f"- **Provider:** {prov}")
935
- try:
936
- seq_len_val = (
937
- int(dataset.get("seq_len"))
938
- if isinstance(dataset.get("seq_len"), int | float)
939
- else dataset.get("seq_len")
940
- )
941
- except Exception: # pragma: no cover - defensive
942
- seq_len_val = dataset.get("seq_len")
943
- if seq_len_val is not None:
944
- lines.append(f"- **Sequence Length:** {seq_len_val}")
945
- windows_blk = (
946
- dataset.get("windows", {}) if isinstance(dataset.get("windows"), dict) else {}
947
- )
948
- win_prev = windows_blk.get("preview")
949
- win_final = windows_blk.get("final")
950
- if win_prev is not None and win_final is not None:
951
- lines.append(f"- **Windows:** {win_prev} preview + {win_final} final")
952
- if windows_blk.get("seed") is not None:
953
- lines.append(f"- **Seed:** {windows_blk.get('seed')}")
954
- hash_blk = dataset.get("hash", {}) if isinstance(dataset.get("hash"), dict) else {}
955
- if hash_blk.get("preview_tokens") is not None:
956
- lines.append(f"- **Preview Tokens:** {hash_blk.get('preview_tokens'):,}")
957
- if hash_blk.get("final_tokens") is not None:
958
- lines.append(f"- **Final Tokens:** {hash_blk.get('final_tokens'):,}")
959
- if hash_blk.get("total_tokens") is not None:
960
- lines.append(f"- **Total Tokens:** {hash_blk.get('total_tokens'):,}")
961
- if hash_blk.get("dataset"):
962
- lines.append(f"- **Dataset Hash:** {hash_blk.get('dataset')}")
963
- tokenizer = dataset.get("tokenizer", {})
964
- if tokenizer.get("name") or tokenizer.get("hash"):
965
- vocab_size = tokenizer.get("vocab_size")
966
- vocab_suffix = f" (vocab {vocab_size})" if isinstance(vocab_size, int) else ""
967
- lines.append(
968
- f"- **Tokenizer:** {tokenizer.get('name', 'unknown')}{vocab_suffix}"
969
- )
970
- if tokenizer.get("hash"):
971
- lines.append(f" - Hash: {tokenizer['hash']}")
972
- lines.append(
973
- f" - BOS/EOS: {tokenizer.get('bos_token')} / {tokenizer.get('eos_token')}"
974
- )
975
- if tokenizer.get("pad_token") is not None:
976
- lines.append(f" - PAD: {tokenizer.get('pad_token')}")
977
- if tokenizer.get("add_prefix_space") is not None:
978
- lines.append(f" - add_prefix_space: {tokenizer.get('add_prefix_space')}")
979
- lines.append("")
980
-
981
- provenance_info = certificate.get("provenance", {}) or {}
982
- if provenance_info:
983
- lines.append("## Run Provenance")
984
- lines.append("")
985
- baseline_info = provenance_info.get("baseline", {}) or {}
986
- if baseline_info:
987
- lines.append(f"- **Baseline Run ID:** {baseline_info.get('run_id')}")
988
- if baseline_info.get("report_hash"):
989
- lines.append(f" - Report Hash: `{baseline_info.get('report_hash')}`")
990
- if baseline_info.get("report_path"):
991
- lines.append(f" - Report Path: {baseline_info.get('report_path')}")
992
- edited_info = provenance_info.get("edited", {}) or {}
993
- if edited_info:
994
- lines.append(f"- **Edited Run ID:** {edited_info.get('run_id')}")
995
- if edited_info.get("report_hash"):
996
- lines.append(f" - Report Hash: `{edited_info.get('report_hash')}`")
997
- if edited_info.get("report_path"):
998
- lines.append(f" - Report Path: {edited_info.get('report_path')}")
999
- window_plan = provenance_info.get("window_plan")
1000
- if isinstance(window_plan, dict) and window_plan:
1001
- preview_val = window_plan.get(
1002
- "preview_n", window_plan.get("actual_preview")
1003
- )
1004
- final_val = window_plan.get("final_n", window_plan.get("actual_final"))
1005
- lines.append(
1006
- f"- **Window Plan:** profile={window_plan.get('profile')}, preview={preview_val}, final={final_val}"
1007
- )
1008
- provider_digest = provenance_info.get("provider_digest")
1009
- if isinstance(provider_digest, dict) and provider_digest:
1010
- ids_d = provider_digest.get("ids_sha256")
1011
- tok_d = provider_digest.get("tokenizer_sha256")
1012
- mask_d = provider_digest.get("masking_sha256")
1013
-
1014
- lines.append("- **Provider Digest:**")
1015
- if tok_d:
1016
- lines.append(
1017
- f" - tokenizer_sha256: `{_short_digest(tok_d)}` (full in JSON)"
1018
- )
1019
- if ids_d:
1020
- lines.append(f" - ids_sha256: `{_short_digest(ids_d)}` (full in JSON)")
1021
- if mask_d:
1022
- lines.append(
1023
- f" - masking_sha256: `{_short_digest(mask_d)}` (full in JSON)"
1024
- )
1025
- # Surface confidence label prominently
1026
- try:
1027
- conf = certificate.get("confidence", {}) or {}
1028
- if isinstance(conf, dict) and conf.get("label"):
1029
- lines.append(f"- **Confidence:** {conf.get('label')}")
1030
- except Exception:
1031
- pass
1032
- # Surface policy version + thresholds hash (short)
1033
- try:
1034
- pd = certificate.get("policy_digest", {}) or {}
1035
- if isinstance(pd, dict) and pd:
1036
- pv = pd.get("policy_version")
1037
- th = pd.get("thresholds_hash")
1038
- if pv:
1039
- lines.append(f"- **Policy Version:** {pv}")
1040
- if isinstance(th, str) and th:
1041
- short = th if len(th) <= 16 else (th[:8] + "…" + th[-8:])
1042
- lines.append(f"- **Thresholds Digest:** `{short}` (full in JSON)")
1043
- if pd.get("changed"):
1044
- lines.append("- Note: policy changed")
1045
- except Exception:
1046
- pass
1047
- lines.append("")
1577
+ _append_dataset_and_provenance_section(lines, certificate)
1048
1578
 
1049
1579
  # Structural Changes heading is printed with content later; avoid empty header here
1050
1580
 
1051
- # Primary Metric (metric-v1) snapshot, if present
1052
- try:
1053
- pm = certificate.get("primary_metric")
1054
- if isinstance(pm, dict) and pm:
1055
- kind = pm.get("kind", "unknown")
1056
- lines.append(f"## Primary Metric ({kind})")
1057
- lines.append("")
1058
- unit = pm.get("unit", "-")
1059
- paired = pm.get("paired", False)
1060
- reps = None
1061
- # Snapshot only; bootstrap reps live in ppl.stats.bootstrap for ppl metrics
1062
- # Mark estimated metrics (e.g., pseudo accuracy counts) clearly
1063
- estimated_flag = False
1064
- try:
1065
- if bool(pm.get("estimated")):
1066
- estimated_flag = True
1067
- elif str(pm.get("counts_source", "")).lower() == "pseudo_config":
1068
- estimated_flag = True
1069
- except Exception:
1070
- estimated_flag = False
1071
- est_suffix = " (estimated)" if estimated_flag else ""
1072
- lines.append(f"- Kind: {kind} (unit: {unit}){est_suffix}")
1073
- gating_basis = pm.get("gating_basis") or pm.get("basis")
1074
- if gating_basis:
1075
- lines.append(f"- Basis: {gating_basis}")
1076
- if isinstance(paired, bool):
1077
- lines.append(f"- Paired: {paired}")
1078
- reps = pm.get("reps")
1079
- if isinstance(reps, int | float):
1080
- lines.append(f"- Bootstrap Reps: {int(reps)}")
1081
- ci = pm.get("ci") or pm.get("display_ci")
1082
- if (
1083
- isinstance(ci, list | tuple)
1084
- and len(ci) == 2
1085
- and all(isinstance(x, int | float) for x in ci)
1086
- ):
1087
- lines.append(f"- CI: {ci[0]:.3f}–{ci[1]:.3f}")
1088
- prev = pm.get("preview")
1089
- fin = pm.get("final")
1090
- ratio = pm.get("ratio_vs_baseline")
1091
-
1092
- lines.append("")
1093
- if estimated_flag and str(kind).lower() in {"accuracy", "vqa_accuracy"}:
1094
- lines.append(
1095
- "- Note: Accuracy derived from pseudo counts (quick dev preset); use a labeled preset for measured accuracy."
1096
- )
1097
- lines.append("| Field | Value |")
1098
- lines.append("|-------|-------|")
1099
- lines.append(f"| Preview | {_fmt_by_kind(prev, str(kind))} |")
1100
- lines.append(f"| Final | {_fmt_by_kind(fin, str(kind))} |")
1101
- # For accuracy, ratio field is actually a delta (as per helper); clarify inline
1102
- if kind in {"accuracy", "vqa_accuracy"}:
1103
- lines.append(f"| Δ vs Baseline | {_fmt_by_kind(ratio, str(kind))} |")
1104
- # When baseline accuracy is near-zero, clarify display rule
1105
- try:
1106
- base_pt = pm.get("baseline_point")
1107
- if isinstance(base_pt, int | float) and base_pt < 0.05:
1108
- lines.append(
1109
- "- Note: baseline < 5%; ratio suppressed; showing Δpp"
1110
- )
1111
- except Exception:
1112
- pass
1113
- else:
1114
- try:
1115
- lines.append(f"| Ratio vs Baseline | {float(ratio):.3f} |")
1116
- except Exception:
1117
- lines.append("| Ratio vs Baseline | N/A |")
1118
- lines.append("")
1119
- # Secondary metrics (informational)
1120
- try:
1121
- secs = certificate.get("secondary_metrics")
1122
- if isinstance(secs, list) and secs:
1123
- lines.append("## Secondary Metrics (informational)")
1124
- lines.append("")
1125
- lines.append("| Kind | Preview | Final | vs Baseline | CI |")
1126
- lines.append("|------|---------|-------|-------------|----|")
1127
- for m in secs:
1128
- if not isinstance(m, dict):
1129
- continue
1130
- k = m.get("kind", "?")
1131
- pv = _fmt_by_kind(m.get("preview"), str(k))
1132
- fv = _fmt_by_kind(m.get("final"), str(k))
1133
- rb = m.get("ratio_vs_baseline")
1134
- try:
1135
- rb_str = (
1136
- f"{float(rb):.3f}"
1137
- if (str(k).startswith("ppl"))
1138
- else _fmt_by_kind(rb, str(k))
1139
- )
1140
- except Exception:
1141
- rb_str = "N/A"
1142
- ci = m.get("display_ci") or m.get("ci")
1143
- if isinstance(ci, tuple | list) and len(ci) == 2:
1144
- ci_str = f"{float(ci[0]):.3f}-{float(ci[1]):.3f}"
1145
- else:
1146
- ci_str = "–"
1147
- lines.append(f"| {k} | {pv} | {fv} | {rb_str} | {ci_str} |")
1148
- lines.append("")
1149
- except Exception:
1150
- pass
1151
- except Exception:
1152
- pass
1153
-
1154
1581
  # System Overhead section (latency/throughput)
1155
1582
  sys_over = certificate.get("system_overhead", {}) or {}
1156
1583
  if isinstance(sys_over, dict) and sys_over:
@@ -1309,31 +1736,32 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
1309
1736
 
1310
1737
  # Variance Guard (Spectral/RMT summaries are already provided above)
1311
1738
  variance = certificate["variance"]
1312
- lines.append("## Variance Guard")
1739
+ appendix_lines.append("### Variance Guard")
1740
+ appendix_lines.append("")
1313
1741
 
1314
1742
  # Display whether VE was enabled after A/B test
1315
- lines.append(f"- **Enabled:** {'Yes' if variance['enabled'] else 'No'}")
1743
+ appendix_lines.append(f"- **Enabled:** {'Yes' if variance['enabled'] else 'No'}")
1316
1744
 
1317
1745
  if variance["enabled"]:
1318
1746
  # VE was enabled - show the gain
1319
1747
  gain_value = variance.get("gain", "N/A")
1320
1748
  if isinstance(gain_value, int | float):
1321
- lines.append(f"- **Gain:** {gain_value:.3f}")
1749
+ appendix_lines.append(f"- **Gain:** {gain_value:.3f}")
1322
1750
  else:
1323
- lines.append(f"- **Gain:** {gain_value}")
1751
+ appendix_lines.append(f"- **Gain:** {gain_value}")
1324
1752
  else:
1325
1753
  # VE was not enabled - show succinct reason if available, else a clear disabled message
1326
1754
  ppl_no_ve = variance.get("ppl_no_ve")
1327
1755
  ppl_with_ve = variance.get("ppl_with_ve")
1328
1756
  ratio_ci = variance.get("ratio_ci")
1329
1757
  if ppl_no_ve is not None and ppl_with_ve is not None and ratio_ci:
1330
- lines.append(f"- **Primary metric without VE:** {ppl_no_ve:.3f}")
1331
- lines.append(f"- **Primary metric with VE:** {ppl_with_ve:.3f}")
1758
+ appendix_lines.append(f"- **Primary metric without VE:** {ppl_no_ve:.3f}")
1759
+ appendix_lines.append(f"- **Primary metric with VE:** {ppl_with_ve:.3f}")
1332
1760
  gain_value = variance.get("gain")
1333
1761
  if isinstance(gain_value, int | float):
1334
- lines.append(f"- **Gain (insufficient):** {gain_value:.3f}")
1762
+ appendix_lines.append(f"- **Gain (insufficient):** {gain_value:.3f}")
1335
1763
  else:
1336
- lines.append(
1764
+ appendix_lines.append(
1337
1765
  "- Variance Guard: Disabled (predictive gate not evaluated for this edit)."
1338
1766
  )
1339
1767
  # Add concise rationale aligned with Balanced predictive gate contract
@@ -1341,14 +1769,14 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
1341
1769
  ve_policy = certificate.get("policies", {}).get("variance", {})
1342
1770
  min_effect = ve_policy.get("min_effect_lognll")
1343
1771
  if isinstance(min_effect, int | float):
1344
- lines.append(
1772
+ appendix_lines.append(
1345
1773
  f"- Predictive gate (Balanced): one-sided; enables only if CI excludes 0 and |mean Δ| ≥ {float(min_effect):.4g}."
1346
1774
  )
1347
1775
  else:
1348
- lines.append(
1776
+ appendix_lines.append(
1349
1777
  "- Predictive gate (Balanced): one-sided; enables only if CI excludes 0 and |mean Δ| ≥ min_effect."
1350
1778
  )
1351
- lines.append(
1779
+ appendix_lines.append(
1352
1780
  "- Predictive Gate: evaluated=false (disabled under current policy/edit)."
1353
1781
  )
1354
1782
  except Exception:
@@ -1356,14 +1784,17 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
1356
1784
 
1357
1785
  if variance.get("ratio_ci"):
1358
1786
  ratio_lo, ratio_hi = variance["ratio_ci"]
1359
- lines.append(f"- **Ratio CI:** [{ratio_lo:.3f}, {ratio_hi:.3f}]")
1787
+ appendix_lines.append(f"- **Ratio CI:** [{ratio_lo:.3f}, {ratio_hi:.3f}]")
1360
1788
 
1361
1789
  if variance.get("calibration") and variance.get("enabled"):
1362
1790
  calib = variance["calibration"]
1363
1791
  coverage = calib.get("coverage")
1364
1792
  requested = calib.get("requested")
1365
1793
  status = calib.get("status", "unknown")
1366
- lines.append(f"- **Calibration:** {coverage}/{requested} windows ({status})")
1794
+ appendix_lines.append(
1795
+ f"- **Calibration:** {coverage}/{requested} windows ({status})"
1796
+ )
1797
+ appendix_lines.append("")
1367
1798
 
1368
1799
  lines.append("")
1369
1800
 
@@ -1397,32 +1828,22 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
1397
1828
  lines.append(f"- **{label}:** {float(moe[key]):+.4f}")
1398
1829
  lines.append("")
1399
1830
 
1400
- # Policy Summary
1401
- lines.append("## Applied Policies")
1402
- lines.append("")
1403
- policies = certificate["policies"]
1404
- for guard_name, policy in policies.items():
1405
- lines.append(f"### {guard_name.title()}")
1406
- lines.append("")
1407
- policy_yaml = (
1408
- yaml.safe_dump(policy, sort_keys=True, width=80).strip().splitlines()
1409
- )
1410
- lines.append("```yaml")
1411
- for line in policy_yaml:
1412
- lines.append(line)
1413
- lines.append("```")
1414
- lines.append("")
1831
+ _append_policy_configuration_section(lines, certificate)
1415
1832
 
1416
- # Artifacts
1417
- lines.append("## Artifacts")
1418
- lines.append("")
1833
+ appendix_lines.append("### Artifacts")
1834
+ appendix_lines.append("")
1419
1835
  artifacts = certificate["artifacts"]
1420
1836
  if artifacts.get("events_path"):
1421
- lines.append(f"- **Events Log:** `{artifacts['events_path']}`")
1837
+ appendix_lines.append(f"- **Events Log:** `{artifacts['events_path']}`")
1422
1838
  if artifacts.get("report_path"):
1423
- lines.append(f"- **Full Report:** `{artifacts['report_path']}`")
1424
- lines.append(f"- **Certificate Generated:** {artifacts['generated_at']}")
1425
- lines.append("")
1839
+ appendix_lines.append(f"- **Full Report:** `{artifacts['report_path']}`")
1840
+ appendix_lines.append(f"- **Certificate Generated:** {artifacts['generated_at']}")
1841
+ appendix_lines.append("")
1842
+
1843
+ if appendix_lines:
1844
+ lines.append("## Appendix")
1845
+ lines.append("")
1846
+ lines.extend(appendix_lines)
1426
1847
 
1427
1848
  # Certificate Hash for Integrity
1428
1849
  cert_hash = _compute_certificate_hash(certificate)
@@ -1433,7 +1854,7 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
1433
1854
  lines.append("---")
1434
1855
  lines.append("")
1435
1856
  lines.append(
1436
- "*This InvarLock safety certificate provides a comprehensive assessment of model compression safety.*"
1857
+ "*This InvarLock evaluation certificate provides a comprehensive assessment of model compression safety.*"
1437
1858
  )
1438
1859
  lines.append(
1439
1860
  "*All metrics are compared against the uncompressed baseline model for safety validation.*"