invarlock 0.3.6__py3-none-any.whl → 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. invarlock/__init__.py +2 -2
  2. invarlock/adapters/__init__.py +10 -14
  3. invarlock/adapters/auto.py +35 -40
  4. invarlock/adapters/capabilities.py +2 -2
  5. invarlock/adapters/hf_causal.py +418 -0
  6. invarlock/adapters/{hf_onnx.py → hf_causal_onnx.py} +3 -3
  7. invarlock/adapters/hf_mixin.py +25 -4
  8. invarlock/adapters/{hf_bert.py → hf_mlm.py} +4 -11
  9. invarlock/adapters/{hf_t5.py → hf_seq2seq.py} +9 -9
  10. invarlock/cli/adapter_auto.py +31 -21
  11. invarlock/cli/app.py +73 -2
  12. invarlock/cli/commands/certify.py +600 -59
  13. invarlock/cli/commands/doctor.py +8 -10
  14. invarlock/cli/commands/plugins.py +13 -9
  15. invarlock/cli/commands/report.py +233 -69
  16. invarlock/cli/commands/run.py +907 -183
  17. invarlock/cli/commands/verify.py +76 -11
  18. invarlock/cli/config.py +1 -1
  19. invarlock/cli/doctor_helpers.py +4 -5
  20. invarlock/cli/output.py +193 -0
  21. invarlock/cli/provenance.py +1 -1
  22. invarlock/core/bootstrap.py +1 -1
  23. invarlock/core/registry.py +9 -11
  24. invarlock/core/runner.py +111 -25
  25. invarlock/edits/quant_rtn.py +65 -37
  26. invarlock/eval/bench.py +3 -3
  27. invarlock/eval/data.py +68 -23
  28. invarlock/eval/metrics.py +59 -1
  29. invarlock/eval/tasks/__init__.py +12 -0
  30. invarlock/eval/tasks/classification.py +48 -0
  31. invarlock/eval/tasks/qa.py +36 -0
  32. invarlock/eval/tasks/text_generation.py +102 -0
  33. invarlock/guards/invariants.py +19 -10
  34. invarlock/guards/rmt.py +2 -2
  35. invarlock/guards/variance.py +2 -2
  36. invarlock/model_profile.py +48 -27
  37. invarlock/observability/health.py +6 -6
  38. invarlock/observability/metrics.py +108 -0
  39. invarlock/reporting/certificate.py +159 -9
  40. invarlock/reporting/certificate_schema.py +1 -1
  41. invarlock/reporting/guards_analysis.py +154 -4
  42. invarlock/reporting/html.py +55 -5
  43. invarlock/reporting/normalizer.py +7 -0
  44. invarlock/reporting/render.py +791 -431
  45. invarlock/reporting/report.py +39 -3
  46. invarlock/reporting/report_types.py +6 -1
  47. invarlock/reporting/telemetry.py +86 -0
  48. {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/METADATA +23 -9
  49. {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/RECORD +53 -48
  50. {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/WHEEL +1 -1
  51. {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/entry_points.txt +5 -3
  52. invarlock/adapters/hf_gpt2.py +0 -404
  53. invarlock/adapters/hf_llama.py +0 -487
  54. {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/licenses/LICENSE +0 -0
  55. {invarlock-0.3.6.dist-info → invarlock-0.3.7.dist-info}/top_level.txt +0 -0
@@ -113,6 +113,462 @@ def _short_digest(v: str) -> str:
113
113
  return v if len(v) <= 16 else (v[:8] + "…" + v[-8:])
114
114
 
115
115
 
116
+ def _render_executive_dashboard(cert: dict[str, Any]) -> str:
117
+ """Render executive summary dashboard table."""
118
+ lines: list[str] = []
119
+ _append_safety_dashboard_section(lines, cert)
120
+ return "\n".join(lines).rstrip()
121
+
122
+
123
+ def _append_safety_dashboard_section(
124
+ lines: list[str], certificate: dict[str, Any]
125
+ ) -> None:
126
+ """Append a concise, first-screen dashboard for the certificate."""
127
+ block = compute_console_validation_block(certificate)
128
+ overall_pass = bool(block.get("overall_pass"))
129
+ overall_status = (
130
+ f"{'✅' if overall_pass else '❌'} {'PASS' if overall_pass else 'FAIL'}"
131
+ )
132
+
133
+ validation = certificate.get("validation", {}) or {}
134
+ pm = certificate.get("primary_metric", {}) or {}
135
+ auto = certificate.get("auto", {}) or {}
136
+ tier = str(auto.get("tier") or "balanced").lower()
137
+
138
+ # Primary metric summary
139
+ pm_kind = str(pm.get("kind", "")).lower()
140
+ pm_basis = pm.get("gating_basis") or pm.get("basis") or "point"
141
+ pm_ok: bool | None
142
+ if isinstance(validation, dict) and "primary_metric_acceptable" in validation:
143
+ pm_ok = bool(validation.get("primary_metric_acceptable"))
144
+ else:
145
+ pm_ok = None
146
+ pm_value = pm.get("ratio_vs_baseline")
147
+
148
+ if pm_kind in {"accuracy", "vqa_accuracy"}:
149
+ measured = f"{pm_value:+.2f} pp" if isinstance(pm_value, int | float) else "N/A"
150
+ th_map = {
151
+ "conservative": -0.5,
152
+ "balanced": -1.0,
153
+ "aggressive": -2.0,
154
+ "none": -1.0,
155
+ }
156
+ th = th_map.get(tier, -1.0)
157
+ threshold = f"≥ {th:+.2f} pp ({pm_basis})"
158
+ else:
159
+ measured = f"{pm_value:.3f}×" if isinstance(pm_value, int | float) else "N/A"
160
+ tier_thresholds = {
161
+ "conservative": 1.05,
162
+ "balanced": 1.10,
163
+ "aggressive": 1.20,
164
+ "none": 1.10,
165
+ }
166
+ ratio_limit = tier_thresholds.get(tier, 1.10)
167
+ target_ratio = auto.get("target_pm_ratio")
168
+ if isinstance(target_ratio, int | float) and target_ratio > 0:
169
+ ratio_limit = min(ratio_limit, float(target_ratio))
170
+ threshold = f"≤ {ratio_limit:.2f}× ({pm_basis})"
171
+
172
+ pm_status = (
173
+ f"{'✅' if pm_ok else '❌'} {measured}"
174
+ if isinstance(pm_ok, bool)
175
+ else f"🛈 {measured}"
176
+ )
177
+
178
+ # Drift summary (final/preview ratio) when preview/final are numeric
179
+ drift_ok: bool | None
180
+ if isinstance(validation, dict) and "preview_final_drift_acceptable" in validation:
181
+ drift_ok = bool(validation.get("preview_final_drift_acceptable"))
182
+ else:
183
+ drift_ok = None
184
+ drift_val = "N/A"
185
+ try:
186
+ pv = (
187
+ float(pm.get("preview"))
188
+ if isinstance(pm.get("preview"), int | float)
189
+ else float("nan")
190
+ )
191
+ fv = (
192
+ float(pm.get("final"))
193
+ if isinstance(pm.get("final"), int | float)
194
+ else float("nan")
195
+ )
196
+ drift = (
197
+ fv / pv
198
+ if (math.isfinite(pv) and pv > 0 and math.isfinite(fv))
199
+ else float("nan")
200
+ )
201
+ if math.isfinite(drift):
202
+ drift_val = f"{drift:.3f}×"
203
+ except Exception:
204
+ drift_val = "N/A"
205
+ drift_status = (
206
+ f"{'✅' if drift_ok else '❌'} {drift_val}"
207
+ if isinstance(drift_ok, bool)
208
+ else f"🛈 {drift_val}"
209
+ )
210
+
211
+ def _gate_cell(key: str, ok_default: bool | None = None) -> str:
212
+ ok: bool | None
213
+ if not isinstance(validation, dict):
214
+ ok = ok_default
215
+ elif key not in validation:
216
+ ok = ok_default
217
+ else:
218
+ ok = bool(validation.get(key))
219
+ if ok is None:
220
+ return "🛈 N/A"
221
+ return "✅ PASS" if ok else "❌ FAIL"
222
+
223
+ overhead_ctx = certificate.get("guard_overhead", {}) or {}
224
+ overhead_evaluated = (
225
+ bool(overhead_ctx.get("evaluated")) if isinstance(overhead_ctx, dict) else False
226
+ )
227
+ overhead_row: tuple[str, str, str] | None = None
228
+ if overhead_evaluated:
229
+ overhead_pct = overhead_ctx.get("overhead_percent")
230
+ overhead_ratio = overhead_ctx.get("overhead_ratio")
231
+ if isinstance(overhead_pct, int | float) and math.isfinite(float(overhead_pct)):
232
+ overhead_measured = f"{float(overhead_pct):+.2f}%"
233
+ elif isinstance(overhead_ratio, int | float) and math.isfinite(
234
+ float(overhead_ratio)
235
+ ):
236
+ overhead_measured = f"{float(overhead_ratio):.3f}×"
237
+ else:
238
+ overhead_measured = "N/A"
239
+ threshold_pct = overhead_ctx.get("threshold_percent")
240
+ if isinstance(threshold_pct, int | float) and math.isfinite(
241
+ float(threshold_pct)
242
+ ):
243
+ threshold_str = f"≤ +{float(threshold_pct):.1f}%"
244
+ else:
245
+ threshold_str = "≤ +1.0%"
246
+ overhead_row = (
247
+ "Overhead",
248
+ f"{'✅' if bool(validation.get('guard_overhead_acceptable', True)) else '❌'} {overhead_measured}"
249
+ if isinstance(validation, dict)
250
+ else f"🛈 {overhead_measured}",
251
+ threshold_str,
252
+ )
253
+
254
+ lines.append("## Safety Dashboard")
255
+ lines.append("")
256
+ lines.append("| Check | Status | Quick Summary |")
257
+ lines.append("|-------|--------|---------------|")
258
+ lines.append(f"| Overall | {overall_status} | Canonical gate outcomes |")
259
+ lines.append(f"| Primary Metric | {pm_status} | {threshold} |")
260
+ lines.append(f"| Drift | {drift_status} | 0.95–1.05× band |")
261
+ lines.append(
262
+ f"| Invariants | {_gate_cell('invariants_pass')} | Model integrity checks |"
263
+ )
264
+ lines.append(
265
+ f"| Spectral | {_gate_cell('spectral_stable')} | Weight matrix spectral norms |"
266
+ )
267
+ lines.append(f"| RMT | {_gate_cell('rmt_stable')} | Random Matrix Theory guard |")
268
+ if overhead_row:
269
+ lines.append(f"| {overhead_row[0]} | {overhead_row[1]} | {overhead_row[2]} |")
270
+ lines.append("")
271
+
272
+
273
+ def _append_primary_metric_section(
274
+ lines: list[str], certificate: dict[str, Any]
275
+ ) -> None:
276
+ """Append the Primary Metric section early for quick triage."""
277
+ pm = certificate.get("primary_metric")
278
+ if not isinstance(pm, dict) or not pm:
279
+ return
280
+
281
+ kind = pm.get("kind", "unknown")
282
+ lines.append("## Primary Metric")
283
+ lines.append("")
284
+ unit = pm.get("unit", "-")
285
+ paired = pm.get("paired", False)
286
+
287
+ estimated_flag = False
288
+ try:
289
+ if bool(pm.get("estimated")):
290
+ estimated_flag = True
291
+ elif str(pm.get("counts_source", "")).lower() == "pseudo_config":
292
+ estimated_flag = True
293
+ except Exception:
294
+ estimated_flag = False
295
+ est_suffix = " (estimated)" if estimated_flag else ""
296
+
297
+ lines.append(f"- Kind: {kind} (unit: {unit}){est_suffix}")
298
+ gating_basis = pm.get("gating_basis") or pm.get("basis")
299
+ if gating_basis:
300
+ lines.append(f"- Basis: {gating_basis}")
301
+ if isinstance(paired, bool):
302
+ lines.append(f"- Paired: {paired}")
303
+ reps = pm.get("reps")
304
+ if isinstance(reps, int | float):
305
+ lines.append(f"- Bootstrap Reps: {int(reps)}")
306
+ ci = pm.get("ci") or pm.get("display_ci")
307
+ if (
308
+ isinstance(ci, list | tuple)
309
+ and len(ci) == 2
310
+ and all(isinstance(x, int | float) for x in ci)
311
+ ):
312
+ lines.append(f"- CI: {ci[0]:.3f}–{ci[1]:.3f}")
313
+
314
+ prev = pm.get("preview")
315
+ fin = pm.get("final")
316
+ ratio = pm.get("ratio_vs_baseline")
317
+
318
+ lines.append("")
319
+ if estimated_flag and str(kind).lower() in {"accuracy", "vqa_accuracy"}:
320
+ lines.append(
321
+ "- Note: Accuracy derived from pseudo counts (quick dev preset); use a labeled preset for measured accuracy."
322
+ )
323
+ lines.append("| Field | Value |")
324
+ lines.append("|-------|-------|")
325
+ lines.append(f"| Preview | {_fmt_by_kind(prev, str(kind))} |")
326
+ lines.append(f"| Final | {_fmt_by_kind(fin, str(kind))} |")
327
+
328
+ if kind in {"accuracy", "vqa_accuracy"}:
329
+ lines.append(f"| Δ vs Baseline | {_fmt_by_kind(ratio, str(kind))} |")
330
+ try:
331
+ base_pt = pm.get("baseline_point")
332
+ if isinstance(base_pt, int | float) and base_pt < 0.05:
333
+ lines.append("- Note: baseline < 5%; ratio suppressed; showing Δpp")
334
+ except Exception:
335
+ pass
336
+ else:
337
+ try:
338
+ lines.append(f"| Ratio vs Baseline | {float(ratio):.3f} |")
339
+ except Exception:
340
+ lines.append("| Ratio vs Baseline | N/A |")
341
+ lines.append("")
342
+
343
+ # Secondary metrics (informational)
344
+ try:
345
+ secs = certificate.get("secondary_metrics")
346
+ if isinstance(secs, list) and secs:
347
+ lines.append("## Secondary Metrics (informational)")
348
+ lines.append("")
349
+ lines.append("| Kind | Preview | Final | vs Baseline | CI |")
350
+ lines.append("|------|---------|-------|-------------|----|")
351
+ for m in secs:
352
+ if not isinstance(m, dict):
353
+ continue
354
+ k = m.get("kind", "?")
355
+ pv = _fmt_by_kind(m.get("preview"), str(k))
356
+ fv = _fmt_by_kind(m.get("final"), str(k))
357
+ rb = m.get("ratio_vs_baseline")
358
+ try:
359
+ rb_str = (
360
+ f"{float(rb):.3f}"
361
+ if (str(k).startswith("ppl"))
362
+ else _fmt_by_kind(rb, str(k))
363
+ )
364
+ except Exception:
365
+ rb_str = "N/A"
366
+ ci = m.get("display_ci") or m.get("ci")
367
+ if isinstance(ci, tuple | list) and len(ci) == 2:
368
+ ci_str = f"{float(ci[0]):.3f}-{float(ci[1]):.3f}"
369
+ else:
370
+ ci_str = "–"
371
+ lines.append(f"| {k} | {pv} | {fv} | {rb_str} | {ci_str} |")
372
+ lines.append("")
373
+ except Exception:
374
+ pass
375
+
376
+
377
+ def _append_policy_configuration_section(
378
+ lines: list[str], certificate: dict[str, Any]
379
+ ) -> None:
380
+ resolved_policy = certificate.get("resolved_policy")
381
+ policy_provenance = certificate.get("policy_provenance", {}) or {}
382
+ has_prov = isinstance(policy_provenance, dict) and bool(policy_provenance)
383
+ has_resolved = isinstance(resolved_policy, dict) and bool(resolved_policy)
384
+ if not (has_prov or has_resolved):
385
+ return
386
+
387
+ lines.append("## Policy Configuration")
388
+ lines.append("")
389
+
390
+ tier = None
391
+ if has_prov:
392
+ tier = policy_provenance.get("tier")
393
+ if not tier:
394
+ tier = (certificate.get("auto", {}) or {}).get("tier")
395
+ digest_value = None
396
+ if has_prov:
397
+ digest_value = policy_provenance.get("policy_digest")
398
+ if not digest_value:
399
+ digest_value = (certificate.get("policy_digest", {}) or {}).get(
400
+ "thresholds_hash"
401
+ )
402
+
403
+ summary_parts: list[str] = []
404
+ if tier:
405
+ summary_parts.append(f"**Tier:** {tier}")
406
+ if digest_value:
407
+ summary_parts.append(f"**Digest:** `{_short_digest(str(digest_value))}`")
408
+ if summary_parts:
409
+ lines.append(" | ".join(summary_parts))
410
+
411
+ if has_prov:
412
+ overrides_list = policy_provenance.get("overrides") or []
413
+ if overrides_list:
414
+ lines.append(f"- **Overrides:** {', '.join(overrides_list)}")
415
+ else:
416
+ lines.append("- **Overrides:** (none)")
417
+ if policy_provenance.get("resolved_at"):
418
+ lines.append(f"- **Resolved At:** {policy_provenance.get('resolved_at')}")
419
+
420
+ if has_resolved:
421
+ lines.append("")
422
+ lines.append("<details>")
423
+ lines.append("<summary>Resolved Policy YAML</summary>")
424
+ lines.append("")
425
+ lines.append("```yaml")
426
+ resolved_yaml = yaml.safe_dump(
427
+ resolved_policy, sort_keys=True, width=80, default_flow_style=False
428
+ ).strip()
429
+ for line in resolved_yaml.splitlines():
430
+ lines.append(line)
431
+ lines.append("```")
432
+ lines.append("")
433
+ lines.append("</details>")
434
+
435
+ lines.append("")
436
+
437
+
438
+ def _append_dataset_and_provenance_section(
439
+ lines: list[str], certificate: dict[str, Any]
440
+ ) -> None:
441
+ dataset = certificate.get("dataset", {}) or {}
442
+ provenance_info = certificate.get("provenance", {}) or {}
443
+
444
+ has_dataset = isinstance(dataset, dict) and bool(dataset)
445
+ has_provenance = isinstance(provenance_info, dict) and bool(provenance_info)
446
+ if not (has_dataset or has_provenance):
447
+ return
448
+
449
+ lines.append("## Dataset and Provenance")
450
+ lines.append("")
451
+
452
+ if has_dataset:
453
+ prov = dataset.get("provider") or "unknown"
454
+ lines.append(f"- **Provider:** {prov}")
455
+ try:
456
+ seq_len_val = (
457
+ int(dataset.get("seq_len"))
458
+ if isinstance(dataset.get("seq_len"), int | float)
459
+ else dataset.get("seq_len")
460
+ )
461
+ except Exception: # pragma: no cover - defensive
462
+ seq_len_val = dataset.get("seq_len")
463
+ if seq_len_val is not None:
464
+ lines.append(f"- **Sequence Length:** {seq_len_val}")
465
+ windows_blk = (
466
+ dataset.get("windows", {})
467
+ if isinstance(dataset.get("windows"), dict)
468
+ else {}
469
+ )
470
+ win_prev = windows_blk.get("preview")
471
+ win_final = windows_blk.get("final")
472
+ if win_prev is not None and win_final is not None:
473
+ lines.append(f"- **Windows:** {win_prev} preview + {win_final} final")
474
+ if windows_blk.get("seed") is not None:
475
+ lines.append(f"- **Seed:** {windows_blk.get('seed')}")
476
+ hash_blk = (
477
+ dataset.get("hash", {}) if isinstance(dataset.get("hash"), dict) else {}
478
+ )
479
+ if hash_blk.get("preview_tokens") is not None:
480
+ lines.append(f"- **Preview Tokens:** {hash_blk.get('preview_tokens'):,}")
481
+ if hash_blk.get("final_tokens") is not None:
482
+ lines.append(f"- **Final Tokens:** {hash_blk.get('final_tokens'):,}")
483
+ if hash_blk.get("total_tokens") is not None:
484
+ lines.append(f"- **Total Tokens:** {hash_blk.get('total_tokens'):,}")
485
+ if hash_blk.get("dataset"):
486
+ lines.append(f"- **Dataset Hash:** {hash_blk.get('dataset')}")
487
+ tokenizer = dataset.get("tokenizer", {})
488
+ if isinstance(tokenizer, dict) and (
489
+ tokenizer.get("name") or tokenizer.get("hash")
490
+ ):
491
+ vocab_size = tokenizer.get("vocab_size")
492
+ vocab_suffix = (
493
+ f" (vocab {vocab_size})" if isinstance(vocab_size, int) else ""
494
+ )
495
+ lines.append(
496
+ f"- **Tokenizer:** {tokenizer.get('name', 'unknown')}{vocab_suffix}"
497
+ )
498
+ if tokenizer.get("hash"):
499
+ lines.append(f" - Hash: {tokenizer['hash']}")
500
+ lines.append(
501
+ f" - BOS/EOS: {tokenizer.get('bos_token')} / {tokenizer.get('eos_token')}"
502
+ )
503
+ if tokenizer.get("pad_token") is not None:
504
+ lines.append(f" - PAD: {tokenizer.get('pad_token')}")
505
+ if tokenizer.get("add_prefix_space") is not None:
506
+ lines.append(
507
+ f" - add_prefix_space: {tokenizer.get('add_prefix_space')}"
508
+ )
509
+
510
+ if has_provenance:
511
+ baseline_info = provenance_info.get("baseline", {}) or {}
512
+ edited_info = provenance_info.get("edited", {}) or {}
513
+
514
+ if baseline_info or edited_info:
515
+ lines.append("")
516
+ if baseline_info:
517
+ lines.append(f"- **Baseline Run ID:** {baseline_info.get('run_id')}")
518
+ if baseline_info.get("report_hash"):
519
+ lines.append(f" - Report Hash: `{baseline_info.get('report_hash')}`")
520
+ if baseline_info.get("report_path"):
521
+ lines.append(f" - Report Path: {baseline_info.get('report_path')}")
522
+ if edited_info:
523
+ lines.append(f"- **Edited Run ID:** {edited_info.get('run_id')}")
524
+ if edited_info.get("report_hash"):
525
+ lines.append(f" - Report Hash: `{edited_info.get('report_hash')}`")
526
+ if edited_info.get("report_path"):
527
+ lines.append(f" - Report Path: {edited_info.get('report_path')}")
528
+
529
+ provider_digest = provenance_info.get("provider_digest")
530
+ if isinstance(provider_digest, dict) and provider_digest:
531
+ ids_d = provider_digest.get("ids_sha256")
532
+ tok_d = provider_digest.get("tokenizer_sha256")
533
+ mask_d = provider_digest.get("masking_sha256")
534
+
535
+ lines.append("- **Provider Digest:**")
536
+ if tok_d:
537
+ lines.append(
538
+ f" - tokenizer_sha256: `{_short_digest(tok_d)}` (full in JSON)"
539
+ )
540
+ if ids_d:
541
+ lines.append(f" - ids_sha256: `{_short_digest(ids_d)}` (full in JSON)")
542
+ if mask_d:
543
+ lines.append(
544
+ f" - masking_sha256: `{_short_digest(mask_d)}` (full in JSON)"
545
+ )
546
+
547
+ try:
548
+ conf = certificate.get("confidence", {}) or {}
549
+ if isinstance(conf, dict) and conf.get("label"):
550
+ lines.append(f"- **Confidence:** {conf.get('label')}")
551
+ except Exception:
552
+ pass
553
+
554
+ try:
555
+ pd = certificate.get("policy_digest", {}) or {}
556
+ if isinstance(pd, dict) and pd:
557
+ pv = pd.get("policy_version")
558
+ th = pd.get("thresholds_hash")
559
+ if pv:
560
+ lines.append(f"- **Policy Version:** {pv}")
561
+ if isinstance(th, str) and th:
562
+ short = th if len(th) <= 16 else (th[:8] + "…" + th[-8:])
563
+ lines.append(f"- **Thresholds Digest:** `{short}` (full in JSON)")
564
+ if pd.get("changed"):
565
+ lines.append("- Note: policy changed")
566
+ except Exception:
567
+ pass
568
+
569
+ lines.append("")
570
+
571
+
116
572
  def _fmt_by_kind(x: Any, k: str) -> str:
117
573
  try:
118
574
  xv = float(x)
@@ -275,11 +731,12 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
275
731
  if not validate_certificate(certificate):
276
732
  raise ValueError("Invalid certificate structure")
277
733
 
278
- lines = []
734
+ lines: list[str] = []
735
+ appendix_lines: list[str] = []
279
736
  edit_name = str(certificate.get("edit_name") or "").lower()
280
737
 
281
738
  # Header
282
- lines.append("# InvarLock Safety Certificate")
739
+ lines.append("# InvarLock Evaluation Certificate")
283
740
  lines.append("")
284
741
  lines.append(
285
742
  "> *Basis: “point” gates check the point estimate; “upper” gates check the CI "
@@ -291,6 +748,10 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
291
748
  lines.append(f"**Generated:** {certificate['artifacts']['generated_at']}")
292
749
  lines.append(f"**Edit Type:** {certificate.get('edit_name', 'Unknown')}")
293
750
  lines.append("")
751
+ lines.append(
752
+ "> Full evidence: see [`evaluation.cert.json`](evaluation.cert.json) for complete provenance, digests, and raw measurements."
753
+ )
754
+ lines.append("")
294
755
 
295
756
  plugins = certificate.get("plugins", {})
296
757
  if isinstance(plugins, dict) and plugins:
@@ -314,7 +775,7 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
314
775
  ]
315
776
  if guard_entries:
316
777
  lines.append("- Guards:\n - " + "\n - ".join(guard_entries))
317
- lines.append("")
778
+ lines.append("")
318
779
 
319
780
  # Executive Summary with validation status (canonical, from console block)
320
781
  lines.append("## Executive Summary")
@@ -354,6 +815,25 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
354
815
  pass
355
816
  lines.append("")
356
817
 
818
+ dashboard = _render_executive_dashboard(certificate)
819
+ if dashboard:
820
+ lines.extend(dashboard.splitlines())
821
+ lines.append("")
822
+
823
+ lines.append("## Contents")
824
+ lines.append("")
825
+ lines.append("- [Safety Dashboard](#safety-dashboard)")
826
+ lines.append("- [Quality Gates](#quality-gates)")
827
+ lines.append("- [Safety Check Details](#safety-check-details)")
828
+ lines.append("- [Primary Metric](#primary-metric)")
829
+ lines.append("- [Guard Observability](#guard-observability)")
830
+ lines.append("- [Model Information](#model-information)")
831
+ lines.append("- [Dataset and Provenance](#dataset-and-provenance)")
832
+ lines.append("- [Policy Configuration](#policy-configuration)")
833
+ lines.append("- [Appendix](#appendix)")
834
+ lines.append("- [Certificate Integrity](#certificate-integrity)")
835
+ lines.append("")
836
+
357
837
  # Validation table with canonical gates (mirrors console allow-list)
358
838
  lines.append("## Quality Gates")
359
839
  lines.append("")
@@ -410,6 +890,31 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
410
890
  )
411
891
  )
412
892
  status = "✅ PASS" if ok else "❌ FAIL"
893
+ drift_min = 0.95
894
+ drift_max = 1.05
895
+ try:
896
+ drift_band = (
897
+ pm_block.get("drift_band") if isinstance(pm_block, dict) else None
898
+ )
899
+ if isinstance(drift_band, dict):
900
+ lo = drift_band.get("min")
901
+ hi = drift_band.get("max")
902
+ if isinstance(lo, int | float) and isinstance(hi, int | float):
903
+ lo_f = float(lo)
904
+ hi_f = float(hi)
905
+ if math.isfinite(lo_f) and math.isfinite(hi_f) and 0 < lo_f < hi_f:
906
+ drift_min = lo_f
907
+ drift_max = hi_f
908
+ elif isinstance(drift_band, list | tuple) and len(drift_band) == 2:
909
+ lo_raw, hi_raw = drift_band[0], drift_band[1]
910
+ if isinstance(lo_raw, int | float) and isinstance(hi_raw, int | float):
911
+ lo_f = float(lo_raw)
912
+ hi_f = float(hi_raw)
913
+ if math.isfinite(lo_f) and math.isfinite(hi_f) and 0 < lo_f < hi_f:
914
+ drift_min = lo_f
915
+ drift_max = hi_f
916
+ except Exception:
917
+ pass
413
918
  # Compute drift from PM preview/final when available
414
919
  try:
415
920
  pv = (
@@ -430,8 +935,9 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
430
935
  except Exception:
431
936
  drift = float("nan")
432
937
  measured = f"{drift:.3f}x" if math.isfinite(drift) else "N/A"
938
+ band_label = f"{drift_min:.2f}–{drift_max:.2f}x"
433
939
  lines.append(
434
- f"| Preview Final Drift Acceptable | {status} | {measured} | 0.95–1.05x | point | Final/Preview ratio stability |"
940
+ f"| Preview Final Drift Acceptable | {status} | {measured} | {band_label} | point | Final/Preview ratio stability |"
435
941
  )
436
942
 
437
943
  # Helper to emit Guard Overhead Acceptable row (only when evaluated)
@@ -616,14 +1122,39 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
616
1122
  or overlap_frac is not None
617
1123
  ):
618
1124
  lines.append("")
619
- lines.append(
620
- f"- Pairing: paired={paired_windows}, match={match_frac:.3f}, overlap={overlap_frac:.3f}"
621
- )
1125
+ parts: list[str] = []
1126
+ if paired_windows is not None:
1127
+ try:
1128
+ parts.append(f"{int(paired_windows)} windows")
1129
+ except Exception:
1130
+ parts.append(f"windows={paired_windows}")
1131
+ if isinstance(match_frac, int | float) and math.isfinite(float(match_frac)):
1132
+ parts.append(f"{float(match_frac) * 100.0:.1f}% match")
1133
+ elif match_frac is not None:
1134
+ parts.append(f"match={match_frac}")
1135
+ if isinstance(overlap_frac, int | float) and math.isfinite(
1136
+ float(overlap_frac)
1137
+ ):
1138
+ parts.append(f"{float(overlap_frac) * 100.0:.1f}% overlap")
1139
+ elif overlap_frac is not None:
1140
+ parts.append(f"overlap={overlap_frac}")
1141
+ lines.append(f"✅ Pairing: {', '.join(parts) if parts else 'N/A'}")
622
1142
  if isinstance(bootstrap, dict):
623
1143
  reps = bootstrap.get("replicates")
624
1144
  bseed = bootstrap.get("seed")
625
1145
  if reps is not None or bseed is not None:
626
- lines.append(f"- Bootstrap: replicates={reps}, seed={bseed}")
1146
+ bits: list[str] = []
1147
+ if reps is not None:
1148
+ try:
1149
+ bits.append(f"{int(reps)} replicates")
1150
+ except Exception:
1151
+ bits.append(f"replicates={reps}")
1152
+ if bseed is not None:
1153
+ try:
1154
+ bits.append(f"seed={int(bseed)}")
1155
+ except Exception:
1156
+ bits.append(f"seed={bseed}")
1157
+ lines.append(f"✅ Bootstrap: {', '.join(bits) if bits else 'N/A'}")
627
1158
  # Optional: show log-space paired Δ CI next to ratio CI for clarity
628
1159
  delta_ci = certificate.get("primary_metric", {}).get("ci") or certificate.get(
629
1160
  "ppl", {}
@@ -633,7 +1164,7 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
633
1164
  and len(delta_ci) == 2
634
1165
  and all(isinstance(x, int | float) for x in delta_ci)
635
1166
  ):
636
- lines.append(f"- Log Δ (paired) CI: [{delta_ci[0]:.6f}, {delta_ci[1]:.6f}]")
1167
+ lines.append(f"🛈 Log Δ (paired) CI: [{delta_ci[0]:.6f}, {delta_ci[1]:.6f}]")
637
1168
  except Exception:
638
1169
  pass
639
1170
 
@@ -654,116 +1185,179 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
654
1185
 
655
1186
  lines.append("")
656
1187
 
1188
+ _append_primary_metric_section(lines, certificate)
1189
+
657
1190
  # Guard observability snapshots
658
1191
  lines.append("## Guard Observability")
659
1192
  lines.append("")
660
1193
 
661
1194
  spectral_info = certificate.get("spectral", {}) or {}
662
1195
  if spectral_info:
663
- lines.append("### Spectral Guard")
1196
+ lines.append("### Spectral Guard Summary")
664
1197
  lines.append("")
665
- mt_info = spectral_info.get("multiple_testing", {}) or {}
666
- if mt_info:
667
- lines.append("- **Multiple Testing:**")
668
- lines.append(" ```yaml")
669
- mt_yaml = (
670
- yaml.safe_dump(mt_info, sort_keys=True, width=70).strip().splitlines()
671
- )
672
- for line in mt_yaml:
673
- lines.append(f" {line}")
674
- lines.append(" ```")
675
- # Spectral summary (place key knobs together for quick scan)
676
- spec_sigma = spectral_info.get("sigma_quantile")
677
- spec_deadband = spectral_info.get("deadband")
678
- spec_max_caps = spectral_info.get("max_caps")
679
- summary_yaml = {
680
- "sigma_quantile": float(spec_sigma)
681
- if isinstance(spec_sigma, int | float)
682
- else None,
683
- "deadband": float(spec_deadband)
684
- if isinstance(spec_deadband, int | float)
685
- else None,
686
- "max_caps": int(spec_max_caps)
687
- if isinstance(spec_max_caps, int | float)
688
- else None,
689
- }
690
- # Drop Nones from summary
691
- summary_yaml = {k: v for k, v in summary_yaml.items() if v is not None}
692
- if summary_yaml:
693
- lines.append("- **Spectral Summary:**")
694
- lines.append(" ```yaml")
695
- for line in (
696
- yaml.safe_dump(summary_yaml, sort_keys=True, width=70)
697
- .strip()
698
- .splitlines()
699
- ):
700
- lines.append(f" {line}")
701
- lines.append(" ```")
1198
+ lines.append("| Metric | Value | Status |")
1199
+ lines.append("|--------|-------|--------|")
1200
+
1201
+ spectral_ok = bool(validation.get("spectral_stable", False))
1202
+ caps_applied = spectral_info.get("caps_applied")
1203
+ max_caps = spectral_info.get("max_caps")
1204
+ caps_val = (
1205
+ f"{caps_applied}/{max_caps}"
1206
+ if caps_applied is not None and max_caps is not None
1207
+ else "-"
1208
+ )
702
1209
  lines.append(
703
- f"- Caps Applied: {spectral_info.get('caps_applied')} / {spectral_info.get('max_caps')}"
1210
+ f"| Caps Applied | {caps_val} | {'✅ OK' if spectral_ok else '❌ FAIL'} |"
704
1211
  )
1212
+
705
1213
  summary = spectral_info.get("summary", {}) or {}
706
- lines.append(f"- Caps Exceeded: {summary.get('caps_exceeded', False)}")
707
- caps_by_family = spectral_info.get("caps_applied_by_family") or {}
1214
+ caps_exceeded = summary.get("caps_exceeded")
1215
+ if caps_exceeded is not None:
1216
+ cap_status = "✅ OK" if not bool(caps_exceeded) else "⚠️ WARN"
1217
+ lines.append(f"| Caps Exceeded | {caps_exceeded} | {cap_status} |")
1218
+
1219
+ top_scores = spectral_info.get("top_z_scores") or {}
1220
+ max_family: str | None = None
1221
+ max_module: str | None = None
1222
+ max_abs_z: float | None = None
1223
+ if isinstance(top_scores, dict):
1224
+ for family, entries in top_scores.items():
1225
+ if not isinstance(entries, list):
1226
+ continue
1227
+ for entry in entries:
1228
+ if not isinstance(entry, dict):
1229
+ continue
1230
+ z_val = entry.get("z")
1231
+ if not (
1232
+ isinstance(z_val, int | float) and math.isfinite(float(z_val))
1233
+ ):
1234
+ continue
1235
+ z_abs = abs(float(z_val))
1236
+ if max_abs_z is None or z_abs > max_abs_z:
1237
+ max_abs_z = z_abs
1238
+ max_family = str(family)
1239
+ max_module = (
1240
+ str(entry.get("module")) if entry.get("module") else None
1241
+ )
1242
+
708
1243
  family_caps = spectral_info.get("family_caps") or {}
709
- if caps_by_family:
710
- lines.append("")
711
- lines.append("| Family | κ | Violations |")
712
- lines.append("|--------|---|------------|")
713
- for family, count in caps_by_family.items():
714
- kappa = family_caps.get(family, {}).get("kappa")
715
- if isinstance(kappa, int | float) and math.isfinite(float(kappa)):
716
- kappa_str = f"{kappa:.3f}"
717
- else:
718
- kappa_str = "-"
719
- lines.append(f"| {family} | {kappa_str} | {count} |")
720
- lines.append("")
1244
+ kappa = None
1245
+ if max_family and isinstance(family_caps, dict):
1246
+ try:
1247
+ kappa = (family_caps.get(max_family, {}) or {}).get("kappa")
1248
+ except Exception:
1249
+ kappa = None
1250
+ kappa_f = (
1251
+ float(kappa)
1252
+ if isinstance(kappa, int | float) and math.isfinite(float(kappa))
1253
+ else None
1254
+ )
1255
+
1256
+ if max_abs_z is not None:
1257
+ max_val = f"{max_abs_z:.3f}"
1258
+ if max_family:
1259
+ max_val += f" ({max_family})"
1260
+ if max_module:
1261
+ max_val += f" – {max_module}"
1262
+ if kappa_f is None:
1263
+ max_status = "🛈 No κ"
1264
+ elif max_abs_z <= kappa_f:
1265
+ max_status = f"✅ Within κ={kappa_f:.3f}"
1266
+ else:
1267
+ max_status = f"❌ Exceeds κ={kappa_f:.3f}"
1268
+ lines.append(f"| Max |z| | {max_val} | {max_status} |")
1269
+
1270
+ mt_info = spectral_info.get("multiple_testing", {}) or {}
1271
+ if isinstance(mt_info, dict) and mt_info:
1272
+ mt_method = mt_info.get("method")
1273
+ mt_alpha = mt_info.get("alpha")
1274
+ mt_m = mt_info.get("m")
1275
+ parts: list[str] = []
1276
+ if mt_method:
1277
+ parts.append(f"method={mt_method}")
1278
+ if isinstance(mt_alpha, int | float) and math.isfinite(float(mt_alpha)):
1279
+ parts.append(f"α={float(mt_alpha):.3g}")
1280
+ if isinstance(mt_m, int | float) and math.isfinite(float(mt_m)):
1281
+ parts.append(f"m={int(mt_m)}")
1282
+ lines.append(
1283
+ f"| Multiple Testing | {', '.join(parts) if parts else '—'} | 🛈 INFO |"
1284
+ )
1285
+
1286
+ lines.append("")
1287
+
1288
+ caps_by_family = spectral_info.get("caps_applied_by_family") or {}
721
1289
  quantiles = spectral_info.get("family_z_quantiles") or {}
722
- if quantiles:
723
- lines.append("| Family | q95 | q99 | Max | Samples |")
724
- lines.append("|--------|-----|-----|-----|---------|")
725
- for family, stats in quantiles.items():
726
- q95 = stats.get("q95")
727
- q99 = stats.get("q99")
728
- max_z = stats.get("max")
729
- count = stats.get("count")
1290
+ if any(
1291
+ bool(x)
1292
+ for x in (caps_by_family, quantiles, family_caps, top_scores)
1293
+ if isinstance(x, dict)
1294
+ ):
1295
+ lines.append("<details>")
1296
+ lines.append("<summary>Per-family details</summary>")
1297
+ lines.append("")
1298
+ lines.append("| Family | κ | q95 | Max |z| | Violations |")
1299
+ lines.append("|--------|---|-----|--------|------------|")
1300
+
1301
+ families: set[str] = set()
1302
+ for block in (caps_by_family, quantiles, family_caps, top_scores):
1303
+ if isinstance(block, dict):
1304
+ families.update(str(k) for k in block.keys())
1305
+
1306
+ for family in sorted(families):
1307
+ kappa = None
1308
+ if isinstance(family_caps, dict):
1309
+ kappa = (family_caps.get(family, {}) or {}).get("kappa")
1310
+ kappa_str = (
1311
+ f"{float(kappa):.3f}"
1312
+ if isinstance(kappa, int | float) and math.isfinite(float(kappa))
1313
+ else "-"
1314
+ )
1315
+
1316
+ q95 = None
1317
+ max_z = None
1318
+ if isinstance(quantiles, dict):
1319
+ stats = quantiles.get(family) or {}
1320
+ if isinstance(stats, dict):
1321
+ q95 = stats.get("q95")
1322
+ max_z = stats.get("max")
730
1323
  q95_str = f"{q95:.3f}" if isinstance(q95, int | float) else "-"
731
- q99_str = f"{q99:.3f}" if isinstance(q99, int | float) else "-"
732
1324
  max_str = f"{max_z:.3f}" if isinstance(max_z, int | float) else "-"
733
- count_str = str(count) if isinstance(count, int | float) else "-"
1325
+
1326
+ violations = None
1327
+ if isinstance(caps_by_family, dict):
1328
+ violations = caps_by_family.get(family)
1329
+ v_str = (
1330
+ str(int(violations)) if isinstance(violations, int | float) else "0"
1331
+ )
1332
+
734
1333
  lines.append(
735
- f"| {family} | {q95_str} | {q99_str} | {max_str} | {count_str} |"
1334
+ f"| {family} | {kappa_str} | {q95_str} | {max_str} | {v_str} |"
736
1335
  )
1336
+
1337
+ if isinstance(top_scores, dict) and top_scores:
1338
+ lines.append("")
1339
+ lines.append("Top |z| per family:")
1340
+ for family in sorted(top_scores.keys()):
1341
+ entries = top_scores[family]
1342
+ if not isinstance(entries, list) or not entries:
1343
+ continue
1344
+ formatted_entries = []
1345
+ for entry in entries:
1346
+ if not isinstance(entry, dict):
1347
+ continue
1348
+ module_name = entry.get("module", "unknown")
1349
+ z_val = entry.get("z")
1350
+ if isinstance(z_val, int | float) and math.isfinite(
1351
+ float(z_val)
1352
+ ):
1353
+ z_str = f"{z_val:.3f}"
1354
+ else:
1355
+ z_str = "n/a"
1356
+ formatted_entries.append(f"{module_name} (|z|={z_str})")
1357
+ lines.append(f"- {family}: {', '.join(formatted_entries)}")
1358
+
737
1359
  lines.append("")
738
- policy_caps = spectral_info.get("policy", {}).get("family_caps")
739
- if policy_caps:
740
- lines.append("- **Family κ (policy):**")
741
- lines.append(" ```yaml")
742
- caps_yaml = (
743
- yaml.safe_dump(policy_caps, sort_keys=True, width=70)
744
- .strip()
745
- .splitlines()
746
- )
747
- for line in caps_yaml:
748
- lines.append(f" {line}")
749
- lines.append(" ```")
750
- top_scores = spectral_info.get("top_z_scores") or {}
751
- if top_scores:
752
- lines.append("Top |z| per family:")
753
- for family in sorted(top_scores.keys()):
754
- entries = top_scores[family]
755
- if not entries:
756
- continue
757
- formatted_entries = []
758
- for entry in entries:
759
- module_name = entry.get("module", "unknown")
760
- z_val = entry.get("z")
761
- if isinstance(z_val, int | float) and math.isfinite(float(z_val)):
762
- z_str = f"{z_val:.3f}"
763
- else:
764
- z_str = "n/a"
765
- formatted_entries.append(f"{module_name} (|z|={z_str})")
766
- lines.append(f"- {family}: {', '.join(formatted_entries)}")
1360
+ lines.append("</details>")
767
1361
  lines.append("")
768
1362
 
769
1363
  rmt_info = certificate.get("rmt", {}) or {}
@@ -771,7 +1365,18 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
771
1365
  lines.append("### RMT Guard")
772
1366
  lines.append("")
773
1367
  families = rmt_info.get("families") or {}
1368
+ stable = bool(rmt_info.get("stable", True))
1369
+ status = "✅ OK" if stable else "❌ FAIL"
1370
+ delta_total = rmt_info.get("delta_total")
1371
+ if isinstance(delta_total, int):
1372
+ lines.append(f"- Δ total: {delta_total:+d}")
1373
+ lines.append(f"- Status: {status}")
1374
+ lines.append(f"- Families: {len(families)}")
774
1375
  if families:
1376
+ lines.append("")
1377
+ lines.append("<details>")
1378
+ lines.append("<summary>RMT family details</summary>")
1379
+ lines.append("")
775
1380
  lines.append("| Family | ε_f | Bare | Guarded | Δ |")
776
1381
  lines.append("|--------|-----|------|---------|---|")
777
1382
  for family, data in families.items():
@@ -801,12 +1406,10 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
801
1406
  f"| {family} | {epsilon_str} | {bare_str} | {guarded_str} | {delta_str} |"
802
1407
  )
803
1408
  lines.append("")
804
- # Delta total and stability flags
805
- delta_total = rmt_info.get("delta_total")
806
- if isinstance(delta_total, int):
807
- lines.append(f"- Δ total: {delta_total:+d}")
808
- lines.append(f"- Stable: {rmt_info.get('stable', True)}")
809
- lines.append("")
1409
+ lines.append("</details>")
1410
+ lines.append("")
1411
+ else:
1412
+ lines.append("")
810
1413
 
811
1414
  guard_overhead_info = certificate.get("guard_overhead", {}) or {}
812
1415
  if guard_overhead_info:
@@ -863,21 +1466,21 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
863
1466
  inference_sources = compression_diag.get("inference_source") or {}
864
1467
  inference_log = compression_diag.get("inference_log") or []
865
1468
  if inference_flags or inference_sources or inference_log:
866
- lines.append("## Inference")
867
- lines.append("")
1469
+ appendix_lines.append("### Inference Diagnostics")
1470
+ appendix_lines.append("")
868
1471
  if inference_flags:
869
- lines.append("- **Fields Inferred:**")
1472
+ appendix_lines.append("- **Fields Inferred:**")
870
1473
  for field, flag in inference_flags.items():
871
- lines.append(f" - {field}: {'yes' if flag else 'no'}")
1474
+ appendix_lines.append(f" - {field}: {'yes' if flag else 'no'}")
872
1475
  if inference_sources:
873
- lines.append("- **Sources:**")
1476
+ appendix_lines.append("- **Sources:**")
874
1477
  for field, source in inference_sources.items():
875
- lines.append(f" - {field}: {source}")
1478
+ appendix_lines.append(f" - {field}: {source}")
876
1479
  if inference_log:
877
- lines.append("- **Inference Log:**")
1480
+ appendix_lines.append("- **Inference Log:**")
878
1481
  for entry in inference_log:
879
- lines.append(f" - {entry}")
880
- lines.append("")
1482
+ appendix_lines.append(f" - {entry}")
1483
+ appendix_lines.append("")
881
1484
 
882
1485
  # Model and Configuration
883
1486
  lines.append("## Model Information")
@@ -906,28 +1509,48 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
906
1509
  if invarlock_version:
907
1510
  lines.append(f"- **InvarLock Version:** {invarlock_version}")
908
1511
  env_flags = meta.get("env_flags")
909
- if isinstance(env_flags, dict) and env_flags:
910
- lines.append("- **Env Flags:**")
911
- lines.append(" ```yaml")
912
- for k, v in env_flags.items():
913
- lines.append(f" {k}: {v}")
914
- lines.append(" ```")
915
- # Determinism flags (if present)
916
1512
  cuda_flags = meta.get("cuda_flags")
1513
+
1514
+ # Compressed determinism/environment summary for readability
1515
+ det_parts: list[str] = []
1516
+ for label, keys in (
1517
+ ("torch_det", ("torch_deterministic_algorithms", "deterministic_algorithms")),
1518
+ ("cudnn_det", ("cudnn_deterministic",)),
1519
+ ("cudnn_bench", ("cudnn_benchmark",)),
1520
+ ("tf32_matmul", ("cuda_matmul_allow_tf32",)),
1521
+ ("tf32_cudnn", ("cudnn_allow_tf32",)),
1522
+ ("cublas_ws", ("CUBLAS_WORKSPACE_CONFIG",)),
1523
+ ):
1524
+ val = None
1525
+ for key in keys:
1526
+ if isinstance(env_flags, dict) and env_flags.get(key) is not None:
1527
+ val = env_flags.get(key)
1528
+ break
1529
+ if isinstance(cuda_flags, dict) and cuda_flags.get(key) is not None:
1530
+ val = cuda_flags.get(key)
1531
+ break
1532
+ if val is not None:
1533
+ det_parts.append(f"{label}={val}")
1534
+ if det_parts:
1535
+ lines.append(f"- **Determinism:** {', '.join(det_parts)}")
1536
+
1537
+ full_flags: dict[str, Any] = {}
1538
+ if isinstance(env_flags, dict) and env_flags:
1539
+ full_flags["env_flags"] = env_flags
917
1540
  if isinstance(cuda_flags, dict) and cuda_flags:
918
- parts = []
919
- for key in (
920
- "deterministic_algorithms",
921
- "cudnn_deterministic",
922
- "cudnn_benchmark",
923
- "cudnn_allow_tf32",
924
- "cuda_matmul_allow_tf32",
925
- "CUBLAS_WORKSPACE_CONFIG",
926
- ):
927
- if key in cuda_flags and cuda_flags[key] is not None:
928
- parts.append(f"{key}={cuda_flags[key]}")
929
- if parts:
930
- lines.append(f"- **Determinism Flags:** {', '.join(parts)}")
1541
+ full_flags["cuda_flags"] = cuda_flags
1542
+ if full_flags:
1543
+ lines.append("")
1544
+ lines.append("<details>")
1545
+ lines.append("<summary>Environment flags (full)</summary>")
1546
+ lines.append("")
1547
+ lines.append("```yaml")
1548
+ flags_yaml = yaml.safe_dump(full_flags, sort_keys=True, width=80).strip()
1549
+ for line in flags_yaml.splitlines():
1550
+ lines.append(line)
1551
+ lines.append("```")
1552
+ lines.append("")
1553
+ lines.append("</details>")
931
1554
  lines.append("")
932
1555
 
933
1556
  # Edit Configuration (removed duplicate Edit Information section)
@@ -951,267 +1574,10 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
951
1574
  pass
952
1575
  lines.append("")
953
1576
 
954
- resolved_policy = certificate.get("resolved_policy")
955
- if resolved_policy:
956
- lines.append("## Resolved Policy")
957
- lines.append("")
958
- lines.append("```yaml")
959
- resolved_yaml = yaml.safe_dump(
960
- resolved_policy, sort_keys=True, width=80, default_flow_style=False
961
- ).strip()
962
- for line in resolved_yaml.splitlines():
963
- lines.append(line)
964
- lines.append("```")
965
- lines.append("")
966
-
967
- policy_provenance = certificate.get("policy_provenance", {})
968
- if policy_provenance:
969
- lines.append("## Policy Provenance")
970
- lines.append("")
971
- lines.append(f"- **Tier:** {policy_provenance.get('tier')}")
972
- overrides_list = policy_provenance.get("overrides") or []
973
- if overrides_list:
974
- lines.append(f"- **Overrides:** {', '.join(overrides_list)}")
975
- else:
976
- lines.append("- **Overrides:** (none)")
977
- digest_value = policy_provenance.get("policy_digest")
978
- if digest_value:
979
- lines.append(f"- **Policy Digest:** `{digest_value}`")
980
- else:
981
- lines.append("- **Policy Digest:** (not recorded)")
982
- if policy_provenance.get("resolved_at"):
983
- lines.append(f"- **Resolved At:** {policy_provenance.get('resolved_at')}")
984
- lines.append("")
985
-
986
- # Dataset Information
987
- lines.append("## Dataset Configuration")
988
- lines.append("")
989
- dataset = certificate.get("dataset", {}) or {}
990
- prov = (
991
- (dataset.get("provider") or "unknown")
992
- if isinstance(dataset, dict)
993
- else "unknown"
994
- )
995
- lines.append(f"- **Provider:** {prov}")
996
- try:
997
- seq_len_val = (
998
- int(dataset.get("seq_len"))
999
- if isinstance(dataset.get("seq_len"), int | float)
1000
- else dataset.get("seq_len")
1001
- )
1002
- except Exception: # pragma: no cover - defensive
1003
- seq_len_val = dataset.get("seq_len")
1004
- if seq_len_val is not None:
1005
- lines.append(f"- **Sequence Length:** {seq_len_val}")
1006
- windows_blk = (
1007
- dataset.get("windows", {}) if isinstance(dataset.get("windows"), dict) else {}
1008
- )
1009
- win_prev = windows_blk.get("preview")
1010
- win_final = windows_blk.get("final")
1011
- if win_prev is not None and win_final is not None:
1012
- lines.append(f"- **Windows:** {win_prev} preview + {win_final} final")
1013
- if windows_blk.get("seed") is not None:
1014
- lines.append(f"- **Seed:** {windows_blk.get('seed')}")
1015
- hash_blk = dataset.get("hash", {}) if isinstance(dataset.get("hash"), dict) else {}
1016
- if hash_blk.get("preview_tokens") is not None:
1017
- lines.append(f"- **Preview Tokens:** {hash_blk.get('preview_tokens'):,}")
1018
- if hash_blk.get("final_tokens") is not None:
1019
- lines.append(f"- **Final Tokens:** {hash_blk.get('final_tokens'):,}")
1020
- if hash_blk.get("total_tokens") is not None:
1021
- lines.append(f"- **Total Tokens:** {hash_blk.get('total_tokens'):,}")
1022
- if hash_blk.get("dataset"):
1023
- lines.append(f"- **Dataset Hash:** {hash_blk.get('dataset')}")
1024
- tokenizer = dataset.get("tokenizer", {})
1025
- if tokenizer.get("name") or tokenizer.get("hash"):
1026
- vocab_size = tokenizer.get("vocab_size")
1027
- vocab_suffix = f" (vocab {vocab_size})" if isinstance(vocab_size, int) else ""
1028
- lines.append(
1029
- f"- **Tokenizer:** {tokenizer.get('name', 'unknown')}{vocab_suffix}"
1030
- )
1031
- if tokenizer.get("hash"):
1032
- lines.append(f" - Hash: {tokenizer['hash']}")
1033
- lines.append(
1034
- f" - BOS/EOS: {tokenizer.get('bos_token')} / {tokenizer.get('eos_token')}"
1035
- )
1036
- if tokenizer.get("pad_token") is not None:
1037
- lines.append(f" - PAD: {tokenizer.get('pad_token')}")
1038
- if tokenizer.get("add_prefix_space") is not None:
1039
- lines.append(f" - add_prefix_space: {tokenizer.get('add_prefix_space')}")
1040
- lines.append("")
1041
-
1042
- provenance_info = certificate.get("provenance", {}) or {}
1043
- if provenance_info:
1044
- lines.append("## Run Provenance")
1045
- lines.append("")
1046
- baseline_info = provenance_info.get("baseline", {}) or {}
1047
- if baseline_info:
1048
- lines.append(f"- **Baseline Run ID:** {baseline_info.get('run_id')}")
1049
- if baseline_info.get("report_hash"):
1050
- lines.append(f" - Report Hash: `{baseline_info.get('report_hash')}`")
1051
- if baseline_info.get("report_path"):
1052
- lines.append(f" - Report Path: {baseline_info.get('report_path')}")
1053
- edited_info = provenance_info.get("edited", {}) or {}
1054
- if edited_info:
1055
- lines.append(f"- **Edited Run ID:** {edited_info.get('run_id')}")
1056
- if edited_info.get("report_hash"):
1057
- lines.append(f" - Report Hash: `{edited_info.get('report_hash')}`")
1058
- if edited_info.get("report_path"):
1059
- lines.append(f" - Report Path: {edited_info.get('report_path')}")
1060
- window_plan = provenance_info.get("window_plan")
1061
- if isinstance(window_plan, dict) and window_plan:
1062
- preview_val = window_plan.get(
1063
- "preview_n", window_plan.get("actual_preview")
1064
- )
1065
- final_val = window_plan.get("final_n", window_plan.get("actual_final"))
1066
- lines.append(
1067
- f"- **Window Plan:** profile={window_plan.get('profile')}, preview={preview_val}, final={final_val}"
1068
- )
1069
- provider_digest = provenance_info.get("provider_digest")
1070
- if isinstance(provider_digest, dict) and provider_digest:
1071
- ids_d = provider_digest.get("ids_sha256")
1072
- tok_d = provider_digest.get("tokenizer_sha256")
1073
- mask_d = provider_digest.get("masking_sha256")
1074
-
1075
- lines.append("- **Provider Digest:**")
1076
- if tok_d:
1077
- lines.append(
1078
- f" - tokenizer_sha256: `{_short_digest(tok_d)}` (full in JSON)"
1079
- )
1080
- if ids_d:
1081
- lines.append(f" - ids_sha256: `{_short_digest(ids_d)}` (full in JSON)")
1082
- if mask_d:
1083
- lines.append(
1084
- f" - masking_sha256: `{_short_digest(mask_d)}` (full in JSON)"
1085
- )
1086
- # Surface confidence label prominently
1087
- try:
1088
- conf = certificate.get("confidence", {}) or {}
1089
- if isinstance(conf, dict) and conf.get("label"):
1090
- lines.append(f"- **Confidence:** {conf.get('label')}")
1091
- except Exception:
1092
- pass
1093
- # Surface policy version + thresholds hash (short)
1094
- try:
1095
- pd = certificate.get("policy_digest", {}) or {}
1096
- if isinstance(pd, dict) and pd:
1097
- pv = pd.get("policy_version")
1098
- th = pd.get("thresholds_hash")
1099
- if pv:
1100
- lines.append(f"- **Policy Version:** {pv}")
1101
- if isinstance(th, str) and th:
1102
- short = th if len(th) <= 16 else (th[:8] + "…" + th[-8:])
1103
- lines.append(f"- **Thresholds Digest:** `{short}` (full in JSON)")
1104
- if pd.get("changed"):
1105
- lines.append("- Note: policy changed")
1106
- except Exception:
1107
- pass
1108
- lines.append("")
1577
+ _append_dataset_and_provenance_section(lines, certificate)
1109
1578
 
1110
1579
  # Structural Changes heading is printed with content later; avoid empty header here
1111
1580
 
1112
- # Primary Metric (metric-v1) snapshot, if present
1113
- try:
1114
- pm = certificate.get("primary_metric")
1115
- if isinstance(pm, dict) and pm:
1116
- kind = pm.get("kind", "unknown")
1117
- lines.append(f"## Primary Metric ({kind})")
1118
- lines.append("")
1119
- unit = pm.get("unit", "-")
1120
- paired = pm.get("paired", False)
1121
- reps = None
1122
- # Snapshot only; bootstrap reps live in ppl.stats.bootstrap for ppl metrics
1123
- # Mark estimated metrics (e.g., pseudo accuracy counts) clearly
1124
- estimated_flag = False
1125
- try:
1126
- if bool(pm.get("estimated")):
1127
- estimated_flag = True
1128
- elif str(pm.get("counts_source", "")).lower() == "pseudo_config":
1129
- estimated_flag = True
1130
- except Exception:
1131
- estimated_flag = False
1132
- est_suffix = " (estimated)" if estimated_flag else ""
1133
- lines.append(f"- Kind: {kind} (unit: {unit}){est_suffix}")
1134
- gating_basis = pm.get("gating_basis") or pm.get("basis")
1135
- if gating_basis:
1136
- lines.append(f"- Basis: {gating_basis}")
1137
- if isinstance(paired, bool):
1138
- lines.append(f"- Paired: {paired}")
1139
- reps = pm.get("reps")
1140
- if isinstance(reps, int | float):
1141
- lines.append(f"- Bootstrap Reps: {int(reps)}")
1142
- ci = pm.get("ci") or pm.get("display_ci")
1143
- if (
1144
- isinstance(ci, list | tuple)
1145
- and len(ci) == 2
1146
- and all(isinstance(x, int | float) for x in ci)
1147
- ):
1148
- lines.append(f"- CI: {ci[0]:.3f}–{ci[1]:.3f}")
1149
- prev = pm.get("preview")
1150
- fin = pm.get("final")
1151
- ratio = pm.get("ratio_vs_baseline")
1152
-
1153
- lines.append("")
1154
- if estimated_flag and str(kind).lower() in {"accuracy", "vqa_accuracy"}:
1155
- lines.append(
1156
- "- Note: Accuracy derived from pseudo counts (quick dev preset); use a labeled preset for measured accuracy."
1157
- )
1158
- lines.append("| Field | Value |")
1159
- lines.append("|-------|-------|")
1160
- lines.append(f"| Preview | {_fmt_by_kind(prev, str(kind))} |")
1161
- lines.append(f"| Final | {_fmt_by_kind(fin, str(kind))} |")
1162
- # For accuracy, ratio field is actually a delta (as per helper); clarify inline
1163
- if kind in {"accuracy", "vqa_accuracy"}:
1164
- lines.append(f"| Δ vs Baseline | {_fmt_by_kind(ratio, str(kind))} |")
1165
- # When baseline accuracy is near-zero, clarify display rule
1166
- try:
1167
- base_pt = pm.get("baseline_point")
1168
- if isinstance(base_pt, int | float) and base_pt < 0.05:
1169
- lines.append(
1170
- "- Note: baseline < 5%; ratio suppressed; showing Δpp"
1171
- )
1172
- except Exception:
1173
- pass
1174
- else:
1175
- try:
1176
- lines.append(f"| Ratio vs Baseline | {float(ratio):.3f} |")
1177
- except Exception:
1178
- lines.append("| Ratio vs Baseline | N/A |")
1179
- lines.append("")
1180
- # Secondary metrics (informational)
1181
- try:
1182
- secs = certificate.get("secondary_metrics")
1183
- if isinstance(secs, list) and secs:
1184
- lines.append("## Secondary Metrics (informational)")
1185
- lines.append("")
1186
- lines.append("| Kind | Preview | Final | vs Baseline | CI |")
1187
- lines.append("|------|---------|-------|-------------|----|")
1188
- for m in secs:
1189
- if not isinstance(m, dict):
1190
- continue
1191
- k = m.get("kind", "?")
1192
- pv = _fmt_by_kind(m.get("preview"), str(k))
1193
- fv = _fmt_by_kind(m.get("final"), str(k))
1194
- rb = m.get("ratio_vs_baseline")
1195
- try:
1196
- rb_str = (
1197
- f"{float(rb):.3f}"
1198
- if (str(k).startswith("ppl"))
1199
- else _fmt_by_kind(rb, str(k))
1200
- )
1201
- except Exception:
1202
- rb_str = "N/A"
1203
- ci = m.get("display_ci") or m.get("ci")
1204
- if isinstance(ci, tuple | list) and len(ci) == 2:
1205
- ci_str = f"{float(ci[0]):.3f}-{float(ci[1]):.3f}"
1206
- else:
1207
- ci_str = "–"
1208
- lines.append(f"| {k} | {pv} | {fv} | {rb_str} | {ci_str} |")
1209
- lines.append("")
1210
- except Exception:
1211
- pass
1212
- except Exception:
1213
- pass
1214
-
1215
1581
  # System Overhead section (latency/throughput)
1216
1582
  sys_over = certificate.get("system_overhead", {}) or {}
1217
1583
  if isinstance(sys_over, dict) and sys_over:
@@ -1370,31 +1736,32 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
1370
1736
 
1371
1737
  # Variance Guard (Spectral/RMT summaries are already provided above)
1372
1738
  variance = certificate["variance"]
1373
- lines.append("## Variance Guard")
1739
+ appendix_lines.append("### Variance Guard")
1740
+ appendix_lines.append("")
1374
1741
 
1375
1742
  # Display whether VE was enabled after A/B test
1376
- lines.append(f"- **Enabled:** {'Yes' if variance['enabled'] else 'No'}")
1743
+ appendix_lines.append(f"- **Enabled:** {'Yes' if variance['enabled'] else 'No'}")
1377
1744
 
1378
1745
  if variance["enabled"]:
1379
1746
  # VE was enabled - show the gain
1380
1747
  gain_value = variance.get("gain", "N/A")
1381
1748
  if isinstance(gain_value, int | float):
1382
- lines.append(f"- **Gain:** {gain_value:.3f}")
1749
+ appendix_lines.append(f"- **Gain:** {gain_value:.3f}")
1383
1750
  else:
1384
- lines.append(f"- **Gain:** {gain_value}")
1751
+ appendix_lines.append(f"- **Gain:** {gain_value}")
1385
1752
  else:
1386
1753
  # VE was not enabled - show succinct reason if available, else a clear disabled message
1387
1754
  ppl_no_ve = variance.get("ppl_no_ve")
1388
1755
  ppl_with_ve = variance.get("ppl_with_ve")
1389
1756
  ratio_ci = variance.get("ratio_ci")
1390
1757
  if ppl_no_ve is not None and ppl_with_ve is not None and ratio_ci:
1391
- lines.append(f"- **Primary metric without VE:** {ppl_no_ve:.3f}")
1392
- lines.append(f"- **Primary metric with VE:** {ppl_with_ve:.3f}")
1758
+ appendix_lines.append(f"- **Primary metric without VE:** {ppl_no_ve:.3f}")
1759
+ appendix_lines.append(f"- **Primary metric with VE:** {ppl_with_ve:.3f}")
1393
1760
  gain_value = variance.get("gain")
1394
1761
  if isinstance(gain_value, int | float):
1395
- lines.append(f"- **Gain (insufficient):** {gain_value:.3f}")
1762
+ appendix_lines.append(f"- **Gain (insufficient):** {gain_value:.3f}")
1396
1763
  else:
1397
- lines.append(
1764
+ appendix_lines.append(
1398
1765
  "- Variance Guard: Disabled (predictive gate not evaluated for this edit)."
1399
1766
  )
1400
1767
  # Add concise rationale aligned with Balanced predictive gate contract
@@ -1402,14 +1769,14 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
1402
1769
  ve_policy = certificate.get("policies", {}).get("variance", {})
1403
1770
  min_effect = ve_policy.get("min_effect_lognll")
1404
1771
  if isinstance(min_effect, int | float):
1405
- lines.append(
1772
+ appendix_lines.append(
1406
1773
  f"- Predictive gate (Balanced): one-sided; enables only if CI excludes 0 and |mean Δ| ≥ {float(min_effect):.4g}."
1407
1774
  )
1408
1775
  else:
1409
- lines.append(
1776
+ appendix_lines.append(
1410
1777
  "- Predictive gate (Balanced): one-sided; enables only if CI excludes 0 and |mean Δ| ≥ min_effect."
1411
1778
  )
1412
- lines.append(
1779
+ appendix_lines.append(
1413
1780
  "- Predictive Gate: evaluated=false (disabled under current policy/edit)."
1414
1781
  )
1415
1782
  except Exception:
@@ -1417,14 +1784,17 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
1417
1784
 
1418
1785
  if variance.get("ratio_ci"):
1419
1786
  ratio_lo, ratio_hi = variance["ratio_ci"]
1420
- lines.append(f"- **Ratio CI:** [{ratio_lo:.3f}, {ratio_hi:.3f}]")
1787
+ appendix_lines.append(f"- **Ratio CI:** [{ratio_lo:.3f}, {ratio_hi:.3f}]")
1421
1788
 
1422
1789
  if variance.get("calibration") and variance.get("enabled"):
1423
1790
  calib = variance["calibration"]
1424
1791
  coverage = calib.get("coverage")
1425
1792
  requested = calib.get("requested")
1426
1793
  status = calib.get("status", "unknown")
1427
- lines.append(f"- **Calibration:** {coverage}/{requested} windows ({status})")
1794
+ appendix_lines.append(
1795
+ f"- **Calibration:** {coverage}/{requested} windows ({status})"
1796
+ )
1797
+ appendix_lines.append("")
1428
1798
 
1429
1799
  lines.append("")
1430
1800
 
@@ -1458,32 +1828,22 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
1458
1828
  lines.append(f"- **{label}:** {float(moe[key]):+.4f}")
1459
1829
  lines.append("")
1460
1830
 
1461
- # Policy Summary
1462
- lines.append("## Applied Policies")
1463
- lines.append("")
1464
- policies = certificate["policies"]
1465
- for guard_name, policy in policies.items():
1466
- lines.append(f"### {guard_name.title()}")
1467
- lines.append("")
1468
- policy_yaml = (
1469
- yaml.safe_dump(policy, sort_keys=True, width=80).strip().splitlines()
1470
- )
1471
- lines.append("```yaml")
1472
- for line in policy_yaml:
1473
- lines.append(line)
1474
- lines.append("```")
1475
- lines.append("")
1831
+ _append_policy_configuration_section(lines, certificate)
1476
1832
 
1477
- # Artifacts
1478
- lines.append("## Artifacts")
1479
- lines.append("")
1833
+ appendix_lines.append("### Artifacts")
1834
+ appendix_lines.append("")
1480
1835
  artifacts = certificate["artifacts"]
1481
1836
  if artifacts.get("events_path"):
1482
- lines.append(f"- **Events Log:** `{artifacts['events_path']}`")
1837
+ appendix_lines.append(f"- **Events Log:** `{artifacts['events_path']}`")
1483
1838
  if artifacts.get("report_path"):
1484
- lines.append(f"- **Full Report:** `{artifacts['report_path']}`")
1485
- lines.append(f"- **Certificate Generated:** {artifacts['generated_at']}")
1486
- lines.append("")
1839
+ appendix_lines.append(f"- **Full Report:** `{artifacts['report_path']}`")
1840
+ appendix_lines.append(f"- **Certificate Generated:** {artifacts['generated_at']}")
1841
+ appendix_lines.append("")
1842
+
1843
+ if appendix_lines:
1844
+ lines.append("## Appendix")
1845
+ lines.append("")
1846
+ lines.extend(appendix_lines)
1487
1847
 
1488
1848
  # Certificate Hash for Integrity
1489
1849
  cert_hash = _compute_certificate_hash(certificate)
@@ -1494,7 +1854,7 @@ def render_certificate_markdown(certificate: dict[str, Any]) -> str:
1494
1854
  lines.append("---")
1495
1855
  lines.append("")
1496
1856
  lines.append(
1497
- "*This InvarLock safety certificate provides a comprehensive assessment of model compression safety.*"
1857
+ "*This InvarLock evaluation certificate provides a comprehensive assessment of model compression safety.*"
1498
1858
  )
1499
1859
  lines.append(
1500
1860
  "*All metrics are compared against the uncompressed baseline model for safety validation.*"